package metadatagenerator;

import ins.namespace.*;
import java.io.*;
import java.util.*;


//-----------------------------------------------
/**
 * Generic interface to parse a field appearing
 * in a bibtex entry.
 * @author Magdalena Balazinska
 */
interface IFieldParser {

    public StringBuffer parse(StreamTokenizer tokenizer, String prefix, 
			      String suffix) throws java.io.IOException;
    public String fieldName();
	
}

//-----------------------------------------------
/**
 * By default all fields are parsed into name-specs
 * of the form [attrib=word1][attrib=word2], etc.
 * @author Magdalena Balazinska
 */
class FieldParser implements IFieldParser {

    Hashtable useless = new Hashtable();
    Hashtable substitution = new Hashtable();
    
    public FieldParser(Hashtable u, Hashtable s) {
    	useless = u;
	substitution = s;
    }

    /** 
     * This method returns true if the field name-spec should be nested within
     * the entries name-spec
     */ 
    public boolean isNested() { return false; }

    public boolean ignoreMe(String token) { return false; }

    /**
     * Makes one av pair from the word read
     * If the word is a substitution, makes av pairs from the substituted string
     */
    public String makeOneAVPair(String value) {

	StringBuffer namespec = new StringBuffer();
	String attribute = attributeName();
	String realvalue = (String)substitution.get(value);
	if (realvalue != null) {
	    StringTokenizer st = new StringTokenizer(realvalue);
	    while (st.hasMoreTokens()) {
		String keyword = st.nextToken();
		if ( useless.get(keyword) == null)
		    namespec.append("[").append(attribute).append("=").append(keyword).append("]");
	    }   
	} else {
	    namespec.append("[").append(attribute).append("=").append(value).append("]");
	}
	return namespec.toString();
    }

    /**
     * Creates name-specifier for a field
     */
    public StringBuffer parse(StreamTokenizer tokenizer, String prefix, 
			      String suffix) throws java.io.IOException {

	tokenizer.wordChars('{','{');
	tokenizer.wordChars('}','}');

	StringBuffer namespec = new StringBuffer();
	boolean nestedField = isNested();
	if ( nestedField)
	    namespec.append(prefix);
	int nestedLevel = 0;

	while ( tokenizer.nextToken() != StreamTokenizer.TT_EOF ) {
	    switch (tokenizer.ttype) {
	    case StreamTokenizer.TT_WORD:
		String token = tokenizer.sval;
		token = clean(token);
		if ( (useless.get(token) == null) &&  !ignoreMe(token) ) {
		    namespec.append(makeOneAVPair(token));
		}
		break;
	    default:
		if ( beginQuote(tokenizer))
		    nestedLevel++;
		else if ( endQuote(tokenizer)) {
		    nestedLevel--;
		}
		else if ( (nestedLevel == 0) && (end(tokenizer))) {
		    if (nestedField)
			namespec.append(suffix);
		    tokenizer.ordinaryChar('{');
		    tokenizer.ordinaryChar('}');
		    return namespec;
		}
	    }
	}   
	if (nestedField)
	    namespec.append(suffix);
	tokenizer.ordinaryChar('{');
	tokenizer.ordinaryChar('}');
	return namespec;
    }

    public String clean(String token) {

	token = token.replaceAll("\\{","");
	token = token.replaceAll("\\}","");
	String backslash = "\\";
	token = token.replaceAll(backslash+backslash,"");
	return token;
    }

    public String fieldName() {
	return "Generic field";
    }

    public String attributeName() {
	return "attribute";
    }

    public boolean beginQuote(StreamTokenizer tokenizer) {

	char BEGIN = '{';
	String tokenString = tokenizer.toString();
	if ( tokenString.indexOf(BEGIN) != -1 ) 
		return true;
	else return false;

    }
    
    public boolean endQuote(StreamTokenizer tokenizer) {

	char END = '}';
	String tokenString = tokenizer.toString();
	if ( tokenString.indexOf(END) != -1 ) 
		return true;
	else return false;

    }

    public boolean end(StreamTokenizer tokenizer) {

	String END = "EOL";
	String tokenString = tokenizer.toString();
	if ( tokenString.indexOf(END) != -1 ) 
		return true;
	else return false;

    }

}

//-----------------------------------------------
/**
 */
class Author extends FieldParser {

    public Author(Hashtable u, Hashtable s) {
 	super(u,s);
    }

    public String fieldName() { return "author"; }
    public String attributeName() { return "authorword"; }

    /**
     * For authors, we want to ignore initials
     */
    public boolean ignoreMe(String token) { 
	if ( token.indexOf('.') != -1)
	    return true;
	else return false;
    }
}


class Title extends FieldParser {

    public Title(Hashtable u, Hashtable s) { super(u,s); }
    public String fieldName() { return "title"; }    
    public String attributeName() { return "titleword"; }
}

class Year extends FieldParser {

    public Year(Hashtable u, Hashtable s) { super(u,s);}
    public String fieldName() { return "year"; }
    public String attributeName() { return "year"; }
}

class Journal extends FieldParser {

    public Journal(Hashtable u, Hashtable s) { super(u,s);}
    public String fieldName() { return "journal"; }
    public String attributeName() { return "journalword"; }
    public boolean isNested() { return true; }
}

class Booktitle extends FieldParser {

    public Booktitle(Hashtable u, Hashtable s) {super(u,s);}
    public String fieldName() { return "booktitle"; }
    public String attributeName() { return "booktitleword"; }
    public boolean isNested() { return true; }
}

class School extends FieldParser {

    public School(Hashtable u, Hashtable s) { super(u,s);}
    public String fieldName() { return "school"; }
    public String attributeName() { return "schoolword"; }
    public boolean isNested() { return true; }
}

class Institution extends FieldParser {

    public Institution(Hashtable u, Hashtable s) { super(u,s); }
    public String fieldName() { return "institution"; }
    public String attributeName() { return "institutionword"; }
    public boolean isNested() { return true; }
}

class Howpublished extends FieldParser {

    public Howpublished(Hashtable u, Hashtable s) { super(u,s);}
    public String fieldName() { return "howpublished"; }
    public String attributeName() { return "howpublished"; }
    public boolean isNested() { return true; }

    public boolean ignoreMe(String token) { 
	if ( token.indexOf("\\url") != -1 )
	    return true;
	else return false;
    }

    public boolean isURL(String token) { 
	if ( token.indexOf("\\url") != -1)
	    return true;
	else return false;
    }

    /**
     * The howpublished field is interpreted as one long string
     * since it often represents a URL
     */ 
    public StringBuffer parse(StreamTokenizer tokenizer, String prefix, 
			      String suffix) throws java.io.IOException {
	
	StringBuffer namespec = new StringBuffer();
	StringBuffer fullStringValue = new StringBuffer();
	boolean nestedField = isNested();
	boolean isUrl = false;
	if ( nestedField)
	    namespec.append(prefix);
	int nested = 0;

	while ( tokenizer.nextToken() != StreamTokenizer.TT_EOF ) {
	    switch (tokenizer.ttype) {
	    case StreamTokenizer.TT_WORD:
		String token = tokenizer.sval;
		if (isURL(token))
		    isUrl = true;
		if ( !ignoreMe(token)) {
		    fullStringValue.append(token);
		    if ( !isUrl)
			fullStringValue.append(" ");
		}
		break;
	    default:
		if ( tokenizer.toString().indexOf(":") != -1)
		    fullStringValue.append(":");
		else if ( beginQuote(tokenizer))
		    nested++;
		else if ( endQuote(tokenizer))
		    nested--;
		else if ( (nested == 0) && (end(tokenizer))) {
		    namespec.append(makeOneAVPair( clean(fullStringValue.toString()) ));
		    if (nestedField)
			namespec.append(suffix);
		    return namespec;
		}
	    }
	}   
	if (nestedField)
	    namespec.append(suffix);
	return namespec;
    }
}
