package metadatagenerator;

import ins.namespace.*;
import java.io.*;
import java.util.*;

/**
 * BibtexMetaDataExtractor.java
 *
 * Takes a bibtex file as input and produces
 * name-specifiers for each entry found.
 * The parser is an extremely simple top-down parser, 
 * so only the mandatory attributes are parsed.
 * Beware: Some entries may not be parsed 
 * entirely correctly.
 * <br>
 * Created: Tue Jan 08 2002 <br>
 * Modified: $Id: BibtexMetaDataExtractor.java,v 1.2 2002/03/21 00:01:55 mbalazin Exp $ 
 * @author Magdalena Balazinska
 */
public class BibtexMetaDataExtractor {

    // Some words such as "of" "the" "a" are not used in name-specifiers
    Hashtable useless = new Hashtable();

    // For each bibtex entry, there's an object that knows how to parse it
    Hashtable entries = new Hashtable();

    // String definitions are stored in a hashtable 
    Hashtable substitution = new Hashtable();

    /**
     */
    public BibtexMetaDataExtractor() {

	useless.put("#","#");
	useless.put("a","a");
	useless.put("an","an");
	useless.put("the","the");
	useless.put("of","of");
	useless.put("in","in");
	useless.put("and","and");
	useless.put("for","for");
	useless.put("on","on");
	useless.put("at","at");
	useless.put("-","-");

	StringDef stringdef = new StringDef(useless,substitution);
	Article article = new Article(useless,substitution);
	InProceedings inprocs = new InProceedings(useless,substitution);
	MastersThesis msthesis = new MastersThesis(useless,substitution);
	Misc misc = new Misc(useless,substitution);
	PhdThesis phdthesis = new PhdThesis(useless,substitution);
	TechReport tech = new TechReport(useless,substitution);
	Manual man = new Manual(useless,substitution);

	entries.put("@string",stringdef);
	entries.put("@article",article);
	entries.put("@inproceedings",inprocs);
	entries.put("@mastersthesis",msthesis);
	entries.put("@misc",misc);
	entries.put("@phdthesis",phdthesis);
	entries.put("@techreport",tech);
	entries.put("@manual",man);
   }


    /**
     * Prepares tokenizer adapted to bibtex files
     */
    public StreamTokenizer prepareSimpleTokenizer(FileReader fr) {

	StreamTokenizer tokenizer = new StreamTokenizer(fr); 
	tokenizer.resetSyntax();
	tokenizer.lowerCaseMode(true);
	tokenizer.wordChars(0,Character.MAX_VALUE);

	tokenizer.whitespaceChars(' ',' ');
	tokenizer.whitespaceChars('(','(');
	tokenizer.whitespaceChars(')',')');
	tokenizer.whitespaceChars('\t','\t');
	tokenizer.whitespaceChars('\r','\r');
	tokenizer.whitespaceChars('\n','\n');

	tokenizer.ordinaryChar(':');
	tokenizer.ordinaryChar('{');
	tokenizer.ordinaryChar('}');
	tokenizer.ordinaryChar(',');
	tokenizer.ordinaryChar('"');
	tokenizer.ordinaryChar('=');
	
	tokenizer.eolIsSignificant(true);
	return tokenizer;

    }

    /**
     */
    public void parseFile(String fileName) {

	try {

	    FileReader fr = new FileReader(fileName);
	    StreamTokenizer tokenizer = prepareSimpleTokenizer(fr);

	    while ( tokenizer.nextToken() != StreamTokenizer.TT_EOF ) {		
		String token = tokenizer.sval;
		String tokenString = tokenizer.toString();
		switch (tokenizer.ttype) {
		case StreamTokenizer.TT_WORD:
			IParser p = (IParser)entries.get(token);
			if (p != null) {
			    String namespec = p.parse(tokenizer).toString();
			    if ( namespec.length() > 0)
				System.out.println(namespec);
			}
			break;
		default:
		}
	    }
	}  catch ( FileNotFoundException e ) {
	    System.err.println("Index file " + fileName + " not found");
	    return;
	} catch ( IOException e ) {
	    System.err.println("IOException reading file " + fileName);
	    return;
	}

    }


 

   

    //-----------------------------------------------
    /**   
     * Takes a list of .bib files as input
     */
    public static void main(String[] args) {

	BibtexMetaDataExtractor bibgen = new BibtexMetaDataExtractor();
	for ( int i = 0; i < args.length; i++ ) {
	    bibgen.parseFile(args[i]);
	}
    }

}




