
import java.io.*;
import java.util.*;

/**
 * read words with Trie support, read words in compressed format
 *
 *
 * @author Owen Astrachan
 * @version $Id$
 */

public class FastJoggleReader
{
    public final static int BUFSIZ = 82000;
    public FastJoggleReader()
    {
	// nothing to create
    }

    /**
     * read words from a compressed file (non-standard compression)
     * currently the filename is hardwired, but it's an easy
     * change to make the filename a parameter
     *
     * @param trie is the Trie to read words into
     */
    
    public void ReadWords(Trie trie)
    {
	String s;
	StringBuffer sb;
	byte buffer[] = new byte[BUFSIZ];
	FileInputStream f = null;
	try
	{
	    f = new FileInputStream("bogdict");
	    int offset = 0;
	    int bytesRead;
	    bytesRead = f.read(buffer,0,BUFSIZ);       // read entire file
	    System.out.println("read = " + bytesRead); // report it
	    ProcessBuffer(buffer,bytesRead,trie);      // process it
	}
	catch (IOException e)
	{
	    e.printStackTrace();
	    System.err.println("error reading dictionary");
	    System.err.println(e.getMessage());
	}
	finally
	{
	    if (f != null)
	    {
		try {
		    f.close();
		}
		catch (IOException e) {
		    // nothing here
		}
	    }
	}
    }

    /**
     * process the compressed words by storing into a trie
     *
     * @param buffer the compressed characters
     * @param size the size of the buffer
     * @param trie the Trie to store words into
     *
     */
    void ProcessBuffer(byte buffer[], int size, Trie trie)
    {
	char sb[] = new char[20];
	int k;
	int count = 0;
	int numWords = 0;
	char ch;
	int index;
	for(index = 0; index < size; index++)
	{
	    ch = (char) buffer[index];
	    if ('a' <= ch && ch <= 'z')    // legal char, store it
	    {
		sb[count++] = ch;
	    }
	    else                           // word ended, process
	    {
		if (count > 0)
		{
		    numWords++;
		    trie.addCString(sb);   // add to trie
		    if (numWords % 1000 == 0)
		    {
			System.out.println("processed " + numWords
					   + " words ");
		    }
		}
                // decrease effective size of sb by ch characters
                // this treats ch (a control char) as a number
                // e.g., ^D (control-D) is 4

		while (count >= 0 && count != (int) ch)
		{
		    sb[count--] = '\0';
		}
	    }
	}
    }	

    // find length of common prefix of a and b
    
    private int PrefixLength(String a, String b)
    {
	int k;
	int size = a.length() < b.length() ? a.length() : b.length();
	int commonCount = 0;
	for(k=0; k < size; k++)
	{
	    if (a.charAt(k) == b.charAt(k)) commonCount++;
	    else return commonCount;
	}
	return commonCount;	
    }
    /**
     * write words to a compressed file (e.g., for subsequent reading)
     *
     * @param trie is the trie that stores the words
     * @param filename the name of the file storing compressed words
     */
    public void WriteWords(Trie trie, String filename)
    {
	HoldWords hw = new HoldWords();

	trie.apply(hw);

	FileOutputStream st = null;
	DataOutputStream f = null;

	try
	{
	    st = new FileOutputStream(filename);
	    f = new DataOutputStream(st);
	    int k;
	    String last = "";
	    int prefix;
	    byte buffer[] = new byte[30]; // store chars to write
	    for(k=0; k < hw.words.size(); k++)
	    {
		String s = (String) hw.words.elementAt(k);
		prefix = PrefixLength(last,s);
		last = s;
		f.writeByte(prefix);
		s.getBytes(prefix,s.length(),buffer,0);
		f.write(buffer,0,s.length()-prefix);
		
		if (k % 1000 == 0)
		{
		    System.out.println("wrote " + k + " words ");
		}
	    }
	    f.writeByte(10);   // write a linefeed
	    
	}
	catch (IOException e)
	{
	    System.out.println("error opening " + filename);
	}
	finally
	{
	    try {
		f.close(); st.close();
	    }
	    catch (IOException e){
		
	    }
	}
    }
}

/**
 * a class to store all words in a trie in a vector
 *
 *@see Trie
 */
class HoldWords implements Recorder
{
    public HoldWords()
    {
	words = new Vector();
    }
    public void record(Object o)
    {
	words.addElement(o);
    }
    
    public void report()
    {
	
    }
    
    public Vector words;
}

