package de.jbible.tool.bibleimport;

import java.util.*;
import java.io.*;

/**
 * This object contains a pool of different strings.
 *
 * After normalizing a string with the methods
 * of this class, we are sure that a string is equal, if the
 * references are equal.
 * A counter counts how often a string was added.
 *
 * @see #normalize
 * @see #add
 */

public class StringPool
{
    /**
     * Create a new, empty pool.
     */
	public StringPool()
	{
	}

    /**
     * Add a string to the pool.
     *
     * It the string exists in the pool, the
     * existing reference is added, else the
     * counter is incremented.
     *
     * @param newString The new string to add.
     * @return The normalized string.
     */
	public String add (String newString)
    {
    	return norm_impl (newString,true);
    }

	/**
     * Normalize a string. This string will not counted.
     *
     * It the string exists in the pool, the
     * existing reference is added. The counter
     * is not incemented from this method.
     *
     * @param newString The new string to add.
     * @return The normalized string.
     */
     public String normalize (String newString)
     {
    	return norm_impl (newString,false);
     }

    /**
     * Add a string to the pool.
     *
     * This is the implementation of add and normalize.
     * The doIncrement flag switch the behaviour.
     *
     * @param newString The new string to add.
     * @param doIncement true: work as add(), false: work as
     * 	nomalize
     * @return The normalized string.
     *
     * @see #normalize
     * @see #add
     */
	private String norm_impl (String newString,
    	boolean doIncrement)
    {
        StringPoolData data = (StringPoolData)
        		_stringMap.get(newString);

        if (data == null)
        {
	        data = new StringPoolData (newString);
            _stringMap.put(newString,data);
        }
        if (doIncrement)
	        data.increment();

        return data.getString();
    }

    /**
     * Copy the map into a list and sort it, so that
     * the most used words comes first.
     *
     * The objects of this list are strings.
     *
     * @return A sorted list, a copy of the internal string list.
     * 		The entries are from type StringPoolData
     */
    public List sort ()
    {
    	// create new list from string map
		List l = new ArrayList (_stringMap.values());

        // sort the list
        Collections.sort(l);

        // set the order variables:
        Iterator iter = l.iterator();

        int order = 0; // 0 is separator!
        StringPoolData data;
        while (iter.hasNext())
        {
        	data = ((StringPoolData)iter.next());
            data._order = order++;
            //System.out.println(data.toString());
        }

        return l;
    }

    /**
     * Query the order of a string.
     *
     * This method assume that the sort() method was called
     * after the last add ().
     *
     * @param str The string to request the order from.
     * @return The order of this string.
     */
	public int getOrder (String str)
    {
		StringPoolData data = (StringPoolData)_stringMap.get(str);

		return data._order;
    }

    /**
     * Write the string pool to file.
     * The table is destroyed after this call for
     * memory saving reasons.
     *
     * @param out The output stream.
     */
	public void write (DataOutputStream out)
    	throws IOException
    {
        byte [] buffer = prepareWriting ();

        // write data count:
        out.writeInt(buffer.length);
        out.writeInt(_stringMap.size());
        out.write(buffer);
    }

    /**
     * Prapare this object for writing.
     *
     * This method creates a byte array from
     * all strings, separated by space (32)
     *
     * @return A byte array that contains the data of this object.
     */
	private byte[] prepareWriting ()
    	throws IOException
    {
    	List strings = sort ();
        Iterator iter = strings.iterator();

    	int size = strings.size();


        ByteArrayOutputStream bos = new ByteArrayOutputStream ();
        DataOutputStream out = new DataOutputStream (bos);

        // write strings to stream:
        // write data count:
        int pos=0;
        String s;

        while (iter.hasNext())
        {
			s = (String)((StringPoolData)iter.next()).getString();
            writeUTF (out,s);
            writeUTF (out," ");
        }
		out.flush ();

        // store array in buffer
        return bos.toByteArray();

    }

    /**
     * My own UTF implementation which does not
     * prepend the string size before each string.
     *
     * @param out The output stream
     * @param s The string to write.
     */
	private void writeUTF (OutputStream out,String s)
    	throws IOException
    {
    	char [] data = s.toCharArray();
        int d;

        for (int i=0;i<data.length;i++)
        {
        	d = data [i]; // convert only once!
        	if (d < 0x80)
            {
            	// one byte
                out.write(d);
            }
            else if (d < 0x800)
            {
            	// two byte
                out.write(((d & 0x07c0)>>6)  | 0xc0);
                out.write( (d & 0x003f) 	 | 0x80);

            }
            else
            {
            	// three byte
                out.write(((d & 0xf000)>>12) | 0xe0);
                out.write(((d & 0x0fc0)>>6)  | 0x80);
                out.write( (d & 0x003f)      | 0x80);
            }
        }

    }


    /**
     * Debug purpose only.
     *
     * @return A string that describes this object.
     */
	public String toString ()
    {
    	List l = sort();

        // calc the count of the first 127 words:
        int i,word127=0;
        int max = 128;
        if (l.size() < 128)
        	max = l.size();

        for (i=0;i<max;i++)
        {
        	word127 += ((StringPoolData)l.get(i)).getCounter();
        }

        // compute the total count of words
        int total = word127;
        max = l.size();
        for (;i<max;i++)
        	total += ((StringPoolData)l.get(i)).getCounter();

    	// statistic:
        return	"Diffent words: "+l.size()+
        		"\nTotal count of words: "+total+
              	"\n1Byte/2Byte coding: "+
                word127+"/"+(total-word127)+
                " ("+(int)( ((double) word127)/((double)total)*100 )+
                "%/"+(int)( ((double)(total-word127))/((double)total)*100 )+"%)";

    	// Detailed output:
    	/*
        Iterator iter = l.iterator();
        StringBuffer sb = new StringBuffer();

        while (iter.hasNext())
        {
			sb.append(iter.next().toString());
        }
		return sb.toString();*/
    }


    /**
     * Returnes the count of string this object contains.
     *
     * @return The internal array size.
     */
	public int size ()
    {
    	return _stringMap.size();
    }


	/** The string map. */
    private Map _stringMap = new HashMap();


}


