blob: fb97d1eb3c36eb852aeb72b032470a94b16ac36d [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.dtm;
import org.apache.xml.utils.IntVector;
import java.util.Vector;
/** <p>DTMStringPool is an "interning" mechanism for strings. It will
* create a stable 1:1 mapping between a set of string values and a set of
* integer index values, so the integers can be used to reliably and
* uniquely identify (and when necessary retrieve) the strings.</p>
*
* <p>Design Priorities:
* <ul>
* <li>String-to-index lookup speed is critical.</li>
* <li>Index-to-String lookup speed is slightly less so.</li>
* <li>Threadsafety is not guaranteed at this level.
* Enforce that in the application if needed.</li>
* <li>Storage efficiency is an issue but not a huge one.
* It is expected that string pools won't exceed about 2000 entries.</li>
* </ul>
* </p>
*
* <p>Implementation detail: A standard Hashtable is relatively
* inefficient when looking up primitive int values, especially when
* we're already maintaining an int-to-string vector. So I'm
* maintaining a simple hash chain within this class.</p>
*
* <p>NOTE: There is nothing in the code that has a real dependency upon
* String. It would work with any object type that implements reliable
* .hashCode() and .equals() operations. The API enforces Strings because
* it's safer that way, but this could trivially be turned into a general
* ObjectPool if one was needed.</p>
*
* <p>Status: Passed basic test in main().</p>
* */
public class DTMStringPool
{
Vector m_intToString;
static final int HASHPRIME=101;
int[] m_hashStart=new int[HASHPRIME];
IntVector m_hashChain;
public static final int NULL=-1;
public DTMStringPool()
{
m_intToString=new Vector();
m_hashChain=new IntVector();
removeAllElements();
// -sb Add this to force empty strings to be index 0.
stringToIndex("");
}
public void removeAllElements()
{
m_intToString.removeAllElements();
for(int i=0;i<HASHPRIME;++i)
m_hashStart[i]=NULL;
m_hashChain.removeAllElements();
}
/** @return string whose value is uniquely identified by this integer index.
* @throws java.lang.ArrayIndexOutOfBoundsException
* if index doesn't map to a string.
* */
public String indexToString(int i)
throws java.lang.ArrayIndexOutOfBoundsException
{
if(i==NULL) return null;
return (String) m_intToString.elementAt(i);
}
/** @return integer index uniquely identifying the value of this string. */
public int stringToIndex(String s)
{
if(s==null) return NULL;
int hashslot=s.hashCode()%HASHPRIME;
if(hashslot<0) hashslot=-hashslot;
// Is it one we already know?
int hashlast=m_hashStart[hashslot];
int hashcandidate=hashlast;
while(hashcandidate!=NULL)
{
if(m_intToString.elementAt(hashcandidate).equals(s))
return hashcandidate;
hashlast=hashcandidate;
hashcandidate=m_hashChain.elementAt(hashcandidate);
}
// New value. Add to tables.
int newIndex=m_intToString.size();
m_intToString.addElement(s);
m_hashChain.addElement(NULL); // Initialize to no-following-same-hash
if(hashlast==NULL) // First for this hash
m_hashStart[hashslot]=newIndex;
else // Link from previous with same hash
m_hashChain.setElementAt(newIndex,hashlast);
return newIndex;
}
/** Command-line unit test driver. This test relies on the fact that
* this version of the pool assigns indices consecutively, starting
* from zero, as new unique strings are encountered.
*/
public static void main(String[] args)
{
String[] word={
"Zero","One","Two","Three","Four","Five",
"Six","Seven","Eight","Nine","Ten",
"Eleven","Twelve","Thirteen","Fourteen","Fifteen",
"Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
"Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
"Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
"Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
"Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
"Thirty-Seven","Thirty-Eight","Thirty-Nine"};
DTMStringPool pool=new DTMStringPool();
System.out.println("If no complaints are printed below, we passed initial test.");
for(int pass=0;pass<=1;++pass)
{
int i;
for(i=0;i<word.length;++i)
{
int j=pool.stringToIndex(word[i]);
if(j!=i)
System.out.println("\tMismatch populating pool: assigned "+
j+" for create "+i);
}
for(i=0;i<word.length;++i)
{
int j=pool.stringToIndex(word[i]);
if(j!=i)
System.out.println("\tMismatch in stringToIndex: returned "+
j+" for lookup "+i);
}
for(i=0;i<word.length;++i)
{
String w=pool.indexToString(i);
if(!word[i].equals(w))
System.out.println("\tMismatch in indexToString: returned"+
w+" for lookup "+i);
}
pool.removeAllElements();
System.out.println("\nPass "+pass+" complete\n");
} // end pass loop
}
}