blob: ad77e298575f884cfe7cd0e78460ded3bc2e957d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jasper.xmlparser;
/**
* This class is a symbol table implementation that guarantees that
* strings used as identifiers are unique references. Multiple calls
* to <code>addSymbol</code> will always return the same string
* reference.
* <p>
* The symbol table performs the same task as <code>String.intern()</code>
* with the following differences:
* <ul>
* <li>
* A new string object does not need to be created in order to
* retrieve a unique reference. Symbols can be added by using
* a series of characters in a character array.
* </li>
* <li>
* Users of the symbol table can provide their own symbol hashing
* implementation. For example, a simple string hashing algorithm
* may fail to produce a balanced set of hashcodes for symbols
* that are <em>mostly</em> unique. Strings with similar leading
* characters are especially prone to this poor hashing behavior.
* </li>
* </ul>
*
* @author Andy Clark
* @version $Id$
*/
public class SymbolTable {
//
// Constants
//
/** Default table size. */
protected static final int TABLE_SIZE = 101;
//
// Data
//
/** Buckets. */
protected Entry[] fBuckets = null;
// actual table size
protected int fTableSize;
//
// Constructors
//
/** Constructs a symbol table with a default number of buckets. */
public SymbolTable() {
this(TABLE_SIZE);
}
/** Constructs a symbol table with a specified number of buckets. */
public SymbolTable(int tableSize) {
fTableSize = tableSize;
fBuckets = new Entry[fTableSize];
}
//
// Public methods
//
/**
* Adds the specified symbol to the symbol table and returns a
* reference to the unique symbol. If the symbol already exists,
* the previous symbol reference is returned instead, in order
* guarantee that symbol references remain unique.
*
* @param symbol The new symbol.
*/
public String addSymbol(String symbol) {
// search for identical symbol
int bucket = hash(symbol) % fTableSize;
int length = symbol.length();
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (symbol.charAt(i) != entry.characters[i]) {
continue OUTER;
}
}
return entry.symbol;
}
}
// create new entry
Entry entry = new Entry(symbol, fBuckets[bucket]);
fBuckets[bucket] = entry;
return entry.symbol;
} // addSymbol(String):String
/**
* Adds the specified symbol to the symbol table and returns a
* reference to the unique symbol. If the symbol already exists,
* the previous symbol reference is returned instead, in order
* guarantee that symbol references remain unique.
*
* @param buffer The buffer containing the new symbol.
* @param offset The offset into the buffer of the new symbol.
* @param length The length of the new symbol in the buffer.
*/
public String addSymbol(char[] buffer, int offset, int length) {
// search for identical symbol
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
continue OUTER;
}
}
return entry.symbol;
}
}
// add new entry
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
fBuckets[bucket] = entry;
return entry.symbol;
} // addSymbol(char[],int,int):String
/**
* Returns a hashcode value for the specified symbol. The value
* returned by this method must be identical to the value returned
* by the <code>hash(char[],int,int)</code> method when called
* with the character array that comprises the symbol string.
*
* @param symbol The symbol to hash.
*/
public int hash(String symbol) {
int code = 0;
int length = symbol.length();
for (int i = 0; i < length; i++) {
code = code * 37 + symbol.charAt(i);
}
return code & 0x7FFFFFF;
} // hash(String):int
/**
* Returns a hashcode value for the specified symbol information.
* The value returned by this method must be identical to the value
* returned by the <code>hash(String)</code> method when called
* with the string object created from the symbol information.
*
* @param buffer The character buffer containing the symbol.
* @param offset The offset into the character buffer of the start
* of the symbol.
* @param length The length of the symbol.
*/
public int hash(char[] buffer, int offset, int length) {
int code = 0;
for (int i = 0; i < length; i++) {
code = code * 37 + buffer[offset + i];
}
return code & 0x7FFFFFF;
} // hash(char[],int,int):int
/**
* Returns true if the symbol table already contains the specified
* symbol.
*
* @param symbol The symbol to look for.
*/
public boolean containsSymbol(String symbol) {
// search for identical symbol
int bucket = hash(symbol) % fTableSize;
int length = symbol.length();
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (symbol.charAt(i) != entry.characters[i]) {
continue OUTER;
}
}
return true;
}
}
return false;
} // containsSymbol(String):boolean
/**
* Returns true if the symbol table already contains the specified
* symbol.
*
* @param buffer The buffer containing the symbol to look for.
* @param offset The offset into the buffer.
* @param length The length of the symbol in the buffer.
*/
public boolean containsSymbol(char[] buffer, int offset, int length) {
// search for identical symbol
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
continue OUTER;
}
}
return true;
}
}
return false;
} // containsSymbol(char[],int,int):boolean
//
// Classes
//
/**
* This class is a symbol table entry. Each entry acts as a node
* in a linked list.
*/
protected static final class Entry {
//
// Data
//
/** Symbol. */
public String symbol;
/**
* Symbol characters. This information is duplicated here for
* comparison performance.
*/
public char[] characters;
/** The next entry. */
public Entry next;
//
// Constructors
//
/**
* Constructs a new entry from the specified symbol and next entry
* reference.
*/
public Entry(String symbol, Entry next) {
this.symbol = symbol.intern();
characters = new char[symbol.length()];
symbol.getChars(0, characters.length, characters, 0);
this.next = next;
}
/**
* Constructs a new entry from the specified symbol information and
* next entry reference.
*/
public Entry(char[] ch, int offset, int length, Entry next) {
characters = new char[length];
System.arraycopy(ch, offset, characters, 0, length);
symbol = new String(characters).intern();
this.next = next;
}
} // class Entry
} // class SymbolTable