/* | |
* Licensed to the Apache Software Foundation (ASF) under one or more | |
* contributor license agreements. See the NOTICE file distributed with | |
* this work for additional information regarding copyright ownership. | |
* The ASF licenses this file to You under the Apache License, Version 2.0 | |
* (the "License"); you may not use this file except in compliance with | |
* the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* ==================================================================== | |
* | |
* This software consists of voluntary contributions made by many | |
* individuals on behalf of the Apache Software Foundation and was | |
* originally based on software copyright (c) 1999, International | |
* Business Machines, Inc., http://www.apache.org. For more | |
* information on the Apache Software Foundation, please see | |
* <http://www.apache.org/>. | |
*/ | |
package org.apache.struts2.jasper.xmlparser; | |
/** | |
* This class is a symbol table implementation that guarantees that | |
* strings used as identifiers are unique references. Multiple calls | |
* to <code>addSymbol</code> will always return the same string | |
* reference. | |
* <p> | |
* The symbol table performs the same task as <code>String.intern()</code> | |
* with the following differences: | |
* <ul> | |
* <li> | |
* A new string object does not need to be created in order to | |
* retrieve a unique reference. Symbols can be added by using | |
* a series of characters in a character array. | |
* </li> | |
* <li> | |
* Users of the symbol table can provide their own symbol hashing | |
* implementation. For example, a simple string hashing algorithm | |
* may fail to produce a balanced set of hashcodes for symbols | |
* that are <em>mostly</em> unique. Strings with similar leading | |
* characters are especially prone to this poor hashing behavior. | |
* </li> | |
* </ul> | |
* | |
* @author Andy Clark | |
* @version $Id: SymbolTable.java 466606 2006-10-21 23:07:12Z markt $ | |
*/ | |
public class SymbolTable { | |
// | |
// Constants | |
// | |
/** Default table size. */ | |
protected static final int TABLE_SIZE = 101; | |
// | |
// Data | |
// | |
/** Buckets. */ | |
protected Entry[] fBuckets = null; | |
// actual table size | |
protected int fTableSize; | |
// | |
// Constructors | |
// | |
/** Constructs a symbol table with a default number of buckets. */ | |
public SymbolTable() { | |
this(TABLE_SIZE); | |
} | |
/** Constructs a symbol table with a specified number of buckets. */ | |
public SymbolTable(int tableSize) { | |
fTableSize = tableSize; | |
fBuckets = new Entry[fTableSize]; | |
} | |
// | |
// Public methods | |
// | |
/** | |
* Adds the specified symbol to the symbol table and returns a | |
* reference to the unique symbol. If the symbol already exists, | |
* the previous symbol reference is returned instead, in order | |
* guarantee that symbol references remain unique. | |
* | |
* @param symbol The new symbol. | |
*/ | |
public String addSymbol(String symbol) { | |
// search for identical symbol | |
int bucket = hash(symbol) % fTableSize; | |
int length = symbol.length(); | |
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { | |
if (length == entry.characters.length) { | |
for (int i = 0; i < length; i++) { | |
if (symbol.charAt(i) != entry.characters[i]) { | |
continue OUTER; | |
} | |
} | |
return entry.symbol; | |
} | |
} | |
// create new entry | |
Entry entry = new Entry(symbol, fBuckets[bucket]); | |
fBuckets[bucket] = entry; | |
return entry.symbol; | |
} // addSymbol(String):String | |
/** | |
* Adds the specified symbol to the symbol table and returns a | |
* reference to the unique symbol. If the symbol already exists, | |
* the previous symbol reference is returned instead, in order | |
* guarantee that symbol references remain unique. | |
* | |
* @param buffer The buffer containing the new symbol. | |
* @param offset The offset into the buffer of the new symbol. | |
* @param length The length of the new symbol in the buffer. | |
*/ | |
public String addSymbol(char[] buffer, int offset, int length) { | |
// search for identical symbol | |
int bucket = hash(buffer, offset, length) % fTableSize; | |
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { | |
if (length == entry.characters.length) { | |
for (int i = 0; i < length; i++) { | |
if (buffer[offset + i] != entry.characters[i]) { | |
continue OUTER; | |
} | |
} | |
return entry.symbol; | |
} | |
} | |
// add new entry | |
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); | |
fBuckets[bucket] = entry; | |
return entry.symbol; | |
} // addSymbol(char[],int,int):String | |
/** | |
* Returns a hashcode value for the specified symbol. The value | |
* returned by this method must be identical to the value returned | |
* by the <code>hash(char[],int,int)</code> method when called | |
* with the character array that comprises the symbol string. | |
* | |
* @param symbol The symbol to hash. | |
*/ | |
public int hash(String symbol) { | |
int code = 0; | |
int length = symbol.length(); | |
for (int i = 0; i < length; i++) { | |
code = code * 37 + symbol.charAt(i); | |
} | |
return code & 0x7FFFFFF; | |
} // hash(String):int | |
/** | |
* Returns a hashcode value for the specified symbol information. | |
* The value returned by this method must be identical to the value | |
* returned by the <code>hash(String)</code> method when called | |
* with the string object created from the symbol information. | |
* | |
* @param buffer The character buffer containing the symbol. | |
* @param offset The offset into the character buffer of the start | |
* of the symbol. | |
* @param length The length of the symbol. | |
*/ | |
public int hash(char[] buffer, int offset, int length) { | |
int code = 0; | |
for (int i = 0; i < length; i++) { | |
code = code * 37 + buffer[offset + i]; | |
} | |
return code & 0x7FFFFFF; | |
} // hash(char[],int,int):int | |
/** | |
* Returns true if the symbol table already contains the specified | |
* symbol. | |
* | |
* @param symbol The symbol to look for. | |
*/ | |
public boolean containsSymbol(String symbol) { | |
// search for identical symbol | |
int bucket = hash(symbol) % fTableSize; | |
int length = symbol.length(); | |
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { | |
if (length == entry.characters.length) { | |
for (int i = 0; i < length; i++) { | |
if (symbol.charAt(i) != entry.characters[i]) { | |
continue OUTER; | |
} | |
} | |
return true; | |
} | |
} | |
return false; | |
} // containsSymbol(String):boolean | |
/** | |
* Returns true if the symbol table already contains the specified | |
* symbol. | |
* | |
* @param buffer The buffer containing the symbol to look for. | |
* @param offset The offset into the buffer. | |
* @param length The length of the symbol in the buffer. | |
*/ | |
public boolean containsSymbol(char[] buffer, int offset, int length) { | |
// search for identical symbol | |
int bucket = hash(buffer, offset, length) % fTableSize; | |
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { | |
if (length == entry.characters.length) { | |
for (int i = 0; i < length; i++) { | |
if (buffer[offset + i] != entry.characters[i]) { | |
continue OUTER; | |
} | |
} | |
return true; | |
} | |
} | |
return false; | |
} // containsSymbol(char[],int,int):boolean | |
// | |
// Classes | |
// | |
/** | |
* This class is a symbol table entry. Each entry acts as a node | |
* in a linked list. | |
*/ | |
protected static final class Entry { | |
// | |
// Data | |
// | |
/** Symbol. */ | |
public String symbol; | |
/** | |
* Symbol characters. This information is duplicated here for | |
* comparison performance. | |
*/ | |
public char[] characters; | |
/** The next entry. */ | |
public Entry next; | |
// | |
// Constructors | |
// | |
/** | |
* Constructs a new entry from the specified symbol and next entry | |
* reference. | |
*/ | |
public Entry(String symbol, Entry next) { | |
this.symbol = symbol.intern(); | |
characters = new char[symbol.length()]; | |
symbol.getChars(0, characters.length, characters, 0); | |
this.next = next; | |
} | |
/** | |
* Constructs a new entry from the specified symbol information and | |
* next entry reference. | |
*/ | |
public Entry(char[] ch, int offset, int length, Entry next) { | |
characters = new char[length]; | |
System.arraycopy(ch, offset, characters, 0, length); | |
symbol = new String(characters).intern(); | |
this.next = next; | |
} | |
} // class Entry | |
} // class SymbolTable |