blob: 8d5e17e5a447c8147fecbc7790b13ae23cf94aa5 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.utils;
/**
*
* @version
*/
public final class SymbolCache {
//
// Symbol Cache
//
public static final int CHAR_OFFSET = 0;
public static final int INDEX_OFFSET = 1;
public static final int NEXT_OFFSET = 2;
public static final int CACHE_RECORD_SIZE = 3;
//
public static final int INITIAL_CACHE_RECORD_COUNT = 4; // start with 4 entries per level
//
public char[] fSymbolChars = new char[8192];
public int fSymbolCharsOffset = 0;
public int[][] fCacheLines = new int[8][];
public int fCacheLineCount = 0;
//
//
//
public SymbolCache() {
fCacheLines[fCacheLineCount++] = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
}
public void reset() {
fSymbolCharsOffset = 0;
fCacheLineCount = 0;
fCacheLines[fCacheLineCount++] = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
}
//
//
//
public char[] getSymbolChars() {
return fSymbolChars;
}
//
// Symbol interfaces
//
public String createSymbol(int symbolHandle, int startOffset, int entry, int[] entries, int offset) {
int slen = fSymbolCharsOffset - startOffset;
String str = new String(fSymbolChars, startOffset, slen);
try {
entries[offset + SymbolCache.INDEX_OFFSET] = symbolHandle;
} catch (ArrayIndexOutOfBoundsException ex) {
throw new RuntimeException("UTL001 untested");
}
return str;
}
public int addSymbolToCache(String str, int slen, int symbolHandle) {
int charsOffset = fSymbolCharsOffset;
if (slen == 0) // EMPTY_STRING cannot be a legal "symbol"
return charsOffset;
int strIndex = 0;
char ch = str.charAt(strIndex++);
try {
fSymbolChars[fSymbolCharsOffset] = ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newChars = new char[fSymbolChars.length * 2];
System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
fSymbolChars = newChars;
fSymbolChars[fSymbolCharsOffset] = ch;
}
fSymbolCharsOffset++;
int entry = 0;
int[] entries = fCacheLines[entry];
int count = entries[0];
int i = 0;
int offset = 1;
while (true) {
if (i == count)
break;
if (entries[offset + CHAR_OFFSET] != ch) {
i++;
offset += CACHE_RECORD_SIZE;
continue;
}
if (strIndex == slen) {
if (entries[offset + INDEX_OFFSET] != -1) {
// How did we miss this during lookup ?
throw new RuntimeException("addSymbolToCache");
}
entries[offset + INDEX_OFFSET] = symbolHandle;
return charsOffset;
}
ch = str.charAt(strIndex++);
try {
fSymbolChars[fSymbolCharsOffset] = ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newChars = new char[fSymbolChars.length * 2];
System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
fSymbolChars = newChars;
fSymbolChars[fSymbolCharsOffset] = ch;
}
fSymbolCharsOffset++;
entry = entries[offset + NEXT_OFFSET];
try {
entries = fCacheLines[entry];
} catch (ArrayIndexOutOfBoundsException ex) {
if (entry == -1) {
entry = fCacheLineCount++;
entries[offset + NEXT_OFFSET] = entry;
entries = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
try {
fCacheLines[entry] = entries;
} catch (ArrayIndexOutOfBoundsException ex2) {
int[][] newCache = new int[entry * 2][];
System.arraycopy(fCacheLines, 0, newCache, 0, entry);
fCacheLines = newCache;
fCacheLines[entry] = entries;
}
} else {
entries = fCacheLines[entry];
throw new RuntimeException("UTL001 untested");
}
}
count = entries[0];
i = 0;
offset = 1;
}
while (true) {
entries[0]++;
try {
entries[offset + CHAR_OFFSET] = ch;
} catch (ArrayIndexOutOfBoundsException ex) {
int newSize = 1 + ((offset - 1) * 2);
int[] newEntries = new int[newSize];
System.arraycopy(entries, 0, newEntries, 0, offset);
fCacheLines[entry] = entries = newEntries;
entries[offset + CHAR_OFFSET] = ch;
}
if (strIndex == slen) {
entries[offset + INDEX_OFFSET] = symbolHandle;
entries[offset + NEXT_OFFSET] = -1;
break;
}
entry = fCacheLineCount++;
entries[offset + INDEX_OFFSET] = -1;
entries[offset + NEXT_OFFSET] = entry;
entries = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
try {
fCacheLines[entry] = entries;
} catch (ArrayIndexOutOfBoundsException ex) {
int[][] newCache = new int[entry * 2][];
System.arraycopy(fCacheLines, 0, newCache, 0, entry);
fCacheLines = newCache;
fCacheLines[entry] = entries;
}
offset = 1;
ch = str.charAt(strIndex++);
try {
fSymbolChars[fSymbolCharsOffset] = ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newChars = new char[fSymbolChars.length * 2];
System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
fSymbolChars = newChars;
fSymbolChars[fSymbolCharsOffset] = ch;
}
fSymbolCharsOffset++;
}
return charsOffset;
}
public void updateCacheLine(int charsOffset, int totalMisses, int length) {
//System.err.println("found symbol " + toString(symbolIndex) + " after " + totalMisses + " total misses (" + (totalMisses/length) + " misses per character).");
int entry = 0;
int[] entries = fCacheLines[0];
int ch = fSymbolChars[charsOffset++];
int count = entries[0];
int offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
int misses = 0;
while (true) {
if (ch != entries[offset + CHAR_OFFSET]) {
offset -= CACHE_RECORD_SIZE;
misses++;
continue;
}
if (misses > 4) {
int symIndex = entries[offset + INDEX_OFFSET];
int nextIndex = entries[offset + NEXT_OFFSET];
System.arraycopy(entries, offset + CACHE_RECORD_SIZE, entries, offset, misses * CACHE_RECORD_SIZE);
offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
entries[offset + CHAR_OFFSET] = ch;
entries[offset + INDEX_OFFSET] = symIndex;
entries[offset + NEXT_OFFSET] = nextIndex;
}
if (--length == 0)
break;
entry = entries[offset + NEXT_OFFSET];
entries = fCacheLines[entry];
ch = fSymbolChars[charsOffset++];
count = entries[0];
offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
misses = 0;
}
}
}