blob: 4a2095662249c3a884cec0b1fc73d1c770f3d4ae [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.utils;
import org.apache.xerces.readers.XMLEntityHandler;
import java.util.Vector;
//
//
//
public class UTF8DataChunk implements StringPool.StringProducer {
//
// Chunk size constants
//
public static final int CHUNK_SHIFT = 14; // 2^14 = 16k
public static final int CHUNK_SIZE = (1 << CHUNK_SHIFT);
public static final int CHUNK_MASK = CHUNK_SIZE - 1;
//
// Public constructor (factory)
//
public static UTF8DataChunk createChunk(StringPool stringPool, UTF8DataChunk prev) {
synchronized (UTF8DataChunk.class) {
if (fgFreeChunks != null) {
UTF8DataChunk newChunk = fgFreeChunks;
fgFreeChunks = newChunk.fNextChunk;
newChunk.fNextChunk = null;
newChunk.init(stringPool, prev);
return newChunk;
}
}
UTF8DataChunk chunk = new UTF8DataChunk(stringPool, prev);
return chunk;
}
//
//
//
public final byte[] toByteArray() {
return fData;
}
//
//
//
public void setByteArray(byte[] data) {
fData = data;
}
//
//
//
public UTF8DataChunk nextChunk() {
return fNextChunk;
}
//
//
//
public boolean clearPreviousChunk() {
if (fPreviousChunk != null) {
fPreviousChunk.setNextChunk(null);
fPreviousChunk.removeRef();
//System.err.println("[" + fPreviousChunk.fChunk + "] " + fPreviousChunk.fRefCount + " refs after clearPreviousChunk");
//System.err.println("[" + fChunk + "] " + fRefCount + " refs after clearPreviousChunk");
fPreviousChunk = null;
return true;
}
return fChunk == 0;
}
//
//
//
public void releaseChunk() {
removeRef();
//System.err.println("[" + fChunk + "] " + fRefCount + " refs after releaseChunk");
}
//
//
//
public void releaseString(int offset, int length) {
removeRef();
}
//
//
//
public String toString(int offset, int length) {
synchronized (fgTempBufferLock) {
int outOffset = 0;
UTF8DataChunk dataChunk = this;
int endOffset = offset + length;
int index = offset & CHUNK_MASK;
byte[] data = fData;
boolean skiplf = false;
while (offset < endOffset) {
int b0 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if (b0 < 0x80) {
if (skiplf) {
skiplf = false;
if (b0 == 0x0A)
continue;
}
if (b0 == 0x0D) {
b0 = 0x0A;
skiplf = true;
}
try {
fgTempBuffer[outOffset] = (char)b0;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)b0;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)b0;
}
continue;
}
int b1 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
int ch = ((0x1f & b0)<<6) + (0x3f & b1); // yyy yyxx xxxx (0x80 to 0x7ff)
try {
fgTempBuffer[outOffset] = (char)ch;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)ch;
}
continue;
}
int b2 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
try {
fgTempBuffer[outOffset] = (char)ch;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)ch;
}
continue;
}
int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
if (ch < 0x10000) {
try {
fgTempBuffer[outOffset] = (char)ch;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)ch;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)ch;
}
} else {
char ch1 = (char)(((ch-0x00010000)>>10)+0xd800);
char ch2 = (char)(((ch-0x00010000)&0x3ff)+0xdc00);
try {
fgTempBuffer[outOffset] = (char)ch1;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)ch1;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)ch1;
}
try {
fgTempBuffer[outOffset] = (char)ch2;
outOffset++;
} catch (NullPointerException ex) {
fgTempBuffer = new char[CHUNK_SIZE];
fgTempBuffer[outOffset++] = (char)ch2;
} catch (ArrayIndexOutOfBoundsException ex) {
char[] newBuffer = new char[outOffset * 2];
System.arraycopy(fgTempBuffer, 0, newBuffer, 0, outOffset);
fgTempBuffer = newBuffer;
fgTempBuffer[outOffset++] = (char)ch2;
}
}
}
return new String(fgTempBuffer, 0, outOffset);
}
}
//
//
//
public boolean equalsString(int offset, int length, char[] strChars, int strOffset, int strLength) {
UTF8DataChunk dataChunk = this;
int endOffset = offset + length;
int index = offset & CHUNK_MASK;
byte[] data = fData;
boolean skiplf = false;
while (offset < endOffset) {
if (strLength-- == 0)
return false;
int b0 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if (b0 < 0x80) {
if (skiplf) {
skiplf = false;
if (b0 == 0x0A)
continue;
}
if (b0 == 0x0D) {
b0 = 0x0A;
skiplf = true;
}
if (b0 != strChars[strOffset++])
return false;
continue;
}
int b1 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
int ch = ((0x1f & b0)<<6) + (0x3f & b1);
if (ch != strChars[strOffset++])
return false;
continue;
}
int b2 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
if (ch != strChars[strOffset++])
return false;
continue;
}
int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12)
+ ((0x3f & b2)<<6) + (0x3f & b3);
if (ch < 0x10000) {
if (ch != strChars[strOffset++])
return false;
} else {
if ((((ch-0x00010000)>>10)+0xd800) != strChars[strOffset++])
return false;
if (strLength-- == 0)
return false;
if ((((ch-0x00010000)&0x3ff)+0xdc00) != strChars[strOffset++])
return false;
}
}
return (strLength == 0);
}
//
//
//
public int addString(int offset, int length) {
if (length == 0)
return StringPool.EMPTY_STRING;
int chunk = offset >> CHUNK_SHIFT;
if (chunk != fChunk) {
if (fPreviousChunk == null)
throw new RuntimeException(new ImplementationMessages().createMessage(null, ImplementationMessages.INT_PCN, 0, null));
return fPreviousChunk.addString(offset, length);
}
int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
if (chunk == lastChunk) {
addRef();
return fStringPool.addString(this, offset & CHUNK_MASK, length);
}
String str = toString(offset & CHUNK_MASK, length);
return fStringPool.addString(str);
}
//
//
//
public int addSymbol(int offset, int length, int hashcode) {
if (length == 0)
return StringPool.EMPTY_STRING;
int chunk = offset >> CHUNK_SHIFT;
if (chunk != fChunk) {
if (fPreviousChunk == null)
throw new RuntimeException(new ImplementationMessages().createMessage(null, ImplementationMessages.INT_PCN, 0, null));
return fPreviousChunk.addSymbol(offset, length, hashcode);
}
int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
int index = offset & CHUNK_MASK;
if (chunk == lastChunk) {
if (hashcode == 0) {
hashcode = getHashcode(index, length);
}
int symbol = fStringPool.lookupSymbol(this, index, length, hashcode);
if (symbol == -1) {
String str = toString(index, length);
symbol = fStringPool.addNewSymbol(str, hashcode);
}
return symbol;
}
String str = toString(index, length);
return fStringPool.addSymbol(str);
}
//
//
//
public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
//
// Setup for the operation.
//
UTF8DataChunk dataChunk = chunkFor(offset);
int endOffset = offset + length;
int index = offset & CHUNK_MASK;
byte[] data = dataChunk.fData;
boolean skiplf = false;
while (offset < endOffset) {
int b0 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if (b0 < 0x80) {
if (skiplf) {
skiplf = false;
if (b0 == 0x0A)
continue;
}
if (b0 == 0x0D) {
b0 = 0x0A;
skiplf = true;
}
charBuffer.append((char)b0);
continue;
}
int b1 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
int ch = ((0x1f & b0)<<6) + (0x3f & b1);
charBuffer.append((char)ch); // yyy yyxx xxxx (0x80 to 0x7ff)
continue;
}
int b2 = data[index++] & 0xff;
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
charBuffer.append((char)ch); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
continue;
}
int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
offset++;
if (index == CHUNK_SIZE && offset < endOffset) {
dataChunk = dataChunk.fNextChunk;
data = dataChunk.fData;
index = 0;
}
int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12)
+ ((0x3f & b2)<<6) + (0x3f & b3);
if (ch < 0x10000)
charBuffer.append((char)ch);
else {
charBuffer.append((char)(((ch-0x00010000)>>10)+0xd800));
charBuffer.append((char)(((ch-0x00010000)&0x3ff)+0xdc00));
}
}
}
//
//
//
private int getHashcode(int index, int length) {
int endIndex = index + length;
int hashcode = 0;
byte[] data = fData;
while (index < endIndex) {
int b0 = data[index++] & 0xff;
if ((b0 & 0x80) == 0) {
hashcode = StringHasher.hashChar(hashcode, b0);
continue;
}
int b1 = data[index++] & 0xff;
if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
int ch = ((0x1f & b0)<<6) + (0x3f & b1); // yyy yyxx xxxx (0x80 to 0x7ff)
hashcode = StringHasher.hashChar(hashcode, ch);
continue;
}
int b2 = data[index++] & 0xff;
if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
hashcode = StringHasher.hashChar(hashcode, ch);
continue;
}
int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12)
+ ((0x3f & b2)<<6) + (0x3f & b3);
if (ch < 0x10000)
hashcode = StringHasher.hashChar(hashcode, ch);
else {
hashcode = StringHasher.hashChar(hashcode, (int)(((ch-0x00010000)>>10)+0xd800));
hashcode = StringHasher.hashChar(hashcode, (int)(((ch-0x00010000)&0x3ff)+0xdc00));
}
}
return StringHasher.finishHash(hashcode);
}
//
//
//
private void init(StringPool stringPool, UTF8DataChunk prev) {
fStringPool = stringPool;
fRefCount = 1;
fChunk = prev == null ? 0 : prev.fChunk + 1;
fNextChunk = null;
fPreviousChunk = prev;
if (prev != null) {
prev.addRef();
prev.setNextChunk(this);
prev.removeRef();
}
}
//
// Constructor for factory method.
//
private UTF8DataChunk(StringPool stringPool, UTF8DataChunk prev) {
init(stringPool, prev);
}
//
//
//
private final UTF8DataChunk chunkFor(int offset) {
if ((offset >> CHUNK_SHIFT) == fChunk)
return this;
return slowChunkFor(offset);
}
private UTF8DataChunk slowChunkFor(int offset) {
int firstChunk = offset >> CHUNK_SHIFT;
UTF8DataChunk dataChunk = this;
while (firstChunk != dataChunk.fChunk)
dataChunk = dataChunk.fPreviousChunk;
return dataChunk;
}
//
//
//
private final void addRef() {
fRefCount++;
//System.err.println(">>[" + fChunk + "] " + (fRefCount - 1) + " -> " + fRefCount);
}
//
//
//
private final void removeRef() {
fRefCount--;
//System.err.println("<<[" + fChunk + "] " + (fRefCount + 1) + " -> " + fRefCount);
if (fRefCount == 0) {
//System.err.println("[" + fChunk + "] recycled a " + fData.length + " character array");
fStringPool = null;
fChunk = -1;
// fData = null;
fPreviousChunk = null;
synchronized (UTF8DataChunk.class) {
/*** Only keep one free chunk at a time! ***
fNextChunk = fgFreeChunks;
/***/
fNextChunk = null;
fgFreeChunks = this;
}
}
}
//
//
//
private void setNextChunk(UTF8DataChunk nextChunk) {
if (nextChunk == null) {
if (fNextChunk != null)
fNextChunk.removeRef();
} else if (fNextChunk == null) {
nextChunk.addRef();
} else
throw new RuntimeException("UTF8DataChunk::setNextChunk");
fNextChunk = nextChunk;
}
//
//
//
private StringPool fStringPool;
private int fRefCount;
private int fChunk;
private byte[] fData = null;
private UTF8DataChunk fNextChunk;
private UTF8DataChunk fPreviousChunk;
private static UTF8DataChunk fgFreeChunks = null;
private static char[] fgTempBuffer = null;
private static Object fgTempBufferLock = new Object();
}