blob: e537660743b9d1d3f7a4e5fa5e4efe98636f9131 [file] [log] [blame]
/* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xmlbeans.impl.util;
import java.io.*;
import static org.apache.xmlbeans.impl.util.LongUTFDataOutputStream.DATA_OUTPUT_CHUNKS;
import static org.apache.xmlbeans.impl.util.LongUTFDataOutputStream.LONG_UTF_MAGIC;
/**
* This class works around the size limitation of UTF strings (< 64kb) of DataInputStream
* and needs to be used with LongUTFDataOutputStream
*/
public class LongUTFDataInputStream extends DataInputStream {
public LongUTFDataInputStream(InputStream in) {
super(wrap(in));
}
private static InputStream wrap(InputStream is) {
return is.markSupported() ? is : new BufferedInputStream(is);
}
private interface IOCall {
byte onebyte(int[] readBuf, int[] fillBuf, int[] readLen) throws IOException;
}
public String readLongUTF() throws IOException {
mark(6);
int utfLen1 = readShort() & 0x0000FFFF;
if (utfLen1 < DATA_OUTPUT_CHUNKS) {
reset();
return readUTF();
}
int magic = readInt();
if (magic != LONG_UTF_MAGIC) {
reset();
return readUTF();
}
final int utfLen = readInt();
StringBuilder sb = new StringBuilder(utfLen/2);
final byte[] bytearr = new byte[4096];
IOCall give = (readBuf, fillBuf, readLen) -> {
if (readLen[0]+1 > utfLen) {
throw new UTFDataFormatException("malformed input: partial character at end");
}
if (readBuf[0] >= fillBuf[0]) {
fillBuf[0] = Math.min(bytearr.length, utfLen-readLen[0]);
readFully(bytearr, 0, fillBuf[0]);
readBuf[0] = 0;
}
readLen[0]++;
return bytearr[readBuf[0]++];
};
final int[] readLen = { 0 }, readBuf = { 0 }, fillBuf = { 0 };
while (readLen[0] < utfLen) {
int c = (int)give.onebyte(readBuf, fillBuf, readLen) & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
/* 0xxxxxxx*/
sb.append((char)c);
break;
case 12: case 13: {
/* 110x xxxx 10xx xxxx*/
int char2 = give.onebyte(readBuf, fillBuf, readLen);
if ((char2 & 0xC0) != 0x80) {
throw new UTFDataFormatException("malformed input around byte " + readLen[0]);
}
sb.append((char) (((c & 0x1F) << 6) | (char2 & 0x3F)));
break;
}
case 14: {
/* 1110 xxxx 10xx xxxx 10xx xxxx */
int char2 = give.onebyte(readBuf, fillBuf, readLen);
int char3 = give.onebyte(readBuf, fillBuf, readLen);
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException("malformed input around byte " + (readLen[0]-1));
}
sb.append((char) (((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F))));
break;
}
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException("malformed input around byte " + readLen[0]);
}
}
return sb.toString();
}
}