// *************************************************************************************************************************** | |
// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * | |
// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * | |
// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * | |
// * with the License. You may obtain a copy of the License at * | |
// * * | |
// * http://www.apache.org/licenses/LICENSE-2.0 * | |
// * * | |
// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * | |
// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * | |
// * specific language governing permissions and limitations under the License. * | |
// *************************************************************************************************************************** | |
package org.apache.juneau.uon; | |
import java.io.*; | |
import org.apache.juneau.parser.*; | |
/** | |
* Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences. | |
* | |
* <p> | |
* Escape sequences are assumed to be encoded UTF-8. Extended Unicode (>\u10000) is supported. | |
* | |
* <p> | |
* If decoding is enabled, the following character replacements occur so that boundaries are not lost: | |
* <ul> | |
* <li><js>'&'</js> -> <js>'\u0001'</js> | |
* <li><js>'='</js> -> <js>'\u0002'</js> | |
* </ul> | |
*/ | |
public final class UonReader extends ParserReader { | |
private final boolean decodeChars; | |
private final char[] buff; | |
// Writable properties. | |
private int iCurrent, iEnd; | |
/** | |
* Constructor. | |
* | |
* @param pipe The parser input. | |
* @param decodeChars Whether the input is URL-encoded. | |
* @throws IOException Thrown by underlying stream. | |
*/ | |
public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException { | |
super(pipe); | |
this.decodeChars = decodeChars; | |
if (pipe.isString()) { | |
String in = pipe.getInputAsString(); | |
this.buff = new char[in.length() < 1024 ? in.length() : 1024]; | |
} else { | |
this.buff = new char[1024]; | |
} | |
} | |
@Override /* Reader */ | |
public final int read(char[] cbuf, int off, int len) throws IOException { | |
if (! decodeChars) | |
return super.read(cbuf, off, len); | |
// Copy any remainder to the beginning of the buffer. | |
int remainder = iEnd - iCurrent; | |
if (remainder > 0) | |
System.arraycopy(buff, iCurrent, buff, 0, remainder); | |
iCurrent = 0; | |
int expected = buff.length - remainder; | |
int x = super.read(buff, remainder, expected); | |
if (x == -1 && remainder == 0) | |
return -1; | |
iEnd = remainder + (x == -1 ? 0 : x); | |
int i = 0; | |
while (i < len) { | |
if (iCurrent >= iEnd) | |
return i; | |
char c = buff[iCurrent++]; | |
if (c == '+') { | |
cbuf[off + i++] = ' '; | |
} else if (c == '&') { | |
cbuf[off + i++] = '\u0001'; | |
} else if (c == '=') { | |
cbuf[off + i++] = '\u0002'; | |
} else if (c != '%') { | |
cbuf[off + i++] = c; | |
} else { | |
int iMark = iCurrent-1; // Keep track of current position. | |
// Stop if there aren't at least two more characters following '%' in the buffer, | |
// or there aren't at least two more positions open in cbuf to handle double-char chars. | |
if (iMark+2 >= iEnd || i+2 > len) { | |
iCurrent--; | |
return i; | |
} | |
int b0 = readEncodedByte(); | |
int cx; | |
// 0xxxxxxx | |
if (b0 < 128) { | |
cx = b0; | |
// 10xxxxxx | |
} else if (b0 < 192) { | |
throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: " + b0); | |
// 110xxxxx 10xxxxxx | |
// 11000000(192) - 11011111(223) | |
} else if (b0 < 224) { | |
cx = readUTF8(b0-192, 1); | |
if (cx == -1) { | |
iCurrent = iMark; | |
return i; | |
} | |
// 1110xxxx 10xxxxxx 10xxxxxx | |
// 11100000(224) - 11101111(239) | |
} else if (b0 < 240) { | |
cx = readUTF8(b0-224, 2); | |
if (cx == -1) { | |
iCurrent = iMark; | |
return i; | |
} | |
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
// 11110000(240) - 11110111(247) | |
} else if (b0 < 248) { | |
cx = readUTF8(b0-240, 3); | |
if (cx == -1) { | |
iCurrent = iMark; | |
return i; | |
} | |
} else | |
throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: " + b0); | |
if (cx < 0x10000) | |
cbuf[off + i++] = (char)cx; | |
else { | |
cx -= 0x10000; | |
cbuf[off + i++] = (char)(0xd800 + (cx >> 10)); | |
cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff)); | |
} | |
} | |
} | |
return i; | |
} | |
private int readUTF8(int n, final int numBytes) throws IOException { | |
if (iCurrent + numBytes*3 > iEnd) | |
return -1; | |
for (int i = 0; i < numBytes; i++) { | |
n <<= 6; | |
n += readHex()-128; | |
} | |
return n; | |
} | |
private int readHex() throws IOException { | |
int c = buff[iCurrent++]; | |
if (c != '%') | |
throw new IOException("Did not find expected '%' character in UTF-8 sequence."); | |
return readEncodedByte(); | |
} | |
private int readEncodedByte() throws IOException { | |
if (iEnd <= iCurrent + 1) | |
throw new IOException("Incomplete trailing escape pattern"); | |
int h = buff[iCurrent++]; | |
int l = buff[iCurrent++]; | |
h = fromHexChar(h); | |
l = fromHexChar(l); | |
return (h << 4) + l; | |
} | |
private static int fromHexChar(int c) throws IOException { | |
if (c >= '0' && c <= '9') | |
return c - '0'; | |
if (c >= 'a' && c <= 'f') | |
return 10 + c - 'a'; | |
if (c >= 'A' && c <= 'F') | |
return 10 + c - 'A'; | |
throw new IOException("Invalid hex character '"+c+"' found in escape pattern."); | |
} | |
@Override /* ParserReader */ | |
public final UonReader unread() throws IOException { | |
super.unread(); | |
return this; | |
} | |
} |