blob: 7aa0ab7e461f64cf308eb858385cbb4826c5b83a [file] [log] [blame]
// ***************************************************************************************************************************
// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file *
// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file *
// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance *
// * with the License. You may obtain a copy of the License at *
// * *
// * http://www.apache.org/licenses/LICENSE-2.0 *
// * *
// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an *
// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the *
// * specific language governing permissions and limitations under the License. *
// ***************************************************************************************************************************
package org.apache.juneau.parser;
import java.io.*;
import org.apache.juneau.internal.*;
/**
* Similar to a {@link java.io.PushbackReader} with a pushback buffer of 1 character.
*
* <p>
* Code is optimized to work with a 1 character buffer.
*
* <p>
* Additionally keeps track of current line and column number, and provides the ability to set mark points and capture
* characters from the previous mark point.
*
* <p>
* <b>Warning:</b> Not thread safe.
*/
public class ParserReader extends Reader implements Positionable {
/** Wrapped reader */
protected final Reader r;
private char[] buff; // Internal character buffer
private int line = 1; // Current line number
private int column; // Current column number
private int iCurrent = 0; // Current pointer into character buffer
private int iMark = -1; // Mark position in buffer
private int iEnd = 0; // The last good character position in the buffer
private boolean endReached, holesExist;
private final boolean unbuffered;
/**
* Constructor.
*
* @param pipe The parser input.
* @throws IOException Thrown by underlying stream.
*/
public ParserReader(ParserPipe pipe) throws IOException {
this.unbuffered = pipe.unbuffered;
if (pipe.isString()) {
String in = pipe.getInputAsString();
this.r = new CharSequenceReader(in);
this.buff = new char[in.length() < 1024 ? in.length() : 1024];
} else {
Reader _r = pipe.getReader();
if (_r instanceof ParserReader)
this.r = ((ParserReader)_r).r;
else
this.r = _r;
this.buff = new char[1024];
}
pipe.setPositionable(this);
}
/**
* Reads a single character.
*
* <p>
* Note that this method does NOT process extended unicode characters (i.e. characters above 0x10000), but rather
* returns them as two <jk>char</jk>s.
* Use {@link #readCodePoint()} to ensure proper handling of extended unicode.
*
* @return The character read, or -1 if the end of the stream has been reached.
* @throws IOException If a problem occurred trying to read from the reader.
*/
@Override /* Reader */
public final int read() throws IOException {
int c = readFromBuff();
if (c == -1)
return -1;
if (c == '\n') {
line++;
column = 0;
} else {
column++;
}
return c;
}
/**
* Same as {@link #read()} but skips over any whitespace characters.
*
* @return The first non-whitespace character, or -1 if the end of stream reached.
* @throws IOException Thrown by underlying stream.
*/
public final int readSkipWs() throws IOException {
while (true) {
int c = read();
if (c == -1 || ! Character.isWhitespace(c))
return c;
}
}
/**
* Same as {@link #read()} but detects and combines extended unicode characters (characters above 0x10000).
*
* @return The character read, or -1 if the end of the stream has been reached.
* @throws IOException If a problem occurred trying to read from the reader.
*/
public final int readCodePoint() throws IOException {
int c = read();
// Characters that take up 2 chars.
if (c >= 0xd800 && c <= 0xdbff) {
int low = read();
if (low >= 0xdc00 && low <= 0xdfff)
c = 0x10000 + ((c - 0xd800) << 10) + (low - 0xdc00);
}
return c;
}
private final int readFromBuff() throws IOException {
while (iCurrent >= iEnd) {
if (endReached)
return -1;
// If there's still space at the end of this buffer, fill it.
// Make sure there's at least 2 character spaces free for extended unicode characters.
//if (false) {
if (iEnd+1 < buff.length) {
int x = read(buff, iCurrent, buff.length-iEnd);
if (x == -1) {
endReached = true;
return -1;
}
iEnd += x;
} else {
// If we're currently marking, then we want to copy from the current mark point
// to the beginning of the buffer and then fill in the remainder of buffer.
if (iMark >= 0) {
// If we're marking from the beginning of the array, we double the size of the
// buffer. This isn't likely to occur often.
if (iMark == 0) {
char[] buff2 = new char[buff.length<<1];
System.arraycopy(buff, 0, buff2, 0, buff.length);
buff = buff2;
// Otherwise, we copy what's currently marked to the beginning of the buffer.
} else {
int copyBuff = iMark;
System.arraycopy(buff, copyBuff, buff, 0, buff.length - copyBuff);
iCurrent -= copyBuff;
iMark -= copyBuff;
}
int expected = buff.length - iCurrent;
int x = read(buff, iCurrent, expected);
if (x == -1) {
endReached = true;
iEnd = iCurrent;
return -1;
}
iEnd = iCurrent + x;
} else {
// Copy the last 10 chars in the buffer to the beginning of the buffer.
int copyBuff = Math.min(iCurrent, 10);
System.arraycopy(buff, iCurrent-copyBuff, buff, 0, copyBuff);
// Number of characters we expect to copy on the next read.
int expected = buff.length - copyBuff;
int x = read(buff, copyBuff, expected);
iCurrent = copyBuff;
if (x == -1) {
endReached = true;
iEnd = iCurrent;
return -1;
}
iEnd = iCurrent + x;
}
}
}
return buff[iCurrent++];
}
/**
* Start buffering the calls to read() so that the text can be gathered from the mark point on calling {@code getFromMarked()}.
*/
public final void mark() {
iMark = iCurrent;
}
/**
* Peeks the next character in the stream.
*
* <p>
* This is equivalent to doing a {@code read()} followed by an {@code unread()}.
*
* @return The peeked character, or (char)-1 if the end of the stream has been reached.
* @throws IOException If a problem occurred trying to read from the reader.
*/
public final int peek() throws IOException {
int c = read();
if (c != -1)
unread();
return c;
}
/**
* Same as {@link #peek()} but skips over any whitespace characters.
*
* <p>
* This is equivalent to doing a {@code read()} followed by an {@code unread()}.
*
* @return The peeked character, or (char)-1 if the end of the stream has been reached.
* @throws IOException If a problem occurred trying to read from the reader.
*/
public final int peekSkipWs() throws IOException {
while(true) {
int c = read();
boolean isWs = Character.isWhitespace(c);
if (c != -1 && ! isWs)
unread();
if (! isWs)
return c;
}
}
/**
* Read the specified number of characters off the stream.
*
* @param num The number of characters to read.
* @return The characters packaged as a String.
* @throws IOException If a problem occurred trying to read from the reader.
*/
public final String read(int num) throws IOException {
char[] c = new char[num];
for (int i = 0; i < num; i++) {
int c2 = read();
if (c2 == -1)
return new String(c, 0, i);
c[i] = (char)c2;
}
return new String(c);
}
/**
* Pushes the last read character back into the stream.
*
* @return This object (for method chaining).
* @throws IOException If a problem occurred trying to read from the reader.
*/
public ParserReader unread() throws IOException {
if (iCurrent <= 0)
throw new IOException("Buffer underflow.");
iCurrent--;
if (column == 0)
line--;
else
column--;
return this;
}
/**
* No-op.
*
* <p>
* Input readers are closed in the {@link ParserPipe} class.
*
* @throws IOException If a problem occurred trying to read from the reader.
*/
@Override /* Reader */
public void close() throws IOException {
// No-op
}
/**
* Returns the contents of the reusable character buffer as a string, and resets the buffer for next usage.
*
* @return The contents of the reusable character buffer as a string.
*/
public final String getMarked() {
return getMarked(0, 0);
}
/**
* Same as {@link #getMarked()} except allows you to specify offsets into the buffer.
*
* <p>
* For example, to return the marked string, but trim the first and last characters, call the following:
* <p class='bcode w800'>
* getFromMarked(1, -1);
* </p>
*
* @param offsetStart The offset of the start position.
* @param offsetEnd The offset of the end position.
* @return The contents of the reusable character buffer as a string.
*/
public final String getMarked(int offsetStart, int offsetEnd) {
int offset = 0;
// Holes are \u00FF 'delete' characters that we need to get rid of now.
if (holesExist) {
for (int i = iMark; i < iCurrent; i++) {
char c = buff[i];
if (c == 127)
offset++;
else
buff[i-offset] = c;
}
holesExist = false;
}
int start = iMark + offsetStart, len = iCurrent - iMark + offsetEnd - offsetStart - offset;
String s = new String(buff, start, len);
iMark = -1;
return s;
}
/**
* Trims off the last character in the marking buffer.
*
* <p>
* Useful for removing escape characters from sequences.
*
* @return This object (for method chaining).
*/
public final ParserReader delete() {
return delete(1);
}
/**
* Trims off the specified number of last characters in the marking buffer.
* Useful for removing escape characters from sequences.
*
* @param count The number of characters to delete.
* @return This object (for method chaining).
*/
public final ParserReader delete(int count) {
for (int i = 0; i < count; i++)
buff[iCurrent-i-1] = 127;
holesExist = true;
return this;
}
/**
* Replaces the last character in the marking buffer with the specified character.
*
* <p>
* <c>offset</c> must be at least <c>1</c> for normal characters, and <c>2</c> for extended
* unicode characters in order for the replacement to fit into the buffer.
*
* @param c The new character.
* @param offset The offset.
* @return This object (for method chaining).
* @throws IOException Thrown by underlying stream.
*/
public final ParserReader replace(int c, int offset) throws IOException {
if (c < 0x10000) {
if (offset < 1)
throw new IOException("Buffer underflow.");
buff[iCurrent-offset] = (char)c;
} else {
if (offset < 2)
throw new IOException("Buffer underflow.");
c -= 0x10000;
buff[iCurrent-offset] = (char)(0xd800 + (c >> 10));
buff[iCurrent-offset+1] = (char)(0xdc00 + (c & 0x3ff));
offset--;
}
// Fill in the gap with DEL characters.
for (int i = 1; i < offset; i++)
buff[iCurrent-i] = 127;
holesExist |= (offset > 1);
return this;
}
/**
* Replace the last read character in the buffer with the specified character.
*
* @param c The new character.
* @return This object (for method chaining).
* @throws IOException Thrown by underlying stream.
*/
public final ParserReader replace(char c) throws IOException {
return replace(c, 1);
}
/**
* Subclasses can override this method to provide additional filtering.
*
* <p>
* Default implementation simply calls the same method on the underlying reader.
*/
@Override /* Reader */
public int read(char[] cbuf, int off, int len) throws IOException {
return unbuffered ? r.read(cbuf, off, 1) : r.read(cbuf, off, len);
}
@Override /* Positionable */
public Position getPosition() {
return new Position(line, column);
}
}