juneau-core/juneau-marshall/src/main/java/org/apache/juneau/parser/ParserReader.java - juneau - Git at Google

 // ***************************************************************************************************************************
 // * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
 // * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
 // * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
 // * with the License.  You may obtain a copy of the License at                                                              *
 // *                                                                                                                         *
 // *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
 // *                                                                                                                         *
 // * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
 // * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
 // * specific language governing permissions and limitations under the License.                                              *
 // ***************************************************************************************************************************
 package org.apache.juneau.parser;

 import java.io.*;

 import org.apache.juneau.internal.*;

 /**
  * Similar to a {@link java.io.PushbackReader} with a pushback buffer of 1 character.
  *
  * <p>
  * Code is optimized to work with a 1 character buffer.
  *
  * <p>
  * Additionally keeps track of current line and column number, and provides the ability to set mark points and capture
  * characters from the previous mark point.
  *
  * <p>
  * <b>Warning:</b>  Not thread safe.
  */
 public class ParserReader extends Reader implements Positionable {

 	/** Wrapped reader */
 	protected final Reader r;

 	private char[] buff;       // Internal character buffer
 	private int line = 1;      // Current line number
 	private int column;        // Current column number
 	private int iCurrent = 0;  // Current pointer into character buffer
 	private int iMark = -1;    // Mark position in buffer
 	private int iEnd = 0;      // The last good character position in the buffer
 	private boolean endReached, holesExist;
 	private final boolean unbuffered;

 	/**
 	 * Constructor.
 	 *
 	 * @param pipe The parser input.
 	 * @throws IOException Thrown by underlying stream.
 	 */
 	public ParserReader(ParserPipe pipe) throws IOException {
 		this.unbuffered = pipe.unbuffered;
 		if (pipe.isString()) {
 			String in = pipe.getInputAsString();
 			this.r = new CharSequenceReader(in);
 			this.buff = new char[in.length() < 1024 ? in.length() : 1024];
 		} else {
 			Reader _r = pipe.getReader();
 			if (_r instanceof ParserReader)
 				this.r = ((ParserReader)_r).r;
 			else
 				this.r = _r;
 			this.buff = new char[1024];
 		}
 		pipe.setPositionable(this);
 	}

 	/**
 	 * Reads a single character.
 	 *
 	 * <p>
 	 * Note that this method does NOT process extended unicode characters (i.e. characters above 0x10000), but rather
 	 * returns them as two <jk>char</jk>s.
 	 * Use {@link #readCodePoint()} to ensure proper handling of extended unicode.
 	 *
 	 * @return The character read, or -1 if the end of the stream has been reached.
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	@Override /* Reader */
 	public final int read() throws IOException {
 		int c = readFromBuff();
 		if (c == -1)
 			return -1;
 		if (c == '\n') {
 			line++;
 			column = 0;
 		} else {
 			column++;
 		}
 		return c;
 	}

 	/**
 	 * Same as {@link #read()} but skips over any whitespace characters.
 	 *
 	 * @return The first non-whitespace character, or -1 if the end of stream reached.
 	 * @throws IOException Thrown by underlying stream.
 	 */
 	public final int readSkipWs() throws IOException {
 		while (true) {
 			int c = read();
 			if (c == -1 || ! Character.isWhitespace(c))
 				return c;
 		}
 	}

 	/**
 	 * Same as {@link #read()} but detects and combines extended unicode characters (characters above 0x10000).
 	 *
 	 * @return The character read, or -1 if the end of the stream has been reached.
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	public final int readCodePoint() throws IOException {
 		int c = read();

 		// Characters that take up 2 chars.
 		if (c >= 0xd800 && c <= 0xdbff) {
 			int low = read();
 			if (low >= 0xdc00 && low <= 0xdfff)
 				c = 0x10000 + ((c - 0xd800) << 10) + (low - 0xdc00);
 		}

 		return c;
 	}

 	private final int readFromBuff() throws IOException {
 		while (iCurrent >= iEnd) {
 			if (endReached)
 				return -1;

 			// If there's still space at the end of this buffer, fill it.
 			// Make sure there's at least 2 character spaces free for extended unicode characters.
 			//if (false) {
 			if (iEnd+1 < buff.length) {
 				int x = read(buff, iCurrent, buff.length-iEnd);
 				if (x == -1) {
 					endReached = true;
 					return -1;
 				}
 				iEnd += x;

 			} else {
 				// If we're currently marking, then we want to copy from the current mark point
 				// to the beginning of the buffer and then fill in the remainder of buffer.
 				if (iMark >= 0) {

 					// If we're marking from the beginning of the array, we double the size of the
 					// buffer.  This isn't likely to occur often.
 					if (iMark == 0) {
 						char[] buff2 = new char[buff.length<<1];
 						System.arraycopy(buff, 0, buff2, 0, buff.length);
 						buff = buff2;

 					// Otherwise, we copy what's currently marked to the beginning of the buffer.
 					} else {
 						int copyBuff = iMark;
 						System.arraycopy(buff, copyBuff, buff, 0, buff.length - copyBuff);
 						iCurrent -= copyBuff;
 						iMark -= copyBuff;
 					}
 					int expected = buff.length - iCurrent;

 					int x = read(buff, iCurrent, expected);
 					if (x == -1) {
 						endReached = true;
 						iEnd = iCurrent;
 						return -1;
 					}
 					iEnd = iCurrent + x;
 				} else {
 					// Copy the last 10 chars in the buffer to the beginning of the buffer.
 					int copyBuff = Math.min(iCurrent, 10);
 					System.arraycopy(buff, iCurrent-copyBuff, buff, 0, copyBuff);

 					// Number of characters we expect to copy on the next read.
 					int expected = buff.length - copyBuff;
 					int x = read(buff, copyBuff, expected);
 					iCurrent = copyBuff;
 					if (x == -1) {
 						endReached = true;
 						iEnd = iCurrent;
 						return -1;
 					}
 					iEnd = iCurrent + x;
 				}
 			}
 		}
 		return buff[iCurrent++];
 	}

 	/**
 	 * Start buffering the calls to read() so that the text can be gathered from the mark point on calling {@code getFromMarked()}.
 	 */
 	public final void mark() {
 		iMark = iCurrent;
 	}

 	/**
 	 * Peeks the next character in the stream.
 	 *
 	 * <p>
 	 * This is equivalent to doing a {@code read()} followed by an {@code unread()}.
 	 *
 	 * @return The peeked character, or (char)-1 if the end of the stream has been reached.
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	public final int peek() throws IOException {
 		int c = read();
 		if (c != -1)
 			unread();
 		return c;
 	}

 	/**
 	 * Same as {@link #peek()} but skips over any whitespace characters.
 	 *
 	 * <p>
 	 * This is equivalent to doing a {@code read()} followed by an {@code unread()}.
 	 *
 	 * @return The peeked character, or (char)-1 if the end of the stream has been reached.
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	public final int peekSkipWs() throws IOException {
 		while(true) {
 			int c = read();
 			boolean isWs = Character.isWhitespace(c);
 			if (c != -1 && ! isWs)
 				unread();
 			if (! isWs)
 				return c;
 		}
 	}

 	/**
 	 * Read the specified number of characters off the stream.
 	 *
 	 * @param num The number of characters to read.
 	 * @return The characters packaged as a String.
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	public final String read(int num) throws IOException {
 		char[] c = new char[num];
 		for (int i = 0; i < num; i++) {
 			int c2 = read();
 			if (c2 == -1)
 				return new String(c, 0, i);
 			c[i] = (char)c2;
 		}
 		return new String(c);
 	}

 	/**
 	 * Pushes the last read character back into the stream.
 	 *
 	 * @return This object (for method chaining).
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	public ParserReader unread() throws IOException {
 		if (iCurrent <= 0)
 			throw new IOException("Buffer underflow.");
 		iCurrent--;
 		if (column == 0)
 			line--;
 		else
 			column--;
 		return this;
 	}

 	/**
 	 * No-op.
 	 *
 	 * <p>
 	 * Input readers are closed in the {@link ParserPipe} class.
 	 *
 	 * @throws IOException If a problem occurred trying to read from the reader.
 	 */
 	@Override /* Reader */
 	public void close() throws IOException {
 		// No-op
 	}

 	/**
 	 * Returns the contents of the reusable character buffer as a string, and resets the buffer for next usage.
 	 *
 	 * @return The contents of the reusable character buffer as a string.
 	 */
 	public final String getMarked() {
 		return getMarked(0, 0);
 	}

 	/**
 	 * Same as {@link #getMarked()} except allows you to specify offsets into the buffer.
 	 *
 	 * <p>
 	 * For example, to return the marked string, but trim the first and last characters, call the following:
 	 * <p class='bcode w800'>
 	 * 	getFromMarked(1, -1);
 	 * </p>
 	 *
 	 * @param offsetStart The offset of the start position.
 	 * @param offsetEnd The offset of the end position.
 	 * @return The contents of the reusable character buffer as a string.
 	 */
 	public final String getMarked(int offsetStart, int offsetEnd) {
 		int offset = 0;

 		// Holes are \u00FF 'delete' characters that we need to get rid of now.
 		if (holesExist) {
 			for (int i = iMark; i < iCurrent; i++) {
 				char c = buff[i];
 				if (c == 127)
 					offset++;
 				else
 					buff[i-offset] = c;
 			}
 			holesExist = false;
 		}
 		int start = iMark + offsetStart, len = iCurrent - iMark + offsetEnd - offsetStart - offset;
 		String s = new String(buff, start, len);
 		iMark = -1;
 		return s;
 	}

 	/**
 	 * Trims off the last character in the marking buffer.
 	 *
 	 * <p>
 	 * Useful for removing escape characters from sequences.
 	 *
 	 * @return This object (for method chaining).
 	 */
 	public final ParserReader delete() {
 		return delete(1);
 	}

 	/**
 	 * Trims off the specified number of last characters in the marking buffer.
 	 * Useful for removing escape characters from sequences.
 	 *
 	 * @param count The number of characters to delete.
 	 * @return This object (for method chaining).
 	 */
 	public final ParserReader delete(int count) {
 		for (int i = 0; i < count; i++)
 			buff[iCurrent-i-1] = 127;
 		holesExist = true;
 		return this;
 	}

 	/**
 	 * Replaces the last character in the marking buffer with the specified character.
 	 *
 	 * <p>
 	 * <c>offset</c> must be at least <c>1</c> for normal characters, and <c>2</c> for extended
 	 * unicode characters in order for the replacement to fit into the buffer.
 	 *
 	 * @param c The new character.
 	 * @param offset The offset.
 	 * @return This object (for method chaining).
 	 * @throws IOException Thrown by underlying stream.
 	 */
 	public final ParserReader replace(int c, int offset) throws IOException {
 		if (c < 0x10000) {
 			if (offset < 1)
 				throw new IOException("Buffer underflow.");
 			buff[iCurrent-offset] = (char)c;
 		} else {
 			if (offset < 2)
 				throw new IOException("Buffer underflow.");
 			c -= 0x10000;
 			buff[iCurrent-offset] = (char)(0xd800 + (c >> 10));
 			buff[iCurrent-offset+1] = (char)(0xdc00 + (c & 0x3ff));
 			offset--;
 		}
 		// Fill in the gap with DEL characters.
 		for (int i = 1; i < offset; i++)
 			buff[iCurrent-i] = 127;
 		holesExist |= (offset > 1);
 		return this;
 	}

 	/**
 	 * Replace the last read character in the buffer with the specified character.
 	 *
 	 * @param c The new character.
 	 * @return This object (for method chaining).
 	 * @throws IOException Thrown by underlying stream.
 	 */
 	public final ParserReader replace(char c) throws IOException {
 		return replace(c, 1);
 	}

 	/**
 	 * Subclasses can override this method to provide additional filtering.
 	 *
 	 * <p>
 	 * Default implementation simply calls the same method on the underlying reader.
 	 */
 	@Override /* Reader */
 	public int read(char[] cbuf, int off, int len) throws IOException {
 		return unbuffered ? r.read(cbuf, off, 1) : r.read(cbuf, off, len);
 	}

 	@Override /* Positionable */
 	public Position getPosition() {
 		return new Position(line, column);
 	}
 }
	// ***************************************************************************************************************************
	// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file *
	// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file *
	// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance *
	// * with the License. You may obtain a copy of the License at *
	// * *
	// * http://www.apache.org/licenses/LICENSE-2.0 *
	// * *
	// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an *
	// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the *
	// * specific language governing permissions and limitations under the License. *
	// ***************************************************************************************************************************
	package org.apache.juneau.parser;

	import java.io.*;

	import org.apache.juneau.internal.*;

	/**
	* Similar to a {@link java.io.PushbackReader} with a pushback buffer of 1 character.
	*
	* <p>
	* Code is optimized to work with a 1 character buffer.
	*
	* <p>
	* Additionally keeps track of current line and column number, and provides the ability to set mark points and capture
	* characters from the previous mark point.
	*
	* <p>
	* <b>Warning:</b> Not thread safe.
	*/
	public class ParserReader extends Reader implements Positionable {

	/** Wrapped reader */
	protected final Reader r;

	private char[] buff; // Internal character buffer
	private int line = 1; // Current line number
	private int column; // Current column number
	private int iCurrent = 0; // Current pointer into character buffer
	private int iMark = -1; // Mark position in buffer
	private int iEnd = 0; // The last good character position in the buffer
	private boolean endReached, holesExist;
	private final boolean unbuffered;

	/**
	* Constructor.
	*
	* @param pipe The parser input.
	* @throws IOException Thrown by underlying stream.
	*/
	public ParserReader(ParserPipe pipe) throws IOException {
	this.unbuffered = pipe.unbuffered;
	if (pipe.isString()) {
	String in = pipe.getInputAsString();
	this.r = new CharSequenceReader(in);
	this.buff = new char[in.length() < 1024 ? in.length() : 1024];
	} else {
	Reader _r = pipe.getReader();
	if (_r instanceof ParserReader)
	this.r = ((ParserReader)_r).r;
	else
	this.r = _r;
	this.buff = new char[1024];
	}
	pipe.setPositionable(this);
	}

	/**
	* Reads a single character.
	*
	* <p>
	* Note that this method does NOT process extended unicode characters (i.e. characters above 0x10000), but rather
	* returns them as two <jk>char</jk>s.
	* Use {@link #readCodePoint()} to ensure proper handling of extended unicode.
	*
	* @return The character read, or -1 if the end of the stream has been reached.
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	@Override /* Reader */
	public final int read() throws IOException {
	int c = readFromBuff();
	if (c == -1)
	return -1;
	if (c == '\n') {
	line++;
	column = 0;
	} else {
	column++;
	}
	return c;
	}

	/**
	* Same as {@link #read()} but skips over any whitespace characters.
	*
	* @return The first non-whitespace character, or -1 if the end of stream reached.
	* @throws IOException Thrown by underlying stream.
	*/
	public final int readSkipWs() throws IOException {
	while (true) {
	int c = read();
	if (c == -1 \|\| ! Character.isWhitespace(c))
	return c;
	}
	}

	/**
	* Same as {@link #read()} but detects and combines extended unicode characters (characters above 0x10000).
	*
	* @return The character read, or -1 if the end of the stream has been reached.
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	public final int readCodePoint() throws IOException {
	int c = read();

	// Characters that take up 2 chars.
	if (c >= 0xd800 && c <= 0xdbff) {
	int low = read();
	if (low >= 0xdc00 && low <= 0xdfff)
	c = 0x10000 + ((c - 0xd800) << 10) + (low - 0xdc00);
	}

	return c;
	}

	private final int readFromBuff() throws IOException {
	while (iCurrent >= iEnd) {
	if (endReached)
	return -1;

	// If there's still space at the end of this buffer, fill it.
	// Make sure there's at least 2 character spaces free for extended unicode characters.
	//if (false) {
	if (iEnd+1 < buff.length) {
	int x = read(buff, iCurrent, buff.length-iEnd);
	if (x == -1) {
	endReached = true;
	return -1;
	}
	iEnd += x;

	} else {
	// If we're currently marking, then we want to copy from the current mark point
	// to the beginning of the buffer and then fill in the remainder of buffer.
	if (iMark >= 0) {

	// If we're marking from the beginning of the array, we double the size of the
	// buffer. This isn't likely to occur often.
	if (iMark == 0) {
	char[] buff2 = new char[buff.length<<1];
	System.arraycopy(buff, 0, buff2, 0, buff.length);
	buff = buff2;

	// Otherwise, we copy what's currently marked to the beginning of the buffer.
	} else {
	int copyBuff = iMark;
	System.arraycopy(buff, copyBuff, buff, 0, buff.length - copyBuff);
	iCurrent -= copyBuff;
	iMark -= copyBuff;
	}
	int expected = buff.length - iCurrent;

	int x = read(buff, iCurrent, expected);
	if (x == -1) {
	endReached = true;
	iEnd = iCurrent;
	return -1;
	}
	iEnd = iCurrent + x;
	} else {
	// Copy the last 10 chars in the buffer to the beginning of the buffer.
	int copyBuff = Math.min(iCurrent, 10);
	System.arraycopy(buff, iCurrent-copyBuff, buff, 0, copyBuff);

	// Number of characters we expect to copy on the next read.
	int expected = buff.length - copyBuff;
	int x = read(buff, copyBuff, expected);
	iCurrent = copyBuff;
	if (x == -1) {
	endReached = true;
	iEnd = iCurrent;
	return -1;
	}
	iEnd = iCurrent + x;
	}
	}
	}
	return buff[iCurrent++];
	}

	/**
	* Start buffering the calls to read() so that the text can be gathered from the mark point on calling {@code getFromMarked()}.
	*/
	public final void mark() {
	iMark = iCurrent;
	}

	/**
	* Peeks the next character in the stream.
	*
	* <p>
	* This is equivalent to doing a {@code read()} followed by an {@code unread()}.
	*
	* @return The peeked character, or (char)-1 if the end of the stream has been reached.
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	public final int peek() throws IOException {
	int c = read();
	if (c != -1)
	unread();
	return c;
	}

	/**
	* Same as {@link #peek()} but skips over any whitespace characters.
	*
	* <p>
	* This is equivalent to doing a {@code read()} followed by an {@code unread()}.
	*
	* @return The peeked character, or (char)-1 if the end of the stream has been reached.
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	public final int peekSkipWs() throws IOException {
	while(true) {
	int c = read();
	boolean isWs = Character.isWhitespace(c);
	if (c != -1 && ! isWs)
	unread();
	if (! isWs)
	return c;
	}
	}

	/**
	* Read the specified number of characters off the stream.
	*
	* @param num The number of characters to read.
	* @return The characters packaged as a String.
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	public final String read(int num) throws IOException {
	char[] c = new char[num];
	for (int i = 0; i < num; i++) {
	int c2 = read();
	if (c2 == -1)
	return new String(c, 0, i);
	c[i] = (char)c2;
	}
	return new String(c);
	}

	/**
	* Pushes the last read character back into the stream.
	*
	* @return This object (for method chaining).
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	public ParserReader unread() throws IOException {
	if (iCurrent <= 0)
	throw new IOException("Buffer underflow.");
	iCurrent--;
	if (column == 0)
	line--;
	else
	column--;
	return this;
	}

	/**
	* No-op.
	*
	* <p>
	* Input readers are closed in the {@link ParserPipe} class.
	*
	* @throws IOException If a problem occurred trying to read from the reader.
	*/
	@Override /* Reader */
	public void close() throws IOException {
	// No-op
	}

	/**
	* Returns the contents of the reusable character buffer as a string, and resets the buffer for next usage.
	*
	* @return The contents of the reusable character buffer as a string.
	*/
	public final String getMarked() {
	return getMarked(0, 0);
	}

	/**
	* Same as {@link #getMarked()} except allows you to specify offsets into the buffer.
	*
	* <p>
	* For example, to return the marked string, but trim the first and last characters, call the following:
	* <p class='bcode w800'>
	* getFromMarked(1, -1);
	* </p>
	*
	* @param offsetStart The offset of the start position.
	* @param offsetEnd The offset of the end position.
	* @return The contents of the reusable character buffer as a string.
	*/
	public final String getMarked(int offsetStart, int offsetEnd) {
	int offset = 0;

	// Holes are \u00FF 'delete' characters that we need to get rid of now.
	if (holesExist) {
	for (int i = iMark; i < iCurrent; i++) {
	char c = buff[i];
	if (c == 127)
	offset++;
	else
	buff[i-offset] = c;
	}
	holesExist = false;
	}
	int start = iMark + offsetStart, len = iCurrent - iMark + offsetEnd - offsetStart - offset;
	String s = new String(buff, start, len);
	iMark = -1;
	return s;
	}

	/**
	* Trims off the last character in the marking buffer.
	*
	* <p>
	* Useful for removing escape characters from sequences.
	*
	* @return This object (for method chaining).
	*/
	public final ParserReader delete() {
	return delete(1);
	}

	/**
	* Trims off the specified number of last characters in the marking buffer.
	* Useful for removing escape characters from sequences.
	*
	* @param count The number of characters to delete.
	* @return This object (for method chaining).
	*/
	public final ParserReader delete(int count) {
	for (int i = 0; i < count; i++)
	buff[iCurrent-i-1] = 127;
	holesExist = true;
	return this;
	}

	/**
	* Replaces the last character in the marking buffer with the specified character.
	*
	* <p>
	* <c>offset</c> must be at least <c>1</c> for normal characters, and <c>2</c> for extended
	* unicode characters in order for the replacement to fit into the buffer.
	*
	* @param c The new character.
	* @param offset The offset.
	* @return This object (for method chaining).
	* @throws IOException Thrown by underlying stream.
	*/
	public final ParserReader replace(int c, int offset) throws IOException {
	if (c < 0x10000) {
	if (offset < 1)
	throw new IOException("Buffer underflow.");
	buff[iCurrent-offset] = (char)c;
	} else {
	if (offset < 2)
	throw new IOException("Buffer underflow.");
	c -= 0x10000;
	buff[iCurrent-offset] = (char)(0xd800 + (c >> 10));
	buff[iCurrent-offset+1] = (char)(0xdc00 + (c & 0x3ff));
	offset--;
	}
	// Fill in the gap with DEL characters.
	for (int i = 1; i < offset; i++)
	buff[iCurrent-i] = 127;
	holesExist \|= (offset > 1);
	return this;
	}

	/**
	* Replace the last read character in the buffer with the specified character.
	*
	* @param c The new character.
	* @return This object (for method chaining).
	* @throws IOException Thrown by underlying stream.
	*/
	public final ParserReader replace(char c) throws IOException {
	return replace(c, 1);
	}

	/**
	* Subclasses can override this method to provide additional filtering.
	*
	* <p>
	* Default implementation simply calls the same method on the underlying reader.
	*/
	@Override /* Reader */
	public int read(char[] cbuf, int off, int len) throws IOException {
	return unbuffered ? r.read(cbuf, off, 1) : r.read(cbuf, off, len);
	}

	@Override /* Positionable */
	public Position getPosition() {
	return new Position(line, column);
	}
	}