| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.codehaus.groovy.antlr; |
| |
| import java.io.IOException; |
| import java.io.Reader; |
| |
| import antlr.CharScanner; |
| import antlr.Token; |
| import antlr.TokenStreamException; |
| |
| /** |
| * Translates GLS-defined unicode escapes into characters. Throws an exception |
| * in the event of an invalid unicode escape being detected. |
| * <p> |
| * No attempt has been made to optimize this class for speed or space. |
| */ |
| public class UnicodeEscapingReader extends Reader { |
| |
| private final Reader reader; |
| private CharScanner lexer; |
| private boolean hasNextChar = false; |
| private int nextChar; |
| private final SourceBuffer sourceBuffer; |
| private int previousLine; |
| private int numUnicodeEscapesFound = 0; |
| private int numUnicodeEscapesFoundOnCurrentLine = 0; |
| |
| private static class DummyLexer extends CharScanner { |
| private final Token t = new Token(); |
| public Token nextToken() throws TokenStreamException { |
| return t; |
| } |
| @Override |
| public int getColumn() { |
| return 0; |
| } |
| @Override |
| public int getLine() { |
| return 0; |
| } |
| } |
| |
| /** |
| * Constructor. |
| * @param reader The reader that this reader will filter over. |
| */ |
| public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) { |
| this.reader = reader; |
| this.sourceBuffer = sourceBuffer; |
| this.lexer = new DummyLexer(); |
| } |
| |
| /** |
| * Sets the lexer that is using this reader. Must be called before the |
| * lexer is used. |
| */ |
| public void setLexer(CharScanner lexer) { |
| this.lexer = lexer; |
| } |
| |
| /** |
| * Reads characters from the underlying reader. |
| * @see java.io.Reader#read(char[],int,int) |
| */ |
| public int read(char cbuf[], int off, int len) throws IOException { |
| int c = 0; |
| int count = 0; |
| while (count < len && (c = read())!= -1) { |
| cbuf[off + count] = (char) c; |
| count++; |
| } |
| return (count == 0 && c == -1) ? -1 : count; |
| } |
| |
| /** |
| * Gets the next character from the underlying reader, |
| * translating escapes as required. |
| * @see java.io.Reader#close() |
| */ |
| public int read() throws IOException { |
| if (hasNextChar) { |
| hasNextChar = false; |
| write(nextChar); |
| return nextChar; |
| } |
| |
| if (previousLine != lexer.getLine()) { |
| // new line, so reset unicode escapes |
| numUnicodeEscapesFoundOnCurrentLine = 0; |
| previousLine = lexer.getLine(); |
| } |
| |
| int c = reader.read(); |
| if (c != '\\') { |
| write(c); |
| return c; |
| } |
| |
| // Have one backslash, continue if next char is 'u' |
| c = reader.read(); |
| if (c != 'u') { |
| hasNextChar = true; |
| nextChar = c; |
| write('\\'); |
| return '\\'; |
| } |
| |
| // Swallow multiple 'u's |
| int numberOfUChars = 0; |
| do { |
| numberOfUChars++; |
| c = reader.read(); |
| } while (c == 'u'); |
| |
| // Get first hex digit |
| checkHexDigit(c); |
| StringBuilder charNum = new StringBuilder(); |
| charNum.append((char) c); |
| |
| // Must now be three more hex digits |
| for (int i = 0; i < 3; i++) { |
| c = reader.read(); |
| checkHexDigit(c); |
| charNum.append((char) c); |
| } |
| int rv = Integer.parseInt(charNum.toString(), 16); |
| write(rv); |
| |
| numUnicodeEscapesFound += 4 + numberOfUChars; |
| numUnicodeEscapesFoundOnCurrentLine += 4 + numberOfUChars; |
| |
| return rv; |
| } |
| private void write(int c) { |
| if (sourceBuffer != null) {sourceBuffer.write(c);} |
| } |
| /** |
| * Checks that the given character is indeed a hex digit. |
| */ |
| private void checkHexDigit(int c) throws IOException { |
| if (c >= '0' && c <= '9') { |
| return; |
| } |
| if (c >= 'a' && c <= 'f') { |
| return; |
| } |
| if (c >= 'A' && c <= 'F') { |
| return; |
| } |
| // Causes the invalid escape to be skipped |
| hasNextChar = true; |
| nextChar = c; |
| throw new IOException("Did not find four digit hex character code." |
| + " line: " + lexer.getLine() + " col:" + lexer.getColumn()); |
| } |
| |
| public int getUnescapedUnicodeColumnCount() { |
| return numUnicodeEscapesFoundOnCurrentLine; |
| } |
| |
| public int getUnescapedUnicodeOffsetCount() { |
| return numUnicodeEscapesFound; |
| } |
| |
| /** |
| * Closes this reader by calling close on the underlying reader. |
| * |
| * @see java.io.Reader#close() |
| */ |
| public void close() throws IOException { |
| reader.close(); |
| } |
| } |