| /* |
| * Copyright 2006- Yonik Seeley |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.noggit; |
| |
| import java.io.IOException; |
| import java.io.Reader; |
| |
| |
| public class JSONParser { |
| |
| /** |
| * Event indicating a JSON string value, including member names of objects |
| */ |
| public static final int STRING = 1; |
| /** |
| * Event indicating a JSON number value which fits into a signed 64 bit integer |
| */ |
| public static final int LONG = 2; |
| /** |
| * Event indicating a JSON number value which has a fractional part or an exponent |
| * and with string length <= 23 chars not including sign. This covers |
| * all representations of normal values for Double.toString(). |
| */ |
| public static final int NUMBER = 3; |
| /** |
| * Event indicating a JSON number value that was not produced by toString of any |
| * Java primitive numerics such as Double or Long. It is either |
| * an integer outside the range of a 64 bit signed integer, or a floating |
| * point value with a string representation of more than 23 chars. |
| */ |
| public static final int BIGNUMBER = 4; |
| /** |
| * Event indicating a JSON boolean |
| */ |
| public static final int BOOLEAN = 5; |
| /** |
| * Event indicating a JSON null |
| */ |
| public static final int NULL = 6; |
| /** |
| * Event indicating the start of a JSON object |
| */ |
| public static final int OBJECT_START = 7; |
| /** |
| * Event indicating the end of a JSON object |
| */ |
| public static final int OBJECT_END = 8; |
| /** |
| * Event indicating the start of a JSON array |
| */ |
| public static final int ARRAY_START = 9; |
| /** |
| * Event indicating the end of a JSON array |
| */ |
| public static final int ARRAY_END = 10; |
| /** |
| * Event indicating the end of input has been reached |
| */ |
| public static final int EOF = 11; |
| |
| |
| /** |
| * Flags to control parsing behavior |
| */ |
| public static final int ALLOW_COMMENTS = 1 << 0; |
| public static final int ALLOW_SINGLE_QUOTES = 1 << 1; |
| public static final int ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER = 1 << 2; |
| public static final int ALLOW_UNQUOTED_KEYS = 1 << 3; |
| public static final int ALLOW_UNQUOTED_STRING_VALUES = 1 << 4; |
| /** |
| * ALLOW_EXTRA_COMMAS causes any number of extra commas in arrays and objects to be ignored |
| * Note that a trailing comma in [] would be [,] (hence calling the feature "trailing" commas |
| * is either limiting or misleading. Since trailing commas is fundamentally incompatible with any future |
| * "fill-in-missing-values-with-null", it was decided to extend this feature to handle any |
| * number of extra commas. |
| */ |
| public static final int ALLOW_EXTRA_COMMAS = 1 << 5; |
| public static final int ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT = 1 << 6; |
| public static final int OPTIONAL_OUTER_BRACES = 1 << 7; |
| |
| public static final int FLAGS_STRICT = 0; |
| public static final int FLAGS_DEFAULT = ALLOW_COMMENTS | ALLOW_SINGLE_QUOTES | ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER | ALLOW_UNQUOTED_KEYS | ALLOW_UNQUOTED_STRING_VALUES | ALLOW_EXTRA_COMMAS; |
| |
| public static class ParseException extends RuntimeException { |
| public ParseException(String msg) { |
| super(msg); |
| } |
| } |
| |
| public static String getEventString(int e) { |
| switch (e) { |
| case STRING: |
| return "STRING"; |
| case LONG: |
| return "LONG"; |
| case NUMBER: |
| return "NUMBER"; |
| case BIGNUMBER: |
| return "BIGNUMBER"; |
| case BOOLEAN: |
| return "BOOLEAN"; |
| case NULL: |
| return "NULL"; |
| case OBJECT_START: |
| return "OBJECT_START"; |
| case OBJECT_END: |
| return "OBJECT_END"; |
| case ARRAY_START: |
| return "ARRAY_START"; |
| case ARRAY_END: |
| return "ARRAY_END"; |
| case EOF: |
| return "EOF"; |
| } |
| return "Unknown: " + e; |
| } |
| |
| private static final CharArr devNull = new CharArr.NullCharArr(); |
| |
| protected int flags = FLAGS_DEFAULT; |
| |
| protected final char[] buf; // input buffer with JSON text in it |
| protected int start; // current position in the buffer |
| protected int end; // end position in the buffer (one past last valid index) |
| protected final Reader in; // optional reader to obtain data from |
| protected boolean eof = false; // true if the end of the stream was reached. |
| protected long gpos; // global position = gpos + start |
| |
| protected int event; // last event read |
| |
| protected int stringTerm; // The terminator for the last string we read: single quote, double quote, or 0 for unterminated. |
| |
| protected boolean missingOpeningBrace = false; |
| |
| public JSONParser(Reader in) { |
| this(in, new char[8192]); |
| // 8192 matches the default buffer size of a BufferedReader so double |
| // buffering of the data is avoided. |
| } |
| |
| public JSONParser(Reader in, char[] buffer) { |
| this.in = in; |
| this.buf = buffer; |
| } |
| |
| // idea - if someone passes us a CharArrayReader, we could |
| // directly use that buffer as it's protected. |
| |
| public JSONParser(char[] data, int start, int end) { |
| this.in = null; |
| this.buf = data; |
| this.start = start; |
| this.end = end; |
| } |
| |
| public JSONParser(String data) { |
| this(data, 0, data.length()); |
| } |
| |
| public JSONParser(String data, int start, int end) { |
| this.in = null; |
| this.start = start; |
| this.end = end; |
| this.buf = new char[end - start]; |
| data.getChars(start, end, buf, 0); |
| } |
| |
| public int getFlags() { |
| return flags; |
| } |
| |
| public int setFlags(int flags) { |
| int oldFlags = flags; |
| this.flags = flags; |
| return oldFlags; |
| } |
| |
| // temporary output buffer |
| private final CharArr out = new CharArr(64); |
| |
| // We need to keep some state in order to (at a minimum) know if |
| // we should skip ',' or ':'. |
| private byte[] stack = new byte[16]; |
| private int ptr = 0; // pointer into the stack of parser states |
| private byte state = 0; // current parser state |
| |
| // parser states stored in the stack |
| private static final byte DID_OBJSTART = 1; // '{' just read |
| private static final byte DID_ARRSTART = 2; // '[' just read |
| private static final byte DID_ARRELEM = 3; // array element just read |
| private static final byte DID_MEMNAME = 4; // object member name (map key) just read |
| private static final byte DID_MEMVAL = 5; // object member value (map val) just read |
| |
| // info about value that was just read (or is in the middle of being read) |
| private int valstate; |
| |
| // push current parser state (use at start of new container) |
| private final void push() { |
| if (ptr >= stack.length) { |
| // doubling here is probably overkill, but anything that needs to double more than |
| // once (32 levels deep) is very atypical anyway. |
| byte[] newstack = new byte[stack.length << 1]; |
| System.arraycopy(stack, 0, newstack, 0, stack.length); |
| stack = newstack; |
| } |
| stack[ptr++] = state; |
| } |
| |
| // pop parser state (use at end of container) |
| private final void pop() { |
| if (--ptr < 0) { |
| throw err("Unbalanced container"); |
| } else { |
| state = stack[ptr]; |
| } |
| } |
| |
| protected void fill() throws IOException { |
| if (in != null) { |
| gpos += end; |
| start = 0; |
| int num = in.read(buf, 0, buf.length); |
| end = num >= 0 ? num : 0; |
| } |
| if (start >= end) eof = true; |
| } |
| |
| private void getMore() throws IOException { |
| fill(); |
| if (start >= end) { |
| throw err(null); |
| } |
| } |
| |
| protected int getChar() throws IOException { |
| if (start >= end) { |
| fill(); |
| if (start >= end) return -1; |
| } |
| return buf[start++]; |
| } |
| |
| /** |
| * Returns true if the given character is considered to be whitespace. |
| * One difference between Java's Character.isWhitespace() is that this method |
| * considers a hard space (non-breaking space, or nbsp) to be whitespace. |
| */ |
| protected static final boolean isWhitespace(int ch) { |
| return (Character.isWhitespace(ch) || ch == 0x00a0); |
| } |
| |
| private static final long WS_MASK = (1L << ' ') | (1L << '\t') | (1L << '\r') | (1L << '\n') | (1L << '#') | (1L << '/') | (0x01); // set 1 bit so 0xA0 will be flagged as whitespace |
| |
| protected int getCharNWS() throws IOException { |
| for (; ; ) { |
| int ch = getChar(); |
| // getCharNWS is normally called in the context of expecting certain JSON special characters |
| // such as ":}"]," |
| // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path |
| // even w/o checking the range first. We'll only get some false-positives while using bare strings (chars "IJMc") |
| if (((WS_MASK >> ch) & 0x01) == 0) { |
| return ch; |
| } else if (ch <= ' ') { // this will only be true if one of the whitespace bits was set |
| continue; |
| } else if (ch == '/') { |
| getSlashComment(); |
| } else if (ch == '#') { |
| getNewlineComment(); |
| } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0 |
| return ch; |
| } |
| |
| /*** |
| // getCharNWS is normally called in the context of expecting certain JSON special characters |
| // such as ":}"]," |
| // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path |
| if (ch < 64) { |
| if (((WS_MASK >> ch) & 0x01) == 0) return ch; |
| if (ch <= ' ') continue; // whitespace below a normal space |
| if (ch=='/') { |
| getSlashComment(); |
| } else if (ch=='#') { |
| getNewlineComment(); |
| } |
| } else if (!isWhitespace(ch)) { // check for higher whitespace like 0xA0 |
| return ch; |
| } |
| ***/ |
| |
| /** older code |
| switch (ch) { |
| case ' ' : |
| case '\t' : |
| case '\r' : |
| case '\n' : |
| continue outer; |
| case '#' : |
| getNewlineComment(); |
| continue outer; |
| case '/' : |
| getSlashComment(); |
| continue outer; |
| default: |
| return ch; |
| } |
| **/ |
| } |
| } |
| |
| protected int getCharNWS(int ch) throws IOException { |
| for (; ; ) { |
| // getCharNWS is normally called in the context of expecting certain JSON special characters |
| // such as ":}"]," |
| // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path |
| // even w/o checking the range first. We'll only get some false-positives while using bare strings (chars "IJMc") |
| if (((WS_MASK >> ch) & 0x01) == 0) { |
| return ch; |
| } else if (ch <= ' ') { // this will only be true if one of the whitespace bits was set |
| // whitespace... get new char at bottom of loop |
| } else if (ch == '/') { |
| getSlashComment(); |
| } else if (ch == '#') { |
| getNewlineComment(); |
| } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0 |
| return ch; |
| } |
| ch = getChar(); |
| } |
| } |
| |
| protected int getCharExpected(int expected) throws IOException { |
| for (; ; ) { |
| int ch = getChar(); |
| if (ch == expected) return expected; |
| if (ch == ' ') continue; |
| return getCharNWS(ch); |
| } |
| } |
| |
| protected void getNewlineComment() throws IOException { |
| // read a # or a //, so go until newline |
| for (; ; ) { |
| int ch = getChar(); |
| // don't worry about DOS /r/n... we'll stop on the \r and let the rest of the whitespace |
| // eater consume the \n |
| if (ch == '\n' || ch == '\r' || ch == -1) { |
| return; |
| } |
| } |
| } |
| |
| protected void getSlashComment() throws IOException { |
| int ch = getChar(); |
| if (ch == '/') { |
| getNewlineComment(); |
| return; |
| } |
| |
| if (ch != '*') { |
| throw err("Invalid comment: expected //, /*, or #"); |
| } |
| |
| ch = getChar(); |
| for (; ; ) { |
| if (ch == '*') { |
| ch = getChar(); |
| if (ch == '/') { |
| return; |
| } else if (ch == '*') { |
| // handle cases of *******/ |
| continue; |
| } |
| } |
| if (ch == -1) { |
| return; |
| } |
| ch = getChar(); |
| } |
| } |
| |
| |
| protected boolean matchBareWord(char[] arr) throws IOException { |
| for (int i = 1; i < arr.length; i++) { |
| int ch = getChar(); |
| if (ch != arr[i]) { |
| if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) { |
| throw err("Expected " + new String(arr)); |
| } else { |
| stringTerm = 0; |
| out.reset(); |
| out.write(arr, 0, i); |
| if (!eof) { |
| start--; |
| } |
| return false; |
| } |
| } |
| } |
| |
| // if we don't allow bare strings, we don't need to check that the string actually terminates... just |
| // let things fail as the parser tries to continue |
| if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) { |
| return true; |
| } |
| |
| // check that the string actually terminates... for example trueX should return false |
| int ch = getChar(); |
| if (eof) { |
| return true; |
| } else if (!isUnquotedStringChar(ch)) { |
| start--; |
| return true; |
| } |
| |
| // we encountered something like "trueX" when matching "true" |
| stringTerm = 0; |
| out.reset(); |
| out.unsafeWrite(arr, 0, arr.length); |
| out.unsafeWrite(ch); |
| return false; |
| } |
| |
| protected ParseException err(String msg) { |
| // We can't tell if EOF was hit by comparing start<=end |
| // because the illegal char could have been the last in the buffer |
| // or in the stream. To deal with this, the "eof" var was introduced |
| if (!eof && start > 0) start--; // backup one char |
| String chs = "char=" + ((start >= end) ? "(EOF)" : "" + buf[start]); |
| String pos = "position=" + (gpos + start); |
| String tot = chs + ',' + pos + getContext(); |
| if (msg == null) { |
| if (start >= end) msg = "Unexpected EOF"; |
| else msg = "JSON Parse Error"; |
| } |
| return new ParseException(msg + ": " + tot); |
| } |
| |
| private String getContext() { |
| String context = ""; |
| if (start >= 0) { |
| context += " AFTER='" + errEscape(Math.max(start - 60, 0), start + 1) + "'"; |
| } |
| if (start < end) { |
| context += " BEFORE='" + errEscape(start + 1, start + 40) + "'"; |
| } |
| return context; |
| } |
| |
| private String errEscape(int a, int b) { |
| b = Math.min(b, end); |
| if (a >= b) return ""; |
| return new String(buf, a, b - a).replaceAll("\\s+", " "); |
| } |
| |
| |
| private boolean bool; // boolean value read |
| private long lval; // long value read |
| private int nstate; // current state while reading a number |
| private static final int HAS_FRACTION = 0x01; // nstate flag, '.' already read |
| private static final int HAS_EXPONENT = 0x02; // nstate flag, '[eE][+-]?[0-9]' already read |
| |
| /** |
| * Returns the long read... only significant if valstate==LONG after |
| * this call. firstChar should be the first numeric digit read. |
| */ |
| private long readNumber(int firstChar, boolean isNeg) throws IOException { |
| out.unsafeWrite(firstChar); // unsafe OK since we know output is big enough |
| // We build up the number in the negative plane since it's larger (by one) than |
| // the positive plane. |
| long v = '0' - firstChar; |
| // can't overflow a long in 18 decimal digits (i.e. 17 additional after the first). |
| // we also need 22 additional to handle double so we'll handle in 2 separate loops. |
| int i; |
| for (i = 0; i < 17; i++) { |
| int ch = getChar(); |
| // TODO: is this switch faster as an if-then-else? |
| switch (ch) { |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| v = v * 10 - (ch - '0'); |
| out.unsafeWrite(ch); |
| continue; |
| case '.': |
| out.unsafeWrite('.'); |
| valstate = readFrac(out, 22 - i); |
| return 0; |
| case 'e': |
| case 'E': |
| out.unsafeWrite(ch); |
| nstate = 0; |
| valstate = readExp(out, 22 - i); |
| return 0; |
| default: |
| // return the number, relying on nextEvent() to return an error |
| // for invalid chars following the number. |
| if (ch != -1) --start; // push back last char if not EOF |
| |
| valstate = LONG; |
| return isNeg ? v : -v; |
| } |
| } |
| |
| // after this, we could overflow a long and need to do extra checking |
| boolean overflow = false; |
| long maxval = isNeg ? Long.MIN_VALUE : -Long.MAX_VALUE; |
| |
| for (; i < 22; i++) { |
| int ch = getChar(); |
| switch (ch) { |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| if (v < (0x8000000000000000L / 10)) overflow = true; // can't multiply by 10 w/o overflowing |
| v *= 10; |
| int digit = ch - '0'; |
| if (v < maxval + digit) overflow = true; // can't add digit w/o overflowing |
| v -= digit; |
| out.unsafeWrite(ch); |
| continue; |
| case '.': |
| out.unsafeWrite('.'); |
| valstate = readFrac(out, 22 - i); |
| return 0; |
| case 'e': |
| case 'E': |
| out.unsafeWrite(ch); |
| nstate = 0; |
| valstate = readExp(out, 22 - i); |
| return 0; |
| default: |
| // return the number, relying on nextEvent() to return an error |
| // for invalid chars following the number. |
| if (ch != -1) --start; // push back last char if not EOF |
| |
| valstate = overflow ? BIGNUMBER : LONG; |
| return isNeg ? v : -v; |
| } |
| } |
| |
| |
| nstate = 0; |
| valstate = BIGNUMBER; |
| return 0; |
| } |
| |
| |
| // read digits right of decimal point |
| private int readFrac(CharArr arr, int lim) throws IOException { |
| nstate = HAS_FRACTION; // deliberate set instead of '|' |
| while (--lim >= 0) { |
| int ch = getChar(); |
| if (ch >= '0' && ch <= '9') { |
| arr.write(ch); |
| } else if (ch == 'e' || ch == 'E') { |
| arr.write(ch); |
| return readExp(arr, lim); |
| } else { |
| if (ch != -1) start--; // back up |
| return NUMBER; |
| } |
| } |
| return BIGNUMBER; |
| } |
| |
| |
| // call after 'e' or 'E' has been seen to read the rest of the exponent |
| private int readExp(CharArr arr, int lim) throws IOException { |
| nstate |= HAS_EXPONENT; |
| int ch = getChar(); |
| lim--; |
| |
| if (ch == '+' || ch == '-') { |
| arr.write(ch); |
| ch = getChar(); |
| lim--; |
| } |
| |
| // make sure at least one digit is read. |
| if (ch < '0' || ch > '9') { |
| throw err("missing exponent number"); |
| } |
| arr.write(ch); |
| |
| return readExpDigits(arr, lim); |
| } |
| |
| // continuation of readExpStart |
| private int readExpDigits(CharArr arr, int lim) throws IOException { |
| while (--lim >= 0) { |
| int ch = getChar(); |
| if (ch >= '0' && ch <= '9') { |
| arr.write(ch); |
| } else { |
| if (ch != -1) start--; // back up |
| return NUMBER; |
| } |
| } |
| return BIGNUMBER; |
| } |
| |
| private void continueNumber(CharArr arr) throws IOException { |
| if (arr != out) arr.write(out); |
| |
| if ((nstate & HAS_EXPONENT) != 0) { |
| readExpDigits(arr, Integer.MAX_VALUE); |
| return; |
| } |
| if (nstate != 0) { |
| readFrac(arr, Integer.MAX_VALUE); |
| return; |
| } |
| |
| for (; ; ) { |
| int ch = getChar(); |
| if (ch >= '0' && ch <= '9') { |
| arr.write(ch); |
| } else if (ch == '.') { |
| arr.write(ch); |
| readFrac(arr, Integer.MAX_VALUE); |
| return; |
| } else if (ch == 'e' || ch == 'E') { |
| arr.write(ch); |
| readExp(arr, Integer.MAX_VALUE); |
| return; |
| } else { |
| if (ch != -1) start--; |
| return; |
| } |
| } |
| } |
| |
| |
| private int hexval(int hexdig) { |
| if (hexdig >= '0' && hexdig <= '9') { |
| return hexdig - '0'; |
| } else if (hexdig >= 'A' && hexdig <= 'F') { |
| return hexdig + (10 - 'A'); |
| } else if (hexdig >= 'a' && hexdig <= 'f') { |
| return hexdig + (10 - 'a'); |
| } |
| throw err("invalid hex digit"); |
| } |
| |
| // backslash has already been read when this is called |
| private char readEscapedChar() throws IOException { |
| int ch = getChar(); |
| switch (ch) { |
| case '"': |
| return '"'; |
| case '\'': |
| return '\''; |
| case '\\': |
| return '\\'; |
| case '/': |
| return '/'; |
| case 'n': |
| return '\n'; |
| case 'r': |
| return '\r'; |
| case 't': |
| return '\t'; |
| case 'f': |
| return '\f'; |
| case 'b': |
| return '\b'; |
| case 'u': |
| return (char) ( |
| (hexval(getChar()) << 12) |
| | (hexval(getChar()) << 8) |
| | (hexval(getChar()) << 4) |
| | (hexval(getChar()))); |
| } |
| if ((flags & ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER) != 0 && ch != EOF) { |
| return (char) ch; |
| } |
| throw err("Invalid character escape"); |
| } |
| |
| // a dummy buffer we can use to point at other buffers |
| private final CharArr tmp = new CharArr(null, 0, 0); |
| |
| private CharArr readStringChars() throws IOException { |
| if (stringTerm == 0) { |
| // "out" will already contain the first part of the bare string, so don't reset it |
| readStringBare(out); |
| return out; |
| } |
| |
| char terminator = (char) stringTerm; |
| int i; |
| for (i = start; i < end; i++) { |
| char c = buf[i]; |
| if (c == terminator) { |
| tmp.set(buf, start, i); // directly use input buffer |
| start = i + 1; // advance past last '"' |
| return tmp; |
| } else if (c == '\\') { |
| break; |
| } |
| } |
| out.reset(); |
| readStringChars2(out, i); |
| return out; |
| } |
| |
| |
| // middle is the pointer to the middle of a buffer to start scanning for a non-string |
| // character ('"' or "/"). start<=middle<end |
| // this should be faster for strings with fewer escapes, but probably slower for many escapes. |
| private void readStringChars2(CharArr arr, int middle) throws IOException { |
| if (stringTerm == 0) { |
| readStringBare(arr); |
| return; |
| } |
| |
| char terminator = (char) stringTerm; |
| |
| for (; ; ) { |
| if (middle >= end) { |
| arr.write(buf, start, middle - start); |
| start = middle; |
| getMore(); |
| middle = start; |
| } |
| int ch = buf[middle++]; |
| if (ch == terminator) { |
| int len = middle - start - 1; |
| if (len > 0) arr.write(buf, start, len); |
| start = middle; |
| return; |
| } else if (ch == '\\') { |
| int len = middle - start - 1; |
| if (len > 0) arr.write(buf, start, len); |
| start = middle; |
| arr.write(readEscapedChar()); |
| middle = start; |
| } |
| } |
| } |
| |
| private void readStringBare(CharArr arr) throws IOException { |
| if (arr != out) { |
| arr.append(out); |
| } |
| |
| for (; ; ) { |
| int ch = getChar(); |
| if (!isUnquotedStringChar(ch)) { |
| if (ch == -1) break; |
| if (ch == '\\') { |
| arr.write(readEscapedChar()); |
| continue; |
| } |
| start--; |
| break; |
| } |
| |
| if (ch == '\\') { |
| arr.write(readEscapedChar()); |
| continue; |
| } |
| |
| arr.write(ch); |
| } |
| } |
| |
| |
| // isName==true if this is a field name (as opposed to a value) |
| protected void handleNonDoubleQuoteString(int ch, boolean isName) throws IOException { |
| if (ch == '\'') { |
| stringTerm = ch; |
| if ((flags & ALLOW_SINGLE_QUOTES) == 0) { |
| throw err("Single quoted strings not allowed"); |
| } |
| } else { |
| if (isName && (flags & ALLOW_UNQUOTED_KEYS) == 0 |
| || !isName && (flags & ALLOW_UNQUOTED_STRING_VALUES) == 0 |
| || eof) { |
| if (isName) { |
| throw err("Expected quoted string"); |
| } else { |
| throw err(null); |
| } |
| } |
| |
| if (!isUnquotedStringStart(ch)) { |
| throw err(null); |
| } |
| |
| stringTerm = 0; // signal for unquoted string |
| out.reset(); |
| out.unsafeWrite(ch); |
| } |
| } |
| |
| private static boolean isUnquotedStringStart(int ch) { |
| return Character.isJavaIdentifierStart(ch); |
| } |
| |
| // What characters are allowed to continue an unquoted string |
| // once we know we are in one. |
| private static boolean isUnquotedStringChar(int ch) { |
| return Character.isJavaIdentifierPart(ch) |
| || ch == '.' |
| || ch == '-' |
| || ch == '/'; |
| |
| // would checking for a-z first speed up the common case? |
| |
| // possibly much more liberal unquoted string handling... |
| /*** |
| switch (ch) { |
| case -1: |
| case ' ': |
| case '\t': |
| case '\r': |
| case '\n': |
| case '}': |
| case ']': |
| case ',': |
| case ':': |
| case '=': // reserved for future use |
| case '\\': // check for backslash should come after this function call |
| return false; |
| } |
| return true; |
| ***/ |
| } |
| |
| |
| /*** alternate implementation |
| // middle is the pointer to the middle of a buffer to start scanning for a non-string |
| // character ('"' or "/"). start<=middle<end |
| private void readStringChars2a(CharArr arr, int middle) throws IOException { |
| int ch=0; |
| for(;;) { |
| // find the next non-string char |
| for (; middle<end; middle++) { |
| ch = buf[middle]; |
| if (ch=='"' || ch=='\\') break; |
| } |
| |
| arr.write(buf,start,middle-start); |
| if (middle>=end) { |
| getMore(); |
| middle=start; |
| } else { |
| start = middle+1; // set buffer pointer to correct spot |
| if (ch=='"') { |
| valstate=0; |
| return; |
| } else if (ch=='\\') { |
| arr.write(readEscapedChar()); |
| if (start>=end) getMore(); |
| middle=start; |
| } |
| } |
| } |
| } |
| ***/ |
| |
| |
| // return the next event when parser is in a neutral state (no |
| // map separators or array element separators to read |
| private int next(int ch) throws IOException { |
| // TODO: try my own form of indirect jump... look up char class and index directly into handling implementation? |
| for (; ; ) { |
| switch (ch) { |
| case ' ': // this is not the exclusive list of whitespace chars... the rest are handled in default: |
| case '\t': |
| case '\r': |
| case '\n': |
| ch = getCharNWS(); // calling getCharNWS here seems faster than letting the switch handle it |
| break; |
| case '"': |
| stringTerm = '"'; |
| valstate = STRING; |
| return STRING; |
| case '\'': |
| if ((flags & ALLOW_SINGLE_QUOTES) == 0) { |
| throw err("Single quoted strings not allowed"); |
| } |
| stringTerm = '\''; |
| valstate = STRING; |
| return STRING; |
| case '{': |
| push(); |
| state = DID_OBJSTART; |
| return OBJECT_START; |
| case '[': |
| push(); |
| state = DID_ARRSTART; |
| return ARRAY_START; |
| case '0': |
| out.reset(); |
| //special case '0'? If next char isn't '.' val=0 |
| ch = getChar(); |
| if (ch == '.') { |
| start--; |
| ch = '0'; |
| readNumber('0', false); |
| return valstate; |
| } else if (ch > '9' || ch < '0') { |
| out.unsafeWrite('0'); |
| if (ch != -1) start--; |
| lval = 0; |
| valstate = LONG; |
| return LONG; |
| } else { |
| throw err("Leading zeros not allowed"); |
| } |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| out.reset(); |
| lval = readNumber(ch, false); |
| return valstate; |
| case '-': |
| out.reset(); |
| out.unsafeWrite('-'); |
| ch = getChar(); |
| if (ch < '0' || ch > '9') throw err("expected digit after '-'"); |
| lval = readNumber(ch, true); |
| return valstate; |
| case 't': |
| // TODO: test performance of this non-branching inline version. |
| // if ((('r'-getChar())|('u'-getChar())|('e'-getChar())) != 0) throw err(""); |
| if (matchBareWord(JSONUtil.TRUE_CHARS)) { |
| bool = true; |
| valstate = BOOLEAN; |
| return valstate; |
| } else { |
| valstate = STRING; |
| return STRING; |
| } |
| case 'f': |
| if (matchBareWord(JSONUtil.FALSE_CHARS)) { |
| bool = false; |
| valstate = BOOLEAN; |
| return valstate; |
| } else { |
| valstate = STRING; |
| return STRING; |
| } |
| case 'n': |
| if (matchBareWord(JSONUtil.NULL_CHARS)) { |
| valstate = NULL; |
| return valstate; |
| } else { |
| valstate = STRING; |
| return STRING; |
| } |
| case '/': |
| getSlashComment(); |
| ch = getChar(); |
| break; |
| case '#': |
| getNewlineComment(); |
| ch = getChar(); |
| break; |
| case ']': // This only happens with a trailing comma (or an error) |
| if (state != DID_ARRELEM || (flags & ALLOW_EXTRA_COMMAS) == 0) { |
| throw err("Unexpected array closer ]"); |
| } |
| pop(); |
| return event = ARRAY_END; |
| case '}': // This only happens with a trailing comma (or an error) |
| if (state != DID_MEMVAL || (flags & ALLOW_EXTRA_COMMAS) == 0) { |
| throw err("Unexpected object closer }"); |
| } |
| pop(); |
| return event = ARRAY_END; |
| case ',': // This only happens with input like [1,] |
| if ((state != DID_ARRELEM && state != DID_MEMVAL) || (flags & ALLOW_EXTRA_COMMAS) == 0) { |
| throw err("Unexpected comma"); |
| } |
| ch = getChar(); |
| break; |
| case -1: |
| if (getLevel() > 0) throw err("Premature EOF"); |
| return EOF; |
| default: |
| // Handle unusual unicode whitespace like no-break space (0xA0) |
| if (isWhitespace(ch)) { |
| ch = getChar(); // getCharNWS() would also work |
| break; |
| } |
| handleNonDoubleQuoteString(ch, false); |
| valstate = STRING; |
| return STRING; |
| // throw err(null); |
| } |
| |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return "start=" + start + ",end=" + end + ",state=" + state + "valstate=" + valstate; |
| } |
| |
| |
| /** |
| * Returns the next event encountered in the JSON stream, one of |
| * <ul> |
| * <li>{@link #STRING}</li> |
| * <li>{@link #LONG}</li> |
| * <li>{@link #NUMBER}</li> |
| * <li>{@link #BIGNUMBER}</li> |
| * <li>{@link #BOOLEAN}</li> |
| * <li>{@link #NULL}</li> |
| * <li>{@link #OBJECT_START}</li> |
| * <li>{@link #OBJECT_END}</li> |
| * <li>{@link #OBJECT_END}</li> |
| * <li>{@link #ARRAY_START}</li> |
| * <li>{@link #ARRAY_END}</li> |
| * <li>{@link #EOF}</li> |
| * </ul> |
| */ |
| public int nextEvent() throws IOException { |
| if (valstate != 0) { |
| if (valstate == STRING) { |
| readStringChars2(devNull, start); |
| } else if (valstate == BIGNUMBER) { |
| continueNumber(devNull); |
| } |
| valstate = 0; |
| } |
| |
| int ch; |
| outer: |
| for (; ; ) { |
| switch (state) { |
| case 0: |
| event = next(getChar()); |
| if (event == STRING && (flags & OPTIONAL_OUTER_BRACES) != 0) { |
| if (start > 0) start--; |
| missingOpeningBrace = true; |
| stringTerm = 0; |
| valstate = 0; |
| event = next('{'); |
| } |
| return event; |
| case DID_OBJSTART: |
| ch = getCharExpected('"'); |
| if (ch == '}') { |
| pop(); |
| return event = OBJECT_END; |
| } |
| if (ch == '"') { |
| stringTerm = ch; |
| } else if (ch == ',' && (flags & ALLOW_EXTRA_COMMAS) != 0) { |
| continue outer; |
| } else { |
| handleNonDoubleQuoteString(ch, true); |
| } |
| state = DID_MEMNAME; |
| valstate = STRING; |
| return event = STRING; |
| case DID_MEMNAME: |
| ch = getCharExpected(':'); |
| if (ch != ':') { |
| if ((ch == '{' || ch == '[') && (flags & ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT) != 0) { |
| start--; |
| } else { |
| throw err("Expected key,value separator ':'"); |
| } |
| } |
| state = DID_MEMVAL; // set state first because it might be pushed... |
| return event = next(getChar()); |
| case DID_MEMVAL: |
| ch = getCharExpected(','); |
| if (ch == '}') { |
| pop(); |
| return event = OBJECT_END; |
| } else if (ch != ',') { |
| if ((flags & ALLOW_EXTRA_COMMAS) != 0 && (ch == '\'' || ch == '"' || Character.isLetter(ch))) { |
| start--; |
| } else if (missingOpeningBrace && ch == -1 && (flags & OPTIONAL_OUTER_BRACES) != 0) { |
| missingOpeningBrace = false; |
| pop(); |
| return event = OBJECT_END; |
| } else throw err("Expected ',' or '}'"); |
| } |
| ch = getCharExpected('"'); |
| if (ch == '"') { |
| stringTerm = ch; |
| } else if ((ch == ',' || ch == '}') && (flags & ALLOW_EXTRA_COMMAS) != 0) { |
| if (ch == ',') continue outer; |
| pop(); |
| return event = OBJECT_END; |
| } else { |
| handleNonDoubleQuoteString(ch, true); |
| } |
| state = DID_MEMNAME; |
| valstate = STRING; |
| return event = STRING; |
| case DID_ARRSTART: |
| ch = getCharNWS(); |
| if (ch == ']') { |
| pop(); |
| return event = ARRAY_END; |
| } |
| state = DID_ARRELEM; // set state first, might be pushed... |
| return event = next(ch); |
| case DID_ARRELEM: |
| ch = getCharExpected(','); |
| if (ch == ',') { |
| // state = DID_ARRELEM; // redundant |
| return event = next(getChar()); |
| } else if (ch == ']') { |
| pop(); |
| return event = ARRAY_END; |
| } else { |
| if ((ch == '{' || ch == '[') && (flags & ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT) != 0) { |
| return event = next(ch); |
| } else { |
| throw err("Expected ',' or ']'"); |
| } |
| } |
| } |
| } // end for(;;) |
| } |
| |
| public int lastEvent() { |
| return event; |
| } |
| |
| public boolean wasKey() { |
| return state == DID_MEMNAME; |
| } |
| |
| |
| private void goTo(int what) throws IOException { |
| if (valstate == what) { |
| valstate = 0; |
| return; |
| } |
| if (valstate == 0) { |
| /*int ev = */ |
| nextEvent(); // TODO |
| if (valstate != what) { |
| throw err("type mismatch"); |
| } |
| valstate = 0; |
| } else { |
| throw err("type mismatch"); |
| } |
| } |
| |
| /** |
| * Returns the JSON string value, decoding any escaped characters. |
| */ |
| public String getString() throws IOException { |
| return getStringChars().toString(); |
| } |
| |
| /** |
| * Returns the characters of a JSON string value, decoding any escaped characters. |
| * The underlying buffer of the returned <code>CharArr</code> should *not* be |
| * modified as it may be shared with the input buffer. |
| * The returned <code>CharArr</code> will only be valid up until |
| * the next JSONParser method is called. Any required data should be |
| * read before that point. |
| */ |
| public CharArr getStringChars() throws IOException { |
| goTo(STRING); |
| return readStringChars(); |
| } |
| |
| /** |
| * Reads a JSON string into the output, decoding any escaped characters. |
| */ |
| public void getString(CharArr output) throws IOException { |
| goTo(STRING); |
| readStringChars2(output, start); |
| } |
| |
| /** |
| * Reads a number from the input stream and parses it as a long, only if |
| * the value will in fact fit into a signed 64 bit integer. |
| */ |
| public long getLong() throws IOException { |
| goTo(LONG); |
| return lval; |
| } |
| |
| /** |
| * Reads a number from the input stream and parses it as a double |
| */ |
| public double getDouble() throws IOException { |
| return Double.parseDouble(getNumberChars().toString()); |
| } |
| |
| /** |
| * Returns the characters of a JSON numeric value. |
| * <p>The underlying buffer of the returned <code>CharArr</code> should *not* be |
| * modified as it may be shared with the input buffer. |
| * <p>The returned <code>CharArr</code> will only be valid up until |
| * the next JSONParser method is called. Any required data should be |
| * read before that point. |
| */ |
| public CharArr getNumberChars() throws IOException { |
| int ev = 0; |
| if (valstate == 0) ev = nextEvent(); |
| |
| if (valstate == LONG || valstate == NUMBER) { |
| valstate = 0; |
| return out; |
| } else if (valstate == BIGNUMBER) { |
| continueNumber(out); |
| valstate = 0; |
| return out; |
| } else { |
| throw err("Unexpected " + ev); |
| } |
| } |
| |
| /** |
| * Reads a JSON numeric value into the output. |
| */ |
| public void getNumberChars(CharArr output) throws IOException { |
| int ev = 0; |
| if (valstate == 0) ev = nextEvent(); |
| if (valstate == LONG || valstate == NUMBER) output.write(this.out); |
| else if (valstate == BIGNUMBER) { |
| continueNumber(output); |
| } else { |
| throw err("Unexpected " + ev); |
| } |
| valstate = 0; |
| } |
| |
| /** |
| * Reads a boolean value |
| */ |
| public boolean getBoolean() throws IOException { |
| goTo(BOOLEAN); |
| return bool; |
| } |
| |
| /** |
| * Reads a null value |
| */ |
| public void getNull() throws IOException { |
| goTo(NULL); |
| } |
| |
| /** |
| * @return the current nesting level, the number of parent objects or arrays. |
| */ |
| public int getLevel() { |
| return ptr; |
| } |
| |
| public long getPosition() { |
| return gpos + start; |
| } |
| } |