| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.vxquery.jsonparser; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.DataOutput; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.nio.ByteBuffer; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import org.apache.commons.lang3.ArrayUtils; |
| import org.apache.htrace.fasterxml.jackson.core.JsonFactory; |
| import org.apache.htrace.fasterxml.jackson.core.JsonParser; |
| import org.apache.htrace.fasterxml.jackson.core.JsonToken; |
| import org.apache.hyracks.api.comm.IFrameFieldAppender; |
| import org.apache.hyracks.api.comm.IFrameWriter; |
| import org.apache.hyracks.api.exceptions.HyracksDataException; |
| import org.apache.hyracks.data.std.primitive.BooleanPointable; |
| import org.apache.hyracks.data.std.primitive.UTF8StringPointable; |
| import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; |
| import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; |
| import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; |
| import org.apache.vxquery.datamodel.builders.atomic.StringValueBuilder; |
| import org.apache.vxquery.datamodel.builders.jsonitem.ArrayBuilder; |
| import org.apache.vxquery.datamodel.builders.jsonitem.ObjectBuilder; |
| import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder; |
| import org.apache.vxquery.datamodel.values.ValueTag; |
| import org.apache.vxquery.datamodel.values.XDMConstants; |
| import org.apache.vxquery.xmlparser.IParser; |
| |
| public class JSONParser implements IParser { |
| final JsonFactory factory; |
| final List<Byte[]> valueSeq; |
| protected final ArrayBackedValueStorage atomic; |
| private TaggedValuePointable tvp; |
| private BooleanPointable bp; |
| protected final List<ArrayBuilder> abStack; |
| protected final List<ObjectBuilder> obStack; |
| protected final List<ArrayBackedValueStorage> abvsStack; |
| protected final List<ArrayBackedValueStorage> keyStack; |
| protected final List<UTF8StringPointable> spStack; |
| protected final StringValueBuilder svb; |
| protected final SequenceBuilder sb; |
| protected final DataOutput out; |
| protected itemType checkItem; |
| protected int levelArray, levelObject; |
| protected final List<Byte[]> allKeys; |
| protected ByteArrayOutputStream outputStream, prefixStream, pathStream; |
| protected int objectMatchLevel; |
| protected int arrayMatchLevel; |
| protected boolean matched, literal; |
| protected ArrayBackedValueStorage tempABVS; |
| protected List<Integer> arrayCounters; |
| protected List<Boolean> keysOrMembers; |
| protected IFrameWriter writer; |
| protected IFrameFieldAppender appender; |
| |
| enum itemType { |
| ARRAY, |
| OBJECT |
| } |
| |
| protected final List<itemType> itemStack; |
| |
| public JSONParser() { |
| this(null); |
| } |
| |
| public JSONParser(List<Byte[]> valueSeq) { |
| factory = new JsonFactory(); |
| this.valueSeq = valueSeq; |
| atomic = new ArrayBackedValueStorage(); |
| tvp = new TaggedValuePointable(); |
| abStack = new ArrayList<ArrayBuilder>(); |
| obStack = new ArrayList<ObjectBuilder>(); |
| abvsStack = new ArrayList<ArrayBackedValueStorage>(); |
| keyStack = new ArrayList<ArrayBackedValueStorage>(); |
| spStack = new ArrayList<UTF8StringPointable>(); |
| itemStack = new ArrayList<itemType>(); |
| svb = new StringValueBuilder(); |
| sb = new SequenceBuilder(); |
| bp = new BooleanPointable(); |
| allKeys = new ArrayList<Byte[]>(); |
| abvsStack.add(atomic); |
| out = abvsStack.get(abvsStack.size() - 1).getDataOutput(); |
| tempABVS = new ArrayBackedValueStorage(); |
| this.objectMatchLevel = 1; |
| this.arrayMatchLevel = 0; |
| matched = false; |
| literal = false; |
| arrayCounters = new ArrayList<Integer>(); |
| outputStream = new ByteArrayOutputStream(); |
| prefixStream = new ByteArrayOutputStream(); |
| pathStream = new ByteArrayOutputStream(); |
| this.keysOrMembers = new ArrayList<Boolean>(); |
| outputStream.reset(); |
| pathStream.reset(); |
| if (valueSeq != null) { |
| for (int i = 0; i < this.valueSeq.size(); i++) { |
| tvp.set(ArrayUtils.toPrimitive(valueSeq.get(i)), 0, ArrayUtils.toPrimitive(valueSeq.get(i)).length); |
| //access an item of an array |
| if (tvp.getTag() == ValueTag.XS_INTEGER_TAG) { |
| pathStream.write(tvp.getByteArray(), 0, tvp.getLength()); |
| this.arrayMatchLevel++; |
| this.keysOrMembers.add(Boolean.valueOf(true)); |
| //access all the items of an array or |
| //all the keys of an object |
| } else if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) { |
| pathStream.write(tvp.getByteArray(), 0, tvp.getLength()); |
| this.arrayMatchLevel++; |
| this.keysOrMembers.add(Boolean.valueOf(false)); |
| //access an object |
| } else { |
| pathStream.write(tvp.getByteArray(), 1, tvp.getLength() - 1); |
| } |
| } |
| } |
| } |
| |
| Byte[] toBytes(Integer v) { |
| Byte[] barr = ArrayUtils.toObject(ByteBuffer.allocate(9).putLong(1, v).array()); |
| barr[0] = ValueTag.XS_INTEGER_TAG; |
| return barr; |
| } |
| |
| public int parse(Reader input, ArrayBackedValueStorage result, IFrameWriter writer, IFrameFieldAppender appender) |
| throws HyracksDataException { |
| this.writer = writer; |
| this.appender = appender; |
| if (this.valueSeq != null) { |
| return parseElements(input, result); |
| } else { |
| return parse(input, result); |
| } |
| } |
| |
| public int parse(Reader input, ArrayBackedValueStorage result) throws HyracksDataException { |
| int items = 0; |
| try { |
| DataOutput outResult = result.getDataOutput(); |
| JsonParser parser = factory.createParser(input); |
| JsonToken token = parser.nextToken(); |
| checkItem = null; |
| levelArray = 0; |
| levelObject = 0; |
| sb.reset(result); |
| while (token != null) { |
| if (itemStack.size() > 1) { |
| checkItem = itemStack.get(itemStack.size() - 2); |
| } |
| switch (token) { |
| case START_ARRAY: |
| startArray(); |
| break; |
| case START_OBJECT: |
| startObject(); |
| break; |
| case FIELD_NAME: |
| startFieldName(parser); |
| break; |
| case VALUE_NUMBER_INT: |
| startAtomicValues(ValueTag.XS_INTEGER_TAG, parser); |
| break; |
| case VALUE_STRING: |
| startAtomicValues(ValueTag.XS_STRING_TAG, parser); |
| break; |
| case VALUE_NUMBER_FLOAT: |
| startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser); |
| break; |
| case END_ARRAY: |
| abStack.get(levelArray - 1).finish(); |
| if (itemStack.size() > 1) { |
| if (checkItem == itemType.ARRAY) { |
| abStack.get(levelArray - 2).addItem(abvsStack.get(levelArray + levelObject)); |
| } else if (checkItem == itemType.OBJECT) { |
| obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1), |
| abvsStack.get(levelArray + levelObject)); |
| } |
| } |
| itemStack.remove(itemStack.size() - 1); |
| levelArray--; |
| if (levelArray + levelObject == 0) { |
| sb.addItem(abvsStack.get(1)); |
| items++; |
| } |
| break; |
| case END_OBJECT: |
| obStack.get(levelObject - 1).finish(); |
| if (itemStack.size() > 1) { |
| if (checkItem == itemType.OBJECT) { |
| obStack.get(levelObject - 2).addItem(spStack.get(levelObject - 2), |
| abvsStack.get(levelArray + levelObject)); |
| } else if (checkItem == itemType.ARRAY) { |
| abStack.get(levelArray - 1).addItem(abvsStack.get(levelArray + levelObject)); |
| } |
| } |
| itemStack.remove(itemStack.size() - 1); |
| levelObject--; |
| if (levelObject + levelArray == 0) { |
| sb.addItem(abvsStack.get(1)); |
| items++; |
| } |
| break; |
| default: |
| break; |
| } |
| token = parser.nextToken(); |
| } |
| sb.finish(); |
| outResult.write(result.getByteArray()); |
| } catch (Exception e) { |
| throw new HyracksDataException("Accessing or writing in out of bounds space", e); |
| } |
| return items; |
| } |
| |
| public int parseElements(Reader input, ArrayBackedValueStorage result) throws HyracksDataException { |
| int items = 0; |
| try { |
| JsonParser parser = factory.createParser(input); |
| JsonToken token = parser.nextToken(); |
| checkItem = null; |
| |
| this.objectMatchLevel = 0; |
| this.matched = false; |
| |
| levelArray = 0; |
| levelObject = 0; |
| sb.reset(result); |
| while (token != null) { |
| if (itemStack.size() > 1) { |
| checkItem = itemStack.get(itemStack.size() - 2); |
| } |
| switch (token) { |
| case START_ARRAY: |
| startArray(); |
| break; |
| case START_OBJECT: |
| startObject(); |
| break; |
| case FIELD_NAME: |
| startFieldName(parser); |
| break; |
| case VALUE_NUMBER_INT: |
| startAtomicValues(ValueTag.XS_INTEGER_TAG, parser); |
| break; |
| case VALUE_STRING: |
| startAtomicValues(ValueTag.XS_STRING_TAG, parser); |
| break; |
| case VALUE_NUMBER_FLOAT: |
| startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser); |
| break; |
| case END_ARRAY: |
| //if the query doesn't ask for an atomic value |
| if (!this.literal && this.pathMatch()) { |
| //check if the path asked from the query includes the current path |
| abStack.get(levelArray - 1).finish(); |
| if (itemStack.size() > 1) { |
| if (checkItem == itemType.ARRAY) { |
| if (levelArray > this.arrayMatchLevel + 1) { |
| abStack.get(levelArray - 2).addItem(abvsStack.get(levelArray + levelObject)); |
| } else if (this.matched) { |
| this.matched = false; |
| items++; |
| writeElement(abvsStack.get(levelArray + levelObject)); |
| } |
| } else if (checkItem == itemType.OBJECT) { |
| if (levelArray > this.arrayMatchLevel && !this.matched) { |
| obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1), |
| abvsStack.get(levelArray + levelObject)); |
| } else if (this.matched) { |
| writeElement(abvsStack.get(levelArray + levelObject)); |
| this.matched = false; |
| items++; |
| } |
| } |
| } |
| } |
| if (allKeys.size() - 1 >= 0) { |
| allKeys.remove(allKeys.size() - 1); |
| } |
| this.arrayCounters.remove(levelArray - 1); |
| itemStack.remove(itemStack.size() - 1); |
| levelArray--; |
| break; |
| case END_OBJECT: |
| //if the query doesn't ask for an atomic value |
| if (!this.literal && this.pathMatch()) { |
| //check if the path asked from the query includes the current path |
| obStack.get(levelObject - 1).finish(); |
| if (itemStack.size() > 1) { |
| if (checkItem == itemType.OBJECT) { |
| if (levelObject > this.objectMatchLevel) { |
| obStack.get(levelObject - 2).addItem(spStack.get(levelObject - 2), |
| abvsStack.get(levelArray + levelObject)); |
| } else if (this.matched) { |
| this.matched = false; |
| items++; |
| writeElement(abvsStack.get(levelArray + levelObject)); |
| } |
| } else if (checkItem == itemType.ARRAY) { |
| abStack.get(levelArray - 1).addItem(abvsStack.get(levelArray + levelObject)); |
| if (this.matched) { |
| writeElement(abvsStack.get(levelArray + levelObject)); |
| this.matched = false; |
| } |
| } |
| } |
| } |
| if (allKeys.size() - 1 >= 0) { |
| allKeys.remove(allKeys.size() - 1); |
| } |
| itemStack.remove(itemStack.size() - 1); |
| levelObject--; |
| break; |
| default: |
| break; |
| } |
| token = parser.nextToken(); |
| } |
| sb.finish(); |
| } catch (Exception e) { |
| throw new HyracksDataException("Accessing or writing in out of bounds space", e); |
| } |
| return items; |
| } |
| |
| private boolean pathMatch() { |
| outputStream.reset(); |
| for (Byte[] bb : allKeys) { |
| outputStream.write(ArrayUtils.toPrimitive(bb), 0, ArrayUtils.toPrimitive(bb).length); |
| } |
| //the path of values created by parsing the file |
| boolean contains = false; |
| this.matched = false; |
| prefixStream.reset(); |
| if (pathStream.size() < outputStream.size()) { |
| prefixStream.write(outputStream.toByteArray(), 0, pathStream.size()); |
| contains = Arrays.equals(prefixStream.toByteArray(), pathStream.toByteArray()); |
| } else { |
| prefixStream.write(pathStream.toByteArray(), 0, outputStream.size()); |
| contains = Arrays.equals(prefixStream.toByteArray(), outputStream.toByteArray()); |
| } |
| if (pathStream.size() == outputStream.size() && contains) { |
| this.objectMatchLevel = this.levelObject; |
| this.matched = true; |
| this.literal = false; |
| } |
| return contains; |
| } |
| |
| public void itemsInArray() { |
| if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY && !this.arrayCounters.isEmpty()) { |
| boolean addCounter = levelArray - 1 < this.keysOrMembers.size() ? this.keysOrMembers.get(levelArray - 1) |
| : true; |
| if (addCounter) { |
| this.arrayCounters.set(levelArray - 1, this.arrayCounters.get(levelArray - 1) + 1); |
| this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - 1))); |
| } else { |
| Byte[] bool = { (byte) 0x2B, 0x01 }; |
| this.allKeys.add(bool); |
| } |
| } |
| } |
| |
| public void atomicValues(int tag, JsonParser parser, DataOutput out, StringValueBuilder svb, int levelArray, |
| int levelObject) throws IOException { |
| abvsStack.get(0).reset(); |
| out.write(tag); |
| if (tag == ValueTag.XS_DOUBLE_TAG) { |
| out.writeDouble(parser.getDoubleValue()); |
| } else if (tag == ValueTag.XS_STRING_TAG) { |
| svb.write(parser.getText(), out); |
| } else if (tag == ValueTag.XS_INTEGER_TAG) { |
| out.writeLong(parser.getLongValue()); |
| } |
| if (!itemStack.isEmpty()) { |
| if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) { |
| abStack.get(levelArray - 1).addItem(abvsStack.get(0)); |
| if (valueSeq != null && this.matched && levelArray == this.arrayMatchLevel) { |
| this.literal = true; |
| this.matched = false; |
| writeElement(abvsStack.get(0)); |
| } |
| } else if (itemStack.get(itemStack.size() - 1) == itemType.OBJECT) { |
| obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1), abvsStack.get(0)); |
| if (valueSeq != null && this.matched && levelObject == this.objectMatchLevel) { |
| this.literal = true; |
| this.matched = false; |
| writeElement(abvsStack.get(0)); |
| } |
| } |
| } |
| } |
| |
| public void writeElement(ArrayBackedValueStorage abvs) throws IOException { |
| tempABVS.reset(); |
| DataOutput out = tempABVS.getDataOutput(); |
| out.write(abvs.getByteArray(), abvs.getStartOffset(), abvs.getLength()); |
| FrameUtils.appendFieldToWriter(writer, appender, tempABVS.getByteArray(), tempABVS.getStartOffset(), |
| tempABVS.getLength()); |
| } |
| |
| public void startArrayOrObjects(int count) { |
| if (valueSeq != null && !this.arrayCounters.isEmpty()) { |
| boolean addCounter = levelArray - count < this.keysOrMembers.size() |
| ? this.keysOrMembers.get(levelArray - count) : true; |
| if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) { |
| if (addCounter) { |
| this.arrayCounters.set(levelArray - count, this.arrayCounters.get(levelArray - count) + 1); |
| this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - count))); |
| } else { |
| XDMConstants.setTrue(bp); |
| this.allKeys.add(ArrayUtils.toObject(bp.getByteArray())); |
| } |
| } |
| |
| } |
| if (count == 2 && valueSeq != null) { |
| this.arrayCounters.add(Integer.valueOf(0)); |
| } |
| } |
| |
| public void startArray() throws HyracksDataException { |
| levelArray++; |
| if (levelArray > abStack.size()) { |
| abStack.add(new ArrayBuilder()); |
| } |
| if (levelArray + levelObject > abvsStack.size() - 1) { |
| abvsStack.add(new ArrayBackedValueStorage()); |
| } |
| startArrayOrObjects(2); |
| itemStack.add(itemType.ARRAY); |
| if (this.pathMatch() || this.valueSeq == null) { |
| abvsStack.get(levelArray + levelObject).reset(); |
| try { |
| abStack.get(levelArray - 1).reset(abvsStack.get(levelArray + levelObject)); |
| } catch (Exception e) { |
| throw new HyracksDataException("Accessing index out of bounds", e); |
| } |
| } |
| } |
| |
| public void startObject() throws HyracksDataException { |
| levelObject++; |
| if (levelObject > obStack.size()) { |
| obStack.add(new ObjectBuilder()); |
| } |
| if (levelArray + levelObject > abvsStack.size() - 1) { |
| abvsStack.add(new ArrayBackedValueStorage()); |
| } |
| startArrayOrObjects(1); |
| itemStack.add(itemType.OBJECT); |
| if (this.pathMatch() || this.valueSeq == null) { |
| abvsStack.get(levelArray + levelObject).reset(); |
| try { |
| obStack.get(levelObject - 1).reset(abvsStack.get(levelArray + levelObject)); |
| } catch (Exception e) { |
| throw new HyracksDataException("Accessing index out of bounds", e); |
| } |
| } |
| } |
| |
| public void startFieldName(JsonParser parser) throws HyracksDataException { |
| if (levelObject > spStack.size()) { |
| keyStack.add(new ArrayBackedValueStorage()); |
| spStack.add(new UTF8StringPointable()); |
| } |
| keyStack.get(levelObject - 1).reset(); |
| DataOutput outk = keyStack.get(levelObject - 1).getDataOutput(); |
| try { |
| svb.write(parser.getText(), outk); |
| spStack.get(levelObject - 1).set(keyStack.get(levelObject - 1)); |
| if (this.valueSeq != null) { |
| int length = 0; |
| byte[] barr = spStack.get(levelObject - 1).getByteArray(); |
| outputStream.reset(); |
| outputStream.write(barr, 0, spStack.get(levelObject - 1).getLength()); |
| allKeys.add(ArrayUtils.toObject(outputStream.toByteArray())); |
| for (int i = 0; i < allKeys.size() - 1; i++) { |
| tvp.set(ArrayUtils.toPrimitive(allKeys.get(i)), 0, ArrayUtils.toPrimitive(allKeys.get(i)).length); |
| length += ArrayUtils.toPrimitive(allKeys.get(i)).length; |
| } |
| //if the next two bytes represent a boolean (boolean has only two bytes), |
| //it means that query asks for all the keys of the object |
| if (length <= pathStream.size() && (length + 2) <= pathStream.size()) { |
| tvp.set(pathStream.toByteArray(), length, length + 2); |
| if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) { |
| abvsStack.get(0).reset(); |
| out.write(ValueTag.XS_STRING_TAG); |
| svb.write(parser.getText(), out); |
| writeElement(abvsStack.get(0)); |
| } |
| } |
| } |
| } catch (Exception e) { |
| throw new HyracksDataException("Writing in out of bounds space", e); |
| } |
| } |
| |
| public void startAtomicValues(int tag, JsonParser parser) throws HyracksDataException { |
| itemsInArray(); |
| if (this.pathMatch() || this.valueSeq == null) { |
| try { |
| atomicValues(tag, parser, out, svb, levelArray, levelObject); |
| } catch (Exception e) { |
| throw new HyracksDataException(e); |
| } |
| } |
| if (allKeys.size() - 1 >= 0) { |
| allKeys.remove(allKeys.size() - 1); |
| } |
| } |
| } |