blob: edf8dbc685d4c508dbf59d598a162cca626208f8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.vxquery.jsonparser;
import java.io.ByteArrayOutputStream;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.htrace.fasterxml.jackson.core.JsonFactory;
import org.apache.htrace.fasterxml.jackson.core.JsonParser;
import org.apache.htrace.fasterxml.jackson.core.JsonToken;
import org.apache.hyracks.api.comm.IFrameFieldAppender;
import org.apache.hyracks.api.comm.IFrameWriter;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.BooleanPointable;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
import org.apache.vxquery.datamodel.builders.atomic.StringValueBuilder;
import org.apache.vxquery.datamodel.builders.jsonitem.ArrayBuilder;
import org.apache.vxquery.datamodel.builders.jsonitem.ObjectBuilder;
import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
import org.apache.vxquery.datamodel.values.ValueTag;
import org.apache.vxquery.datamodel.values.XDMConstants;
import org.apache.vxquery.xmlparser.IParser;
public class JSONParser implements IParser {
final JsonFactory factory;
final List<Byte[]> valueSeq;
protected final ArrayBackedValueStorage atomic;
private TaggedValuePointable tvp;
private BooleanPointable bp;
protected final List<ArrayBuilder> abStack;
protected final List<ObjectBuilder> obStack;
protected final List<ArrayBackedValueStorage> abvsStack;
protected final List<ArrayBackedValueStorage> keyStack;
protected final List<UTF8StringPointable> spStack;
protected final StringValueBuilder svb;
protected final SequenceBuilder sb;
protected final DataOutput out;
protected itemType checkItem;
protected int levelArray, levelObject;
protected final List<Byte[]> allKeys;
protected ByteArrayOutputStream outputStream, prefixStream, pathStream;
protected int objectMatchLevel;
protected int arrayMatchLevel;
protected boolean matched, literal;
protected ArrayBackedValueStorage tempABVS;
protected List<Integer> arrayCounters;
protected List<Boolean> keysOrMembers;
protected IFrameWriter writer;
protected IFrameFieldAppender appender;
enum itemType {
ARRAY,
OBJECT
}
protected final List<itemType> itemStack;
public JSONParser() {
this(null);
}
public JSONParser(List<Byte[]> valueSeq) {
factory = new JsonFactory();
this.valueSeq = valueSeq;
atomic = new ArrayBackedValueStorage();
tvp = new TaggedValuePointable();
abStack = new ArrayList<ArrayBuilder>();
obStack = new ArrayList<ObjectBuilder>();
abvsStack = new ArrayList<ArrayBackedValueStorage>();
keyStack = new ArrayList<ArrayBackedValueStorage>();
spStack = new ArrayList<UTF8StringPointable>();
itemStack = new ArrayList<itemType>();
svb = new StringValueBuilder();
sb = new SequenceBuilder();
bp = new BooleanPointable();
allKeys = new ArrayList<Byte[]>();
abvsStack.add(atomic);
out = abvsStack.get(abvsStack.size() - 1).getDataOutput();
tempABVS = new ArrayBackedValueStorage();
this.objectMatchLevel = 1;
this.arrayMatchLevel = 0;
matched = false;
literal = false;
arrayCounters = new ArrayList<Integer>();
outputStream = new ByteArrayOutputStream();
prefixStream = new ByteArrayOutputStream();
pathStream = new ByteArrayOutputStream();
this.keysOrMembers = new ArrayList<Boolean>();
outputStream.reset();
pathStream.reset();
if (valueSeq != null) {
for (int i = 0; i < this.valueSeq.size(); i++) {
tvp.set(ArrayUtils.toPrimitive(valueSeq.get(i)), 0, ArrayUtils.toPrimitive(valueSeq.get(i)).length);
//access an item of an array
if (tvp.getTag() == ValueTag.XS_INTEGER_TAG) {
pathStream.write(tvp.getByteArray(), 0, tvp.getLength());
this.arrayMatchLevel++;
this.keysOrMembers.add(Boolean.valueOf(true));
//access all the items of an array or
//all the keys of an object
} else if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) {
pathStream.write(tvp.getByteArray(), 0, tvp.getLength());
this.arrayMatchLevel++;
this.keysOrMembers.add(Boolean.valueOf(false));
//access an object
} else {
pathStream.write(tvp.getByteArray(), 1, tvp.getLength() - 1);
}
}
}
}
Byte[] toBytes(Integer v) {
Byte[] barr = ArrayUtils.toObject(ByteBuffer.allocate(9).putLong(1, v).array());
barr[0] = ValueTag.XS_INTEGER_TAG;
return barr;
}
public int parse(Reader input, ArrayBackedValueStorage result, IFrameWriter writer, IFrameFieldAppender appender)
throws HyracksDataException {
this.writer = writer;
this.appender = appender;
if (this.valueSeq != null) {
return parseElements(input, result);
} else {
return parse(input, result);
}
}
public int parse(Reader input, ArrayBackedValueStorage result) throws HyracksDataException {
int items = 0;
try {
DataOutput outResult = result.getDataOutput();
JsonParser parser = factory.createParser(input);
JsonToken token = parser.nextToken();
checkItem = null;
levelArray = 0;
levelObject = 0;
sb.reset(result);
while (token != null) {
if (itemStack.size() > 1) {
checkItem = itemStack.get(itemStack.size() - 2);
}
switch (token) {
case START_ARRAY:
startArray();
break;
case START_OBJECT:
startObject();
break;
case FIELD_NAME:
startFieldName(parser);
break;
case VALUE_NUMBER_INT:
startAtomicValues(ValueTag.XS_INTEGER_TAG, parser);
break;
case VALUE_STRING:
startAtomicValues(ValueTag.XS_STRING_TAG, parser);
break;
case VALUE_NUMBER_FLOAT:
startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser);
break;
case END_ARRAY:
abStack.get(levelArray - 1).finish();
if (itemStack.size() > 1) {
if (checkItem == itemType.ARRAY) {
abStack.get(levelArray - 2).addItem(abvsStack.get(levelArray + levelObject));
} else if (checkItem == itemType.OBJECT) {
obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1),
abvsStack.get(levelArray + levelObject));
}
}
itemStack.remove(itemStack.size() - 1);
levelArray--;
if (levelArray + levelObject == 0) {
sb.addItem(abvsStack.get(1));
items++;
}
break;
case END_OBJECT:
obStack.get(levelObject - 1).finish();
if (itemStack.size() > 1) {
if (checkItem == itemType.OBJECT) {
obStack.get(levelObject - 2).addItem(spStack.get(levelObject - 2),
abvsStack.get(levelArray + levelObject));
} else if (checkItem == itemType.ARRAY) {
abStack.get(levelArray - 1).addItem(abvsStack.get(levelArray + levelObject));
}
}
itemStack.remove(itemStack.size() - 1);
levelObject--;
if (levelObject + levelArray == 0) {
sb.addItem(abvsStack.get(1));
items++;
}
break;
default:
break;
}
token = parser.nextToken();
}
sb.finish();
outResult.write(result.getByteArray());
} catch (Exception e) {
throw new HyracksDataException("Accessing or writing in out of bounds space", e);
}
return items;
}
public int parseElements(Reader input, ArrayBackedValueStorage result) throws HyracksDataException {
int items = 0;
try {
JsonParser parser = factory.createParser(input);
JsonToken token = parser.nextToken();
checkItem = null;
this.objectMatchLevel = 0;
this.matched = false;
levelArray = 0;
levelObject = 0;
sb.reset(result);
while (token != null) {
if (itemStack.size() > 1) {
checkItem = itemStack.get(itemStack.size() - 2);
}
switch (token) {
case START_ARRAY:
startArray();
break;
case START_OBJECT:
startObject();
break;
case FIELD_NAME:
startFieldName(parser);
break;
case VALUE_NUMBER_INT:
startAtomicValues(ValueTag.XS_INTEGER_TAG, parser);
break;
case VALUE_STRING:
startAtomicValues(ValueTag.XS_STRING_TAG, parser);
break;
case VALUE_NUMBER_FLOAT:
startAtomicValues(ValueTag.XS_DOUBLE_TAG, parser);
break;
case END_ARRAY:
//if the query doesn't ask for an atomic value
if (!this.literal && this.pathMatch()) {
//check if the path asked from the query includes the current path
abStack.get(levelArray - 1).finish();
if (itemStack.size() > 1) {
if (checkItem == itemType.ARRAY) {
if (levelArray > this.arrayMatchLevel + 1) {
abStack.get(levelArray - 2).addItem(abvsStack.get(levelArray + levelObject));
} else if (this.matched) {
this.matched = false;
items++;
writeElement(abvsStack.get(levelArray + levelObject));
}
} else if (checkItem == itemType.OBJECT) {
if (levelArray > this.arrayMatchLevel && !this.matched) {
obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1),
abvsStack.get(levelArray + levelObject));
} else if (this.matched) {
writeElement(abvsStack.get(levelArray + levelObject));
this.matched = false;
items++;
}
}
}
}
if (allKeys.size() - 1 >= 0) {
allKeys.remove(allKeys.size() - 1);
}
this.arrayCounters.remove(levelArray - 1);
itemStack.remove(itemStack.size() - 1);
levelArray--;
break;
case END_OBJECT:
//if the query doesn't ask for an atomic value
if (!this.literal && this.pathMatch()) {
//check if the path asked from the query includes the current path
obStack.get(levelObject - 1).finish();
if (itemStack.size() > 1) {
if (checkItem == itemType.OBJECT) {
if (levelObject > this.objectMatchLevel) {
obStack.get(levelObject - 2).addItem(spStack.get(levelObject - 2),
abvsStack.get(levelArray + levelObject));
} else if (this.matched) {
this.matched = false;
items++;
writeElement(abvsStack.get(levelArray + levelObject));
}
} else if (checkItem == itemType.ARRAY) {
abStack.get(levelArray - 1).addItem(abvsStack.get(levelArray + levelObject));
if (this.matched) {
writeElement(abvsStack.get(levelArray + levelObject));
this.matched = false;
}
}
}
}
if (allKeys.size() - 1 >= 0) {
allKeys.remove(allKeys.size() - 1);
}
itemStack.remove(itemStack.size() - 1);
levelObject--;
break;
default:
break;
}
token = parser.nextToken();
}
sb.finish();
} catch (Exception e) {
throw new HyracksDataException("Accessing or writing in out of bounds space", e);
}
return items;
}
private boolean pathMatch() {
outputStream.reset();
for (Byte[] bb : allKeys) {
outputStream.write(ArrayUtils.toPrimitive(bb), 0, ArrayUtils.toPrimitive(bb).length);
}
//the path of values created by parsing the file
boolean contains = false;
this.matched = false;
prefixStream.reset();
if (pathStream.size() < outputStream.size()) {
prefixStream.write(outputStream.toByteArray(), 0, pathStream.size());
contains = Arrays.equals(prefixStream.toByteArray(), pathStream.toByteArray());
} else {
prefixStream.write(pathStream.toByteArray(), 0, outputStream.size());
contains = Arrays.equals(prefixStream.toByteArray(), outputStream.toByteArray());
}
if (pathStream.size() == outputStream.size() && contains) {
this.objectMatchLevel = this.levelObject;
this.matched = true;
this.literal = false;
}
return contains;
}
public void itemsInArray() {
if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY && !this.arrayCounters.isEmpty()) {
boolean addCounter = levelArray - 1 < this.keysOrMembers.size() ? this.keysOrMembers.get(levelArray - 1)
: true;
if (addCounter) {
this.arrayCounters.set(levelArray - 1, this.arrayCounters.get(levelArray - 1) + 1);
this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - 1)));
} else {
Byte[] bool = { (byte) 0x2B, 0x01 };
this.allKeys.add(bool);
}
}
}
public void atomicValues(int tag, JsonParser parser, DataOutput out, StringValueBuilder svb, int levelArray,
int levelObject) throws IOException {
abvsStack.get(0).reset();
out.write(tag);
if (tag == ValueTag.XS_DOUBLE_TAG) {
out.writeDouble(parser.getDoubleValue());
} else if (tag == ValueTag.XS_STRING_TAG) {
svb.write(parser.getText(), out);
} else if (tag == ValueTag.XS_INTEGER_TAG) {
out.writeLong(parser.getLongValue());
}
if (!itemStack.isEmpty()) {
if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) {
abStack.get(levelArray - 1).addItem(abvsStack.get(0));
if (valueSeq != null && this.matched && levelArray == this.arrayMatchLevel) {
this.literal = true;
this.matched = false;
writeElement(abvsStack.get(0));
}
} else if (itemStack.get(itemStack.size() - 1) == itemType.OBJECT) {
obStack.get(levelObject - 1).addItem(spStack.get(levelObject - 1), abvsStack.get(0));
if (valueSeq != null && this.matched && levelObject == this.objectMatchLevel) {
this.literal = true;
this.matched = false;
writeElement(abvsStack.get(0));
}
}
}
}
public void writeElement(ArrayBackedValueStorage abvs) throws IOException {
tempABVS.reset();
DataOutput out = tempABVS.getDataOutput();
out.write(abvs.getByteArray(), abvs.getStartOffset(), abvs.getLength());
FrameUtils.appendFieldToWriter(writer, appender, tempABVS.getByteArray(), tempABVS.getStartOffset(),
tempABVS.getLength());
}
public void startArrayOrObjects(int count) {
if (valueSeq != null && !this.arrayCounters.isEmpty()) {
boolean addCounter = levelArray - count < this.keysOrMembers.size()
? this.keysOrMembers.get(levelArray - count) : true;
if (itemStack.get(itemStack.size() - 1) == itemType.ARRAY) {
if (addCounter) {
this.arrayCounters.set(levelArray - count, this.arrayCounters.get(levelArray - count) + 1);
this.allKeys.add(this.toBytes(this.arrayCounters.get(levelArray - count)));
} else {
XDMConstants.setTrue(bp);
this.allKeys.add(ArrayUtils.toObject(bp.getByteArray()));
}
}
}
if (count == 2 && valueSeq != null) {
this.arrayCounters.add(Integer.valueOf(0));
}
}
public void startArray() throws HyracksDataException {
levelArray++;
if (levelArray > abStack.size()) {
abStack.add(new ArrayBuilder());
}
if (levelArray + levelObject > abvsStack.size() - 1) {
abvsStack.add(new ArrayBackedValueStorage());
}
startArrayOrObjects(2);
itemStack.add(itemType.ARRAY);
if (this.pathMatch() || this.valueSeq == null) {
abvsStack.get(levelArray + levelObject).reset();
try {
abStack.get(levelArray - 1).reset(abvsStack.get(levelArray + levelObject));
} catch (Exception e) {
throw new HyracksDataException("Accessing index out of bounds", e);
}
}
}
public void startObject() throws HyracksDataException {
levelObject++;
if (levelObject > obStack.size()) {
obStack.add(new ObjectBuilder());
}
if (levelArray + levelObject > abvsStack.size() - 1) {
abvsStack.add(new ArrayBackedValueStorage());
}
startArrayOrObjects(1);
itemStack.add(itemType.OBJECT);
if (this.pathMatch() || this.valueSeq == null) {
abvsStack.get(levelArray + levelObject).reset();
try {
obStack.get(levelObject - 1).reset(abvsStack.get(levelArray + levelObject));
} catch (Exception e) {
throw new HyracksDataException("Accessing index out of bounds", e);
}
}
}
public void startFieldName(JsonParser parser) throws HyracksDataException {
if (levelObject > spStack.size()) {
keyStack.add(new ArrayBackedValueStorage());
spStack.add(new UTF8StringPointable());
}
keyStack.get(levelObject - 1).reset();
DataOutput outk = keyStack.get(levelObject - 1).getDataOutput();
try {
svb.write(parser.getText(), outk);
spStack.get(levelObject - 1).set(keyStack.get(levelObject - 1));
if (this.valueSeq != null) {
int length = 0;
byte[] barr = spStack.get(levelObject - 1).getByteArray();
outputStream.reset();
outputStream.write(barr, 0, spStack.get(levelObject - 1).getLength());
allKeys.add(ArrayUtils.toObject(outputStream.toByteArray()));
for (int i = 0; i < allKeys.size() - 1; i++) {
tvp.set(ArrayUtils.toPrimitive(allKeys.get(i)), 0, ArrayUtils.toPrimitive(allKeys.get(i)).length);
length += ArrayUtils.toPrimitive(allKeys.get(i)).length;
}
//if the next two bytes represent a boolean (boolean has only two bytes),
//it means that query asks for all the keys of the object
if (length <= pathStream.size() && (length + 2) <= pathStream.size()) {
tvp.set(pathStream.toByteArray(), length, length + 2);
if (tvp.getTag() == ValueTag.XS_BOOLEAN_TAG) {
abvsStack.get(0).reset();
out.write(ValueTag.XS_STRING_TAG);
svb.write(parser.getText(), out);
writeElement(abvsStack.get(0));
}
}
}
} catch (Exception e) {
throw new HyracksDataException("Writing in out of bounds space", e);
}
}
public void startAtomicValues(int tag, JsonParser parser) throws HyracksDataException {
itemsInArray();
if (this.pathMatch() || this.valueSeq == null) {
try {
atomicValues(tag, parser, out, svb, levelArray, levelObject);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
if (allKeys.size() - 1 >= 0) {
allKeys.remove(allKeys.size() - 1);
}
}
}