blob: eb81d3fd96c6e3375a8e29f314ef1615f799acbe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.external.parser;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.BitSet;
import java.util.List;
import org.apache.asterix.builders.AbvsBuilderFactory;
import org.apache.asterix.builders.IARecordBuilder;
import org.apache.asterix.builders.IAsterixListBuilder;
import org.apache.asterix.builders.ListBuilderFactory;
import org.apache.asterix.builders.OrderedListBuilder;
import org.apache.asterix.builders.RecordBuilderFactory;
import org.apache.asterix.builders.UnorderedListBuilder;
import org.apache.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeserializer;
import org.apache.asterix.external.api.IRawRecord;
import org.apache.asterix.external.api.IRecordDataParser;
import org.apache.asterix.external.api.IStreamDataParser;
import org.apache.asterix.om.base.ABoolean;
import org.apache.asterix.om.base.AMutableInterval;
import org.apache.asterix.om.base.ANull;
import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
import org.apache.asterix.om.types.AOrderedListType;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.AUnorderedListType;
import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
import org.apache.asterix.om.types.hierachy.ITypeConvertComputer;
import org.apache.asterix.om.util.container.IObjectPool;
import org.apache.asterix.om.util.container.ListObjectPool;
import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IMutableValueStorage;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
/**
* Parser for ADM formatted data.
*/
public class ADMDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
private AdmLexer admLexer;
private final ARecordType recordType;
private boolean isStreamParser = true;
private int nullableFieldId = 0;
private final ArrayBackedValueStorage castBuffer = new ArrayBackedValueStorage();
private final IObjectPool<IARecordBuilder, ATypeTag> recordBuilderPool = new ListObjectPool<IARecordBuilder, ATypeTag>(
new RecordBuilderFactory());
private final IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool = new ListObjectPool<IAsterixListBuilder, ATypeTag>(
new ListBuilderFactory());
private final IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = new ListObjectPool<IMutableValueStorage, ATypeTag>(
new AbvsBuilderFactory());
protected final AMutableInterval aInterval = new AMutableInterval(0L, 0L, (byte) 0);
private final String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
private final String mismatchErrorMessage2 = " got a value of type ";
static class ParseException extends HyracksDataException {
private static final long serialVersionUID = 1L;
private String filename;
private int line = -1;
private int column = -1;
public ParseException(String message) {
super(message);
}
public ParseException(Throwable cause) {
super(cause);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
public ParseException(Throwable cause, String filename, int line, int column) {
super(cause);
setLocation(filename, line, column);
}
public void setLocation(String filename, int line, int column) {
this.filename = filename;
this.line = line;
this.column = column;
}
@Override
public String getMessage() {
StringBuilder msg = new StringBuilder("Parse error");
if (filename != null) {
msg.append(" in file " + filename);
}
if (line >= 0) {
if (column >= 0) {
msg.append(" at (" + line + ", " + column + ")");
} else {
msg.append(" in line " + line);
}
}
return msg.append(": " + super.getMessage()).toString();
}
}
public ADMDataParser(ARecordType recordType, boolean isStream) {
this(null, recordType, isStream);
}
public ADMDataParser(String filename, ARecordType recordType, boolean isStream) {
this.filename = filename;
this.recordType = recordType;
this.isStreamParser = isStream;
if (!isStreamParser) {
this.admLexer = new AdmLexer();
}
}
@Override
public boolean parse(DataOutput out) throws IOException {
try {
resetPools();
return parseAdmInstance(recordType, out);
} catch (ParseException e) {
e.setLocation(filename, admLexer.getLine(), admLexer.getColumn());
throw e;
} catch (IOException e) {
throw new ParseException(e, filename, admLexer.getLine(), admLexer.getColumn());
}
}
@Override
public void parse(IRawRecord<? extends char[]> record, DataOutput out) throws IOException {
try {
resetPools();
admLexer.setBuffer(record.get());
parseAdmInstance(recordType, out);
} catch (ParseException e) {
e.setLocation(filename, admLexer.getLine(), admLexer.getColumn());
throw e;
} catch (IOException e) {
throw new ParseException(e, filename, admLexer.getLine(), admLexer.getColumn());
}
}
@Override
public void setInputStream(InputStream in) throws IOException {
admLexer = new AdmLexer(new java.io.InputStreamReader(in));
}
protected boolean parseAdmInstance(IAType objectType, DataOutput out) throws IOException {
int token = admLexer.next();
if (token == AdmLexer.TOKEN_EOF) {
return false;
} else {
admFromLexerStream(token, objectType, out);
return true;
}
}
private void admFromLexerStream(int token, IAType objectType, DataOutput out) throws IOException {
switch (token) {
case AdmLexer.TOKEN_NULL_LITERAL: {
if (checkType(ATypeTag.NULL, objectType)) {
nullSerde.serialize(ANull.NULL, out);
} else {
throw new ParseException("This field can not be null");
}
break;
}
case AdmLexer.TOKEN_TRUE_LITERAL: {
if (checkType(ATypeTag.BOOLEAN, objectType)) {
booleanSerde.serialize(ABoolean.TRUE, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_BOOLEAN_CONS: {
parseConstructor(ATypeTag.BOOLEAN, objectType, out);
break;
}
case AdmLexer.TOKEN_FALSE_LITERAL: {
if (checkType(ATypeTag.BOOLEAN, objectType)) {
booleanSerde.serialize(ABoolean.FALSE, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_DOUBLE_LITERAL: {
parseToNumericTarget(ATypeTag.DOUBLE, objectType, out);
break;
}
case AdmLexer.TOKEN_DOUBLE_CONS: {
parseConstructor(ATypeTag.DOUBLE, objectType, out);
break;
}
case AdmLexer.TOKEN_FLOAT_LITERAL: {
parseToNumericTarget(ATypeTag.FLOAT, objectType, out);
break;
}
case AdmLexer.TOKEN_FLOAT_CONS: {
parseConstructor(ATypeTag.FLOAT, objectType, out);
break;
}
case AdmLexer.TOKEN_INT8_LITERAL: {
parseAndCastNumeric(ATypeTag.INT8, objectType, out);
break;
}
case AdmLexer.TOKEN_INT8_CONS: {
parseConstructor(ATypeTag.INT8, objectType, out);
break;
}
case AdmLexer.TOKEN_INT16_LITERAL: {
parseAndCastNumeric(ATypeTag.INT16, objectType, out);
break;
}
case AdmLexer.TOKEN_INT16_CONS: {
parseConstructor(ATypeTag.INT16, objectType, out);
break;
}
case AdmLexer.TOKEN_INT_LITERAL: {
// For an INT value without any suffix, we return it as INT64 type value since it is
// the default integer type.
parseAndCastNumeric(ATypeTag.INT64, objectType, out);
break;
}
case AdmLexer.TOKEN_INT32_LITERAL: {
parseAndCastNumeric(ATypeTag.INT32, objectType, out);
break;
}
case AdmLexer.TOKEN_INT32_CONS: {
parseConstructor(ATypeTag.INT32, objectType, out);
break;
}
case AdmLexer.TOKEN_INT64_LITERAL: {
parseAndCastNumeric(ATypeTag.INT64, objectType, out);
break;
}
case AdmLexer.TOKEN_INT64_CONS: {
parseConstructor(ATypeTag.INT64, objectType, out);
break;
}
case AdmLexer.TOKEN_STRING_LITERAL: {
if (checkType(ATypeTag.STRING, objectType)) {
String tokenImage = admLexer.getLastTokenImage().substring(1,
admLexer.getLastTokenImage().length() - 1);
aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
stringSerde.serialize(aString, out);
} else if (checkType(ATypeTag.UUID, objectType)) {
// Dealing with UUID type that is represented by a string
String tokenImage = admLexer.getLastTokenImage().substring(1,
admLexer.getLastTokenImage().length() - 1);
aUUID.parseUUIDString(tokenImage);
uuidSerde.serialize(aUUID, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_STRING_CONS: {
parseConstructor(ATypeTag.STRING, objectType, out);
break;
}
case AdmLexer.TOKEN_HEX_CONS:
case AdmLexer.TOKEN_BASE64_CONS: {
if (checkType(ATypeTag.BINARY, objectType)) {
if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
parseToBinaryTarget(token, admLexer.getLastTokenImage(), out);
if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
break;
}
}
}
}
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
case AdmLexer.TOKEN_DATE_CONS: {
parseConstructor(ATypeTag.DATE, objectType, out);
break;
}
case AdmLexer.TOKEN_TIME_CONS: {
parseConstructor(ATypeTag.TIME, objectType, out);
break;
}
case AdmLexer.TOKEN_DATETIME_CONS: {
parseConstructor(ATypeTag.DATETIME, objectType, out);
break;
}
case AdmLexer.TOKEN_INTERVAL_CONS: {
if (checkType(ATypeTag.INTERVAL, objectType)) {
objectType = getComplexType(objectType, ATypeTag.INTERVAL);
parseInterval(ATypeTag.INTERVAL, objectType, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_DURATION_CONS: {
parseConstructor(ATypeTag.DURATION, objectType, out);
break;
}
case AdmLexer.TOKEN_YEAR_MONTH_DURATION_CONS: {
parseConstructor(ATypeTag.YEARMONTHDURATION, objectType, out);
break;
}
case AdmLexer.TOKEN_DAY_TIME_DURATION_CONS: {
parseConstructor(ATypeTag.DAYTIMEDURATION, objectType, out);
break;
}
case AdmLexer.TOKEN_POINT_CONS: {
parseConstructor(ATypeTag.POINT, objectType, out);
break;
}
case AdmLexer.TOKEN_POINT3D_CONS: {
parseConstructor(ATypeTag.POINT3D, objectType, out);
break;
}
case AdmLexer.TOKEN_CIRCLE_CONS: {
parseConstructor(ATypeTag.CIRCLE, objectType, out);
break;
}
case AdmLexer.TOKEN_RECTANGLE_CONS: {
parseConstructor(ATypeTag.RECTANGLE, objectType, out);
break;
}
case AdmLexer.TOKEN_LINE_CONS: {
parseConstructor(ATypeTag.LINE, objectType, out);
break;
}
case AdmLexer.TOKEN_POLYGON_CONS: {
parseConstructor(ATypeTag.POLYGON, objectType, out);
break;
}
case AdmLexer.TOKEN_START_UNORDERED_LIST: {
if (checkType(ATypeTag.UNORDEREDLIST, objectType)) {
objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
parseUnorderedList((AUnorderedListType) objectType, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_START_ORDERED_LIST: {
if (checkType(ATypeTag.ORDEREDLIST, objectType)) {
objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
parseOrderedList((AOrderedListType) objectType, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_START_RECORD: {
if (checkType(ATypeTag.RECORD, objectType)) {
objectType = getComplexType(objectType, ATypeTag.RECORD);
parseRecord((ARecordType) objectType, out);
} else {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
case AdmLexer.TOKEN_UUID_CONS: {
parseConstructor(ATypeTag.UUID, objectType, out);
break;
}
case AdmLexer.TOKEN_EOF: {
break;
}
default: {
throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token) + ".");
}
}
}
private String replaceEscapes(String tokenImage) throws ParseException {
char[] chars = tokenImage.toCharArray();
int len = chars.length;
int readpos = 0;
int writepos = 0;
int movemarker = 0;
while (readpos < len) {
if (chars[readpos] == '\\') {
moveChars(chars, movemarker, readpos, readpos - writepos);
switch (chars[readpos + 1]) {
case '\\':
case '\"':
case '/':
chars[writepos] = chars[readpos + 1];
break;
case 'b':
chars[writepos] = '\b';
break;
case 'f':
chars[writepos] = '\f';
break;
case 'n':
chars[writepos] = '\n';
break;
case 'r':
chars[writepos] = '\r';
break;
case 't':
chars[writepos] = '\t';
break;
case 'u':
chars[writepos] = (char) Integer.parseInt(new String(chars, readpos + 2, 4), 16);
readpos += 4;
break;
default:
throw new ParseException("Illegal escape '\\" + chars[readpos + 1] + "'");
}
++readpos;
movemarker = readpos + 1;
}
++writepos;
++readpos;
}
moveChars(chars, movemarker, len, readpos - writepos);
return new String(chars, 0, len - (readpos - writepos));
}
private static void moveChars(char[] chars, int start, int end, int offset) {
if (offset == 0) {
return;
}
for (int i = start; i < end; ++i) {
chars[i - offset] = chars[i];
}
}
private IAType getComplexType(IAType aObjectType, ATypeTag tag) {
if (aObjectType == null) {
return null;
}
if (aObjectType.getTypeTag() == tag) {
return aObjectType;
}
if (aObjectType.getTypeTag() == ATypeTag.UNION) {
AUnionType unionType = (AUnionType) aObjectType;
IAType type = unionType.getActualType();
if (type.getTypeTag() == tag) {
return type;
}
}
return null; // wont get here
}
private ATypeTag getTargetTypeTag(ATypeTag expectedTypeTag, IAType aObjectType) throws HyracksDataException {
if (aObjectType == null) {
return expectedTypeTag;
}
if (aObjectType.getTypeTag() != ATypeTag.UNION) {
ATypeTag typeTag = aObjectType.getTypeTag();
if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
|| ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
return typeTag;
} else {
return null;
}
} else { // union
List<IAType> unionList = ((AUnionType) aObjectType).getUnionList();
for (IAType t : unionList) {
final ATypeTag typeTag = t.getTypeTag();
if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
|| ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
return typeTag;
}
}
}
return null;
}
private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
return getTargetTypeTag(expectedTypeTag, aObjectType) != null;
}
private void parseRecord(ARecordType recType, DataOutput out) throws IOException {
ArrayBackedValueStorage fieldValueBuffer = getTempBuffer();
ArrayBackedValueStorage fieldNameBuffer = getTempBuffer();
IARecordBuilder recBuilder = getRecordBuilder();
BitSet nulls = null;
if (recType != null) {
// TODO: use BitSet Pool
nulls = new BitSet(recType.getFieldNames().length);
recBuilder.reset(recType);
} else {
recBuilder.reset(null);
}
recBuilder.init();
int token;
boolean inRecord = true;
boolean expectingRecordField = false;
boolean first = true;
Boolean openRecordField = false;
int fieldId = 0;
IAType fieldType = null;
do {
token = admLexer.next();
switch (token) {
case AdmLexer.TOKEN_END_RECORD: {
if (expectingRecordField) {
throw new ParseException("Found END_RECORD while expecting a record field.");
}
inRecord = false;
break;
}
case AdmLexer.TOKEN_STRING_LITERAL: {
// we've read the name of the field
// now read the content
fieldNameBuffer.reset();
fieldValueBuffer.reset();
expectingRecordField = false;
if (recType != null) {
String fldName = admLexer.getLastTokenImage().substring(1,
admLexer.getLastTokenImage().length() - 1);
fieldId = recBuilder.getFieldId(fldName);
if ((fieldId < 0) && !recType.isOpen()) {
throw new ParseException(
"This record is closed, you can not add extra fields! new field name: " + fldName);
} else if ((fieldId < 0) && recType.isOpen()) {
aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
} else {
// a closed field
nulls.set(fieldId);
fieldType = recType.getFieldTypes()[fieldId];
openRecordField = false;
}
} else {
aStringFieldName.setValue(
admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
}
token = admLexer.next();
if (token != AdmLexer.TOKEN_COLON) {
throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+ " while expecting \":\".");
}
token = admLexer.next();
this.admFromLexerStream(token, fieldType, fieldValueBuffer.getDataOutput());
if (openRecordField) {
recBuilder.addField(fieldNameBuffer, fieldValueBuffer);
} else {
recBuilder.addField(fieldId, fieldValueBuffer);
}
break;
}
case AdmLexer.TOKEN_COMMA: {
if (first) {
throw new ParseException("Found COMMA before any record field.");
}
if (expectingRecordField) {
throw new ParseException("Found COMMA while expecting a record field.");
}
expectingRecordField = true;
break;
}
default: {
throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+ " while parsing record fields.");
}
}
first = false;
} while (inRecord);
if (recType != null) {
nullableFieldId = checkOptionalConstraints(recType, nulls);
if (nullableFieldId != -1) {
throw new ParseException("Field: " + recType.getFieldNames()[nullableFieldId] + " can not be null");
}
}
recBuilder.write(out, true);
}
private int checkOptionalConstraints(ARecordType recType, BitSet nulls) {
for (int i = 0; i < recType.getFieldTypes().length; i++) {
if (nulls.get(i) == false) {
IAType type = recType.getFieldTypes()[i];
if ((type.getTypeTag() != ATypeTag.NULL) && (type.getTypeTag() != ATypeTag.UNION)) {
return i;
}
if (type.getTypeTag() != ATypeTag.UNION) {
continue;
}
// union
AUnionType unionType = (AUnionType) type;
if (!unionType.isUnknownableType()) {
return i;
}
}
}
return -1;
}
private void parseInterval(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
long start = 0, end = 0;
byte tag = 0;
int token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
ATypeTag intervalType;
token = admLexer.next();
switch (token) {
case AdmLexer.TOKEN_DATE_CONS:
intervalType = ATypeTag.DATE;
break;
case AdmLexer.TOKEN_TIME_CONS:
intervalType = ATypeTag.TIME;
break;
case AdmLexer.TOKEN_DATETIME_CONS:
intervalType = ATypeTag.DATETIME;
break;
default:
throw new ParseException("Unsupported interval type: " + AdmLexer.tokenKindToString(token) + ".");
}
// Interval
start = parseIntervalArgument(intervalType);
end = parseIntervalSecondArgument(token, intervalType);
tag = intervalType.serialize();
}
// Closing interval.
token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
try {
aInterval.setValue(start, end, tag);
} catch (HyracksDataException e) {
throw new ParseException(e);
}
} else {
throw new ParseException("Interval was not closed.");
}
intervalSerde.serialize(aInterval, out);
}
private long parseIntervalSecondArgument(int startToken, ATypeTag parseType) throws IOException {
int token = admLexer.next();
if (token == AdmLexer.TOKEN_COMMA) {
token = admLexer.next();
if (token == startToken) {
return parseIntervalArgument(parseType);
} else {
throw new ParseException("The interval start and end point types do not match: "
+ AdmLexer.tokenKindToString(startToken) + " != " + AdmLexer.tokenKindToString(token));
}
} else {
throw new ParseException("Missing COMMA before interval end point.");
}
}
private long parseIntervalArgument(ATypeTag tag) throws IOException {
int token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
token = admLexer.next();
if (token == AdmLexer.TOKEN_STRING_LITERAL) {
long chrononTimeInMs = 0;
String arg = admLexer.getLastTokenImage();
switch (tag) {
case DATE:
chrononTimeInMs += (parseDatePart(arg, 0, arg.length() - 1)
/ GregorianCalendarSystem.CHRONON_OF_DAY);
break;
case TIME:
chrononTimeInMs += parseTimePart(arg, 0, arg.length() - 1);
break;
case DATETIME:
int timeSeperatorOffsetInDatetimeString = arg.indexOf('T');
if (timeSeperatorOffsetInDatetimeString < 0) {
throw new ParseException(
"This can not be an instance of interval: missing T for a datetime value.");
}
chrononTimeInMs += parseDatePart(arg, 0, timeSeperatorOffsetInDatetimeString - 1);
chrononTimeInMs += parseTimePart(arg, timeSeperatorOffsetInDatetimeString + 1,
arg.length() - 1);
break;
default:
throw new ParseException("Unsupported interval type: " + tag.name() + ".");
}
token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
return chrononTimeInMs;
}
}
}
throw new ParseException("Interval argument not properly constructed.");
}
private void parseOrderedList(AOrderedListType oltype, DataOutput out) throws IOException {
ArrayBackedValueStorage itemBuffer = getTempBuffer();
OrderedListBuilder orderedListBuilder = (OrderedListBuilder) getOrderedListBuilder();
IAType itemType = null;
if (oltype != null) {
itemType = oltype.getItemType();
}
orderedListBuilder.reset(oltype);
int token;
boolean inList = true;
boolean expectingListItem = false;
boolean first = true;
do {
token = admLexer.next();
if (token == AdmLexer.TOKEN_END_ORDERED_LIST) {
if (expectingListItem) {
throw new ParseException("Found END_COLLECTION while expecting a list item.");
}
inList = false;
} else if (token == AdmLexer.TOKEN_COMMA) {
if (first) {
throw new ParseException("Found COMMA before any list item.");
}
if (expectingListItem) {
throw new ParseException("Found COMMA while expecting a list item.");
}
expectingListItem = true;
} else {
expectingListItem = false;
itemBuffer.reset();
admFromLexerStream(token, itemType, itemBuffer.getDataOutput());
orderedListBuilder.addItem(itemBuffer);
}
first = false;
} while (inList);
orderedListBuilder.write(out, true);
}
private void parseUnorderedList(AUnorderedListType uoltype, DataOutput out) throws IOException {
ArrayBackedValueStorage itemBuffer = getTempBuffer();
UnorderedListBuilder unorderedListBuilder = (UnorderedListBuilder) getUnorderedListBuilder();
IAType itemType = null;
if (uoltype != null) {
itemType = uoltype.getItemType();
}
unorderedListBuilder.reset(uoltype);
int token;
boolean inList = true;
boolean expectingListItem = false;
boolean first = true;
do {
token = admLexer.next();
if (token == AdmLexer.TOKEN_END_RECORD) {
if (admLexer.next() == AdmLexer.TOKEN_END_RECORD) {
if (expectingListItem) {
throw new ParseException("Found END_COLLECTION while expecting a list item.");
} else {
inList = false;
}
} else {
throw new ParseException("Found END_RECORD while expecting a list item.");
}
} else if (token == AdmLexer.TOKEN_COMMA) {
if (first) {
throw new ParseException("Found COMMA before any list item.");
}
if (expectingListItem) {
throw new ParseException("Found COMMA while expecting a list item.");
}
expectingListItem = true;
} else {
expectingListItem = false;
itemBuffer.reset();
admFromLexerStream(token, itemType, itemBuffer.getDataOutput());
unorderedListBuilder.addItem(itemBuffer);
}
first = false;
} while (inList);
unorderedListBuilder.write(out, true);
}
private IARecordBuilder getRecordBuilder() {
return recordBuilderPool.allocate(ATypeTag.RECORD);
}
private IAsterixListBuilder getOrderedListBuilder() {
return listBuilderPool.allocate(ATypeTag.ORDEREDLIST);
}
private IAsterixListBuilder getUnorderedListBuilder() {
return listBuilderPool.allocate(ATypeTag.UNORDEREDLIST);
}
private ArrayBackedValueStorage getTempBuffer() {
return (ArrayBackedValueStorage) abvsBuilderPool.allocate(ATypeTag.BINARY);
}
private void parseToBinaryTarget(int lexerToken, String tokenImage, DataOutput out)
throws ParseException, HyracksDataException {
switch (lexerToken) {
case AdmLexer.TOKEN_HEX_CONS: {
parseHexBinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
break;
}
case AdmLexer.TOKEN_BASE64_CONS: {
parseBase64BinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
break;
}
}
}
private void parseToNumericTarget(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
if ((targetTypeTag == null) || !parseValue(admLexer.getLastTokenImage(), targetTypeTag, out)) {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
}
}
private void parseAndCastNumeric(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
DataOutput dataOutput = out;
if (targetTypeTag != typeTag) {
castBuffer.reset();
dataOutput = castBuffer.getDataOutput();
}
if ((targetTypeTag == null) || !parseValue(admLexer.getLastTokenImage(), typeTag, dataOutput)) {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
}
// If two type tags are not the same, either we try to promote or demote source type to the
// target type
if (targetTypeTag != typeTag) {
if (ATypeHierarchy.canPromote(typeTag, targetTypeTag)) {
// can promote typeTag to targetTypeTag
ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag, targetTypeTag);
if (promoteComputer == null) {
throw new ParseException("Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
}
// do the promotion; note that the type tag field should be skipped
promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
castBuffer.getLength() - 1, out);
} else if (ATypeHierarchy.canDemote(typeTag, targetTypeTag)) {
// can demote source type to the target type
ITypeConvertComputer demoteComputer = ATypeHierarchy.getTypeDemoteComputer(typeTag, targetTypeTag);
if (demoteComputer == null) {
throw new ParseException("Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
}
// do the demotion; note that the type tag field should be skipped
demoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
castBuffer.getLength() - 1, out);
}
}
}
private void parseConstructor(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
if (targetTypeTag != null) {
DataOutput dataOutput = out;
if (targetTypeTag != typeTag) {
castBuffer.reset();
dataOutput = castBuffer.getDataOutput();
}
int token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
token = admLexer.next();
if (token == AdmLexer.TOKEN_STRING_LITERAL) {
String unquoted = admLexer.getLastTokenImage().substring(1,
admLexer.getLastTokenImage().length() - 1);
if (!parseValue(unquoted, typeTag, dataOutput)) {
throw new ParseException("Missing deserializer method for constructor: "
+ AdmLexer.tokenKindToString(token) + ".");
}
token = admLexer.next();
if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
if (targetTypeTag != typeTag) {
ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag,
targetTypeTag);
// the availability if the promote computer should be consistent with
// the availability of a target type
assert promoteComputer != null;
// do the promotion; note that the type tag field should be skipped
promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
castBuffer.getLength() - 1, out);
}
return;
}
}
}
}
throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + ". Got " + typeTag + " instead.");
}
private boolean parseValue(String unquoted, ATypeTag typeTag, DataOutput out) throws HyracksDataException {
switch (typeTag) {
case BOOLEAN:
parseBoolean(unquoted, out);
return true;
case INT8:
parseInt8(unquoted, out);
return true;
case INT16:
parseInt16(unquoted, out);
return true;
case INT32:
parseInt32(unquoted, out);
return true;
case INT64:
parseInt64(unquoted, out);
return true;
case FLOAT:
aFloat.setValue(Float.parseFloat(unquoted));
floatSerde.serialize(aFloat, out);
return true;
case DOUBLE:
aDouble.setValue(Double.parseDouble(unquoted));
doubleSerde.serialize(aDouble, out);
return true;
case STRING:
aString.setValue(unquoted);
stringSerde.serialize(aString, out);
return true;
case TIME:
parseTime(unquoted, out);
return true;
case DATE:
parseDate(unquoted, out);
return true;
case DATETIME:
parseDateTime(unquoted, out);
return true;
case DURATION:
parseDuration(unquoted, out);
return true;
case DAYTIMEDURATION:
parseDateTimeDuration(unquoted, out);
return true;
case YEARMONTHDURATION:
parseYearMonthDuration(unquoted, out);
return true;
case POINT:
parsePoint(unquoted, out);
return true;
case POINT3D:
parse3DPoint(unquoted, out);
return true;
case CIRCLE:
parseCircle(unquoted, out);
return true;
case RECTANGLE:
parseRectangle(unquoted, out);
return true;
case LINE:
parseLine(unquoted, out);
return true;
case POLYGON:
APolygonSerializerDeserializer.parse(unquoted, out);
return true;
case UUID:
aUUID.parseUUIDString(unquoted);
uuidSerde.serialize(aUUID, out);
return true;
default:
return false;
}
}
private void parseBoolean(String bool, DataOutput out) throws HyracksDataException {
String errorMessage = "This can not be an instance of boolean";
if (bool.equals("true")) {
booleanSerde.serialize(ABoolean.TRUE, out);
} else if (bool.equals("false")) {
booleanSerde.serialize(ABoolean.FALSE, out);
} else {
throw new ParseException(errorMessage);
}
}
private void parseInt8(String int8, DataOutput out) throws HyracksDataException {
String errorMessage = "This can not be an instance of int8";
boolean positive = true;
byte value = 0;
int offset = 0;
if (int8.charAt(offset) == '+') {
offset++;
} else if (int8.charAt(offset) == '-') {
offset++;
positive = false;
}
for (; offset < int8.length(); offset++) {
if ((int8.charAt(offset) >= '0') && (int8.charAt(offset) <= '9')) {
value = (byte) (((value * 10) + int8.charAt(offset)) - '0');
} else if ((int8.charAt(offset) == 'i') && (int8.charAt(offset + 1) == '8')
&& ((offset + 2) == int8.length())) {
break;
} else {
throw new ParseException(errorMessage);
}
}
if (value < 0) {
throw new ParseException(errorMessage);
}
if ((value > 0) && !positive) {
value *= -1;
}
aInt8.setValue(value);
int8Serde.serialize(aInt8, out);
}
private void parseInt16(String int16, DataOutput out) throws HyracksDataException {
String errorMessage = "This can not be an instance of int16";
boolean positive = true;
short value = 0;
int offset = 0;
if (int16.charAt(offset) == '+') {
offset++;
} else if (int16.charAt(offset) == '-') {
offset++;
positive = false;
}
for (; offset < int16.length(); offset++) {
if ((int16.charAt(offset) >= '0') && (int16.charAt(offset) <= '9')) {
value = (short) (((value * 10) + int16.charAt(offset)) - '0');
} else if ((int16.charAt(offset) == 'i') && (int16.charAt(offset + 1) == '1')
&& (int16.charAt(offset + 2) == '6') && ((offset + 3) == int16.length())) {
break;
} else {
throw new ParseException(errorMessage);
}
}
if (value < 0) {
throw new ParseException(errorMessage);
}
if ((value > 0) && !positive) {
value *= -1;
}
aInt16.setValue(value);
int16Serde.serialize(aInt16, out);
}
private void parseInt32(String int32, DataOutput out) throws HyracksDataException {
String errorMessage = "This can not be an instance of int32";
boolean positive = true;
int value = 0;
int offset = 0;
if (int32.charAt(offset) == '+') {
offset++;
} else if (int32.charAt(offset) == '-') {
offset++;
positive = false;
}
for (; offset < int32.length(); offset++) {
if ((int32.charAt(offset) >= '0') && (int32.charAt(offset) <= '9')) {
value = (((value * 10) + int32.charAt(offset)) - '0');
} else if ((int32.charAt(offset) == 'i') && (int32.charAt(offset + 1) == '3')
&& (int32.charAt(offset + 2) == '2') && ((offset + 3) == int32.length())) {
break;
} else {
throw new ParseException(errorMessage);
}
}
if (value < 0) {
throw new ParseException(errorMessage);
}
if ((value > 0) && !positive) {
value *= -1;
}
aInt32.setValue(value);
int32Serde.serialize(aInt32, out);
}
private void parseInt64(String int64, DataOutput out) throws HyracksDataException {
String errorMessage = "This can not be an instance of int64";
boolean positive = true;
long value = 0;
int offset = 0;
if (int64.charAt(offset) == '+') {
offset++;
} else if (int64.charAt(offset) == '-') {
offset++;
positive = false;
}
for (; offset < int64.length(); offset++) {
if ((int64.charAt(offset) >= '0') && (int64.charAt(offset) <= '9')) {
value = (((value * 10) + int64.charAt(offset)) - '0');
} else if ((int64.charAt(offset) == 'i') && (int64.charAt(offset + 1) == '6')
&& (int64.charAt(offset + 2) == '4') && ((offset + 3) == int64.length())) {
break;
} else {
throw new ParseException(errorMessage);
}
}
if (value < 0) {
throw new ParseException(errorMessage);
}
if ((value > 0) && !positive) {
value *= -1;
}
aInt64.setValue(value);
int64Serde.serialize(aInt64, out);
}
/**
* Resets the pools before parsing a top-level record.
* In this way the elements in those pools can be re-used.
*/
private void resetPools() {
listBuilderPool.reset();
recordBuilderPool.reset();
abvsBuilderPool.reset();
}
@Override
public boolean reset(InputStream in) throws IOException {
admLexer.reInit(new InputStreamReader(in));
return true;
}
}