| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.asterix.external.library; |
| |
| import java.io.DataOutput; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.BitSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.asterix.builders.AbvsBuilderFactory; |
| import org.apache.asterix.builders.IARecordBuilder; |
| import org.apache.asterix.builders.IAsterixListBuilder; |
| import org.apache.asterix.builders.ListBuilderFactory; |
| import org.apache.asterix.builders.OrderedListBuilder; |
| import org.apache.asterix.builders.RecordBuilderFactory; |
| import org.apache.asterix.builders.UnorderedListBuilder; |
| import org.apache.asterix.common.exceptions.AsterixException; |
| import org.apache.asterix.external.api.IRawRecord; |
| import org.apache.asterix.external.api.IRecordDataParser; |
| import org.apache.asterix.external.classad.AMutableCharArrayString; |
| import org.apache.asterix.external.classad.AMutableNumberFactor; |
| import org.apache.asterix.external.classad.AttributeReference; |
| import org.apache.asterix.external.classad.CaseInsensitiveString; |
| import org.apache.asterix.external.classad.CharArrayLexerSource; |
| import org.apache.asterix.external.classad.ClassAd; |
| import org.apache.asterix.external.classad.ExprList; |
| import org.apache.asterix.external.classad.ExprTree; |
| import org.apache.asterix.external.classad.ExprTree.NodeKind; |
| import org.apache.asterix.external.classad.ExprTreeHolder; |
| import org.apache.asterix.external.classad.FileLexerSource; |
| import org.apache.asterix.external.classad.FunctionCall; |
| import org.apache.asterix.external.classad.InputStreamLexerSource; |
| import org.apache.asterix.external.classad.Lexer; |
| import org.apache.asterix.external.classad.Lexer.TokenType; |
| import org.apache.asterix.external.classad.LexerSource; |
| import org.apache.asterix.external.classad.Literal; |
| import org.apache.asterix.external.classad.Operation; |
| import org.apache.asterix.external.classad.StringLexerSource; |
| import org.apache.asterix.external.classad.TokenValue; |
| import org.apache.asterix.external.classad.Value; |
| import org.apache.asterix.external.classad.Value.NumberFactor; |
| import org.apache.asterix.external.classad.object.pool.ClassAdObjectPool; |
| import org.apache.asterix.external.parser.AbstractDataParser; |
| import org.apache.asterix.om.base.ABoolean; |
| import org.apache.asterix.om.base.AMutableInt32; |
| import org.apache.asterix.om.types.AOrderedListType; |
| import org.apache.asterix.om.types.ARecordType; |
| import org.apache.asterix.om.types.ATypeTag; |
| import org.apache.asterix.om.types.AUnionType; |
| import org.apache.asterix.om.types.AUnorderedListType; |
| import org.apache.asterix.om.types.IAType; |
| import org.apache.asterix.om.types.hierachy.ATypeHierarchy; |
| import org.apache.asterix.om.util.NonTaggedFormatUtil; |
| import org.apache.asterix.om.util.container.IObjectPool; |
| import org.apache.asterix.om.util.container.ListObjectPool; |
| import org.apache.commons.lang3.mutable.MutableBoolean; |
| import org.apache.hyracks.api.exceptions.HyracksDataException; |
| import org.apache.hyracks.data.std.api.IMutableValueStorage; |
| import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; |
| |
| /// This reads ClassAd strings from various sources and converts them into a ClassAd. |
| /// It can read from Strings, Files, and InputStreams. |
| public class ClassAdParser extends AbstractDataParser implements IRecordDataParser<char[]> { |
| |
| // reusable components |
| private Lexer lexer = new Lexer(); |
| private LexerSource currentSource = null; |
| private boolean isExpr = false; |
| private final ClassAdObjectPool objectPool; |
| // asterix objects |
| private ARecordType recordType; |
| private IObjectPool<IARecordBuilder, ATypeTag> recordBuilderPool = new ListObjectPool<IARecordBuilder, ATypeTag>( |
| new RecordBuilderFactory()); |
| private IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool = |
| new ListObjectPool<IAsterixListBuilder, ATypeTag>( |
| new ListBuilderFactory()); |
| private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = |
| new ListObjectPool<IMutableValueStorage, ATypeTag>( |
| new AbvsBuilderFactory()); |
| private final ClassAd rootAd; |
| private String exprPrefix = "expr="; |
| private String exprSuffix = ""; |
| private boolean evaluateExpr = true; |
| private String exprFieldNameSuffix = "Expr"; |
| private boolean keepBoth = true; |
| private boolean oldFormat = true; |
| private StringLexerSource stringLexerSource = new StringLexerSource(""); |
| |
| public ClassAdParser(ARecordType recordType, boolean oldFormat, boolean evaluateExpr, boolean keepBoth, |
| String exprPrefix, String exprSuffix, String exprFieldNameSuffix, ClassAdObjectPool objectPool) { |
| if (objectPool == null) { |
| System.out.println(); |
| } |
| this.objectPool = objectPool; |
| this.rootAd = new ClassAd(objectPool); |
| this.recordType = recordType; |
| this.currentSource = new CharArrayLexerSource(); |
| this.recordType = recordType; |
| this.oldFormat = oldFormat; |
| if (oldFormat) { |
| rootAd.createParser(); |
| } |
| this.keepBoth = keepBoth; |
| this.evaluateExpr = evaluateExpr; |
| this.exprPrefix = exprPrefix; |
| this.exprSuffix = exprSuffix; |
| this.exprFieldNameSuffix = exprFieldNameSuffix; |
| } |
| |
| public ClassAdParser(ClassAdObjectPool objectPool) { |
| if (objectPool == null) { |
| System.out.println(); |
| } |
| this.objectPool = objectPool; |
| this.currentSource = new CharArrayLexerSource(); |
| rootAd = null; |
| } |
| |
| /*********************************** |
| * AsterixDB Specific begin |
| * |
| * @throws AsterixException |
| ***********************************/ |
| public void asterixParse(ClassAd classad, DataOutput out) throws IOException, AsterixException { |
| // we assume the lexer source used here is a char array |
| parseClassAd(currentSource, classad, false); |
| parseRecord(null, classad, out); |
| } |
| |
| public void handleErrorParsing() throws IOException { |
| } |
| |
| private boolean asterixParseClassAd(ClassAd ad) throws IOException { |
| TokenType tt; |
| ad.clear(); |
| lexer.initialize(currentSource); |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_OPEN_BOX) { |
| handleErrorParsing(); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| TokenValue tv = objectPool.tokenValuePool.get(); |
| ExprTreeHolder tree = objectPool.mutableExprPool.get(); |
| while (tt != TokenType.LEX_CLOSE_BOX) { |
| // Get the name of the expression |
| tv.reset(); |
| tree.reset(); |
| tt = lexer.consumeToken(tv); |
| if (tt == TokenType.LEX_SEMICOLON) { |
| // We allow empty expressions, so if someone give a double |
| // semicolon, it doesn't |
| // hurt. Technically it's not right, but we shouldn't make users |
| // pay the price for |
| // a meaningless mistake. See condor-support #1881 for a user |
| // that was bitten by this. |
| continue; |
| } |
| if (tt != TokenType.LEX_IDENTIFIER) { |
| throw new HyracksDataException( |
| "while parsing classad: expected LEX_IDENTIFIER " + " but got " + Lexer.strLexToken(tt)); |
| } |
| |
| // consume the intermediate '=' |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_BOUND_TO) { |
| throw new HyracksDataException( |
| "while parsing classad: expected LEX_BOUND_TO " + " but got " + Lexer.strLexToken(tt)); |
| } |
| |
| int positionBefore = lexer.getLexSource().getPosition(); |
| isExpr = false; |
| // parse the expression |
| parseExpression(tree); |
| if (tree.getInnerTree() == null) { |
| handleErrorParsing(); |
| throw new HyracksDataException("parse expression returned empty tree"); |
| } |
| |
| if ((!evaluateExpr || keepBoth) && isExpr && positionBefore >= 0) { |
| // we will store a string representation of the expression |
| int len = lexer.getLexSource().getPosition() - positionBefore - 2; |
| // add it as it is to the classAd |
| Literal lit = objectPool.literalPool.get(); |
| Value exprVal = objectPool.valuePool.get(); |
| |
| exprVal.setStringValue((exprPrefix == null ? "" : exprPrefix) |
| + String.valueOf(lexer.getLexSource().getBuffer(), positionBefore, len) |
| + (exprSuffix == null ? "" : exprSuffix)); |
| Literal.createLiteral(lit, exprVal, NumberFactor.NO_FACTOR); |
| if (!evaluateExpr) { |
| ad.insert(tv.getStrValue().toString(), lit); |
| } else { |
| ad.insert(tv.getStrValue().toString() + exprFieldNameSuffix, lit); |
| } |
| } |
| if (!isExpr || (evaluateExpr)) { |
| // insert the attribute into the classad |
| if (!ad.insert(tv.getStrValue().toString(), tree)) { |
| handleErrorParsing(); |
| throw new HyracksDataException("Couldn't insert value to classad"); |
| } |
| } |
| // the next token must be a ';' or a ']' |
| tt = lexer.peekToken(); |
| if (tt != TokenType.LEX_SEMICOLON && tt != TokenType.LEX_CLOSE_BOX) { |
| handleErrorParsing(); |
| throw new HyracksDataException("while parsing classad: expected LEX_SEMICOLON or " |
| + "LEX_CLOSE_BOX but got " + Lexer.strLexToken(tt)); |
| } |
| |
| // Slurp up any extra semicolons. This does not duplicate the work |
| // at the top of the loop |
| // because it accounts for the case where the last expression has |
| // extra semicolons, |
| // while the first case accounts for optional beginning semicolons. |
| while (tt == TokenType.LEX_SEMICOLON) { |
| lexer.consumeToken(); |
| tt = lexer.peekToken(); |
| } |
| } |
| return true; |
| } |
| |
| public static String readLine(char[] buffer, AMutableInt32 offset, int maxOffset) { |
| int position = offset.getIntegerValue(); |
| while (buffer[position] != '\n' && position < maxOffset) { |
| position++; |
| } |
| if (offset.getIntegerValue() == position) { |
| return null; |
| } |
| String line = String.valueOf(buffer, offset.getIntegerValue(), position - offset.getIntegerValue()); |
| position++; |
| offset.setValue(position); |
| return line; |
| } |
| |
| private AMutableInt32 aInt32 = new AMutableInt32(0); |
| |
| /** |
| * Resets the pools before parsing a top-level record. In this way the |
| * elements in those pools can be re-used. |
| */ |
| private void resetPools() { |
| listBuilderPool.reset(); |
| recordBuilderPool.reset(); |
| abvsBuilderPool.reset(); |
| objectPool.reset(); |
| } |
| |
| private ATypeTag getTargetTypeTag(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException { |
| if (aObjectType == null) { |
| return expectedTypeTag; |
| } |
| if (aObjectType.getTypeTag() != ATypeTag.UNION) { |
| final ATypeTag typeTag = aObjectType.getTypeTag(); |
| if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag) |
| || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) { |
| return typeTag; |
| } else { |
| return null; |
| } |
| } else { // union |
| List<IAType> unionList = ((AUnionType) aObjectType).getUnionList(); |
| for (IAType t : unionList) { |
| final ATypeTag typeTag = t.getTypeTag(); |
| if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag) |
| || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) { |
| return typeTag; |
| } |
| } |
| } |
| return null; |
| } |
| |
| private void parseRecord(ARecordType recType, ClassAd pAd, DataOutput out) throws IOException, AsterixException { |
| ArrayBackedValueStorage fieldValueBuffer = getTempBuffer(); |
| ArrayBackedValueStorage fieldNameBuffer = getTempBuffer(); |
| IARecordBuilder recBuilder = getRecordBuilder(); |
| BitSet nulls = null; |
| if (recType != null) { |
| nulls = getBitSet(); |
| recBuilder.reset(recType); |
| } else { |
| recBuilder.reset(null); |
| } |
| recBuilder.init(); |
| Boolean openRecordField = false; |
| int fieldId = 0; |
| IAType fieldType = null; |
| |
| // new stuff |
| Map<CaseInsensitiveString, ExprTree> attrs = pAd.getAttrList(); |
| for (Entry<CaseInsensitiveString, ExprTree> entry : attrs.entrySet()) { |
| // reset buffers |
| fieldNameBuffer.reset(); |
| fieldValueBuffer.reset(); |
| // take care of field name |
| String fldName = entry.getKey().get(); |
| if (recType != null) { |
| fieldId = recBuilder.getFieldId(fldName); |
| if (fieldId < 0 && !recType.isOpen()) { |
| throw new HyracksDataException("This record is closed, you can not add extra fields !!"); |
| } else if (fieldId < 0 && recType.isOpen()) { |
| aStringFieldName.setValue(fldName); |
| stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput()); |
| openRecordField = true; |
| fieldType = null; |
| } else { |
| // a closed field |
| nulls.set(fieldId); |
| fieldType = recType.getFieldTypes()[fieldId]; |
| openRecordField = false; |
| } |
| } else { |
| aStringFieldName.setValue(fldName); |
| stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput()); |
| openRecordField = true; |
| fieldType = null; |
| } |
| |
| // add field value to value buffer |
| writeFieldValueToBuffer(fieldType, fieldValueBuffer.getDataOutput(), fldName, entry.getValue(), pAd); |
| if (openRecordField) { |
| if (fieldValueBuffer.getByteArray()[0] != ATypeTag.MISSING.serialize()) { |
| recBuilder.addField(fieldNameBuffer, fieldValueBuffer); |
| } |
| } else if (NonTaggedFormatUtil.isOptional(fieldType)) { |
| if (fieldValueBuffer.getByteArray()[0] != ATypeTag.MISSING.serialize()) { |
| recBuilder.addField(fieldId, fieldValueBuffer); |
| } |
| } else { |
| recBuilder.addField(fieldId, fieldValueBuffer); |
| } |
| } |
| |
| if (recType != null) { |
| int optionalFieldId = checkOptionalConstraints(recType, nulls); |
| if (optionalFieldId != -1) { |
| throw new HyracksDataException( |
| "Field: " + recType.getFieldNames()[optionalFieldId] + " can not be optional"); |
| } |
| } |
| recBuilder.write(out, true); |
| } |
| |
| private void writeFieldValueToBuffer(IAType fieldType, DataOutput out, String name, ExprTree tree, ClassAd pAd) |
| throws IOException, AsterixException { |
| Value val; |
| switch (tree.getKind()) { |
| case ATTRREF_NODE: |
| case CLASSAD_NODE: |
| case EXPR_ENVELOPE: |
| case EXPR_LIST_NODE: |
| case FN_CALL_NODE: |
| case OP_NODE: |
| val = objectPool.valuePool.get(); |
| if (pAd.evaluateAttr(name, val)) { |
| } else { |
| // just write the expr |
| val = ((Literal) pAd.getAttrList().get(name + "Expr")).getValue(); |
| } |
| break; |
| case LITERAL_NODE: |
| val = ((Literal) tree.getTree()).getValue(); |
| break; |
| default: |
| throw new HyracksDataException("Unknown Expression type detected: " + tree.getKind()); |
| } |
| |
| if (fieldType != null) { |
| if (NonTaggedFormatUtil.isOptional(fieldType)) { |
| fieldType = ((AUnionType) fieldType).getActualType(); |
| } |
| } |
| switch (val.getValueType()) { |
| case ABSOLUTE_TIME_VALUE: |
| if (checkType(ATypeTag.DATETIME, fieldType)) { |
| parseDateTime(val, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case BOOLEAN_VALUE: |
| if (checkType(ATypeTag.BOOLEAN, fieldType)) { |
| booleanSerde.serialize(val.getBoolVal() ? ABoolean.TRUE : ABoolean.FALSE, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case CLASSAD_VALUE: |
| if (checkType(ATypeTag.RECORD, fieldType)) { |
| IAType objectType = getComplexType(fieldType, ATypeTag.RECORD); |
| ClassAd classad = val.getClassadVal(); |
| parseRecord((ARecordType) objectType, classad, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case ERROR_VALUE: |
| case STRING_VALUE: |
| case UNDEFINED_VALUE: |
| if (checkType(ATypeTag.STRING, fieldType)) { |
| parseString(val, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case INTEGER_VALUE: |
| if (checkType(ATypeTag.INT64, fieldType)) { |
| if (fieldType == null || fieldType.getTypeTag() == ATypeTag.INT64) { |
| aInt64.setValue(val.getLongVal()); |
| int64Serde.serialize(aInt64, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT32) { |
| aInt32.setValue((int) val.getLongVal()); |
| int32Serde.serialize(aInt32, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.DOUBLE) { |
| aDouble.setValue(val.getLongVal()); |
| doubleSerde.serialize(aDouble, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT16) { |
| aInt16.setValue((short) val.getLongVal()); |
| int16Serde.serialize(aInt16, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT8) { |
| aInt8.setValue((byte) val.getLongVal()); |
| int8Serde.serialize(aInt8, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.FLOAT) { |
| aFloat.setValue(val.getLongVal()); |
| floatSerde.serialize(aFloat, out); |
| } |
| } else if (checkType(ATypeTag.DATETIME, fieldType)) { |
| // Classad uses Linux Timestamps (s instead of ms) |
| aDateTime.setValue(val.getLongVal() * 1000); |
| datetimeSerde.serialize(aDateTime, out); |
| } else if (checkType(ATypeTag.DURATION, fieldType)) { |
| // Classad uses Linux Timestamps (s instead of ms) |
| aDuration.setValue(0, val.getLongVal() * 1000); |
| durationSerde.serialize(aDuration, out); |
| } else if (checkType(ATypeTag.INT32, fieldType)) { |
| aInt32.setValue((int) val.getLongVal()); |
| int32Serde.serialize(aInt32, out); |
| } else if (checkType(ATypeTag.DOUBLE, fieldType)) { |
| aDouble.setValue(val.getLongVal()); |
| doubleSerde.serialize(aDouble, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case LIST_VALUE: |
| case SLIST_VALUE: |
| IAType objectType; |
| if (checkType(ATypeTag.UNORDEREDLIST, fieldType)) { |
| objectType = getComplexType(fieldType, ATypeTag.UNORDEREDLIST); |
| parseUnorderedList((AUnorderedListType) objectType, val, out); |
| } else if (checkType(ATypeTag.ORDEREDLIST, fieldType)) { |
| objectType = getComplexType(fieldType, ATypeTag.ORDEREDLIST); |
| parseOrderedList((AOrderedListType) objectType, val, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case REAL_VALUE: |
| if (checkType(ATypeTag.DOUBLE, fieldType)) { |
| if (fieldType == null || fieldType.getTypeTag() == ATypeTag.DOUBLE) { |
| aDouble.setValue(val.getDoubleVal()); |
| doubleSerde.serialize(aDouble, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT32) { |
| aInt32.setValue((int) val.getDoubleVal()); |
| int32Serde.serialize(aInt32, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT64) { |
| aInt64.setValue((long) val.getDoubleVal()); |
| int64Serde.serialize(aInt64, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT16) { |
| aInt16.setValue((short) val.getDoubleVal()); |
| int16Serde.serialize(aInt16, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.INT8) { |
| aInt8.setValue((byte) val.getDoubleVal()); |
| int8Serde.serialize(aInt8, out); |
| } else if (fieldType.getTypeTag() == ATypeTag.FLOAT) { |
| aFloat.setValue((float) val.getDoubleVal()); |
| floatSerde.serialize(aFloat, out); |
| } |
| } else if (checkType(ATypeTag.INT32, fieldType)) { |
| aInt32.setValue((int) val.getDoubleVal()); |
| int32Serde.serialize(aInt32, out); |
| } else if (checkType(ATypeTag.INT64, fieldType)) { |
| aInt64.setValue((long) val.getDoubleVal()); |
| int64Serde.serialize(aInt64, out); |
| } else if (checkType(ATypeTag.DATETIME, fieldType)) { |
| // Classad uses Linux Timestamps (s instead of ms) |
| aDateTime.setValue(val.getLongVal() * 1000); |
| datetimeSerde.serialize(aDateTime, out); |
| } else if (checkType(ATypeTag.DURATION, fieldType)) { |
| // Classad uses Linux Timestamps (s instead of ms) |
| aDuration.setValue(0, (long) (val.getDoubleVal() * 1000.0)); |
| durationSerde.serialize(aDuration, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| case RELATIVE_TIME_VALUE: |
| if (checkType(ATypeTag.DURATION, fieldType)) { |
| parseDuration(val, out); |
| } else { |
| throw new HyracksDataException(mismatchErrorMessage + fieldType.getTypeTag()); |
| } |
| break; |
| default: |
| throw new HyracksDataException("unknown data type " + val.getValueType()); |
| } |
| } |
| |
| private void parseOrderedList(AOrderedListType oltype, Value listVal, DataOutput out) |
| throws IOException, AsterixException { |
| ArrayBackedValueStorage itemBuffer = getTempBuffer(); |
| OrderedListBuilder orderedListBuilder = (OrderedListBuilder) getOrderedListBuilder(); |
| IAType itemType = null; |
| if (oltype != null) { |
| itemType = oltype.getItemType(); |
| } |
| orderedListBuilder.reset(oltype); |
| for (ExprTree tree : listVal.getListVal().getExprList()) { |
| itemBuffer.reset(); |
| writeFieldValueToBuffer(itemType, itemBuffer.getDataOutput(), null, tree, null); |
| orderedListBuilder.addItem(itemBuffer); |
| } |
| orderedListBuilder.write(out, true); |
| } |
| |
| private void parseUnorderedList(AUnorderedListType uoltype, Value listVal, DataOutput out) |
| throws IOException, AsterixException { |
| ArrayBackedValueStorage itemBuffer = getTempBuffer(); |
| UnorderedListBuilder unorderedListBuilder = (UnorderedListBuilder) getUnorderedListBuilder(); |
| IAType itemType = null; |
| if (uoltype != null) { |
| itemType = uoltype.getItemType(); |
| } |
| unorderedListBuilder.reset(uoltype); |
| for (ExprTree tree : listVal.getListVal().getExprList()) { |
| itemBuffer.reset(); |
| writeFieldValueToBuffer(itemType, itemBuffer.getDataOutput(), null, tree, null); |
| unorderedListBuilder.addItem(itemBuffer); |
| } |
| unorderedListBuilder.write(out, true); |
| } |
| |
| private void parseString(Value val, DataOutput out) throws HyracksDataException { |
| switch (val.getValueType()) { |
| case ERROR_VALUE: |
| aString.setValue("error"); |
| break; |
| case STRING_VALUE: |
| aString.setValue(val.getStringVal()); |
| break; |
| case UNDEFINED_VALUE: |
| aString.setValue("undefined"); |
| break; |
| default: |
| throw new HyracksDataException("Unknown String type " + val.getValueType()); |
| } |
| stringSerde.serialize(aString, out); |
| } |
| |
| protected void parseDuration(Value duration, DataOutput out) throws HyracksDataException { |
| try { |
| aDuration.setValue(0, duration.getTimeVal().getRelativeTime()); |
| durationSerde.serialize(aDuration, out); |
| } catch (Exception e) { |
| throw new HyracksDataException(e); |
| } |
| } |
| |
| protected void parseDateTime(Value datetime, DataOutput out) throws HyracksDataException { |
| aDateTime.setValue(datetime.getTimeVal().getTimeInMillis()); |
| datetimeSerde.serialize(aDateTime, out); |
| } |
| |
| public static IAType getComplexType(IAType aObjectType, ATypeTag tag) { |
| if (aObjectType == null) { |
| return null; |
| } |
| |
| if (aObjectType.getTypeTag() == tag) { |
| return aObjectType; |
| } |
| |
| if (aObjectType.getTypeTag() == ATypeTag.UNION) { |
| List<IAType> unionList = ((AUnionType) aObjectType).getUnionList(); |
| for (int i = 0; i < unionList.size(); i++) { |
| if (unionList.get(i).getTypeTag() == tag) { |
| return unionList.get(i); |
| } |
| } |
| } |
| return null; // wont get here |
| } |
| |
| private String mismatchErrorMessage = "Mismatch Type, expecting a value of type "; |
| |
| private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException { |
| return getTargetTypeTag(expectedTypeTag, aObjectType) != null; |
| } |
| |
| private BitSet getBitSet() { |
| return objectPool.bitSetPool.get(); |
| } |
| |
| public static int checkOptionalConstraints(ARecordType recType, BitSet nulls) { |
| for (int i = 0; i < recType.getFieldTypes().length; i++) { |
| if (nulls.get(i) == false) { |
| IAType type = recType.getFieldTypes()[i]; |
| if (type.getTypeTag() != ATypeTag.MISSING && type.getTypeTag() != ATypeTag.UNION) { |
| return i; |
| } |
| |
| if (type.getTypeTag() == ATypeTag.UNION) { // union |
| AUnionType unionType = (AUnionType) type; |
| if (!unionType.isUnknownableType()) { |
| return i; |
| } |
| } |
| } |
| } |
| return -1; |
| } |
| |
| private IARecordBuilder getRecordBuilder() { |
| return recordBuilderPool.allocate(ATypeTag.RECORD); |
| } |
| |
| private IAsterixListBuilder getOrderedListBuilder() { |
| return listBuilderPool.allocate(ATypeTag.ORDEREDLIST); |
| } |
| |
| private IAsterixListBuilder getUnorderedListBuilder() { |
| return listBuilderPool.allocate(ATypeTag.UNORDEREDLIST); |
| } |
| |
| private ArrayBackedValueStorage getTempBuffer() { |
| return (ArrayBackedValueStorage) abvsBuilderPool.allocate(ATypeTag.BINARY); |
| } |
| |
| public static ATypeTag getMatchingType(Literal lit) throws HyracksDataException { |
| return getMatchingType(lit.getValue()); |
| } |
| |
| public static ATypeTag getMatchingType(Value val) throws HyracksDataException { |
| switch (val.getValueType()) { |
| case ABSOLUTE_TIME_VALUE: |
| return ATypeTag.DATETIME; |
| case BOOLEAN_VALUE: |
| return ATypeTag.BOOLEAN; |
| case CLASSAD_VALUE: |
| return ATypeTag.RECORD; |
| case ERROR_VALUE: |
| case STRING_VALUE: |
| case UNDEFINED_VALUE: |
| return ATypeTag.STRING; |
| case INTEGER_VALUE: |
| return ATypeTag.INT64; |
| case LIST_VALUE: |
| case SLIST_VALUE: |
| return ATypeTag.UNORDEREDLIST; |
| case NULL_VALUE: |
| return ATypeTag.MISSING; |
| case REAL_VALUE: |
| return ATypeTag.DOUBLE; |
| case RELATIVE_TIME_VALUE: |
| return ATypeTag.DURATION; |
| default: |
| throw new HyracksDataException("Unknown data type"); |
| } |
| } |
| |
| /******************************** |
| * End of AsterixDB specifics |
| ********************************/ |
| |
| /** |
| * Parse a ClassAd |
| * |
| * @param buffer |
| * Buffer containing the string representation of the classad. |
| * @param full |
| * If this parameter is true, the parse is considered to succeed |
| * only if the ClassAd was parsed successfully and no other |
| * tokens follow the ClassAd. |
| * @return pointer to the ClassAd object if successful, or null otherwise |
| * @throws IOException |
| */ |
| public ClassAd parseClassAd(String buffer, boolean full) throws IOException { |
| currentSource = new StringLexerSource(buffer); |
| return parseClassAd(currentSource, full); |
| } |
| |
| public ClassAd parseClassAd(String buffer, AMutableInt32 offset) throws IOException { |
| currentSource = new StringLexerSource(buffer); |
| ClassAd ad = parseClassAd((StringLexerSource) currentSource); |
| offset.setValue(((StringLexerSource) currentSource).getCurrentLocation()); |
| return ad; |
| } |
| |
| public ClassAd parseClassAd(StringLexerSource lexer_source) throws IOException { |
| return parseClassAd(lexer_source, false); |
| } |
| |
| public ClassAd parseClassAd(File file, boolean full) throws IOException { |
| FileLexerSource fileLexerSource = new FileLexerSource(file); |
| return parseClassAd(fileLexerSource, full); |
| } |
| |
| public ClassAd parseClassAd(InputStream in, boolean full) throws IOException { |
| InputStreamLexerSource lexer_source = new InputStreamLexerSource(in); |
| return parseClassAd(lexer_source, full); |
| } |
| |
| // preferred method since the parser doesn't need to create an object |
| public void parseClassAd(ClassAd ad, LexerSource lexer_source, boolean full) throws IOException { |
| ad.reset(); |
| if (lexer.initialize(lexer_source)) { |
| if (!parseClassAd(ad, full)) { |
| return; |
| } else if (lexer_source.readPreviousCharacter() != '\0') { |
| // The lexer swallows one extra character, so if we have |
| // two classads back to back we need to make sure to unread |
| // one of the characters. |
| lexer_source.unreadCharacter(); |
| } |
| } |
| } |
| |
| public ClassAd parseClassAd(LexerSource lexer_source, boolean full) throws IOException { |
| System.out.println("Don't use this call. instead, pass a mutable classad instance"); |
| ClassAd ad = objectPool.classAdPool.get(); |
| if (lexer.initialize(lexer_source)) { |
| if (!parseClassAd(ad, full)) { |
| return null; |
| } else if (lexer_source.readPreviousCharacter() != '\0') { |
| // The lexer swallows one extra character, so if we have |
| // two classads back to back we need to make sure to unread |
| // one of the characters. |
| lexer_source.unreadCharacter(); |
| } |
| } |
| return ad; |
| } |
| |
| /** |
| * Parse a ClassAd |
| * |
| * @param buffer |
| * Buffer containing the string representation of the classad. |
| * @param ad |
| * The classad to be populated |
| * @param full |
| * If this parameter is true, the parse is considered to succeed |
| * only if the ClassAd was parsed successfully and no other |
| * tokens follow the ClassAd. |
| * @return true on success, false on failure |
| * @throws IOException |
| */ |
| public boolean parseClassAd(String buffer, ClassAd classad, boolean full) throws IOException { |
| StringLexerSource stringLexerSource = new StringLexerSource(buffer); |
| return parseClassAd(stringLexerSource, classad, full); |
| } |
| |
| public boolean parseClassAd(String buffer, ClassAd classad, AMutableInt32 offset) throws IOException { |
| boolean success = false; |
| StringLexerSource stringLexerSource = new StringLexerSource(buffer, offset.getIntegerValue().intValue()); |
| success = parseClassAd(stringLexerSource, classad); |
| offset.setValue(stringLexerSource.getCurrentLocation()); |
| return success; |
| } |
| |
| public boolean parseNext(ClassAd classad) throws IOException { |
| resetPools(); |
| return parseClassAd(currentSource, classad, false); |
| } |
| |
| public boolean parseNext(ClassAd classad, boolean full) throws IOException { |
| return parseClassAd(currentSource, classad, full); |
| } |
| |
| private boolean parseClassAd(StringLexerSource lexer_source, ClassAd classad) throws IOException { |
| return parseClassAd(lexer_source, classad, false); |
| } |
| |
| public boolean parseClassAd(File file, ClassAd classad, boolean full) throws IOException { |
| FileLexerSource fileLexerSource = new FileLexerSource(file); |
| return parseClassAd(fileLexerSource, classad, full); |
| } |
| |
| public boolean parseClassAd(InputStream stream, ClassAd classad, boolean full) throws IOException { |
| InputStreamLexerSource inputStreamLexerSource = new InputStreamLexerSource(stream); |
| return parseClassAd(inputStreamLexerSource, classad, full); |
| } |
| |
| public boolean parseClassAd(LexerSource lexer_source, ClassAd classad, boolean full) throws IOException { |
| boolean success = false; |
| if (lexer.initialize(lexer_source)) { |
| success = parseClassAd(classad, full); |
| } |
| if (success) { |
| // The lexer swallows one extra character, so if we have |
| // two classads back to back we need to make sure to unread |
| // one of the characters. |
| if (lexer_source.readPreviousCharacter() != Lexer.EOF) { |
| lexer_source.unreadCharacter(); |
| } |
| } else { |
| classad.clear(); |
| } |
| return success; |
| } |
| |
| /** |
| * Parse an expression |
| * |
| * @param buffer |
| * Buffer containing the string representation of the expression. |
| * @param full |
| * If this parameter is true, the parse is considered to succeed |
| * only if the expression was parsed successfully and no other |
| * tokens are left. |
| * @return pointer to the expression object if successful, or null otherwise |
| */ |
| public ExprTree parseExpression(String buffer, boolean full) throws IOException { |
| stringLexerSource.setNewSource(buffer); |
| ExprTreeHolder mutableExpr = objectPool.mutableExprPool.get(); |
| if (lexer.initialize(stringLexerSource)) { |
| parseExpression(mutableExpr, full); |
| } |
| return mutableExpr.getInnerTree(); |
| } |
| |
| public ExprTree ParseExpression(String buffer) throws IOException { |
| return parseExpression(buffer, false); |
| } |
| |
| public ExprTree parseExpression(LexerSource lexer_source, boolean full) throws IOException { |
| ExprTreeHolder mutableExpr = objectPool.mutableExprPool.get(); |
| if (lexer.initialize(lexer_source)) { |
| parseExpression(mutableExpr, full); |
| } |
| return mutableExpr.getInnerTree(); |
| } |
| |
| public ExprTree parseNextExpression() throws IOException { |
| if (!lexer.wasInitialized()) { |
| return null; |
| } else { |
| ExprTreeHolder expr = objectPool.mutableExprPool.get(); |
| parseExpression(expr, false); |
| ExprTree innerTree = expr.getInnerTree(); |
| return innerTree; |
| } |
| } |
| |
| /*-------------------------------------------------------------------- |
| * |
| * Private Functions |
| * |
| *-------------------------------------------------------------------*/ |
| |
| // Expression .= LogicalORExpression |
| // | LogicalORExpression '?' Expression ':' Expression |
| |
| private boolean parseExpression(ExprTreeHolder tree) throws IOException { |
| return parseExpression(tree, false); |
| } |
| |
| private boolean parseExpression(ExprTreeHolder tree, boolean full) throws IOException { |
| TokenType tt; |
| if (!parseLogicalORExpression(tree)) { |
| return false; |
| } |
| if ((tt = lexer.peekToken()) == TokenType.LEX_QMARK) { |
| lexer.consumeToken(); |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeM = objectPool.mutableExprPool.get(); |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| parseExpression(treeM); |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_COLON) { |
| throw new HyracksDataException("expected LEX_COLON, but got " + Lexer.strLexToken(tt)); |
| } |
| parseExpression(treeR); |
| if (treeL.getInnerTree() != null && treeM.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_TERNARY_OP, treeL, treeM, treeR, newTree); |
| tree.setInnerTree(newTree); |
| return (true); |
| } |
| tree.setInnerTree(null); |
| return false; |
| } |
| // if a full parse was requested, ensure that input is exhausted |
| if (full && (lexer.consumeToken() != TokenType.LEX_END_OF_INPUT)) { |
| throw new HyracksDataException( |
| "expected LEX_END_OF_INPUT on full parse, but got " + String.valueOf(Lexer.strLexToken(tt))); |
| } |
| return true; |
| } |
| |
| // LogicalORExpression .= LogicalANDExpression |
| // | LogicalORExpression '||' LogicalANDExpression |
| |
| private boolean parseLogicalORExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseLogicalANDExpression(tree)) { |
| return false; |
| } |
| while ((lexer.peekToken()) == TokenType.LEX_LOGICAL_OR) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseLogicalANDExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_LOGICAL_OR_OP, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // LogicalANDExpression .= InclusiveORExpression |
| // | LogicalANDExpression '&&' InclusiveORExpression |
| private boolean parseLogicalANDExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseInclusiveORExpression(tree)) { |
| return false; |
| } |
| while ((lexer.peekToken()) == TokenType.LEX_LOGICAL_AND) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseInclusiveORExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_LOGICAL_AND_OP, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // InclusiveORExpression .= ExclusiveORExpression |
| // | InclusiveORExpression '|' ExclusiveORExpression |
| public boolean parseInclusiveORExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseExclusiveORExpression(tree)) { |
| return false; |
| } |
| while ((lexer.peekToken()) == TokenType.LEX_BITWISE_OR) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseExclusiveORExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_BITWISE_OR_OP, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // ExclusiveORExpression .= ANDExpression |
| // | ExclusiveORExpression '^' ANDExpression |
| private boolean parseExclusiveORExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseANDExpression(tree)) { |
| return false; |
| } |
| while ((lexer.peekToken()) == TokenType.LEX_BITWISE_XOR) { |
| lexer.consumeToken(); |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| parseANDExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_BITWISE_XOR_OP, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // ANDExpression .= EqualityExpression |
| // | ANDExpression '&' EqualityExpression |
| private boolean parseANDExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseEqualityExpression(tree)) { |
| return false; |
| } |
| while ((lexer.peekToken()) == TokenType.LEX_BITWISE_AND) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseEqualityExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_BITWISE_AND_OP, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // EqualityExpression .= RelationalExpression |
| // | EqualityExpression '==' RelationalExpression |
| // | EqualityExpression '!=' RelationalExpression |
| // | EqualityExpression '=?=' RelationalExpression |
| // | EqualityExpression '=!=' RelationalExpression |
| private boolean parseEqualityExpression(ExprTreeHolder tree) throws IOException { |
| TokenType tt; |
| int op = Operation.OpKind_NO_OP; |
| if (!parseRelationalExpression(tree)) { |
| return false; |
| } |
| tt = lexer.peekToken(); |
| while (tt == TokenType.LEX_EQUAL || tt == TokenType.LEX_NOT_EQUAL || tt == TokenType.LEX_META_EQUAL |
| || tt == TokenType.LEX_META_NOT_EQUAL) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseRelationalExpression(treeR); |
| switch (tt) { |
| case LEX_EQUAL: |
| op = Operation.OpKind_EQUAL_OP; |
| break; |
| case LEX_NOT_EQUAL: |
| op = Operation.OpKind_NOT_EQUAL_OP; |
| break; |
| case LEX_META_EQUAL: |
| op = Operation.OpKind_META_EQUAL_OP; |
| break; |
| case LEX_META_NOT_EQUAL: |
| op = Operation.OpKind_META_NOT_EQUAL_OP; |
| break; |
| default: |
| throw new HyracksDataException("ClassAd: Should not reach here"); |
| } |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(op, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| } |
| return true; |
| } |
| |
| // RelationalExpression .= ShiftExpression |
| // | RelationalExpression '<' ShiftExpression |
| // | RelationalExpression '>' ShiftExpression |
| // | RelationalExpression '<=' ShiftExpression |
| // | RelationalExpression '>=' ShiftExpression |
| private boolean parseRelationalExpression(ExprTreeHolder tree) throws IOException { |
| TokenType tt; |
| if (!parseShiftExpression(tree)) { |
| return false; |
| } |
| tt = lexer.peekToken(); |
| while (tt == TokenType.LEX_LESS_THAN || tt == TokenType.LEX_GREATER_THAN || tt == TokenType.LEX_LESS_OR_EQUAL |
| || tt == TokenType.LEX_GREATER_OR_EQUAL) { |
| int op = Operation.OpKind_NO_OP; |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseShiftExpression(treeR); |
| switch (tt) { |
| case LEX_LESS_THAN: |
| op = Operation.OpKind_LESS_THAN_OP; |
| break; |
| case LEX_LESS_OR_EQUAL: |
| op = Operation.OpKind_LESS_OR_EQUAL_OP; |
| break; |
| case LEX_GREATER_THAN: |
| op = Operation.OpKind_GREATER_THAN_OP; |
| break; |
| case LEX_GREATER_OR_EQUAL: |
| op = Operation.OpKind_GREATER_OR_EQUAL_OP; |
| break; |
| default: |
| throw new HyracksDataException("ClassAd: Should not reach here"); |
| } |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(op, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| } |
| return true; |
| } |
| |
| // ShiftExpression .= AdditiveExpression |
| // | ShiftExpression '<<' AdditiveExpression |
| // | ShiftExpression '>>' AdditiveExpression |
| // | ShiftExpression '>>>' AditiveExpression |
| private boolean parseShiftExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseAdditiveExpression(tree)) { |
| return false; |
| } |
| |
| TokenType tt = lexer.peekToken(); |
| while (tt == TokenType.LEX_LEFT_SHIFT || tt == TokenType.LEX_RIGHT_SHIFT || tt == TokenType.LEX_URIGHT_SHIFT) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| int op; |
| lexer.consumeToken(); |
| parseAdditiveExpression(treeR); |
| switch (tt) { |
| case LEX_LEFT_SHIFT: |
| op = Operation.OpKind_LEFT_SHIFT_OP; |
| break; |
| case LEX_RIGHT_SHIFT: |
| op = Operation.OpKind_RIGHT_SHIFT_OP; |
| break; |
| case LEX_URIGHT_SHIFT: |
| op = Operation.OpKind_URIGHT_SHIFT_OP; |
| break; |
| default: |
| op = Operation.OpKind_NO_OP; // Make gcc's -wuninitalized happy |
| throw new HyracksDataException("ClassAd: Should not reach here"); |
| } |
| |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(op, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| } |
| return true; |
| } |
| |
| // AdditiveExpression .= MultiplicativeExpression |
| // | AdditiveExpression '+' MultiplicativeExpression |
| // | AdditiveExpression '-' MultiplicativeExpression |
| private boolean parseAdditiveExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseMultiplicativeExpression(tree)) { |
| return false; |
| } |
| |
| TokenType tt = lexer.peekToken(); |
| while (tt == TokenType.LEX_PLUS || tt == TokenType.LEX_MINUS) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| lexer.consumeToken(); |
| parseMultiplicativeExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation( |
| (tt == TokenType.LEX_PLUS) ? Operation.OpKind_ADDITION_OP : Operation.OpKind_SUBTRACTION_OP, |
| treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| } |
| return true; |
| } |
| |
| // MultiplicativeExpression .= UnaryExpression |
| // | MultiplicativeExpression '*' UnaryExpression |
| // | MultiplicativeExpression '/' UnaryExpression |
| // | MultiplicativeExpression '%' UnaryExpression |
| private boolean parseMultiplicativeExpression(ExprTreeHolder tree) throws IOException { |
| if (!parseUnaryExpression(tree)) { |
| return false; |
| } |
| |
| TokenType tt = lexer.peekToken(); |
| while (tt == TokenType.LEX_MULTIPLY || tt == TokenType.LEX_DIVIDE || tt == TokenType.LEX_MODULUS) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| int op; |
| lexer.consumeToken(); |
| parseUnaryExpression(treeR); |
| switch (tt) { |
| case LEX_MULTIPLY: |
| op = Operation.OpKind_MULTIPLICATION_OP; |
| break; |
| case LEX_DIVIDE: |
| op = Operation.OpKind_DIVISION_OP; |
| break; |
| case LEX_MODULUS: |
| op = Operation.OpKind_MODULUS_OP; |
| break; |
| default: |
| op = Operation.OpKind_NO_OP; // Make gcc's -wuninitalized happy |
| throw new HyracksDataException("ClassAd: Should not reach here"); |
| } |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(op, treeL, treeR, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tt = lexer.peekToken(); |
| } |
| return true; |
| } |
| |
| // UnaryExpression .= PostfixExpression |
| // | UnaryOperator UnaryExpression |
| // ( where UnaryOperator is one of { -, +, ~, ! } ) |
| private boolean parseUnaryExpression(ExprTreeHolder tree) throws IOException { |
| TokenType tt = lexer.peekToken(); |
| if (tt == TokenType.LEX_MINUS || tt == TokenType.LEX_PLUS || tt == TokenType.LEX_BITWISE_NOT |
| || tt == TokenType.LEX_LOGICAL_NOT) { |
| lexer.consumeToken(); |
| ExprTreeHolder treeM = objectPool.mutableExprPool.get(); |
| int op = Operation.OpKind_NO_OP; |
| parseUnaryExpression(treeM); |
| switch (tt) { |
| case LEX_MINUS: |
| op = Operation.OpKind_UNARY_MINUS_OP; |
| break; |
| case LEX_PLUS: |
| op = Operation.OpKind_UNARY_PLUS_OP; |
| break; |
| case LEX_BITWISE_NOT: |
| op = Operation.OpKind_BITWISE_NOT_OP; |
| break; |
| case LEX_LOGICAL_NOT: |
| op = Operation.OpKind_LOGICAL_NOT_OP; |
| break; |
| default: |
| throw new HyracksDataException("ClassAd: Shouldn't Get here"); |
| } |
| if (treeM.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(op, treeM, null, null, newTree); |
| tree.setInnerTree(newTree); |
| } else { |
| tree.setInnerTree(null); |
| return (false); |
| } |
| return true; |
| } else { |
| return parsePostfixExpression(tree); |
| } |
| } |
| |
| // PostfixExpression .= PrimaryExpression |
| // | PostfixExpression '.' Identifier |
| // | PostfixExpression '[' Expression ']' |
| private boolean parsePostfixExpression(ExprTreeHolder tree) throws IOException { |
| TokenType tt; |
| if (!parsePrimaryExpression(tree)) { |
| return false; |
| } |
| while ((tt = lexer.peekToken()) == TokenType.LEX_OPEN_BOX || tt == TokenType.LEX_SELECTION) { |
| ExprTreeHolder treeL = tree; |
| ExprTreeHolder treeR = objectPool.mutableExprPool.get(); |
| TokenValue tv = objectPool.tokenValuePool.get(); |
| lexer.consumeToken(); |
| if (tt == TokenType.LEX_OPEN_BOX) { |
| // subscript operation |
| parseExpression(treeR); |
| if (treeL.getInnerTree() != null && treeR.getInnerTree() != null) { |
| Operation newTree = objectPool.operationPool.get(); |
| Operation.createOperation(Operation.OpKind_SUBSCRIPT_OP, treeL, treeR, null, newTree); |
| if (lexer.consumeToken() == TokenType.LEX_CLOSE_BOX) { |
| tree.setInnerTree(newTree); |
| continue; |
| } |
| } |
| tree.setInnerTree(null); |
| return false; |
| } else if (tt == TokenType.LEX_SELECTION) { |
| // field selection operation |
| if ((tt = lexer.consumeToken(tv)) != TokenType.LEX_IDENTIFIER) { |
| throw new HyracksDataException("second argument of selector must be an " + "identifier (got" |
| + String.valueOf(Lexer.strLexToken(tt)) + ")"); |
| } |
| AttributeReference newTree = objectPool.attrRefPool.get(); |
| AttributeReference.createAttributeReference(treeL, tv.getStrValue(), false, newTree); |
| tree.setInnerTree(newTree); |
| } |
| } |
| return true; |
| } |
| |
| // PrimaryExpression .= Identifier |
| // | FunctionCall |
| // | '.' Identifier |
| // | '(' Expression ')' |
| // | Literal |
| // FunctionCall .= Identifier ArgumentList |
| // ( Constant may be |
| // boolean,undefined,error,string,integer,real,classad,list ) |
| // ( ArgumentList non-terminal includes parentheses ) |
| private boolean parsePrimaryExpression(ExprTreeHolder tree) throws IOException { |
| ExprTreeHolder treeL; |
| TokenValue tv = objectPool.tokenValuePool.get(); |
| TokenType tt; |
| tree.setInnerTree(null); |
| switch ((tt = lexer.peekToken(tv))) { |
| // identifiers |
| case LEX_IDENTIFIER: |
| isExpr = true; |
| lexer.consumeToken(); |
| // check for funcion call |
| if ((tt = lexer.peekToken()) == TokenType.LEX_OPEN_PAREN) { |
| ExprList argList = objectPool.exprListPool.get(); |
| if (!parseArgumentList(argList)) { |
| tree.setInnerTree(null); |
| return false; |
| }; |
| // special case function-calls should be converted |
| // into a literal expression if the argument is a |
| // string literal |
| if (shouldEvaluateAtParseTime(tv.getStrValue().toString(), argList)) { |
| tree.setInnerTree(evaluateFunction(tv.getStrValue().toString(), argList)); |
| } else { |
| tree.setInnerTree( |
| FunctionCall.createFunctionCall(tv.getStrValue().toString(), argList, objectPool)); |
| } |
| } else { |
| // I don't think this is ever hit |
| tree.setInnerTree( |
| AttributeReference.createAttributeReference(null, tv.getStrValue(), false, objectPool)); |
| } |
| return (tree.getInnerTree() != null); |
| case LEX_SELECTION: |
| isExpr = true; |
| lexer.consumeToken(); |
| if ((tt = lexer.consumeToken(tv)) == TokenType.LEX_IDENTIFIER) { |
| // the boolean final arg signifies that reference is absolute |
| tree.setInnerTree( |
| AttributeReference.createAttributeReference(null, tv.getStrValue(), true, objectPool)); |
| return (tree.size() != 0); |
| } |
| // not an identifier following the '.' |
| throw new HyracksDataException( |
| "need identifier in selection expression (got" + Lexer.strLexToken(tt) + ")"); |
| // parenthesized expression |
| case LEX_OPEN_PAREN: { |
| isExpr = true; |
| lexer.consumeToken(); |
| treeL = objectPool.mutableExprPool.get(); |
| parseExpression(treeL); |
| if (treeL.getInnerTree() == null) { |
| tree.resetExprTree(null); |
| return false; |
| } |
| |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_CLOSE_PAREN) { |
| throw new HyracksDataException("exptected LEX_CLOSE_PAREN, but got " + Lexer.strLexToken(tt)); |
| // tree.resetExprTree(null); |
| // return false; |
| } |
| // assume make operation will return a new tree |
| tree.setInnerTree(Operation.createOperation(Operation.OpKind_PARENTHESES_OP, treeL, objectPool)); |
| return (tree.size() != 0); |
| } |
| // constants |
| case LEX_OPEN_BOX: { |
| isExpr = true; |
| ClassAd newAd = objectPool.classAdPool.get(); |
| if (!parseClassAd(newAd)) { |
| tree.resetExprTree(null); |
| return false; |
| } |
| tree.setInnerTree(newAd); |
| } |
| return true; |
| |
| case LEX_OPEN_BRACE: { |
| isExpr = true; |
| ExprList newList = objectPool.exprListPool.get(); |
| if (!parseExprList(newList)) { |
| tree.setInnerTree(null); |
| return false; |
| } |
| tree.setInnerTree(newList); |
| } |
| return true; |
| |
| case LEX_UNDEFINED_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setUndefinedValue(); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| case LEX_ERROR_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setErrorValue(); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| case LEX_BOOLEAN_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| MutableBoolean b = new MutableBoolean(); |
| tv.getBoolValue(b); |
| lexer.consumeToken(); |
| val.setBooleanValue(b); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| case LEX_INTEGER_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setIntegerValue(tv.getIntValue()); |
| tree.setInnerTree(Literal.createLiteral(val, tv.getFactor(), objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| case LEX_REAL_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setRealValue(tv.getRealValue()); |
| tree.setInnerTree(Literal.createLiteral(val, tv.getFactor(), objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| case LEX_STRING_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setStringValue(tv.getStrValue()); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| case LEX_ABSOLUTE_TIME_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setAbsoluteTimeValue(tv.getTimeValue()); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| case LEX_RELATIVE_TIME_VALUE: { |
| Value val = objectPool.valuePool.get(); |
| lexer.consumeToken(); |
| val.setRelativeTimeValue(tv.getTimeValue().getRelativeTime()); |
| tree.setInnerTree(Literal.createLiteral(val, objectPool)); |
| return (tree.getInnerTree() != null); |
| } |
| |
| default: |
| tree.setInnerTree(null); |
| return false; |
| } |
| } |
| |
| // ArgumentList .= '(' ListOfArguments ')' |
| // ListOfArguments .= (epsilon) |
| // | ListOfArguments ',' Expression |
| public boolean parseArgumentList(ExprList argList) throws IOException { |
| TokenType tt; |
| argList.clear(); |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_OPEN_PAREN) { |
| throw new HyracksDataException("expected LEX_OPEN_PAREN but got " + String.valueOf(Lexer.strLexToken(tt))); |
| // return false; |
| } |
| tt = lexer.peekToken(); |
| ExprTreeHolder tree = objectPool.mutableExprPool.get(); |
| while (tt != TokenType.LEX_CLOSE_PAREN) { |
| // parse the expression |
| tree.reset(); |
| parseExpression(tree); |
| if (tree.getInnerTree() == null) { |
| argList.clear(); |
| return false; |
| } |
| // insert the expression into the argument list |
| argList.add(tree.getInnerTree()); |
| // the next token must be a ',' or a ')' |
| // or it can be a ';' if using old ClassAd semantics |
| tt = lexer.peekToken(); |
| if (tt == TokenType.LEX_COMMA || (tt == TokenType.LEX_SEMICOLON && false)) { |
| lexer.consumeToken(); |
| } else if (tt != TokenType.LEX_CLOSE_PAREN) { |
| argList.clear(); |
| throw new HyracksDataException( |
| "expected LEX_COMMA or LEX_CLOSE_PAREN but got " + String.valueOf(Lexer.strLexToken(tt))); |
| // return false; |
| } |
| } |
| lexer.consumeToken(); |
| return true; |
| } |
| |
| // ClassAd .= '[' AttributeList ']' |
| // AttributeList .= (epsilon) |
| // | Attribute ';' AttributeList |
| // Attribute .= Identifier '=' Expression |
| public boolean parseClassAd(ClassAd ad) throws IOException { |
| return parseClassAd(ad, false); |
| } |
| |
| public boolean parseClassAdOld(ClassAd ad, boolean full) throws IOException { |
| return false; |
| } |
| |
| public boolean parseClassAd(ClassAd ad, boolean full) throws IOException { |
| TokenType tt; |
| ad.clear(); |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_OPEN_BOX) { |
| return false; |
| } |
| tt = lexer.peekToken(); |
| TokenValue tv = objectPool.tokenValuePool.get(); |
| ExprTreeHolder tree = objectPool.mutableExprPool.get(); |
| while (tt != TokenType.LEX_CLOSE_BOX) { |
| // Get the name of the expression |
| tv.reset(); |
| tree.reset(); |
| tt = lexer.consumeToken(tv); |
| if (tt == TokenType.LEX_SEMICOLON) { |
| // We allow empty expressions, so if someone give a double |
| // semicolon, it doesn't |
| // hurt. Technically it's not right, but we shouldn't make users |
| // pay the price for |
| // a meaningless mistake. See condor-support #1881 for a user |
| // that was bitten by this. |
| continue; |
| } |
| if (tt != TokenType.LEX_IDENTIFIER) { |
| throw new HyracksDataException( |
| "while parsing classad: expected LEX_IDENTIFIER " + " but got " + Lexer.strLexToken(tt)); |
| } |
| |
| // consume the intermediate '=' |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_BOUND_TO) { |
| throw new HyracksDataException( |
| "while parsing classad: expected LEX_BOUND_TO " + " but got " + Lexer.strLexToken(tt)); |
| } |
| |
| isExpr = false; |
| parseExpression(tree); |
| if (tree.getInnerTree() == null) { |
| throw new HyracksDataException("parse expression returned empty tree"); |
| } |
| |
| // insert the attribute into the classad |
| if (!ad.insert(tv.getStrValue().toString(), tree)) { |
| throw new HyracksDataException("Couldn't insert value to classad"); |
| } |
| |
| // the next token must be a ';' or a ']' |
| tt = lexer.peekToken(); |
| if (tt != TokenType.LEX_SEMICOLON && tt != TokenType.LEX_CLOSE_BOX) { |
| throw new HyracksDataException("while parsing classad: expected LEX_SEMICOLON or " |
| + "LEX_CLOSE_BOX but got " + Lexer.strLexToken(tt)); |
| } |
| |
| // Slurp up any extra semicolons. This does not duplicate the work |
| // at the top of the loop |
| // because it accounts for the case where the last expression has |
| // extra semicolons, |
| // while the first case accounts for optional beginning semicolons. |
| while (tt == TokenType.LEX_SEMICOLON) { |
| lexer.consumeToken(); |
| tt = lexer.peekToken(); |
| } |
| } |
| lexer.consumeToken(); |
| // if a full parse was requested, ensure that input is exhausted |
| if (full && (lexer.consumeToken() != TokenType.LEX_END_OF_INPUT)) { |
| throw new HyracksDataException("while parsing classad: expected LEX_END_OF_INPUT for " |
| + "full parse but got " + Lexer.strLexToken(tt)); |
| } |
| return true; |
| } |
| |
| // ExprList .= '{' ListOfExpressions '}' |
| // ListOfExpressions .= (epsilon) |
| // | Expression ',' ListOfExpressions |
| public boolean parseExprList(ExprList list) throws IOException { |
| return parseExprList(list, false); |
| } |
| |
| public boolean parseExprList(ExprList list, boolean full) throws IOException { |
| TokenType tt; |
| ExprTreeHolder tree = objectPool.mutableExprPool.get(); |
| ExprList loe = objectPool.exprListPool.get(); |
| |
| if ((tt = lexer.consumeToken()) != TokenType.LEX_OPEN_BRACE) { |
| throw new HyracksDataException( |
| "while parsing expression list: expected LEX_OPEN_BRACE" + " but got " + Lexer.strLexToken(tt)); |
| // return false; |
| } |
| tt = lexer.peekToken(); |
| while (tt != TokenType.LEX_CLOSE_BRACE) { |
| // parse the expression |
| parseExpression(tree); |
| if (tree.getInnerTree() == null) { |
| throw new HyracksDataException("while parsing expression list: expected " |
| + "LEX_CLOSE_BRACE or LEX_COMMA but got " + Lexer.strLexToken(tt)); |
| } |
| |
| // insert the expression into the list |
| loe.add(tree); |
| |
| // the next token must be a ',' or a '}' |
| tt = lexer.peekToken(); |
| if (tt == TokenType.LEX_COMMA) { |
| lexer.consumeToken(); |
| } else if (tt != TokenType.LEX_CLOSE_BRACE) { |
| throw new HyracksDataException("while parsing expression list: expected " |
| + "LEX_CLOSE_BRACE or LEX_COMMA but got " + Lexer.strLexToken(tt)); |
| } |
| } |
| |
| lexer.consumeToken(); |
| list.setValue(ExprList.createExprList(loe, objectPool)); |
| |
| // if a full parse was requested, ensure that input is exhausted |
| if (full && (lexer.consumeToken() != TokenType.LEX_END_OF_INPUT)) { |
| list.clear(); |
| throw new HyracksDataException("while parsing expression list: expected " |
| + "LEX_END_OF_INPUT for full parse but got " + Lexer.strLexToken(tt)); |
| } |
| return true; |
| } |
| |
| public boolean shouldEvaluateAtParseTime(String functionName, ExprList argList) throws HyracksDataException { |
| boolean should_eval = false; |
| if (functionName.equalsIgnoreCase("absTime") || functionName.equalsIgnoreCase("relTime")) { |
| if (argList.size() == 1 && argList.get(0).getKind() == NodeKind.LITERAL_NODE) { |
| Value val = objectPool.valuePool.get(); |
| AMutableNumberFactor factor = objectPool.numFactorPool.get(); |
| ((Literal) argList.get(0)).getComponents(val, factor); |
| if (val.isStringValue()) { |
| should_eval = true; |
| } |
| } |
| } |
| return should_eval; |
| } |
| |
| public ExprTree evaluateFunction(String functionName, ExprList argList) throws HyracksDataException { |
| Value val = objectPool.valuePool.get(); |
| AMutableNumberFactor factor = objectPool.numFactorPool.get(); |
| ExprTreeHolder tree = objectPool.mutableExprPool.get(); |
| ((Literal) argList.get(0)).getComponents(val, factor); |
| |
| AMutableCharArrayString string_value = objectPool.strPool.get(); |
| if (val.isStringValue(string_value)) { |
| if (functionName.equalsIgnoreCase("absTime")) { |
| tree.setInnerTree(Literal.createAbsTime(string_value, objectPool)); |
| } else if (functionName.equalsIgnoreCase("relTime")) { |
| tree.setInnerTree(Literal.createRelTime(string_value, objectPool)); |
| } else { |
| tree.setInnerTree(FunctionCall.createFunctionCall(functionName, argList, objectPool)); |
| } |
| } else { |
| tree.setInnerTree(FunctionCall.createFunctionCall(functionName, argList, objectPool)); |
| } |
| return tree; |
| } |
| |
| public TokenType peekToken() throws IOException { |
| if (lexer.wasInitialized()) { |
| return lexer.peekToken(); |
| } else { |
| return TokenType.LEX_TOKEN_ERROR; |
| } |
| } |
| |
| public TokenType consumeToken() throws IOException { |
| if (lexer.wasInitialized()) { |
| return lexer.consumeToken(); |
| } else { |
| return TokenType.LEX_TOKEN_ERROR; |
| } |
| } |
| |
| public boolean parseExpression(String buf, ExprTreeHolder tree) throws IOException { |
| return parseExpression(buf, tree, false); |
| } |
| |
| public boolean parseExpression(String buf, ExprTreeHolder tree, boolean full) throws IOException { |
| boolean success; |
| StringLexerSource lexer_source = new StringLexerSource(buf); |
| |
| success = false; |
| if (lexer.initialize(lexer_source)) { |
| success = parseExpression(tree, full); |
| } |
| return success; |
| } |
| |
| public ClassAd parseClassAd(String input_basic) throws IOException { |
| return parseClassAd(input_basic, false); |
| } |
| |
| public LexerSource getLexerSource() { |
| return currentSource; |
| } |
| |
| public void setLexerSource(LexerSource lexerSource) { |
| this.currentSource = lexerSource; |
| } |
| |
| @Override |
| public void parse(IRawRecord<? extends char[]> record, DataOutput out) throws IOException { |
| try { |
| resetPools(); |
| if (oldFormat) { |
| int maxOffset = record.size(); |
| rootAd.clear(); |
| char[] buffer = record.get(); |
| aInt32.setValue(0); |
| String line = readLine(buffer, aInt32, maxOffset); |
| while (line != null) { |
| if (line.trim().length() == 0) { |
| if (rootAd.size() == 0) { |
| line = readLine(buffer, aInt32, maxOffset); |
| continue; |
| } |
| break; |
| } else if (!rootAd.insert(line)) { |
| throw new HyracksDataException("Couldn't parse expression in line: " + line); |
| } |
| line = readLine(buffer, aInt32, maxOffset); |
| } |
| } else { |
| currentSource.setNewSource(record.get()); |
| rootAd.reset(); |
| asterixParseClassAd(rootAd); |
| } |
| parseRecord(recordType, rootAd, out); |
| } catch (Exception e) { |
| throw new HyracksDataException(e); |
| } |
| } |
| } |