| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| grammar Dml; |
| |
| @header |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| } |
| |
| // DML Program is a list of expression |
| // For now, we only allow global function definitions (not nested or inside a while block) |
| programroot: (blocks+=statement | functionBlocks+=functionStatement)* EOF; |
| |
| statement returns [ org.apache.sysds.parser.dml.StatementInfo info ] |
| @init { |
| // This actions occurs regardless of how many alternatives in this rule |
| $info = new org.apache.sysds.parser.dml.StatementInfo(); |
| } : |
| // ------------------------------------------ |
| // ImportStatement |
| 'source' '(' filePath = STRING ')' 'as' namespace=ID ';'* # ImportStatement |
| | 'setwd' '(' pathValue = STRING ')' ';'* # PathStatement |
| // ------------------------------------------ |
| // Treat function call as AssignmentStatement or MultiAssignmentStatement |
| // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A |
| // Convert FunctionCallIdentifier(paramExprs, ..) -> source |
| | // TODO: Throw an informative error if user doesnot provide the optional assignment |
| ( targetList=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # FunctionCallAssignmentStatement |
| | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # FunctionCallMultiAssignmentStatement |
| // {notifyErrorListeners("Too many parentheses");} |
| // ------------------------------------------ |
| // AssignmentStatement |
| | targetList=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ',' source=expression ')' ';'* # IfdefAssignmentStatement |
| | targetList=dataIdentifier op=('<-'|'=') source=expression ';'* # AssignmentStatement |
| | targetList=dataIdentifier op='+=' source=expression ';'* # AccumulatorAssignmentStatement |
| // ------------------------------------------ |
| // We don't support block statement |
| // | '{' body+=expression ';'* ( body+=expression ';'* )* '}' # BlockStatement |
| // ------------------------------------------ |
| // IfStatement |
| | 'if' '(' predicate=expression ')' (ifBody+=statement ';'* | '{' (ifBody+=statement ';'*)* '}') ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)* '}'))? # IfStatement |
| // ------------------------------------------ |
| // ForStatement & ParForStatement |
| | 'for' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'* )* '}') # ForStatement |
| // Convert strictParameterizedExpression to HashMap<String, String> for parForParams |
| | 'parfor' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}') # ParForStatement |
| | 'while' '(' predicate=expression ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}') # WhileStatement |
| // ------------------------------------------ |
| ; |
| |
| iterablePredicate returns [ org.apache.sysds.parser.dml.ExpressionInfo info ] |
| @init { |
| // This actions occurs regardless of how many alternatives in this rule |
| $info = new org.apache.sysds.parser.dml.ExpressionInfo(); |
| } : |
| from=expression ':' to=expression #IterablePredicateColonExpression |
| | ID '(' from=expression ',' to=expression (',' increment=expression)? ')' #IterablePredicateSeqExpression |
| ; |
| |
| functionStatement returns [ org.apache.sysds.parser.dml.StatementInfo info ] |
| @init { |
| // This actions occurs regardless of how many alternatives in this rule |
| $info = new org.apache.sysds.parser.dml.StatementInfo(); |
| } : |
| // ------------------------------------------ |
| // FunctionStatement & ExternalFunctionStatement |
| // small change: only allow typed arguments here ... instead of data identifier |
| name=ID ('<-'|'=') 'function' '(' ( inputParams+=typedArgAssign (',' inputParams+=typedArgAssign)* )? ')' ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? '{' (body+=statement ';'*)* '}' ';'* # InternalFunctionDefExpression |
| | name=ID ('<-'|'=') 'externalFunction' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')' ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? 'implemented' 'in' '(' ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? ')' ';'* # ExternalFunctionDefExpression |
| // ------------------------------------------ |
| ; |
| |
| |
| // Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression |
| dataIdentifier returns [ org.apache.sysds.parser.dml.ExpressionInfo dataInfo ] |
| @init { |
| // This actions occurs regardless of how many alternatives in this rule |
| $dataInfo = new org.apache.sysds.parser.dml.ExpressionInfo(); |
| // $dataInfo.expr = new org.apache.sysds.parser.DataIdentifier(); |
| } : |
| // ------------------------------------------ |
| // IndexedIdentifier |
| name=ID '[' (rowLower=expression (':' rowUpper=expression)?)? (',' (colLower=expression (':' colUpper=expression)?)?)? ']' # IndexedExpression |
| // ------------------------------------------ |
| | ID # SimpleDataIdentifierExpression |
| | COMMANDLINE_NAMED_ID # CommandlineParamExpression |
| | COMMANDLINE_POSITION_ID # CommandlinePositionExpression |
| ; |
| expression returns [ org.apache.sysds.parser.dml.ExpressionInfo info ] |
| @init { |
| // This actions occurs regardless of how many alternatives in this rule |
| $info = new org.apache.sysds.parser.dml.ExpressionInfo(); |
| // $info.expr = new org.apache.sysds.parser.BinaryExpression(org.apache.sysds.parser.Expression.BinaryOp.INVALID); |
| } : |
| // ------------------------------------------ |
| // BinaryExpression |
| // power |
| <assoc=right> left=expression op='^' right=expression # PowerExpression |
| // unary plus and minus |
| | op=('-'|'+') left=expression # UnaryExpression |
| // sequence - since we are only using this into for |
| //| left=expression op=':' right=expression # SequenceExpression |
| // matrix multiply |
| | left=expression op='%*%' right=expression # MatrixMulExpression |
| // modulus and integer division |
| | left=expression op=('%/%' | '%%' ) right=expression # ModIntDivExpression |
| // arithmetic multiply and divide |
| | left=expression op=('*'|'/') right=expression # MultDivExpression |
| // arithmetic addition and subtraction |
| | left=expression op=('+'|'-') right=expression # AddSubExpression |
| // ------------------------------------------ |
| // RelationalExpression |
| | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression |
| // ------------------------------------------ |
| // BooleanExpression |
| // boolean not |
| | op='!' left=expression # BooleanNotExpression |
| // boolean and |
| | left=expression op=('&'|'&&') right=expression # BooleanAndExpression |
| // boolean or |
| | left=expression op=('|'|'||') right=expression # BooleanOrExpression |
| |
| // --------------------------------- |
| // only applicable for builtin function expressions |
| | name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # BuiltinFunctionExpression |
| |
| // 4. Atomic |
| | '(' left=expression ')' # AtomicExpression |
| |
| // Should you allow indexed expression here ? |
| | '[' targetList+=expression (',' targetList+=expression)* ']' # MultiIdExpression |
| |
| // | BOOLEAN # ConstBooleanIdExpression |
| | 'TRUE' # ConstTrueExpression |
| | 'FALSE' # ConstFalseExpression |
| | INT # ConstIntIdExpression |
| | DOUBLE # ConstDoubleIdExpression |
| | STRING # ConstStringIdExpression |
| | dataIdentifier # DataIdExpression |
| // Special |
| // | 'NULL' | 'NA' | 'Inf' | 'NaN' |
| ; |
| |
| typedArgNoAssign : paramType=ml_type paramName=ID; |
| typedArgAssign : paramType=ml_type (paramName=ID | (paramName=ID '=')? paramVal=expression); |
| |
| parameterizedExpression : (paramName=ID '=')? paramVal=expression; |
| strictParameterizedExpression : paramName=ID '=' paramVal=expression ; |
| strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ; |
| ID : (ALPHABET (ALPHABET|DIGIT|'_')* '::')? ALPHABET (ALPHABET|DIGIT|'_')* |
| // Special ID cases: |
| // | 'matrix' // --> This is a special case which causes lot of headache |
| | 'as.scalar' | 'as.matrix' | 'as.frame' | 'as.double' | 'as.integer' | 'as.logical' | 'index.return' | 'empty.return' | 'lower.tail' |
| | 'lower.tri' | 'upper.tri' |
| | 'is.na' | 'is.nan' | 'is.infinite' |
| ; |
| // Unfortunately, we have datatype name clashing with builtin function name: matrix :( |
| // Therefore, ugly work around for checking datatype |
| ml_type : valueType | dataType '[' valueType ']'; |
| // Note to reduce number of keywords, these are case-sensitive, |
| // To allow case-insenstive, 'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T') |
| valueType: 'int' | 'integer' | 'string' | 'boolean' | 'double' | 'unknown' |
| | 'Int' | 'Integer' | 'String' | 'Boolean' | 'Double' | 'Unknown'; |
| dataType: |
| // 'scalar' # ScalarDataTypeDummyCheck |
| // | |
| ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } } |
| //| 'matrix' //---> See ID, this causes lot of headache |
| ; |
| INT : DIGIT+ [Ll]?; |
| // BOOLEAN : 'TRUE' | 'FALSE'; |
| DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]? |
| | DIGIT+ EXP? [Ll]? |
| | '.' DIGIT+ EXP? [Ll]? |
| ; |
| DIGIT: '0'..'9'; |
| ALPHABET : [a-zA-Z] ; |
| fragment EXP : ('E' | 'e') ('+' | '-')? INT ; |
| COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*; |
| COMMANDLINE_POSITION_ID: '$' DIGIT+; |
| |
| // supports single and double quoted string with escape characters |
| STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\''; |
| fragment ESC : '\\' [btnfr"'\\] ; |
| // Comments, whitespaces and new line |
| LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ; |
| MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ; |
| WHITESPACE : (' ' | '\t' | '\r' | '\n')+ -> skip ; |