blob: e98df385adb030cd646694d43cacc039cc8d531e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.parser.pydml;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.DefaultErrorStrategy;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.parser.AParserWrapper;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.FunctionStatementBlock;
import org.apache.sysml.parser.ImportStatement;
import org.apache.sysml.parser.LanguageException;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.parser.Statement;
import org.apache.sysml.parser.common.CustomErrorListener;
import org.apache.sysml.parser.dml.DMLParserWrapper;
import org.apache.sysml.parser.pydml.PydmlParser.FunctionStatementContext;
import org.apache.sysml.parser.pydml.PydmlParser.ProgramrootContext;
import org.apache.sysml.parser.pydml.PydmlParser.StatementContext;
/**
* Logic of this wrapper is similar to DMLParserWrapper.
*
* Note: ExpressionInfo and StatementInfo are simply wrapper objects and are reused in both DML and PyDML parsers.
*
*/
public class PyDMLParserWrapper extends AParserWrapper
{
private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
/**
* Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command)
* @param fileName either full path or null --> only used for better error handling
* @param dmlScript required
* @param argVals
* @return
* @throws ParseException
*/
@Override
public DMLProgram parse(String fileName, String dmlScript, Map<String,String> argVals) throws ParseException {
DMLProgram prog = doParse(fileName, dmlScript, null, argVals);
return prog;
}
/**
* This function is supposed to be called directly only from PydmlSyntacticValidator when it encounters 'import'
* @param fileName script file name
* @param dmlScript script file contents
* @param sourceNamespace namespace from source statement
* @param argVals script arguments
* @return null if at least one error
* @throws ParseException
*/
public DMLProgram doParse(String fileName, String dmlScript, String sourceNamespace, Map<String,String> argVals) throws ParseException {
DMLProgram dmlPgm = null;
ANTLRInputStream in;
try {
if(dmlScript == null) {
dmlScript = DMLParserWrapper.readDMLScript(fileName, LOG);
}
InputStream stream = new ByteArrayInputStream(dmlScript.getBytes());
in = new org.antlr.v4.runtime.ANTLRInputStream(stream);
}
catch (FileNotFoundException e) {
throw new ParseException("Cannot find file: " + fileName, e);
}
catch (IOException e) {
throw new ParseException("Cannot open file: " + fileName, e);
}
catch (LanguageException e) {
throw new ParseException(e.getMessage(), e);
}
ProgramrootContext ast = null;
CustomErrorListener errorListener = new CustomErrorListener();
try {
PydmlLexer lexer = new PydmlLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer);
PydmlParser antlr4Parser = new PydmlParser(tokens);
boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value.
if(tryOptimizedParsing) {
// Try faster and simpler SLL
antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
antlr4Parser.removeErrorListeners();
antlr4Parser.setErrorHandler(new BailErrorStrategy());
try{
ast = antlr4Parser.programroot();
// If successful, no need to try out full LL(*) ... SLL was enough
}
catch(ParseCancellationException ex) {
// Error occurred, so now try full LL(*) for better error messages
tokens.reset();
antlr4Parser.reset();
if(fileName != null) {
errorListener.setCurrentFileName(fileName);
}
else {
errorListener.setCurrentFileName("MAIN_SCRIPT");
}
// Set our custom error listener
antlr4Parser.addErrorListener(errorListener);
antlr4Parser.setErrorHandler(new DefaultErrorStrategy());
antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL);
ast = antlr4Parser.programroot();
}
}
else {
// Set our custom error listener
antlr4Parser.removeErrorListeners();
antlr4Parser.addErrorListener(errorListener);
errorListener.setCurrentFileName(fileName);
// Now do the parsing
ast = antlr4Parser.programroot();
}
}
catch(Exception e) {
throw new ParseException("ERROR: Cannot parse the program:" + fileName, e);
}
// Now convert the parse tree into DMLProgram
// Do syntactic validation while converting
ParseTree tree = ast;
// And also do syntactic validation
ParseTreeWalker walker = new ParseTreeWalker();
// Get list of function definitions which take precedence over built-in functions if same name
PydmlPreprocessor prep = new PydmlPreprocessor(errorListener);
walker.walk(prep, tree);
// Syntactic validation
PydmlSyntacticValidator validator = new PydmlSyntacticValidator(errorListener, argVals, sourceNamespace, prep.getFunctionDefs());
walker.walk(validator, tree);
errorListener.unsetCurrentFileName();
this.parseIssues = errorListener.getParseIssues();
this.atLeastOneWarning = errorListener.isAtLeastOneWarning();
this.atLeastOneError = errorListener.isAtLeastOneError();
if (atLeastOneError) {
throw new ParseException(parseIssues, dmlScript);
}
if (atLeastOneWarning) {
LOG.warn(CustomErrorListener.generateParseIssuesMessage(dmlScript, parseIssues));
}
dmlPgm = createDMLProgram(ast, sourceNamespace);
return dmlPgm;
}
private DMLProgram createDMLProgram(ProgramrootContext ast, String sourceNamespace) {
DMLProgram dmlPgm = new DMLProgram();
String namespace = (sourceNamespace != null && sourceNamespace.length() > 0) ? sourceNamespace : DMLProgram.DEFAULT_NAMESPACE;
dmlPgm.getNamespaces().put(namespace, dmlPgm);
// First add all the functions
for(FunctionStatementContext fn : ast.functionBlocks) {
FunctionStatementBlock functionStmtBlk = new FunctionStatementBlock();
functionStmtBlk.addStatement(fn.info.stmt);
try {
dmlPgm.addFunctionStatementBlock(namespace, fn.info.functionName, functionStmtBlk);
} catch (LanguageException e) {
LOG.error("line: " + fn.start.getLine() + ":" + fn.start.getCharPositionInLine() + " cannot process the function " + fn.info.functionName);
return null;
}
}
// Then add all the statements
for(StatementContext stmtCtx : ast.blocks) {
Statement current = stmtCtx.info.stmt;
if(current == null) {
LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the statement");
return null;
}
// Ignore Newline logic
if(current.isEmptyNewLineStatement()) {
continue;
}
if(current instanceof ImportStatement) {
// Handle import statements separately
if(stmtCtx.info.namespaces != null) {
// Add the DMLProgram entries into current program
for(Map.Entry<String, DMLProgram> entry : stmtCtx.info.namespaces.entrySet()) {
// TODO handle namespace key already exists for different program value instead of overwriting
DMLProgram prog = entry.getValue();
if (prog != null && prog.getNamespaces().size() > 0) {
dmlPgm.getNamespaces().put(entry.getKey(), prog);
}
// Add dependent programs (handle imported script that also imports scripts)
for(Map.Entry<String, DMLProgram> dependency : entry.getValue().getNamespaces().entrySet()) {
String depNamespace = dependency.getKey();
DMLProgram depProgram = dependency.getValue();
if (dmlPgm.getNamespaces().get(depNamespace) == null) {
dmlPgm.getNamespaces().put(depNamespace, depProgram);
}
}
}
}
else {
LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the import statement");
return null;
}
}
// Now wrap statement into individual statement block
// merge statement will take care of merging these blocks
dmlPgm.addStatementBlock(getStatementBlock(current));
}
dmlPgm.mergeStatementBlocks();
return dmlPgm;
}
}