blob: 1f84af48c5346690b461a1f1a65f576f43bf6cbd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.parser.PigParserNode.InvocationPoint;
import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
import org.apache.pig.tools.parameters.PreprocessorContext;
class PigMacro {
private static final Log LOG = LogFactory.getLog(PigMacro.class);
private String fileName;
private String name;
private String body;
private List<String> params;
private List<String> rets;
private Map<String, PigMacro> seen;
private Set<String> macroStack;
private PigContext pigContext;
private long idx = 0;
// The start line number of this macro in the script
private int startLine = 0;
PigMacro(String name, String file, List<String> params,
List<String> returns, String body, Map<String, PigMacro> seen) {
this.name = name;
this.params = (params == null) ? new ArrayList<String>() : params;
this.rets = (returns == null) ? new ArrayList<String>() : returns;
this.fileName = file;
this.body = body;
this.seen = seen;
this.macroStack = new HashSet<String>();
LOG.debug("Macro '" + name + "' is defined");
}
String getName() { return name; }
void setStack(Set<String> stack) {
macroStack = stack;
}
Set<String> getStack() { return macroStack; }
void setStartLine(int start) {
this.startLine = start;
}
int getStartLine() {
return startLine;
}
void setPigContext(PigContext pigContext) {
this.pigContext = pigContext;
}
private CommonTree inline(String[] inputs, String[] outputs, CommonTree t,
String file) throws ParserException {
String in = substituteParams(inputs, outputs, t.getLine(), file);
Set<String> masks = new HashSet<String>();
if (inputs != null) {
for (String s : inputs) {
masks.add(s);
}
}
for (String s : outputs) {
masks.add(s);
}
return maskAlias(in, masks, t, file);
}
private String substituteParams(String[] inputs, String[] outputs,
int line, String file) throws ParserException {
if ((inputs == null && !params.isEmpty())
|| (inputs != null && inputs.length != params.size())) {
String msg = getErrorMessage(file, line,
"Failed to expand macro '" + name + "'",
"Expected number of parameters: " + params.size()
+ " actual number of inputs: "
+ ((inputs == null) ? 0 : inputs.length));
throw new ParserException(msg);
}
boolean isVoidReturn = false;
if (rets.isEmpty()) {
if (outputs != null && outputs.length > 0) {
String msg = getErrorMessage(file, line, "Cannot expand macro '"
+ name + "'",
"Expected number of return aliases: 0"
+ " actual number of return values: "
+ outputs.length);
throw new ParserException(msg);
}
isVoidReturn = true;
}
if (!isVoidReturn && ((outputs == null && !rets.isEmpty())
|| (outputs != null && outputs.length != rets.size()))) {
String msg = getErrorMessage(file, line, "Failed to expand macro '"
+ name + "'",
"Expected number of return aliases: " + rets.size()
+ " actual number of return values: "
+ ((outputs == null) ? 0 : outputs.length));
throw new ParserException(msg);
}
String[] args = new String[params.size()];
for (int i=0; i<params.size(); i++) {
if (inputs[i].startsWith("$"))
inputs[i]="\\\\"+inputs[i];
args[i] = params.get(i) + "=" + inputs[i];
}
if (!isVoidReturn) {
String[] args1 = new String[params.size() + rets.size()];
System.arraycopy(args, 0, args1, 0, params.size());
args = args1;
for (int i=0; i<rets.size(); i++) {
args[params.size() + i] = rets.get(i) + "=" + outputs[i];
}
}
StringWriter writer = new StringWriter();
BufferedReader in = new BufferedReader(new StringReader(body));
try {
PreprocessorContext pc = new PreprocessorContext(50);
pc.loadParamVal(Arrays.asList(args), null);
Map<String, String> paramVal = pc.getParamVal();
for (Map.Entry<String, String> e : pigContext.getParamVal().entrySet()) {
// overwrite=false since macro parameters should have precedence
// over commandline parameters (if keys overlap)
pc.processOrdLine(e.getKey(), e.getValue(), false);
}
ParameterSubstitutionPreprocessor psp = new ParameterSubstitutionPreprocessor(pc);
psp.genSubstitutedFile(in, writer);
} catch (Exception e) {
// catch both ParserException and RuntimeException
String msg = getErrorMessage(file, line,
"Macro inline failed for macro '" + name + "'",
e.getMessage() + "\n Macro content: " + body);
throw new ParserException(msg);
}
LOG.debug("--- after substition:\n" + writer.toString());
return writer.toString();
}
private CommonTree maskAlias(String in, Set<String> masks, CommonTree tree,
String file) throws ParserException {
// this is the MACRO_INLINE node. the real line number is in the
// macro name node
int line = tree.getChild(0).getLine();
CharStream input = null;
try {
// parse macro body into ast
input = new QueryParserStringStream(in, file);
} catch (IOException e) {
String msg = getErrorMessage(file, line, "Failed to inline macro '"
+ name + "'", e.getMessage() + "\nmacro content: " + in);
throw new ParserException(msg);
}
QueryLexer lex = new QueryLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lex);
QueryParser.query_return result = null;
QueryParser parser = QueryParserUtils.createParser(tokens, startLine-1);
try {
result = parser.query();
} catch (RecognitionException e) {
e.line += startLine -1;
String msg = (fileName == null) ? parser.getErrorHeader(e)
: QueryParserUtils.generateErrorHeader(e, fileName);
msg += " " + parser.getErrorMessage(e, parser.getTokenNames());
String msg2 = getErrorMessage(file, line, "Failed to parse macro '"
+ name + "'", msg + "\nmacro content: " + in);
throw new ParserException(msg2);
}
CommonTree ast = (CommonTree)result.getTree();
LOG.debug("AST for macro '" + name + "':\n" + ast.toStringTree());
List<CommonTree> macroDefNodes = new ArrayList<CommonTree>();
traverseMacro(ast, macroDefNodes, "MACRO_DEF");
if (!macroDefNodes.isEmpty()) {
String fname = ((PigParserNode)ast).getFileName();
String msg = getErrorMessage(fname, ast.getLine(),
"Invalid macro definition", "macro '" + name
+ "' contains macro definition.\nmacro content: "
+ body);
throw new ParserException(msg);
}
// add macro invocation points to the expanded macro tree
PigParserNode pnode = (PigParserNode)tree;
List<InvocationPoint> invStack = pnode.getInvocationStack();
List<InvocationPoint> newInvStack = (invStack == null) ? new ArrayList<InvocationPoint>()
: new ArrayList<InvocationPoint>(invStack);
InvocationPoint pt = new InvocationPoint(line, file, name);
newInvStack.add(pt);
setInvocationStack(ast, newInvStack);
// recursively expand the inline macros
List<CommonTree> inlineNodes = new ArrayList<CommonTree>();
traverseMacro(ast, inlineNodes, "MACRO_INLINE");
for (CommonTree t : inlineNodes) {
CommonTree newTree = macroInline(t,
new ArrayList<PigMacro>(seen.values()), macroStack, pigContext);
QueryParserUtils.replaceNodeWithNodeList(t, newTree, null);
}
// mask the aliases in the inlined macro
CommonTreeNodeStream nodes = new CommonTreeNodeStream(ast);
AliasMasker walker = new AliasMasker(nodes);
walker.setParams(masks, name, idx++);
AliasMasker.query_return result2 = null;
CommonTree commonTree = null;
try {
result2 = walker.query();
} catch (RecognitionException e) {
e.line += startLine - 1;
String msg = walker.getErrorHeader(e) + " "
+ walker.getErrorMessage(e, walker.getTokenNames());
String msg2 = getErrorMessage(file, line, "Failed to mask macro '"
+ name + "'", msg + "\nmacro content: " + in);
throw new ParserException(msg2);
}
commonTree = result2.tree;
LOG.debug("AST for masked macro '" + name + "':\n"
+ commonTree.toStringTree());
return commonTree;
}
private static void setInvocationStack(Tree ast, List<InvocationPoint> stack) {
PigParserNode node = (PigParserNode)ast;
node.setInvocationStack(stack);
int n = node.getChildCount();
for (int i = 0; i < n; i++) {
setInvocationStack(node.getChild(i), stack);
}
}
/*
* Validates that return alias exists in the macro body.
*/
void validate() throws IOException {
if (rets.isEmpty()) {
return;
}
HashSet<String> testSet = new HashSet<String>();
StreamTokenizer st = new StreamTokenizer(new StringReader(body));
st.wordChars('.', '.');
st.wordChars('0', '9');
st.wordChars('_', '_');
st.wordChars('$', '$');
st.lowerCaseMode(false);
st.ordinaryChar('/');
st.slashStarComments(true);
while (st.nextToken() != StreamTokenizer.TT_EOF) {
if (matchWord(st, "define", false) && matchDollarAlias(st, true)) {
testSet.add(st.sval.substring(1));
} else if (matchDollarAlias(st, false)) {
String prevWord = st.sval;
if (matchWord(st, "if", true) || matchWord(st, "otherwise", true)) {
testSet.add(prevWord.substring(1));
} else if (matchChar(st, '=', true) && !matchChar(st, '=', true)) {
testSet.add(prevWord.substring(1));
} else if (matchChar(st, ',', true)) {
// possible mult-alias inlining of a macro
ArrayList<String> mlist = new ArrayList<String>();
mlist.add(prevWord);
if (isMultiValueReturn(st, mlist, true)) {
for (String s : mlist) {
testSet.add(s.substring(1));
}
}
}
} else if (matchChar(st, '-', false) && matchChar(st, '-', true)) {
skipSingleLineComment(st);
}
}
for (String s : rets) {
if (!testSet.contains(s)) {
throw new IOException("Macro '" + name
+ "' missing return alias: " + s);
}
}
}
// check for multi-value return pattern: alias, alias, ..., alias =
private static boolean isMultiValueReturn(StreamTokenizer st,
List<String> mlist, boolean comma) throws IOException {
int lookahead = st.nextToken();
if ((comma && lookahead == StreamTokenizer.TT_WORD)
|| (!comma && matchChar(st, ',', false))) {
if (matchDollarAlias(st, false)) {
mlist.add(st.sval);
}
return isMultiValueReturn(st, mlist, !comma);
}
if (!comma && lookahead == '=' && !matchChar(st, '=', true)) {
return true;
}
return false;
}
private static boolean matchDollarAlias(StreamTokenizer st, boolean next)
throws IOException {
int type = next ? st.nextToken() : st.ttype;
if (type == StreamTokenizer.TT_WORD && st.sval.charAt(0) == '$'
&& st.sval.length() > 1) {
return true;
}
if (next) st.pushBack();
return false;
}
private static boolean matchWord(StreamTokenizer st, String word,
boolean next) throws IOException {
int type = next ? st.nextToken() : st.ttype;
if (type == StreamTokenizer.TT_WORD
&& st.sval.equalsIgnoreCase(word)) {
return true;
}
if (next) st.pushBack();
return false;
}
private static boolean matchChar(StreamTokenizer st, int c, boolean next)
throws IOException {
int type = next ? st.nextToken() : st.ttype;
if (type == c) return true;
if (next) st.pushBack();
return false;
}
private static void skipSingleLineComment(StreamTokenizer st)
throws IOException {
int lineNo = st.lineno();
int lookahead = st.nextToken();
while (lookahead != StreamTokenizer.TT_EOF && lookahead != '\n') {
if (st.lineno() > lineNo) break;
lookahead = st.nextToken();
}
st.pushBack();
}
private static void traverseMacro(Tree t, List<CommonTree> nodes,
String nodeType) {
if (t.getText().equals(nodeType)) {
nodes.add((CommonTree) t);
}
int n = t.getChildCount();
for (int i = 0; i < n; i++) {
Tree t0 = t.getChild(i);
traverseMacro(t0, nodes, nodeType);
}
}
/*
* Macro inline nodes have the following form:
*
* (MACRO_INLINE <name> (RETURN_VAL <values>) (PARAMS <values>))
*
* Child nodes:
* 0: macro name
* 1: list of return values
* 2: list of parameters
*/
static CommonTree macroInline(CommonTree t,
List<PigMacro> macroDefs, Set<String> macroStack,
PigContext pigContext)
throws ParserException {
// get name
String mn = t.getChild(0).getText();
// get macroDef
PigMacro macro = null;
for (PigMacro pm : macroDefs) {
if (pm.getName().equals(mn)) {
macro = pm;
break;
}
}
String file = ((PigParserNode)t).getFileName();
if (macro == null) {
String msg = getErrorMessage(file, t.getLine(),
"Cannot expand macro '" + mn + "'",
"Macro must be defined before expansion.");
throw new ParserException(msg);
}
if (macroStack.contains(macro.name)) {
String msg = getErrorMessage(file, t.getLine(),
"Cannot expand macro '" + mn + "'",
"Macro can't be defined circularly.");
throw new ParserException(msg);
}
// set nested macro call stack
Set<String> newStack = new HashSet<String>(macroStack);
newStack.add(macro.name);
macro.setStack(newStack);
// inform the macro of the PigContext
// so it can substitute parameters from the main pigscript
macro.setPigContext(pigContext);
// get return values
int n = t.getChild(1).getChildCount();
String[] rets = new String[n];
for (int i = 0; i < n; i++) {
rets[i] = t.getChild(1).getChild(i).getText();
}
// get parameters
int m = t.getChild(2).getChildCount();
String[] params = new String[m];
for (int i = 0; i < m; i++) {
params[i] = t.getChild(2).getChild(i).getText();
}
return macro.inline(params, rets, t, file);
}
private static String getErrorMessage(String file, int line, String header,
String reason) {
StringBuilder sb = new StringBuilder();
sb.append("<");
if (file != null) {
sb.append("file ").append(file).append(", ");
}
sb.append("line ").append(line).append("> ").append(header);
if (reason != null) {
sb.append(". Reason: ").append(reason);
}
return sb.toString();
}
}