blob: b8fe18a75856336195d81f43e3919521821c9aa7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Grammar for PigScript
options {
// Generate non-static functions
STATIC = false;
// Case is ignored in keywords
IGNORE_CASE = true;
// DEBUG_PARSER = true;
JAVA_UNICODE_ESCAPE = true;
}
PARSER_BEGIN(PigScriptParser)
package org.apache.pig.tools.pigscript.parser;
import java.io.IOException;
import java.io.InputStream;
import java.util.Stack;
import java.util.List;
import java.util.ArrayList;
import org.apache.pig.impl.util.StringUtils;
import jline.console.ConsoleReader;
public abstract class PigScriptParser
{
protected boolean mInteractive;
protected ConsoleReader mConsoleReader;
public void setInteractive(boolean interactive)
{
mInteractive = interactive;
token_source.interactive = interactive;
}
public int getLineNumber()
{
return jj_input_stream.getBeginLine();
}
public void setConsoleReader(ConsoleReader c)
{
mConsoleReader = c;
token_source.consoleReader = c;
}
abstract public void prompt();
abstract protected void quit();
abstract protected void printAliases() throws IOException;
abstract protected void printClear();
abstract protected void processFsCommand(String[] cmdTokens) throws IOException;
abstract protected void processShCommand(String[] cmdTokens) throws IOException;
abstract protected void processSQLCommand(String cmdTokens) throws IOException;
abstract protected void processDescribe(String alias) throws IOException;
abstract protected void processExplain(String alias, String script, boolean isVerbose, String format, String target, List<String> params, List<String> files) throws IOException, ParseException;
abstract protected void processRegister(String jar) throws IOException;
abstract protected void processRegister(String path, String scriptingEngine, String namespace) throws IOException, ParseException;
abstract protected void processSet(String key, String value) throws IOException, ParseException;
abstract protected void processSet() throws IOException, ParseException;
abstract protected void processHistory(boolean withNumbers);
abstract protected void processCat(String path) throws IOException;
abstract protected void processCD(String path) throws IOException;
abstract protected void processDump(String alias) throws IOException;
abstract protected void processKill(String jobid) throws IOException;
abstract protected void processLS(String path) throws IOException;
abstract protected void processPWD() throws IOException;
abstract protected void printHelp();
abstract protected void processMove(String src, String dst) throws IOException;
abstract protected void processCopy(String src, String dst) throws IOException;
abstract protected void processCopyToLocal(String src, String dst) throws IOException;
abstract protected void processCopyFromLocal(String src, String dst) throws IOException;
abstract protected void processMkdir(String dir) throws IOException;
abstract protected void processPig(String cmd) throws IOException;
abstract protected void processRemove(String path, String opt) throws IOException;
abstract protected void processIllustrate(String alias, String script, String target, List<String> params, List<String> files) throws IOException, ParseException;
abstract protected void processScript(String script, boolean batch, List<String> params, List<String> files) throws IOException, ParseException;
abstract protected void processDefault(String key, String value) throws IOException;
abstract protected void processDeclare(String key, String value) throws IOException;
static String unquote(String s)
{
if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'')
return s.substring(1, s.length()-1);
else
return s;
}
static boolean eolOrSemicolon(int kind) {
if (kind == EOL || kind == SEMICOLON) {
return true;
} else {
return false;
}
}
}
PARSER_END(PigScriptParser)
// Skip all tabs and spaces
SKIP : { " " | "\t" }
// Skip comments(single line and multiline)
SKIP : {
<"--"(~["\r","\n"])*>
| <"#!" (~["\r","\n"])*>
| <"/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/">
}
// tokens
// commands
TOKEN: {<CAT: "cat">}
TOKEN: {<CLEAR: "clear">}
TOKEN: {<FS: "fs">}
TOKEN: {<SH:"sh">}
TOKEN: {<CD: "cd">}
TOKEN: {<COPY: "cp">}
TOKEN: {<COPYFROMLOCAL: "copyFromLocal">}
TOKEN: {<COPYTOLOCAL: "copyToLocal">}
TOKEN: {<DUMP: "dump">}
TOKEN: {<DUMP_SHORT: "\\d">}
TOKEN: {<DESCRIBE: "describe">}
TOKEN: {<DESCRIBE_SHORT: "\\de">}
TOKEN: {<ALIASES: "aliases">}
TOKEN: {<EXPLAIN: "explain">}
TOKEN: {<EXPLAIN_SHORT: "\\e">}
TOKEN: {<HELP: "help">}
TOKEN: {<HISTORY: "history">}
TOKEN: {<KILL: "kill">}
TOKEN: {<LS: "ls">}
TOKEN: {<MOVE: "mv">}
TOKEN: {<MKDIR: "mkdir">}
TOKEN: {<PWD: "pwd">}
TOKEN: {<QUIT: "quit">}
TOKEN: {<QUIT_SHORT: "\\q">}
TOKEN: {<REGISTER: "register">}
TOKEN: {<USING: "using">}
TOKEN: {<AS: "as"> }
TOKEN: {<REMOVE: "rm">}
TOKEN: {<REMOVEFORCE: "rmf">}
TOKEN: {<SET: "set">}
TOKEN: {<ILLUSTRATE: "illustrate">}
TOKEN: {<ILLUSTRATE_SHORT: "\\i">}
TOKEN: {<RUN: "run">}
TOKEN: {<EXEC: "exec">}
TOKEN: {<PARAM: "-param">}
TOKEN: {<PARAM_FILE: "-param_file">}
TOKEN: {<SCRIPT: "-script">}
TOKEN: {<DOT: "-dot">}
TOKEN: {<XML: "-xml">}
TOKEN: {<OUT: "-out">}
TOKEN: {<BRIEF: "-brief">}
TOKEN: {<N: "-n">}
TOKEN: {<PIGDEFAULT: "%default" >}
TOKEN: {<DECLARE: "%declare" >}
// internal use commands
TOKEN: {<SCRIPT_DONE: "scriptDone">}
// Define pig command as
// (1) Starting with "split"/"define"/"store"/"import" or assignment (A=) or multi-assignment (A, B, ...=) followed by
// (2) Single statement followed by ; and newline or
// (3) Block of statements enclosed in
TOKEN_MGR_DECLS : {
int pigBlockLevel = 0;
int funcBlockLevel = 0;
int tupleSchemaLevel = 0;
int bagSchemaLevel = 0;
int bagConstantLevel = 0;
int prevState = DEFAULT;
boolean interactive = false;
ConsoleReader consoleReader = null;
Stack<Integer> stack = new Stack<Integer>();
public void secondary_prompt()
{
if (interactive)
{
/*System.err.print(">> ");
System.err.flush();*/
consoleReader.setPrompt(">> ");
}
}
public int getState(int state) {
if(!stack.empty()) return stack.pop();
return state;
}
public void saveState(int state) {
stack.push(state);
}
}
<DEFAULT> MORE :
{
<"sql"> : SQL_START
}
<SQL_START> MORE :
{
<";"> : SQL_END
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<SQL_END> TOKEN : {
<SQL: ""> { matchedToken.image = image.toString();
}: DEFAULT
}
<DEFAULT> MORE :
{
<"split"> : PIG_START
| <"define"> : PIG_START
| <"store"> : PIG_START
| <"assert"> : PIG_START
| <"import"> : PIG_START
| <(["a"-"z", "A"-"Z"])+(["a"-"z", "A"-"Z"] | ["0"-"9"] | "_")*(" " | "\t")*"="> : PIG_START
| <"=>" (" " | "\t")*> : PIG_START
| < <IDENTIFIER> (" " | "\t")* ("," (" " | "\t")* <IDENTIFIER> )* (" " | "\t")* "="> : PIG_START
| < <IDENTIFIER> (" " | "\t")* "(" > : PIG_START
}
<PIG_START> MORE :
{
<"'"> {prevState = PIG_START;} : IN_STRING
| <"`"> {prevState = PIG_START;} : IN_COMMAND
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = PIG_START;} : SCHEMA_DEFINITION
| <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t" | "\r" | "\n")+ > {prevState = PIG_START;} : GENERATE
| <"{"> {pigBlockLevel = 1;} : IN_BLOCK
| <"}"> {if (true) throw new TokenMgrError("Unmatched '}'", TokenMgrError.LEXICAL_ERROR);}
| <";"> : PIG_END
| <"--"> {prevState = PIG_START;} : SINGLE_LINE_COMMENT
| <"/*"> {prevState = PIG_START;} : MULTI_LINE_COMMENT
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<SINGLE_LINE_COMMENT> MORE :
{
<("\n" | "\r" | "\r\n")> {SwitchTo(prevState); if(prevState != DEFAULT) secondary_prompt();}
| <(~[])>
}
<MULTI_LINE_COMMENT> MORE :
{
<"*/"> {SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<IN_STRING> MORE :
{
<"\\\\">
| <"\\'">
| <"'"> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<IN_COMMAND> MORE :
{
<"\\`">
| <"`"> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<GENERATE> MORE :
{
<"{">
{
bagConstantLevel++;
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : BAG_CONSTANT
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+>
{
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : SCHEMA_DEFINITION
| <"--">
{
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : SINGLE_LINE_COMMENT
| <"/*">
{
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : MULTI_LINE_COMMENT
| <"'">
{
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : IN_STRING
| <";">
{
prevState = getState(prevState);
if(prevState == PIG_START) {
input_stream.backup(1);
image.deleteCharAt(image.length()-1);
}
SwitchTo(prevState);
}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<SCHEMA_DEFINITION> MORE :
{
<"("> {tupleSchemaLevel++;}
| <")">
{
if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) {
// This means parenthesis is not from this schema_def.
// Putting back ")" although others do not check parenthesis at this time.
// This is a bandaid workaround for the issue with inline-op which
// also uses parenthesis.
// Real fix would be to move out of using this javacc parser. (PIG-2597)
input_stream.backup(1);
image.deleteCharAt(image.length()-1);
SwitchTo(prevState);
}
tupleSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState);
}
| <"{"> {bagSchemaLevel++;}
| <"}"> {bagSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); }
| <("," | ";" )>
{
if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) {
input_stream.backup(1);
image.deleteCharAt(image.length()-1);
SwitchTo(prevState);
}
}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<BAG_CONSTANT> MORE :
{
<"{"> {bagConstantLevel++;}
| <"}"> {bagConstantLevel--; if (bagConstantLevel == 0) SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<IN_BLOCK> MORE :
{
<"\""> {prevState = IN_BLOCK;} : IN_DOUBLE_QUOTED_STRING
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = IN_BLOCK;} : SCHEMA_DEFINITION
| <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t" | "\r" | "\n")+> {prevState = IN_BLOCK;} : GENERATE
| <"{"> {pigBlockLevel++;}
| <"}"(";")?> {pigBlockLevel--; if (pigBlockLevel == 0) SwitchTo(PIG_END);}
| <"'"> {prevState = IN_BLOCK;} : IN_STRING
| <"`"> {prevState = IN_BLOCK;} : IN_COMMAND
| <"--"> {prevState = IN_BLOCK;} : SINGLE_LINE_COMMENT
| <"/*"> {prevState = IN_BLOCK;} : MULTI_LINE_COMMENT
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<IN_DOUBLE_QUOTED_STRING> MORE :
{
<"\\\"">
| <"\""> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")> {secondary_prompt();}
| <(~[])>
}
<PIG_END> TOKEN :
{
<PIG: ""> { matchedToken.image = image.toString();
//System.out.println("image = " + matchedToken.image);
}: DEFAULT
}
// other
TOKEN: {<EOL: "\r" | "\n" | "\r\n">}
TOKEN: {<QUOTE: "'">}
TOKEN: {<SEMICOLON: ";">}
TOKEN:
{
<#LETTER : ["a"-"z", "A"-"Z"] >
| <#DIGIT : ["0"-"9"] >
| <#SPECIALCHAR : ["_"] >
| <#FSSPECIALCHAR: ["/"]>
| <#FLOAT: <INTEGER> ( "." <INTEGER> )? | "." <INTEGER> >
| <#INTEGER: ( <DIGIT> )+ >
| <#NUMBER: <INTEGER> | <FLOAT> | <FLOAT> ( ["e","E"] ([ "-","+"])? <FLOAT> )?>
}
TOKEN: {<PREVREL: "@">}
TOKEN: {<IDENTIFIER: (<LETTER>)+(<DIGIT> | <LETTER> | <SPECIALCHAR> | "::")*>}
TOKEN: {<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+>}
TOKEN : { <QUOTEDSTRING : "'"
( (~["'","\\","\n","\r"])
| ("\\"
( ["n","t","b","r","f","\\","'"] )
)
| ("\\u"
["0"-"9","A"-"F","a"-"f"]
["0"-"9","A"-"F","a"-"f"]
["0"-"9","A"-"F","a"-"f"]
["0"-"9","A"-"F","a"-"f"]
)
)*
"'"> }
TOKEN: {<SHELLCMD: "`" (~["`"])* "`" >}
void parse() throws IOException:
{
Token t1, t2, t3;
String engine = null, namespace = null;
List<String> cmdTokens = new ArrayList<String>();
}
{
(
<EOL>
{prompt();}
|
<FS>
(
t1 = GetPath()
{
cmdTokens.add(t1.image);
while(true){
try{
t1=GetPath();
cmdTokens.add(t1.image);
}catch(ParseException e){
break;
}
}
processFsCommand(cmdTokens.toArray(new String[cmdTokens.size()]));
}
)+
|
<SH>
(
t1 = GetValue()
{
cmdTokens.add(t1.image);
while(true){
try{
t1=GetValue();
cmdTokens.add(StringUtils.unescapeInputString(unquote(t1.image)));
}catch(ParseException e){
break;
}
}
processShCommand(cmdTokens.toArray(new String[cmdTokens.size()]));
}
)+
|
<CAT>
(
t1 = GetPath()
{processCat(t1.image);}
)+
|
Clear()
|
<CD>
(
t1 = GetPath()
{processCD(t1.image);}
|
{processCD(null);}
)
|
<COPY>
t1 = GetPath()
t2 = GetPath()
{processCopy(t1.image, t2.image);}
|
<COPYFROMLOCAL>
t1 = GetPath()
t2 = GetPath()
{processCopyFromLocal(t1.image, t2.image);}
|
<COPYTOLOCAL>
t1 = GetPath()
t2 = GetPath()
{processCopyToLocal(t1.image, t2.image);}
|
(<DUMP> | <DUMP_SHORT>)
(
(t1 = <IDENTIFIER>
| t1 = <PREVREL>)
{processDump(t1.image);}
|
{processDump(null);}
)
|
Illustrate()
|
(<DESCRIBE> | <DESCRIBE_SHORT>)
(
(t1 = <IDENTIFIER>
| t1 = <PREVREL>)
{processDescribe(t1.image);}
|
{processDescribe(null);}
)
|
<ALIASES>
{printAliases();}
|
Explain()
|
<HELP>
{printHelp();}
|
History()
|
<KILL>
t1 = <IDENTIFIER>
{processKill(t1.image);}
|
<LS>
(
t1 = GetPath()
{processLS(t1.image);}
|
{processLS(null);}
)
|
<MOVE>
t1 = GetPath()
t2 = GetPath()
{processMove(t1.image, t2.image);}
|
<MKDIR>
t1 = GetPath()
{processMkdir(t1.image);}
|
t1 = <SQL>
{processSQLCommand(t1.image);}
|
t1 = <PIG>
{processPig(t1.image);}
|
<PWD>
{processPWD();}
|
(<QUIT> | <QUIT_SHORT>)
{quit();}
|
<REGISTER>
t1 = GetPath()
[
<USING> t2 = GetPath()
{ engine = t2.image; }
[
<AS> t3 = <IDENTIFIER>
{namespace = t3.image; }
]
]
{processRegister(unquote(t1.image), engine, namespace); }
|
Script()
|
<REMOVE>
(
t1 = GetPath()
{processRemove(t1.image, null);}
)+
|
<REMOVEFORCE>
(
t1 = GetPath()
{processRemove(t1.image, "force");}
)+
|
<SCRIPT_DONE>
{quit();}
|
<SET>
(
t1 = GetKey()
t2 = GetValueOrNull()
{processSet(t1.image, eolOrSemicolon(t2.kind)?null:unquote(t2.image));}
|
{processSet();}
)
|
<PIGDEFAULT>
t1 = GetKey()
t2 = GetDefaultValue()
{processDefault(t1.image, t2.image);}
|
<DECLARE>
t1 = GetKey()
t2 = GetDefaultValue()
{processDeclare(t1.image, t2.image);}
|
<EOF>
{quit();}
|
<SEMICOLON>
{}
|
// handle invalid token
handle_invalid_command(EOL)
{prompt();}
)
}
void Clear():
{
}
{
<CLEAR>
{printClear();}
}
void Illustrate() throws IOException:
{
Token t;
String alias = null;
String script = null;
String target=null;
ArrayList<String> params;
ArrayList<String> files;
}
{
(<ILLUSTRATE> | <ILLUSTRATE_SHORT>)
{
params = new ArrayList<String>();
files = new ArrayList<String>();
}
(
<OUT>
t = GetPath()
{target = t.image;}
|
<SCRIPT>
t = GetPath()
{script = t.image;}
|
<PARAM>
t = GetPath()
{params.add(t.image);}
|
<PARAM_FILE>
t = GetPath()
{files.add(t.image);}
)*
(
(t = <IDENTIFIER>
| t = <PREVREL>)
{alias = t.image;}
)?
{processIllustrate(alias, script, target, params, files);}
}
void History():
{
boolean withNumbers = true;
}
{
<HISTORY>
(
<N>
{withNumbers = false;}
)?
{processHistory(withNumbers);}
}
void Explain() throws IOException:
{
Token t;
String alias = null;
String script = null;
String format="text";
String target=null;
boolean isVerbose = true;
ArrayList<String> params;
ArrayList<String> files;
}
{
(<EXPLAIN> | <EXPLAIN_SHORT>)
{
params = new ArrayList<String>();
files = new ArrayList<String>();
}
(
<BRIEF>
{isVerbose = false;}
|
<DOT>
{format = "dot";}
|
<XML>
{format = "xml";}
|
<OUT>
t = GetPath()
{target = t.image;}
|
<SCRIPT>
t = GetPath()
{script = t.image;}
|
<PARAM>
t = GetPath()
{params.add(t.image);}
|
<PARAM_FILE>
t = GetPath()
{files.add(t.image);}
)*
(
(t = <IDENTIFIER>
| t = <PREVREL>)
{alias = t.image;}
)?
{processExplain(alias, script, isVerbose, format, target, params, files);}
}
void Script() throws IOException:
{
Token t;
String script = null;
boolean batch = false;
ArrayList<String> params;
ArrayList<String> files;
}
{
(
<RUN>
{batch = false;}
|
<EXEC>
{batch = true;}
)
{
params = new ArrayList<String>();
files = new ArrayList<String>();
}
(
<PARAM>
t = GetPath()
{params.add(t.image);}
|
<PARAM_FILE>
t = GetPath()
{files.add(t.image);}
)*
(
t = GetPath()
{script = t.image;}
)?
{processScript(script, batch, params, files);}
}
Token GetPath() :
{
Token t;
}
{
(
t = <IDENTIFIER>
|
t = <PATH>
|
t = GetReserved()
)
{return t;}
}
Token GetKey() :
{
Token t;
}
{
t = GetPath()
{return t;}
}
Token GetValueOrNull() :
{
Token t;
}
{
(
t = GetValue()
|
t = <EOL>
|
t = <SEMICOLON>
)
{return t;}
}
Token GetValue() :
{
Token t;
}
{
(
t = GetPath()
|
t = <QUOTEDSTRING>
)
{return t;}
}
Token GetDefaultValue() :
{
Token t;
}
{
(
t = GetPath()
|
t = <QUOTEDSTRING>
|
t = <SHELLCMD>
)
{return t;}
}
Token GetReserved () :
{
Token t;
}
{
(
t = <CAT>
|
t = <CD>
|
t = <CLEAR>
|
t = <COPY>
|
t = <COPYFROMLOCAL>
|
t = <COPYTOLOCAL>
|
t = <DUMP>
|
t = <DUMP_SHORT>
|
t = <DESCRIBE>
|
t = <DESCRIBE_SHORT>
|
t = <ALIASES>
|
t = <EXPLAIN>
|
t = <EXPLAIN_SHORT>
|
t = <ILLUSTRATE>
|
t = <ILLUSTRATE_SHORT>
|
t = <HELP>
|
t = <HISTORY>
|
t = <KILL>
|
t = <LS>
|
t = <MOVE>
|
t = <MKDIR>
|
t = <PWD>
|
t = <QUIT>
|
t = <QUIT_SHORT>
|
t = <REGISTER>
|
t = <USING>
|
t = <AS>
|
t = <REMOVE>
|
t = <SET>
|
t = <SCRIPT_DONE>
|
t = <RUN>
|
t = <EXEC>
)
{return t;}
}
JAVACODE
void handle_invalid_command(int kind)
{
throw generateParseException(); // throw the exception object.
}