blob: beb322a51707ca599869fb2f5916b4ef683dd9c4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This grammar is used to process %declare and %default commands
* within pig script, and substitute parameter values
*/
options {
// Generate non-static functions
STATIC = false;
IGNORE_CASE = true;
JAVA_UNICODE_ESCAPE = true;
//DEBUG_PARSER = true;
LOOKAHEAD = 2;
}
PARSER_BEGIN(PigFileParser)
package org.apache.pig.tools.parameters;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Hashtable;
import java.util.Stack;
import java.io.Writer;
import java.lang.StringBuilder;
//warnings in by code generated by javacc cannot be fixed here,
// so suppressing all warnings for this class. But this does not help in
//supressing Warnings in other classes generated by this .jj file
@SuppressWarnings("all")
public class PigFileParser {
private PreprocessorContext pc;
private Writer out;
public void setContext(PreprocessorContext pc) {
this.pc = pc;
}
public void setOutputWriter(Writer out) {
this.out = out;
}
private static String unquote(String s)
{
if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'')
return s.substring(1, s.length()-1);
else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"')
return s.substring(1, s.length()-1);
else
return s;
}
}
PARSER_END(PigFileParser)
TOKEN_MGR_DECLS : {
int pigBlockLevel = 0;
int funcBlockLevel = 0;
int tupleSchemaLevel = 0;
int bagSchemaLevel = 0;
int bagConstantLevel = 0;
int prevState = DEFAULT;
Stack<Integer> stack = new Stack<Integer>();
public int getState(int state) {
if(!stack.empty()) return stack.pop();
return state;
}
public void saveState(int state) {
stack.push(state);
}
}
<DEFAULT> MORE :
{
<"define" (<WS>)+ <IDENTIFIER> (<WS>)* "(" > : PIG_START
}
<PIG_START> MORE :
{
<"'"> {prevState = PIG_START;} : IN_STRING
| <"`"> {prevState = PIG_START;} : IN_COMMAND
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = PIG_START;} : SCHEMA_DEFINITION
| <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+ > {prevState = PIG_START;} : GENERATE
| <"{"> {pigBlockLevel = 1;} : IN_BLOCK
| <"}"> {if (true) throw new TokenMgrError("Unmatched '}'", TokenMgrError.LEXICAL_ERROR);}
| <";"> : PIG_END
| <"--"> {prevState = PIG_START;} : SINGLE_LINE_COMMENT
| <"/*"> {prevState = PIG_START;} : MULTI_LINE_COMMENT
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<SINGLE_LINE_COMMENT> MORE :
{
<("\n" | "\r" | "\r\n")> {SwitchTo(prevState);}
| <(~[])>
}
<MULTI_LINE_COMMENT> MORE :
{
<"*/"> {SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<IN_STRING> MORE :
{
<"\\'">
| <"'"> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<IN_COMMAND> MORE :
{
<"\\`">
| <"`"> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<GENERATE> MORE :
{
<"{">
{
bagConstantLevel++;
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : BAG_CONSTANT
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+>
{
prevState = getState(prevState);
saveState(prevState);
prevState = GENERATE;
} : SCHEMA_DEFINITION
| <";">
{
prevState = getState(prevState);
if(prevState == PIG_START) {
input_stream.backup(1);
image.deleteCharAt(image.length()-1);
}
SwitchTo(prevState);
}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<SCHEMA_DEFINITION> MORE :
{
<"("> {tupleSchemaLevel++;}
| <")"> {tupleSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); }
| <"{"> {bagSchemaLevel++;}
| <"}"> {bagSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); }
| <("," | ";" )>
{
if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) {
input_stream.backup(1);
image.deleteCharAt(image.length()-1);
SwitchTo(prevState);
}
}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<BAG_CONSTANT> MORE :
{
<"{"> {bagConstantLevel++;}
| <"}"> {bagConstantLevel--; if (bagConstantLevel == 0) SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<IN_BLOCK> MORE :
{
<"\""> {prevState = IN_BLOCK;} : IN_DOUBLE_QUOTED_STRING
| <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = IN_BLOCK;} : SCHEMA_DEFINITION
| <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+> {prevState = IN_BLOCK;} : GENERATE
| <"{"> {pigBlockLevel++;}
| <"}"(";")?> {pigBlockLevel--; if (pigBlockLevel == 0) SwitchTo(PIG_END);}
| <"'"> {prevState = IN_BLOCK;} : IN_STRING
| <"`"> {prevState = IN_BLOCK;} : IN_COMMAND
| <"--"> {prevState = IN_BLOCK;} : SINGLE_LINE_COMMENT
| <"/*"> {prevState = IN_BLOCK;} : MULTI_LINE_COMMENT
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<IN_DOUBLE_QUOTED_STRING> MORE :
{
<"\\\"">
| <"\""> { SwitchTo(prevState);}
| <("\n" | "\r" | "\r\n")>
| <(~[])>
}
<PIG_END> TOKEN :
{
<PIG: ""> {
matchedToken.image = image.toString();
}: DEFAULT
}
TOKEN :
{
<NEWLINE: "\n" | "\r">
|
<SPACE: " " | "\t">
|
<WS: "\n" | "\r" | " " | "\t">
}
// comments(single line and multi-line)
TOKEN :
{
<COMMENT: "--"(~["\r","\n"])*
| "#!" (~["\r","\n"])*
| "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/"
>
}
TOKEN:
{
<#LETTER : ["a"-"z", "A"-"Z"] >
|
<#DIGIT : ["0"-"9"] >
|
<#SPECIALCHAR : ["_"] >
|
<#DOLLAR : ["$"]>
}
TOKEN :
{
<DECLARE: "%declare" >
|
<PIGDEFAULT: "%default" >
}
TOKEN :
{
<REGISTER: "register"> : IN_REGISTER
|
<IDENTIFIER: (<SPECIALCHAR>)*<LETTER>(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>
|
<LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
|
<SHELLCMD: "`" (~["`"])* "`" >
|
// see others() rule for use of OTHER and NOT_OTHER_CHAR
// others() is supposed to match 'everything else'. To ensure that others()
// don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR
// NOT_OTHER_CHAR consists of first characters of other tokens, and OTHER consists of one
// or more chars that don't belong to NOT_OTHER_CHAR. Since the tokeniser matches the longest
// match, other tokens will get matched instead of NOT_OTHER_CHAR.
<OTHER: (~["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"])+ >
|
<NOT_OTHER_CHAR: ["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"] >
}
<IN_REGISTER> MORE : { " " | "\t" | "\r" | "\n"}
<IN_REGISTER> TOKEN: {
<PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+> {
matchedToken.image = image.toString();
}: DEFAULT
}
void Parse() throws IOException : {}
{
(input())*<EOF>
}
void input() throws IOException :
{
String s;
Token strTok = null;
Token strTok2 = null;
}
{
strTok = <PIG>
{
//System.out.println("Pig image: \n" + strTok.image);
out.append(strTok.image );
}
|
strTok = <DECLARE>
(
param_value(true) // overwrite=true
{ pc.validate(strTok.toString()); }
)
|
strTok = <PIGDEFAULT>
(
param_value(false) // overwrite=false
{ pc.validate(strTok.toString()); }
)
|
strTok = <REGISTER>
strTok2 = <PATH> {}
{
// Adding a special case for register since it handles "/*" globbing
// and this conflicts with general multi-line comment "/* */".
// See the comment above on OTHERS on how tokenizer matches the longest
// match. Here, string next to "register" is treated as PATH TOKEN
// and therefore not consider "/*" as part of the comment
// (and avoid the longest match problem).
out.append(strTok.image);
String sub_line = pc.substitute(strTok2.image);
out.append(sub_line);
}
|
s = paramString(){}
{
//process an ordinary pig line - perform substitution
String sub_line = pc.substitute(s);
out.append(sub_line );
}
|
strTok = <IDENTIFIER>{}
{
out.append(strTok.image );
}
|
write_ignore_toks()
}
void param_value(boolean overwrite) throws IOException:
{
Token id, val;
String s;
String other;
Token strTok;
}
{
(ignore_toks_nonewline())*
id=<IDENTIFIER>
(ignore_toks_nonewline())*
(
s=others() (ignore_toks_nonewline())* write_newline() { pc.processOrdLine(id.image , s, overwrite);}
|
val=<IDENTIFIER> // this construct is for cases like a.123
{s = val.image;}
( LOOKAHEAD( 2 )
other = others() (ignore_toks_nonewline())* write_newline()
{ s += other; }
)?
{pc.processOrdLine(id.image , s, overwrite);}
|
val=<SHELLCMD> { pc.processShellCmd(id.image , val.image, overwrite); }
|
val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image, s, overwrite); }
)
}
//match others, see comments above
//on description of OTHER , NOT_OTHER_CHAR
String others() throws IOException :
{ Token t;
StringBuilder sb = new StringBuilder();
}
{
(
t=<OTHER>
{
sb.append(t.image);
}
|
t=<NOT_OTHER_CHAR>
{
sb.append(t.image);
}
)
(
t=<OTHER>
{
sb.append(t.image);
}
|
t=<NOT_OTHER_CHAR>
{
sb.append(t.image);
}
|
t=<IDENTIFIER>
{
//eg of this match is a unquoted filename - /d1/abc/
sb.append(t.image);
}
|
t=<LITERAL>
{
sb.append(t.image);
}
|
t=<SHELLCMD>
{
sb.append(t.image);
}
)*{
return sb.toString();
}
}
// a string that can contain parameter
String paramString() throws IOException :
{Token t;
String str;
}
{
str = others()
{
return str;
}
|
t=<LITERAL>
{
return t.image;
}
|
t = <SHELLCMD>{}
{
return t.image;
}
}
// write the newlines,spaces and comments to preserve formatting
void write_ignore_toks() throws IOException :
{ String str;}
{
str = ignore_toks(){
out.append(str);
}
}
// match the newlines,spaces and comments
String ignore_toks_nonewline() throws IOException :
{ Token t; }
{
t = <COMMENT>{
return t.image;
}
|
t = <SPACE>{
return t.image;
}
}
// match the newlines,spaces and comments
String ignore_toks() throws IOException :
{ Token t;
String str;
}
{
t = <COMMENT>{
return t.image;
}
|
str = space_or_newline(){
return str;
}
}
String space_or_newline() :
{ Token t; }
{
t = <SPACE>{
return t.image;
}
|
t = <NEWLINE>{
return t.image;
}
}
void write_newline() throws IOException:
{ Token t; }
{
t = <NEWLINE>{
out.append(t.image);
}
}