| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * This grammar is used to process %declare and %default commands |
| * within pig script, and substitute parameter values |
| */ |
| |
| options { |
| // Generate non-static functions |
| STATIC = false; |
| IGNORE_CASE = true; |
| JAVA_UNICODE_ESCAPE = true; |
| //DEBUG_PARSER = true; |
| LOOKAHEAD = 2; |
| |
| } |
| PARSER_BEGIN(PigFileParser) |
| |
| package org.apache.pig.tools.parameters; |
| |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.util.Hashtable; |
| import java.util.Stack; |
| import java.io.Writer; |
| import java.lang.StringBuilder; |
| |
| //warnings in by code generated by javacc cannot be fixed here, |
| // so suppressing all warnings for this class. But this does not help in |
| //supressing Warnings in other classes generated by this .jj file |
| @SuppressWarnings("all") |
| |
| public class PigFileParser { |
| private PreprocessorContext pc; |
| private Writer out; |
| public void setContext(PreprocessorContext pc) { |
| this.pc = pc; |
| } |
| public void setOutputWriter(Writer out) { |
| this.out = out; |
| } |
| private static String unquote(String s) |
| { |
| if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'') |
| return s.substring(1, s.length()-1); |
| else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') |
| return s.substring(1, s.length()-1); |
| else |
| return s; |
| } |
| } |
| |
| PARSER_END(PigFileParser) |
| |
| |
| TOKEN_MGR_DECLS : { |
| int pigBlockLevel = 0; |
| int funcBlockLevel = 0; |
| int tupleSchemaLevel = 0; |
| int bagSchemaLevel = 0; |
| int bagConstantLevel = 0; |
| int prevState = DEFAULT; |
| |
| Stack<Integer> stack = new Stack<Integer>(); |
| |
| public int getState(int state) { |
| if(!stack.empty()) return stack.pop(); |
| return state; |
| } |
| |
| public void saveState(int state) { |
| stack.push(state); |
| } |
| |
| } |
| |
| <DEFAULT> MORE : |
| { |
| <"define" (<WS>)+ <IDENTIFIER> (<WS>)* "(" > : PIG_START |
| } |
| |
| <PIG_START> MORE : |
| { |
| <"'"> {prevState = PIG_START;} : IN_STRING |
| | <"`"> {prevState = PIG_START;} : IN_COMMAND |
| | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = PIG_START;} : SCHEMA_DEFINITION |
| | <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+ > {prevState = PIG_START;} : GENERATE |
| | <"{"> {pigBlockLevel = 1;} : IN_BLOCK |
| | <"}"> {if (true) throw new TokenMgrError("Unmatched '}'", TokenMgrError.LEXICAL_ERROR);} |
| | <";"> : PIG_END |
| | <"--"> {prevState = PIG_START;} : SINGLE_LINE_COMMENT |
| | <"/*"> {prevState = PIG_START;} : MULTI_LINE_COMMENT |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <SINGLE_LINE_COMMENT> MORE : |
| { |
| <("\n" | "\r" | "\r\n")> {SwitchTo(prevState);} |
| | <(~[])> |
| } |
| |
| <MULTI_LINE_COMMENT> MORE : |
| { |
| <"*/"> {SwitchTo(prevState);} |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <IN_STRING> MORE : |
| { |
| <"\\'"> |
| | <"'"> { SwitchTo(prevState);} |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <IN_COMMAND> MORE : |
| { |
| <"\\`"> |
| | <"`"> { SwitchTo(prevState);} |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <GENERATE> MORE : |
| { |
| <"{"> |
| { |
| bagConstantLevel++; |
| prevState = getState(prevState); |
| saveState(prevState); |
| prevState = GENERATE; |
| } : BAG_CONSTANT |
| | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+> |
| { |
| prevState = getState(prevState); |
| saveState(prevState); |
| prevState = GENERATE; |
| } : SCHEMA_DEFINITION |
| | <";"> |
| { |
| prevState = getState(prevState); |
| if(prevState == PIG_START) { |
| input_stream.backup(1); |
| image.deleteCharAt(image.length()-1); |
| } |
| SwitchTo(prevState); |
| } |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <SCHEMA_DEFINITION> MORE : |
| { |
| <"("> {tupleSchemaLevel++;} |
| | <")"> {tupleSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); } |
| | <"{"> {bagSchemaLevel++;} |
| | <"}"> {bagSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); } |
| | <("," | ";" )> |
| { |
| if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) { |
| input_stream.backup(1); |
| image.deleteCharAt(image.length()-1); |
| SwitchTo(prevState); |
| } |
| } |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <BAG_CONSTANT> MORE : |
| { |
| <"{"> {bagConstantLevel++;} |
| | <"}"> {bagConstantLevel--; if (bagConstantLevel == 0) SwitchTo(prevState);} |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <IN_BLOCK> MORE : |
| { |
| <"\""> {prevState = IN_BLOCK;} : IN_DOUBLE_QUOTED_STRING |
| | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = IN_BLOCK;} : SCHEMA_DEFINITION |
| | <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+> {prevState = IN_BLOCK;} : GENERATE |
| | <"{"> {pigBlockLevel++;} |
| | <"}"(";")?> {pigBlockLevel--; if (pigBlockLevel == 0) SwitchTo(PIG_END);} |
| | <"'"> {prevState = IN_BLOCK;} : IN_STRING |
| | <"`"> {prevState = IN_BLOCK;} : IN_COMMAND |
| | <"--"> {prevState = IN_BLOCK;} : SINGLE_LINE_COMMENT |
| | <"/*"> {prevState = IN_BLOCK;} : MULTI_LINE_COMMENT |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <IN_DOUBLE_QUOTED_STRING> MORE : |
| { |
| <"\\\""> |
| | <"\""> { SwitchTo(prevState);} |
| | <("\n" | "\r" | "\r\n")> |
| | <(~[])> |
| } |
| |
| <PIG_END> TOKEN : |
| { |
| <PIG: ""> { |
| matchedToken.image = image.toString(); |
| }: DEFAULT |
| } |
| |
| TOKEN : |
| { |
| <NEWLINE: "\n" | "\r"> |
| | |
| <SPACE: " " | "\t"> |
| | |
| <WS: "\n" | "\r" | " " | "\t"> |
| } |
| |
| // comments(single line and multi-line) |
| TOKEN : |
| { |
| <COMMENT: "--"(~["\r","\n"])* |
| | "#!" (~["\r","\n"])* |
| | "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/" |
| > |
| } |
| |
| TOKEN: |
| { |
| <#LETTER : ["a"-"z", "A"-"Z"] > |
| | |
| <#DIGIT : ["0"-"9"] > |
| | |
| <#SPECIALCHAR : ["_"] > |
| | |
| <#DOLLAR : ["$"]> |
| } |
| |
| TOKEN : |
| { |
| <DECLARE: "%declare" > |
| | |
| <PIGDEFAULT: "%default" > |
| } |
| |
| |
| TOKEN : |
| { |
| <REGISTER: "register"> : IN_REGISTER |
| | |
| <IDENTIFIER: (<SPECIALCHAR>)*<LETTER>(<DIGIT> | <LETTER> | <SPECIALCHAR>)*> |
| | |
| <LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") > |
| | |
| <SHELLCMD: "`" (~["`"])* "`" > |
| | |
| // see others() rule for use of OTHER and NOT_OTHER_CHAR |
| // others() is supposed to match 'everything else'. To ensure that others() |
| // don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR |
| // NOT_OTHER_CHAR consists of first characters of other tokens, and OTHER consists of one |
| // or more chars that don't belong to NOT_OTHER_CHAR. Since the tokeniser matches the longest |
| // match, other tokens will get matched instead of NOT_OTHER_CHAR. |
| <OTHER: (~["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"])+ > |
| | |
| <NOT_OTHER_CHAR: ["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"] > |
| } |
| |
| <IN_REGISTER> MORE : { " " | "\t" | "\r" | "\n"} |
| |
| <IN_REGISTER> TOKEN: { |
| <PATH: (~["(", ")", ";", "\r", " ", "\t", "\n"])+> { |
| matchedToken.image = image.toString(); |
| }: DEFAULT |
| } |
| |
| void Parse() throws IOException : {} |
| { |
| (input())*<EOF> |
| } |
| |
| void input() throws IOException : |
| { |
| String s; |
| Token strTok = null; |
| Token strTok2 = null; |
| } |
| { |
| strTok = <PIG> |
| { |
| //System.out.println("Pig image: \n" + strTok.image); |
| out.append(strTok.image ); |
| } |
| | |
| strTok = <DECLARE> |
| ( |
| param_value(true) // overwrite=true |
| { pc.validate(strTok.toString()); } |
| ) |
| | |
| strTok = <PIGDEFAULT> |
| ( |
| param_value(false) // overwrite=false |
| { pc.validate(strTok.toString()); } |
| ) |
| | |
| strTok = <REGISTER> |
| strTok2 = <PATH> {} |
| { |
| // Adding a special case for register since it handles "/*" globbing |
| // and this conflicts with general multi-line comment "/* */". |
| // See the comment above on OTHERS on how tokenizer matches the longest |
| // match. Here, string next to "register" is treated as PATH TOKEN |
| // and therefore not consider "/*" as part of the comment |
| // (and avoid the longest match problem). |
| out.append(strTok.image); |
| String sub_line = pc.substitute(strTok2.image); |
| out.append(sub_line); |
| } |
| | |
| s = paramString(){} |
| { |
| //process an ordinary pig line - perform substitution |
| String sub_line = pc.substitute(s); |
| out.append(sub_line ); |
| } |
| | |
| strTok = <IDENTIFIER>{} |
| { |
| out.append(strTok.image ); |
| } |
| | |
| write_ignore_toks() |
| |
| } |
| |
| void param_value(boolean overwrite) throws IOException: |
| { |
| Token id, val; |
| String s; |
| String other; |
| Token strTok; |
| } |
| { |
| (ignore_toks_nonewline())* |
| id=<IDENTIFIER> |
| (ignore_toks_nonewline())* |
| ( |
| s=others() (ignore_toks_nonewline())* write_newline() { pc.processOrdLine(id.image , s, overwrite);} |
| | |
| val=<IDENTIFIER> // this construct is for cases like a.123 |
| {s = val.image;} |
| ( LOOKAHEAD( 2 ) |
| other = others() (ignore_toks_nonewline())* write_newline() |
| { s += other; } |
| )? |
| {pc.processOrdLine(id.image , s, overwrite);} |
| | |
| val=<SHELLCMD> { pc.processShellCmd(id.image , val.image, overwrite); } |
| | |
| val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image, s, overwrite); } |
| |
| ) |
| } |
| |
| //match others, see comments above |
| //on description of OTHER , NOT_OTHER_CHAR |
| String others() throws IOException : |
| { Token t; |
| StringBuilder sb = new StringBuilder(); |
| } |
| { |
| ( |
| t=<OTHER> |
| { |
| sb.append(t.image); |
| } |
| | |
| t=<NOT_OTHER_CHAR> |
| { |
| sb.append(t.image); |
| } |
| ) |
| ( |
| t=<OTHER> |
| { |
| sb.append(t.image); |
| } |
| | |
| t=<NOT_OTHER_CHAR> |
| { |
| sb.append(t.image); |
| } |
| | |
| t=<IDENTIFIER> |
| { |
| //eg of this match is a unquoted filename - /d1/abc/ |
| sb.append(t.image); |
| } |
| | |
| t=<LITERAL> |
| { |
| sb.append(t.image); |
| } |
| | |
| t=<SHELLCMD> |
| { |
| sb.append(t.image); |
| } |
| )*{ |
| return sb.toString(); |
| } |
| } |
| |
| |
| // a string that can contain parameter |
| String paramString() throws IOException : |
| {Token t; |
| String str; |
| } |
| { |
| str = others() |
| { |
| return str; |
| } |
| | |
| t=<LITERAL> |
| { |
| return t.image; |
| } |
| | |
| t = <SHELLCMD>{} |
| { |
| return t.image; |
| } |
| } |
| |
| // write the newlines,spaces and comments to preserve formatting |
| void write_ignore_toks() throws IOException : |
| { String str;} |
| { |
| str = ignore_toks(){ |
| out.append(str); |
| } |
| } |
| |
| |
| // match the newlines,spaces and comments |
| String ignore_toks_nonewline() throws IOException : |
| { Token t; } |
| { |
| t = <COMMENT>{ |
| return t.image; |
| } |
| | |
| t = <SPACE>{ |
| return t.image; |
| } |
| } |
| |
| |
| // match the newlines,spaces and comments |
| String ignore_toks() throws IOException : |
| { Token t; |
| String str; |
| } |
| { |
| t = <COMMENT>{ |
| return t.image; |
| } |
| | |
| str = space_or_newline(){ |
| return str; |
| } |
| } |
| |
| String space_or_newline() : |
| { Token t; } |
| { |
| t = <SPACE>{ |
| return t.image; |
| } |
| | |
| t = <NEWLINE>{ |
| return t.image; |
| } |
| } |
| |
| void write_newline() throws IOException: |
| { Token t; } |
| { |
| t = <NEWLINE>{ |
| out.append(t.image); |
| } |
| } |
| |