| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * Grammar file for Pig tree parser (visitor for default data type insertion). |
| * |
| * NOTE: THIS FILE IS BASED ON QueryParser.g, SO IF YOU CHANGE THAT FILE, YOU WILL |
| * PROBABLY NEED TO MAKE CORRESPONDING CHANGES TO THIS FILE AS WELL. |
| */ |
| |
| tree grammar AliasMasker; |
| |
| options { |
| tokenVocab=QueryParser; |
| ASTLabelType=CommonTree; |
| output=AST; |
| backtrack=true; |
| } |
| |
| @header { |
| package org.apache.pig.parser; |
| |
| import java.util.HashSet; |
| import java.util.Set; |
| } |
| |
| @members { |
| |
| @Override |
| public String getErrorMessage(RecognitionException e, String[] tokenNames) { |
| if (e instanceof ParserValidationException) { |
| return e.toString(); |
| } |
| return super.getErrorMessage(e, tokenNames); |
| } |
| |
| public void setParams(Set ps, String macro, long idx) { |
| params = ps; |
| macroName = macro; |
| index = idx; |
| } |
| |
| private String getMask(String alias) { |
| return params.contains( alias ) |
| ? alias |
| : "macro_" + macroName + "_" + alias + "_" + index; |
| } |
| |
| private Set<String> params = new HashSet<String>(); |
| |
| private Set<String> aliasSeen = new HashSet<String>(); |
| |
| private String macroName = ""; |
| |
| private long index = 0; |
| |
| private boolean inAsOrGenClause = false; |
| |
| } // End of @members |
| |
| @rulecatch { |
| catch(RecognitionException re) { |
| throw re; |
| } |
| } |
| |
| query : ^( QUERY statement* ) |
| ; |
| |
| statement : general_statement |
| | split_statement |
| | realias_statement |
| | assert_statement |
| ; |
| |
| split_statement : split_clause |
| ; |
| |
| assert_statement: assert_clause |
| ; |
| |
| realias_statement : realias_clause |
| ; |
| |
| // For foreach statement that with complex inner plan. |
| general_statement |
| : ^( STATEMENT ( alias )? |
| op_clause parallel_clause? ) |
| ; |
| |
| realias_clause : ^(REALIAS alias IDENTIFIER) |
| ; |
| |
| parallel_clause |
| : ^( PARALLEL INTEGER ) |
| ; |
| |
| alias |
| : IDENTIFIER |
| { |
| aliasSeen.add($IDENTIFIER.text); |
| $IDENTIFIER.getToken().setText(getMask($IDENTIFIER.text)); |
| } |
| ; |
| |
| op_clause : define_clause |
| | load_clause |
| | group_clause |
| | store_clause |
| | filter_clause |
| | distinct_clause |
| | limit_clause |
| | sample_clause |
| | order_clause |
| | rank_clause |
| | cross_clause |
| | join_clause |
| | union_clause |
| | stream_clause |
| | mr_clause |
| | split_clause |
| | foreach_clause |
| | cube_clause |
| | assert_clause |
| ; |
| |
| define_clause |
| : ^( DEFINE IDENTIFIER ( cmd | func_clause ) ) |
| ; |
| |
| cmd |
| : ^( EXECCOMMAND |
| ( ship_clause | cache_clause | input_clause | output_clause | error_clause )* ) |
| ; |
| |
| ship_clause |
| : ^( SHIP path_list? ) |
| ; |
| |
| path_list |
| : QUOTEDSTRING+ |
| ; |
| |
| cache_clause |
| : ^( CACHE path_list ) |
| ; |
| |
| input_clause |
| : ^( INPUT stream_cmd+ ) |
| ; |
| |
| stream_cmd |
| : ^( STDIN func_clause? ) |
| | ^( STDOUT func_clause? ) |
| | ^( QUOTEDSTRING func_clause? ) |
| ; |
| |
| output_clause |
| : ^( OUTPUT stream_cmd+ ) |
| ; |
| |
| error_clause |
| : ^( STDERROR ( QUOTEDSTRING INTEGER? )? ) |
| ; |
| |
| load_clause |
| : ^( LOAD filename func_clause? as_clause? ) |
| ; |
| |
| filename |
| : QUOTEDSTRING |
| ; |
| |
| as_clause |
| @init { |
| inAsOrGenClause = true; |
| } |
| @after { |
| inAsOrGenClause = false; |
| } |
| : ^( AS field_def_list ) |
| ; |
| |
| field_def |
| : ^( FIELD_DEF IDENTIFIER type? ) { |
| if (inAsOrGenClause) { |
| if (aliasSeen.contains($IDENTIFIER.text)) { |
| throw new ParserValidationException(input, new SourceLocation((PigParserNode)$field_def.start), |
| "Macro doesn't support user defined schema that contains name that conflicts with alias name: " + $IDENTIFIER.text); |
| } |
| } |
| } |
| | ^( FIELD_DEF_WITHOUT_IDENTIFIER type ) |
| ; |
| |
| field_def_list |
| : field_def+ |
| ; |
| |
| type : simple_type | tuple_type | bag_type | map_type |
| ; |
| |
| simple_type |
| : BOOLEAN | INT | LONG | FLOAT | DOUBLE | DATETIME | BIGINTEGER | BIGDECIMAL | CHARARRAY | BYTEARRAY |
| ; |
| |
| tuple_type |
| : ^( TUPLE_TYPE field_def_list? ) |
| ; |
| |
| bag_type |
| : ^( BAG_TYPE IDENTIFIER? tuple_type? ) |
| ; |
| |
| map_type : ^( MAP_TYPE IDENTIFIER? type? ) |
| ; |
| |
| func_clause |
| : ^( FUNC_REF func_name ) |
| | ^( FUNC func_name func_args? ) |
| ; |
| |
| func_name |
| : eid ( ( PERIOD | DOLLAR ) eid )* |
| ; |
| |
| func_args |
| : QUOTEDSTRING+ |
| ; |
| |
| cube_clause |
| : ^( CUBE cube_item ) |
| ; |
| |
| cube_item |
| : rel ( cube_by_clause ) |
| ; |
| |
| cube_by_clause |
| : ^( BY cube_or_rollup ) |
| ; |
| |
| cube_or_rollup |
| : cube_rollup_list+ |
| ; |
| |
| cube_rollup_list |
| : ^( ( CUBE | ROLLUP ) cube_by_expr_list ) |
| ; |
| |
| cube_by_expr_list |
| : cube_by_expr+ |
| ; |
| |
| cube_by_expr |
| : col_range | expr | STAR |
| ; |
| |
| group_clause |
| : ^( ( GROUP | COGROUP ) group_item+ group_type? partition_clause? ) |
| ; |
| |
| group_type : QUOTEDSTRING |
| ; |
| |
| group_item |
| : rel ( join_group_by_clause | ALL | ANY ) ( INNER | OUTER )? |
| ; |
| |
| rel |
| : alias | ( op_clause parallel_clause? ) |
| ; |
| |
| flatten_generated_item |
| @init { |
| inAsOrGenClause = true; |
| } |
| @after { |
| inAsOrGenClause = false; |
| } |
| : ( flatten_clause | col_range | expr | STAR ) field_def_list? |
| ; |
| |
| flatten_clause |
| : ^( FLATTEN expr ) |
| ; |
| |
| store_clause |
| : ^( STORE alias filename func_clause? ) |
| ; |
| |
| assert_clause |
| : ^( ASSERT alias cond comment? ) |
| ; |
| |
| comment : QUOTEDSTRING |
| ; |
| |
| filter_clause |
| : ^( FILTER rel cond ) |
| ; |
| |
| cond |
| : ^( OR cond cond ) |
| | ^( AND cond cond ) |
| | ^( NOT cond ) |
| | ^( NULL expr NOT? ) |
| | ^( rel_op expr expr ) |
| | in_eval |
| | func_eval |
| | ^( BOOL_COND expr ) |
| ; |
| |
| in_eval |
| : ^( IN ( ^( IN_LHS expr ) ^( IN_RHS expr ) )+ ) |
| ; |
| |
| func_eval |
| : ^( FUNC_EVAL func_name real_arg* ) |
| ; |
| |
| real_arg |
| : expr | STAR |
| ; |
| |
| expr |
| : ^( PLUS expr expr ) |
| | ^( MINUS expr expr ) |
| | ^( STAR expr expr ) |
| | ^( DIV expr expr ) |
| | ^( PERCENT expr expr ) |
| | ^( CAST_EXPR type expr ) |
| | const_expr |
| | var_expr |
| | ^( NEG expr ) |
| | ^( CAST_EXPR type_cast expr ) |
| | ^( EXPR_IN_PAREN expr ) |
| ; |
| |
| type_cast |
| : simple_type | map_type | tuple_type_cast | bag_type_cast |
| ; |
| |
| tuple_type_cast |
| : ^( TUPLE_TYPE_CAST type_cast* ) |
| ; |
| |
| bag_type_cast |
| : ^( BAG_TYPE_CAST tuple_type_cast? ) |
| ; |
| |
| var_expr |
| : projectable_expr ( dot_proj | pound_proj )* |
| ; |
| |
| projectable_expr |
| : func_eval | col_ref | bin_expr | case_expr | case_cond |
| ; |
| |
| dot_proj |
| : ^( PERIOD col_alias_or_index+ ) |
| ; |
| |
| col_alias_or_index : col_alias | col_index |
| ; |
| |
| col_alias |
| : GROUP |
| | CUBE |
| | IDENTIFIER |
| ; |
| |
| col_index |
| : DOLLARVAR |
| ; |
| |
| col_range : ^(COL_RANGE col_ref? DOUBLE_PERIOD col_ref?) |
| ; |
| |
| pound_proj |
| : ^( POUND ( QUOTEDSTRING | NULL ) ) |
| ; |
| |
| bin_expr |
| : ^( BIN_EXPR cond expr expr ) |
| ; |
| |
| case_expr |
| : ^( CASE_EXPR ( ^( CASE_EXPR_LHS expr ) ( ^( CASE_EXPR_RHS expr) )+ )+ ) |
| ; |
| |
| case_cond |
| : ^( CASE_COND ^( WHEN cond+ ) ^( THEN expr+ ) ) |
| ; |
| |
| limit_clause |
| : ^( LIMIT rel ( INTEGER | LONGINTEGER | expr ) ) |
| ; |
| |
| sample_clause |
| : ^( SAMPLE rel ( DOUBLENUMBER | expr ) ) |
| ; |
| |
| rank_clause |
| : ^( RANK rel ( rank_by_statement )? ) |
| ; |
| |
| rank_by_statement |
| : ^( BY rank_by_clause ( DENSE )? ) |
| ; |
| |
| rank_by_clause |
| : STAR ( ASC | DESC )? |
| | rank_col+ |
| ; |
| |
| rank_col |
| : ( col_range | col_ref ) ( ASC | DESC )? |
| ; |
| |
| order_clause |
| : ^( ORDER rel order_by_clause func_clause? ) |
| ; |
| |
| order_by_clause |
| : STAR ( ASC | DESC )? |
| | order_col+ |
| ; |
| |
| order_col |
| : (col_range | col_ref) ( ASC | DESC )? |
| ; |
| |
| distinct_clause |
| : ^( DISTINCT rel partition_clause? ) |
| ; |
| |
| partition_clause |
| : ^( PARTITION func_name ) |
| ; |
| |
| cross_clause |
| : ^( CROSS rel_list partition_clause? ) |
| ; |
| |
| rel_list |
| : rel+ |
| ; |
| |
| join_clause |
| : ^( JOIN join_sub_clause join_type? partition_clause? ) |
| ; |
| |
| join_type : QUOTEDSTRING |
| ; |
| |
| join_sub_clause |
| : join_item ( LEFT |
| | RIGHT |
| | FULL |
| ) OUTER? join_item |
| | join_item+ |
| ; |
| |
| join_item |
| : ^( JOIN_ITEM rel join_group_by_clause ) |
| ; |
| |
| join_group_by_clause |
| : ^( BY join_group_by_expr+ ) |
| ; |
| |
| join_group_by_expr |
| : col_range | expr | STAR |
| ; |
| |
| union_clause |
| : ^( UNION ONSCHEMA? rel_list ) |
| ; |
| |
| foreach_clause |
| : ^( FOREACH rel foreach_plan ) |
| ; |
| |
| foreach_plan |
| : ^( FOREACH_PLAN_SIMPLE generate_clause ) |
| | ^( FOREACH_PLAN_COMPLEX nested_blk ) |
| ; |
| |
| nested_blk |
| : nested_command* generate_clause |
| ; |
| |
| generate_clause |
| : ^( GENERATE flatten_generated_item+ ) |
| ; |
| |
| nested_command |
| : ^( NESTED_CMD IDENTIFIER nested_op ) |
| | ^( NESTED_CMD_ASSI IDENTIFIER expr ) |
| ; |
| |
| nested_op : nested_proj |
| | nested_filter |
| | nested_sort |
| | nested_distinct |
| | nested_limit |
| | nested_cross |
| | nested_foreach |
| ; |
| |
| nested_proj |
| : ^( NESTED_PROJ col_ref col_ref+ ) |
| ; |
| |
| nested_filter |
| : ^( FILTER nested_op_input cond ) |
| ; |
| |
| nested_sort |
| : ^( ORDER nested_op_input order_by_clause func_clause? ) |
| ; |
| |
| nested_distinct |
| : ^( DISTINCT nested_op_input ) |
| ; |
| |
| nested_limit |
| : ^( LIMIT nested_op_input ( INTEGER | expr ) ) |
| ; |
| |
| nested_cross : ^( CROSS nested_op_input_list ) |
| ; |
| |
| nested_foreach : ^( FOREACH nested_op_input generate_clause ) |
| ; |
| |
| nested_op_input_list : nested_op_input+ |
| ; |
| |
| nested_op_input : col_ref | nested_proj |
| ; |
| |
| stream_clause |
| : ^( STREAM rel ( EXECCOMMAND | IDENTIFIER ) as_clause? ) |
| ; |
| |
| mr_clause |
| : ^( MAPREDUCE QUOTEDSTRING path_list? store_clause load_clause EXECCOMMAND? ) |
| ; |
| |
| split_clause |
| : ^( SPLIT rel split_branch+ split_otherwise? ) |
| ; |
| |
| split_branch |
| : ^( SPLIT_BRANCH alias cond ) |
| ; |
| |
| split_otherwise |
| : ^( OTHERWISE alias ) |
| ; |
| |
| col_ref : alias_col_ref | dollar_col_ref |
| ; |
| |
| alias_col_ref |
| : GROUP |
| | CUBE |
| | IDENTIFIER |
| { |
| String alias = $IDENTIFIER.text; |
| String[] names = alias.split( "::" ); |
| StringBuilder sb = new StringBuilder(); |
| for( int i = 0; i < names.length; i++ ) { |
| String name = names[i]; |
| sb.append( aliasSeen.contains( name ) ? getMask( name ) : name ); |
| if( i < names.length - 1 ) |
| sb.append( "::" ); |
| } |
| $IDENTIFIER.token.setText( sb.toString() ); |
| } |
| ; |
| |
| dollar_col_ref |
| : DOLLARVAR |
| ; |
| |
| const_expr : literal |
| ; |
| |
| literal : scalar | map | bag | tuple |
| ; |
| |
| scalar : num_scalar | QUOTEDSTRING | NULL | TRUE | FALSE |
| ; |
| |
| num_scalar : MINUS? ( INTEGER | LONGINTEGER | FLOATNUMBER | DOUBLENUMBER | BIGINTEGERNUMBER | BIGDECIMALNUMBER ) |
| ; |
| |
| map |
| : ^( MAP_VAL keyvalue* ) |
| ; |
| |
| keyvalue |
| : ^( KEY_VAL_PAIR map_key const_expr ) |
| ; |
| |
| map_key : QUOTEDSTRING |
| ; |
| |
| bag |
| : ^( BAG_VAL tuple* ) |
| ; |
| |
| tuple |
| : ^( TUPLE_VAL literal* ) |
| ; |
| |
| // extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice. |
| eid : rel_str_op |
| | IMPORT |
| | RETURNS |
| | DEFINE |
| | LOAD |
| | FILTER |
| | FOREACH |
| | CUBE |
| | ROLLUP |
| | MATCHES |
| | ORDER |
| | RANK |
| | DISTINCT |
| | COGROUP |
| | JOIN |
| | CROSS |
| | UNION |
| | SPLIT |
| | INTO |
| | IF |
| | ALL |
| | AS |
| | BY |
| | USING |
| | INNER |
| | OUTER |
| | PARALLEL |
| | PARTITION |
| | GROUP |
| | AND |
| | OR |
| | NOT |
| | GENERATE |
| | FLATTEN |
| | EVAL |
| | ASC |
| | DESC |
| | BOOLEAN |
| | INT |
| | LONG |
| | FLOAT |
| | DOUBLE |
| | DATETIME |
| | CHARARRAY |
| | BIGINTEGER |
| | BIGDECIMAL |
| | BYTEARRAY |
| | BAG |
| | TUPLE |
| | MAP |
| | IS |
| | NULL |
| | TRUE |
| | FALSE |
| | STREAM |
| | THROUGH |
| | STORE |
| | MAPREDUCE |
| | SHIP |
| | CACHE |
| | INPUT |
| | OUTPUT |
| | STDERROR |
| | STDIN |
| | STDOUT |
| | LIMIT |
| | SAMPLE |
| | LEFT |
| | RIGHT |
| | FULL |
| | IDENTIFIER |
| | TOBAG |
| | TOMAP |
| | TOTUPLE |
| | ASSERT |
| ; |
| |
| // relational operator |
| rel_op |
| : rel_op_eq |
| | rel_op_ne |
| | rel_op_gt |
| | rel_op_gte |
| | rel_op_lt |
| | rel_op_lte |
| | STR_OP_MATCHES |
| ; |
| |
| rel_op_eq |
| : STR_OP_EQ |
| | NUM_OP_EQ |
| ; |
| |
| rel_op_ne |
| : STR_OP_NE |
| | NUM_OP_NE |
| ; |
| |
| rel_op_gt |
| : STR_OP_GT |
| | NUM_OP_GT |
| ; |
| |
| rel_op_gte |
| : STR_OP_GTE |
| | NUM_OP_GTE |
| ; |
| |
| rel_op_lt |
| : STR_OP_LT |
| | NUM_OP_LT |
| ; |
| |
| rel_op_lte |
| : STR_OP_LTE |
| | NUM_OP_LTE |
| ; |
| |
| rel_str_op |
| : STR_OP_EQ |
| | STR_OP_NE |
| | STR_OP_GT |
| | STR_OP_LT |
| | STR_OP_GTE |
| | STR_OP_LTE |
| | STR_OP_MATCHES |
| ; |
| |