src/org/apache/pig/parser/AstValidator.g - pig - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /**
  * Grammar file for Pig tree parser (visitor for default data type insertion).
  *
  * NOTE: THIS FILE IS BASED ON QueryParser.g, SO IF YOU CHANGE THAT FILE, YOU WILL
  *       PROBABLY NEED TO MAKE CORRESPONDING CHANGES TO THIS FILE AS WELL.
  */

 tree grammar AstValidator;

 options {
     tokenVocab=QueryParser;
     ASTLabelType=CommonTree;
     output=AST;
     backtrack=true;
 }

 @header {
 package org.apache.pig.parser;

 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.util.NumValCarrier;

 import java.util.HashSet;
 import java.util.Set;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 }

 @members {

 private static Log log = LogFactory.getLog( AstValidator.class );

 @Override
 protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
 throws RecognitionException {
     throw new MismatchedTokenException( ttype, input );
 }

 @Override
 public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow)
 throws RecognitionException {
     throw e;
 }

 private void validateSchemaAliasName(Set<String> fieldNames, CommonTree node, String name)
 throws DuplicatedSchemaAliasException {
     if( fieldNames.contains( name ) ) {
         throw new DuplicatedSchemaAliasException( input,
             new SourceLocation( (PigParserNode)node ), name );
     } else {
         fieldNames.add( name );
     }
 }

 private void validateAliasRef(Set<String> aliases, CommonTree node, String alias)
 throws UndefinedAliasException {
     if( !aliases.contains( alias ) ) {
         throw new UndefinedAliasException( input, new SourceLocation( (PigParserNode)node ), alias );
     }
 }

 private void checkDuplication(int count, CommonTree node) throws ParserValidationException {
     if( count > 1 ) {
         throw new ParserValidationException( input, new SourceLocation( (PigParserNode)node ),
             "Duplicated command option" );
     }
 }

 private String lastRel = null;

 private String getLastRel(CommonTree node) throws UndefinedAliasException {
     if (lastRel != null) {
         return lastRel;
     }
     throw new UndefinedAliasException( input, new SourceLocation((PigParserNode)node), "@");
 }

 private Set<String> aliases = new HashSet<String>() {
     @Override
     public boolean add(String e) {
         lastRel = e;
         return super.add(e);
     }
 };

 } // End of @members

 @rulecatch {
 catch(RecognitionException re) {
     throw re;
 }
 }

 query : ^( QUERY statement* )
 ;

 statement : general_statement
           | split_statement
           | realias_statement
           | register_statement
           | assert_statement
 ;

 split_statement : split_clause
 ;

 realias_statement : realias_clause
 ;

 register_statement : ^( REGISTER QUOTEDSTRING (USING IDENTIFIER AS IDENTIFIER)? )
 ;

 assert_statement : assert_clause
 ;

 general_statement : ^( STATEMENT ( alias { aliases.add( $alias.name ); } )? op_clause parallel_clause? )
 ;

 realias_clause : ^(REALIAS alias IDENTIFIER)
    {
        aliases.add( $alias.name );
    }
 ;

 parallel_clause : ^( PARALLEL INTEGER )
 ;

 alias returns[String name, CommonTree node]
  : IDENTIFIER
    {
        $name = $IDENTIFIER.text;
        $node = $IDENTIFIER;
    }
 ;

 previous_rel returns[String name, CommonTree node]
  : ARROBA
    {
        $name = getLastRel($ARROBA);
        $node = $ARROBA;
    }
 ;

 op_clause : define_clause
           | load_clause
           | group_clause
           | store_clause
           | filter_clause
           | distinct_clause
           | limit_clause
           | sample_clause
           | order_clause
           | rank_clause
           | cross_clause
           | join_clause
           | union_clause
           | stream_clause
           | mr_clause
           | split_clause
           | foreach_clause
           | cube_clause
           | assert_clause
 ;

 define_clause : ^( DEFINE alias ( cmd | func_clause ) )
 ;

 cmd
 @init {
     int ship = 0;
     int cache = 0;
     int in = 0;
     int out = 0;
     int error = 0;
 }
  : ^( EXECCOMMAND ( ship_clause { checkDuplication( ++ship, $ship_clause.start ); }
                   | cache_clause { checkDuplication( ++cache, $cache_clause.start ); }
                   | input_clause { checkDuplication( ++in, $input_clause.start ); }
                   | output_clause { checkDuplication( ++out, $output_clause.start ); }
                   | error_clause { checkDuplication( ++error, $error_clause.start ); }
                   )*
    )
 ;

 ship_clause : ^( SHIP path_list? )
 ;

 path_list : QUOTEDSTRING+
 ;

 cache_clause : ^( CACHE path_list )
 ;

 input_clause : ^( INPUT stream_cmd+ )
 ;

 stream_cmd : ^( STDIN func_clause? )
            | ^( STDOUT func_clause? )
            | ^( QUOTEDSTRING func_clause? )
 ;

 output_clause : ^( OUTPUT stream_cmd+ )
 ;

 error_clause : ^( STDERROR  ( QUOTEDSTRING INTEGER? )? )
 ;

 load_clause : ^( LOAD filename func_clause? as_clause? )
 ;

 filename : QUOTEDSTRING
 ;

 as_clause: ^( AS field_def_list )
 ;

 field_def[Set<String> fieldNames, NumValCarrier nvc] throws DuplicatedSchemaAliasException
  : ^( FIELD_DEF IDENTIFIER { validateSchemaAliasName( fieldNames, $IDENTIFIER, $IDENTIFIER.text ); } type? )
  | ^( FIELD_DEF_WITHOUT_IDENTIFIER type { validateSchemaAliasName ( fieldNames, $FIELD_DEF_WITHOUT_IDENTIFIER, $nvc.makeNameFromDataType ( $type.typev ) ); } )
 ;

 field_def_list throws DuplicatedSchemaAliasException
 scope{
     Set<String> fieldNames;
     NumValCarrier nvc;
 }
 @init {
     $field_def_list::fieldNames = new HashSet<String>();
     $field_def_list::nvc = new NumValCarrier();
 }
  : ( field_def[$field_def_list::fieldNames, $field_def_list::nvc] )+
 ;

 type returns [byte typev]
   : simple_type { $typev = $simple_type.typev; }
   | tuple_type { $typev = DataType.TUPLE; }
   | bag_type { $typev = DataType.BAG; }
   | map_type { $typev = DataType.MAP; }
 ;

 simple_type returns [byte typev]
   : BOOLEAN { $typev = DataType.BOOLEAN; }
   | INT { $typev = DataType.INTEGER; }
   | LONG { $typev = DataType.LONG; }
   | FLOAT { $typev = DataType.FLOAT; }
   | DOUBLE { $typev = DataType.DOUBLE; }
   | BIGINTEGER { $typev = DataType.BIGINTEGER; }
   | BIGDECIMAL { $typev = DataType.BIGDECIMAL; }
   | DATETIME { $typev = DataType.DATETIME; }
   | CHARARRAY { $typev = DataType.CHARARRAY; }
   | BYTEARRAY { $typev = DataType.BYTEARRAY; }
 ;

 tuple_type : ^( TUPLE_TYPE field_def_list? )
 ;

 bag_type : ^( BAG_TYPE IDENTIFIER? tuple_type? )
 ;

 map_type : ^( MAP_TYPE IDENTIFIER? type? )
 ;

 func_clause : ^( FUNC_REF func_name )
             | ^( FUNC func_name func_args? )
 ;

 func_name : eid ( ( PERIOD | DOLLAR ) eid )*
 ;

 func_args_string : QUOTEDSTRING | MULTILINE_QUOTEDSTRING
 ;

 func_args : func_args_string+
 ;

 cube_clause
  : ^( CUBE cube_item )
 ;

 cube_item
  : rel ( cube_by_clause )
 ;

 cube_by_clause
  : ^( BY cube_or_rollup )
 ;

 cube_or_rollup
  : cube_rollup_list+
 ;

 cube_rollup_list
  : ^( ( CUBE | ROLLUP ) cube_by_expr_list )
 ;

 cube_by_expr_list
  : cube_by_expr+
 ;

 cube_by_expr
  : col_range | expr | STAR
 ;

 group_clause
 scope {
     int arity;
 }
 @init {
     $group_clause::arity = 0;
 }
  : ^( ( GROUP | COGROUP ) group_item+ group_type? partition_clause? )
 ;

 group_type : QUOTEDSTRING
 ;

 group_item
  : rel ( join_group_by_clause | ALL | ANY ) ( INNER | OUTER )?
    {
        if( $group_clause::arity == 0 ) {
            // For the first input
            $group_clause::arity = $join_group_by_clause.exprCount;
        } else if( $join_group_by_clause.exprCount != $group_clause::arity ) {
            throw new ParserValidationException( input, new SourceLocation( (PigParserNode)$group_item.start ),
                "The arity of the group by columns do not match." );
        }
    }
 ;

 rel : alias {  validateAliasRef( aliases, $alias.node, $alias.name ); }
     | previous_rel { validateAliasRef( aliases, $previous_rel.node, $previous_rel.name ); }
     | op_clause parallel_clause?
 ;

 flatten_generated_item : ( flatten_clause | col_range | expr | STAR ) field_def_list?
 ;

 flatten_clause : ^( FLATTEN expr )
 ;

 store_clause : ^( STORE rel filename func_clause? )
 ;

 assert_clause : ^( ASSERT rel cond comment? )
 ;

 comment : QUOTEDSTRING
 ;

 filter_clause : ^( FILTER rel cond )
 ;

 cond : ^( OR cond cond )
      | ^( AND cond cond )
      | ^( NOT cond )
      | ^( NULL expr NOT? )
      | ^( rel_op expr expr )
      | in_eval
      | func_eval
      | ^( BOOL_COND expr )
 ;

 in_eval: ^( IN ( ^( IN_LHS expr ) ^( IN_RHS expr ) )+ )
 ;

 func_eval: ^( FUNC_EVAL func_name real_arg* ) | ^( INVOKER_FUNC_EVAL func_name IDENTIFIER real_arg* )
 ;

 real_arg : expr | STAR | col_range
 ;

 expr : ^( PLUS expr expr )
      | ^( MINUS expr expr )
      | ^( STAR expr expr )
      | ^( DIV expr expr )
      | ^( PERCENT expr expr )
      | ^( CAST_EXPR type expr )
      | const_expr
      | var_expr
      | ^( NEG expr )
      | ^( CAST_EXPR type_cast expr )
      | ^( EXPR_IN_PAREN expr )
 ;

 type_cast : simple_type | map_type | tuple_type_cast | bag_type_cast
 ;

 tuple_type_cast : ^( TUPLE_TYPE_CAST type_cast* )
 ;

 bag_type_cast : ^( BAG_TYPE_CAST tuple_type_cast? )
 ;

 var_expr : projectable_expr ( dot_proj | pound_proj )*
 ;

 projectable_expr: func_eval | col_ref | bin_expr | case_expr | case_cond
 ;

 dot_proj : ^( PERIOD col_alias_or_index+ )
 ;

 col_alias_or_index : col_alias | col_index
 ;

 col_alias : GROUP | CUBE | IDENTIFIER
 ;

 col_index : DOLLARVAR
 ;

 col_range :  ^(COL_RANGE col_ref? DOUBLE_PERIOD col_ref?)
 ;


 pound_proj : ^( POUND ( QUOTEDSTRING | NULL ) )
 ;

 bin_expr : ^( BIN_EXPR cond expr expr )
 ;

 case_expr: ^( CASE_EXPR ( ^( CASE_EXPR_LHS expr ) ( ^( CASE_EXPR_RHS expr) )+ )+ )
 ;

 case_cond: ^( CASE_COND ^( WHEN cond+ ) ^( THEN expr+ ) )
 ;

 limit_clause : ^( LIMIT rel ( INTEGER | LONGINTEGER | expr ) )
 ;

 sample_clause : ^( SAMPLE rel ( DOUBLENUMBER | expr ) )
 ;

 rank_clause : ^( RANK rel ( rank_by_statement )? )
 ;

 rank_by_statement : ^( BY rank_by_clause ( DENSE )? )
 ;

 rank_by_clause : STAR ( ASC | DESC )?
                | rank_col+
 ;

 rank_col : col_range (ASC | DESC)?
          | col_ref ( ASC | DESC )?
 ;

 order_clause : ^( ORDER rel order_by_clause func_clause? )
 ;

 order_by_clause : STAR ( ASC | DESC )?
                 | order_col+
 ;

 order_col : col_range (ASC | DESC)?
           | col_ref ( ASC | DESC )?
 ;

 distinct_clause : ^( DISTINCT rel partition_clause? )
 ;

 partition_clause : ^( PARTITION func_name )
 ;

 cross_clause : ^( CROSS rel_list partition_clause? )
 ;

 rel_list : rel+
 ;

 join_clause
 scope {
     int arity;
 }
 @init {
     $join_clause::arity = 0;
 }
  : ^( JOIN join_sub_clause join_type? partition_clause? )
 ;

 join_type : QUOTEDSTRING
 ;

 join_sub_clause
  : join_item ( LEFT | RIGHT | FULL ) OUTER? join_item
  | join_item+
 ;

 join_item
  : ^( JOIN_ITEM rel join_group_by_clause )
    {
        if( $join_clause::arity == 0 ) {
            // For the first input
            $join_clause::arity = $join_group_by_clause.exprCount;
        } else if( $join_group_by_clause.exprCount != $join_clause::arity ) {
            throw new ParserValidationException( input, new SourceLocation( (PigParserNode)$join_item.start ),
                "The arity of the join columns do not match." );
        }
    }
 ;

 join_group_by_clause returns[int exprCount]
 @init {
     $exprCount = 0;
 }
  : ^( BY ( join_group_by_expr { $exprCount++; } )+ )
 ;

 join_group_by_expr : col_range  | expr | STAR
 ;

 union_clause : ^( UNION ONSCHEMA? rel_list )
 ;

 foreach_clause : ^( FOREACH rel foreach_plan )
 ;

 foreach_plan : ^( FOREACH_PLAN_SIMPLE generate_clause )
              | ^( FOREACH_PLAN_COMPLEX nested_blk )
 ;

 nested_blk
 scope { Set<String> ids; }
 @init{ $nested_blk::ids = new HashSet<String>(); }
  : nested_command* generate_clause
 ;

 generate_clause : ^( GENERATE flatten_generated_item+ )
 ;

 nested_command
  : ^( NESTED_CMD IDENTIFIER nested_op )
    {
        $nested_blk::ids.add( $IDENTIFIER.text );
    }
  | ^( NESTED_CMD_ASSI IDENTIFIER expr )
    {
        $nested_blk::ids.add( $IDENTIFIER.text );
    }
 ;

 nested_op : nested_proj
           | nested_filter
           | nested_sort
           | nested_distinct
           | nested_limit
           | nested_cross
           | nested_foreach
 ;

 nested_proj : ^( NESTED_PROJ col_ref col_ref+ )
 ;

 nested_filter
  : ^( FILTER nested_op_input cond )
 ;

 nested_sort : ^( ORDER nested_op_input  order_by_clause func_clause? )
 ;

 nested_distinct : ^( DISTINCT nested_op_input )
 ;

 nested_limit : ^( LIMIT nested_op_input ( INTEGER | expr ) )
 ;

 nested_cross : ^( CROSS nested_op_input_list )
 ;

 nested_foreach : ^( FOREACH nested_op_input generate_clause )
 ;

 nested_op_input : col_ref | nested_proj
 ;

 nested_op_input_list : nested_op_input+
 ;

 stream_clause : ^( STREAM rel ( EXECCOMMAND | IDENTIFIER ) as_clause? )
 ;

 mr_clause : ^( MAPREDUCE QUOTEDSTRING path_list? store_clause load_clause EXECCOMMAND? )
 ;

 split_clause : ^( SPLIT rel split_branch+ split_otherwise? )
 ;

 split_branch
  : ^( SPLIT_BRANCH alias cond )
    {
        aliases.add( $alias.name );
    }
 ;

 split_otherwise : ^( OTHERWISE alias ALL? )
    {
        aliases.add( $alias.name );
    }
 ;

 col_ref : alias_col_ref | dollar_col_ref
 ;

 alias_col_ref : GROUP | CUBE | IDENTIFIER
 ;

 dollar_col_ref : DOLLARVAR
 ;

 const_expr : literal
 ;

 literal : scalar | map | bag | tuple
 ;

 scalar : num_scalar | QUOTEDSTRING | NULL | TRUE | FALSE
 ;

 num_scalar : MINUS? ( INTEGER | LONGINTEGER | FLOATNUMBER | DOUBLENUMBER | BIGINTEGERNUMBER | BIGDECIMALNUMBER )
 ;

 map : ^( MAP_VAL keyvalue* )
 ;

 keyvalue : ^( KEY_VAL_PAIR map_key const_expr )
 ;

 map_key : QUOTEDSTRING
 ;

 bag : ^( BAG_VAL tuple* )
 ;

 tuple : ^( TUPLE_VAL literal* )
 ;

 // extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
 eid : rel_str_op
     | IMPORT
     | RETURNS
     | DEFINE
     | LOAD
     | FILTER
     | FOREACH
     | CUBE
     | ROLLUP
     | MATCHES
     | ORDER
     | RANK
     | DISTINCT
     | COGROUP
     | JOIN
     | CROSS
     | UNION
     | SPLIT
     | INTO
     | IF
     | ALL
     | AS
     | BY
     | USING
     | INNER
     | OUTER
     | PARALLEL
     | PARTITION
     | GROUP
     | AND
     | OR
     | NOT
     | GENERATE
     | FLATTEN
     | EVAL
     | ASC
     | DESC
     | BOOLEAN
     | INT
     | LONG
     | FLOAT
     | DOUBLE
     | BIGINTEGER
     | BIGDECIMAL
     | DATETIME
     | CHARARRAY
     | BYTEARRAY
     | BAG
     | TUPLE
     | MAP
     | IS
     | NULL
     | TRUE
     | FALSE
     | STREAM
     | THROUGH
     | STORE
     | MAPREDUCE
     | SHIP
     | CACHE
     | INPUT
     | OUTPUT
     | STDERROR
     | STDIN
     | STDOUT
     | LIMIT
     | SAMPLE
     | LEFT
     | RIGHT
     | FULL
     | IDENTIFIER
     | TOBAG
     | TOMAP
     | TOTUPLE
     | ASSERT
 ;

 // relational operator
 rel_op : rel_op_eq
        | rel_op_ne
        | rel_op_gt
        | rel_op_gte
        | rel_op_lt
        | rel_op_lte
        | STR_OP_MATCHES
 ;

 rel_op_eq : STR_OP_EQ | NUM_OP_EQ
 ;

 rel_op_ne : STR_OP_NE | NUM_OP_NE
 ;

 rel_op_gt : STR_OP_GT | NUM_OP_GT
 ;

 rel_op_gte : STR_OP_GTE | NUM_OP_GTE
 ;

 rel_op_lt : STR_OP_LT | NUM_OP_LT
 ;

 rel_op_lte : STR_OP_LTE | NUM_OP_LTE
 ;

 rel_str_op : STR_OP_EQ
            | STR_OP_NE
            | STR_OP_GT
            | STR_OP_LT
            | STR_OP_GTE
            | STR_OP_LTE
            | STR_OP_MATCHES
 ;
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/**
	* Grammar file for Pig tree parser (visitor for default data type insertion).
	*
	* NOTE: THIS FILE IS BASED ON QueryParser.g, SO IF YOU CHANGE THAT FILE, YOU WILL
	* PROBABLY NEED TO MAKE CORRESPONDING CHANGES TO THIS FILE AS WELL.
	*/

	tree grammar AstValidator;

	options {
	tokenVocab=QueryParser;
	ASTLabelType=CommonTree;
	output=AST;
	backtrack=true;
	}

	@header {
	package org.apache.pig.parser;

	import org.apache.pig.data.DataType;
	import org.apache.pig.impl.util.NumValCarrier;

	import java.util.HashSet;
	import java.util.Set;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	}

	@members {

	private static Log log = LogFactory.getLog( AstValidator.class );

	@Override
	protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
	throws RecognitionException {
	throw new MismatchedTokenException( ttype, input );
	}

	@Override
	public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow)
	throws RecognitionException {
	throw e;
	}

	private void validateSchemaAliasName(Set<String> fieldNames, CommonTree node, String name)
	throws DuplicatedSchemaAliasException {
	if( fieldNames.contains( name ) ) {
	throw new DuplicatedSchemaAliasException( input,
	new SourceLocation( (PigParserNode)node ), name );
	} else {
	fieldNames.add( name );
	}
	}

	private void validateAliasRef(Set<String> aliases, CommonTree node, String alias)
	throws UndefinedAliasException {
	if( !aliases.contains( alias ) ) {
	throw new UndefinedAliasException( input, new SourceLocation( (PigParserNode)node ), alias );
	}
	}

	private void checkDuplication(int count, CommonTree node) throws ParserValidationException {
	if( count > 1 ) {
	throw new ParserValidationException( input, new SourceLocation( (PigParserNode)node ),
	"Duplicated command option" );
	}
	}

	private String lastRel = null;

	private String getLastRel(CommonTree node) throws UndefinedAliasException {
	if (lastRel != null) {
	return lastRel;
	}
	throw new UndefinedAliasException( input, new SourceLocation((PigParserNode)node), "@");
	}

	private Set<String> aliases = new HashSet<String>() {
	@Override
	public boolean add(String e) {
	lastRel = e;
	return super.add(e);
	}
	};

	} // End of @members

	@rulecatch {
	catch(RecognitionException re) {
	throw re;
	}
	}

	query : ^( QUERY statement* )
	;

	statement : general_statement
	\| split_statement
	\| realias_statement
	\| register_statement
	\| assert_statement
	;

	split_statement : split_clause
	;

	realias_statement : realias_clause
	;

	register_statement : ^( REGISTER QUOTEDSTRING (USING IDENTIFIER AS IDENTIFIER)? )
	;

	assert_statement : assert_clause
	;

	general_statement : ^( STATEMENT ( alias { aliases.add( $alias.name ); } )? op_clause parallel_clause? )
	;

	realias_clause : ^(REALIAS alias IDENTIFIER)
	{
	aliases.add( $alias.name );
	}
	;

	parallel_clause : ^( PARALLEL INTEGER )
	;

	alias returns[String name, CommonTree node]
	: IDENTIFIER
	{
	$name = $IDENTIFIER.text;
	$node = $IDENTIFIER;
	}
	;

	previous_rel returns[String name, CommonTree node]
	: ARROBA
	{
	$name = getLastRel($ARROBA);
	$node = $ARROBA;
	}
	;

	op_clause : define_clause
	\| load_clause
	\| group_clause
	\| store_clause
	\| filter_clause
	\| distinct_clause
	\| limit_clause
	\| sample_clause
	\| order_clause
	\| rank_clause
	\| cross_clause
	\| join_clause
	\| union_clause
	\| stream_clause
	\| mr_clause
	\| split_clause
	\| foreach_clause
	\| cube_clause
	\| assert_clause
	;

	define_clause : ^( DEFINE alias ( cmd \| func_clause ) )
	;

	cmd
	@init {
	int ship = 0;
	int cache = 0;
	int in = 0;
	int out = 0;
	int error = 0;
	}
	: ^( EXECCOMMAND ( ship_clause { checkDuplication( ++ship, $ship_clause.start ); }
	\| cache_clause { checkDuplication( ++cache, $cache_clause.start ); }
	\| input_clause { checkDuplication( ++in, $input_clause.start ); }
	\| output_clause { checkDuplication( ++out, $output_clause.start ); }
	\| error_clause { checkDuplication( ++error, $error_clause.start ); }
	)*
	)
	;

	ship_clause : ^( SHIP path_list? )
	;

	path_list : QUOTEDSTRING+
	;

	cache_clause : ^( CACHE path_list )
	;

	input_clause : ^( INPUT stream_cmd+ )
	;

	stream_cmd : ^( STDIN func_clause? )
	\| ^( STDOUT func_clause? )
	\| ^( QUOTEDSTRING func_clause? )
	;

	output_clause : ^( OUTPUT stream_cmd+ )
	;

	error_clause : ^( STDERROR ( QUOTEDSTRING INTEGER? )? )
	;

	load_clause : ^( LOAD filename func_clause? as_clause? )
	;

	filename : QUOTEDSTRING
	;

	as_clause: ^( AS field_def_list )
	;

	field_def[Set<String> fieldNames, NumValCarrier nvc] throws DuplicatedSchemaAliasException
	: ^( FIELD_DEF IDENTIFIER { validateSchemaAliasName( fieldNames, $IDENTIFIER, $IDENTIFIER.text ); } type? )
	\| ^( FIELD_DEF_WITHOUT_IDENTIFIER type { validateSchemaAliasName ( fieldNames, $FIELD_DEF_WITHOUT_IDENTIFIER, $nvc.makeNameFromDataType ( $type.typev ) ); } )
	;

	field_def_list throws DuplicatedSchemaAliasException
	scope{
	Set<String> fieldNames;
	NumValCarrier nvc;
	}
	@init {
	$field_def_list::fieldNames = new HashSet<String>();
	$field_def_list::nvc = new NumValCarrier();
	}
	: ( field_def[$field_def_list::fieldNames, $field_def_list::nvc] )+
	;

	type returns [byte typev]
	: simple_type { $typev = $simple_type.typev; }
	\| tuple_type { $typev = DataType.TUPLE; }
	\| bag_type { $typev = DataType.BAG; }
	\| map_type { $typev = DataType.MAP; }
	;

	simple_type returns [byte typev]
	: BOOLEAN { $typev = DataType.BOOLEAN; }
	\| INT { $typev = DataType.INTEGER; }
	\| LONG { $typev = DataType.LONG; }
	\| FLOAT { $typev = DataType.FLOAT; }
	\| DOUBLE { $typev = DataType.DOUBLE; }
	\| BIGINTEGER { $typev = DataType.BIGINTEGER; }
	\| BIGDECIMAL { $typev = DataType.BIGDECIMAL; }
	\| DATETIME { $typev = DataType.DATETIME; }
	\| CHARARRAY { $typev = DataType.CHARARRAY; }
	\| BYTEARRAY { $typev = DataType.BYTEARRAY; }
	;

	tuple_type : ^( TUPLE_TYPE field_def_list? )
	;

	bag_type : ^( BAG_TYPE IDENTIFIER? tuple_type? )
	;

	map_type : ^( MAP_TYPE IDENTIFIER? type? )
	;

	func_clause : ^( FUNC_REF func_name )
	\| ^( FUNC func_name func_args? )
	;

	func_name : eid ( ( PERIOD \| DOLLAR ) eid )*
	;

	func_args_string : QUOTEDSTRING \| MULTILINE_QUOTEDSTRING
	;

	func_args : func_args_string+
	;

	cube_clause
	: ^( CUBE cube_item )
	;

	cube_item
	: rel ( cube_by_clause )
	;

	cube_by_clause
	: ^( BY cube_or_rollup )
	;

	cube_or_rollup
	: cube_rollup_list+
	;

	cube_rollup_list
	: ^( ( CUBE \| ROLLUP ) cube_by_expr_list )
	;

	cube_by_expr_list
	: cube_by_expr+
	;

	cube_by_expr
	: col_range \| expr \| STAR
	;

	group_clause
	scope {
	int arity;
	}
	@init {
	$group_clause::arity = 0;
	}
	: ^( ( GROUP \| COGROUP ) group_item+ group_type? partition_clause? )
	;

	group_type : QUOTEDSTRING
	;

	group_item
	: rel ( join_group_by_clause \| ALL \| ANY ) ( INNER \| OUTER )?
	{
	if( $group_clause::arity == 0 ) {
	// For the first input
	$group_clause::arity = $join_group_by_clause.exprCount;
	} else if( $join_group_by_clause.exprCount != $group_clause::arity ) {
	throw new ParserValidationException( input, new SourceLocation( (PigParserNode)$group_item.start ),
	"The arity of the group by columns do not match." );
	}
	}
	;

	rel : alias { validateAliasRef( aliases, $alias.node, $alias.name ); }
	\| previous_rel { validateAliasRef( aliases, $previous_rel.node, $previous_rel.name ); }
	\| op_clause parallel_clause?
	;

	flatten_generated_item : ( flatten_clause \| col_range \| expr \| STAR ) field_def_list?
	;

	flatten_clause : ^( FLATTEN expr )
	;

	store_clause : ^( STORE rel filename func_clause? )
	;

	assert_clause : ^( ASSERT rel cond comment? )
	;

	comment : QUOTEDSTRING
	;

	filter_clause : ^( FILTER rel cond )
	;

	cond : ^( OR cond cond )
	\| ^( AND cond cond )
	\| ^( NOT cond )
	\| ^( NULL expr NOT? )
	\| ^( rel_op expr expr )
	\| in_eval
	\| func_eval
	\| ^( BOOL_COND expr )
	;

	in_eval: ^( IN ( ^( IN_LHS expr ) ^( IN_RHS expr ) )+ )
	;

	func_eval: ^( FUNC_EVAL func_name real_arg* ) \| ^( INVOKER_FUNC_EVAL func_name IDENTIFIER real_arg* )
	;

	real_arg : expr \| STAR \| col_range
	;

	expr : ^( PLUS expr expr )
	\| ^( MINUS expr expr )
	\| ^( STAR expr expr )
	\| ^( DIV expr expr )
	\| ^( PERCENT expr expr )
	\| ^( CAST_EXPR type expr )
	\| const_expr
	\| var_expr
	\| ^( NEG expr )
	\| ^( CAST_EXPR type_cast expr )
	\| ^( EXPR_IN_PAREN expr )
	;

	type_cast : simple_type \| map_type \| tuple_type_cast \| bag_type_cast
	;

	tuple_type_cast : ^( TUPLE_TYPE_CAST type_cast* )
	;

	bag_type_cast : ^( BAG_TYPE_CAST tuple_type_cast? )
	;

	var_expr : projectable_expr ( dot_proj \| pound_proj )*
	;

	projectable_expr: func_eval \| col_ref \| bin_expr \| case_expr \| case_cond
	;

	dot_proj : ^( PERIOD col_alias_or_index+ )
	;

	col_alias_or_index : col_alias \| col_index
	;

	col_alias : GROUP \| CUBE \| IDENTIFIER
	;

	col_index : DOLLARVAR
	;

	col_range : ^(COL_RANGE col_ref? DOUBLE_PERIOD col_ref?)
	;


	pound_proj : ^( POUND ( QUOTEDSTRING \| NULL ) )
	;

	bin_expr : ^( BIN_EXPR cond expr expr )
	;

	case_expr: ^( CASE_EXPR ( ^( CASE_EXPR_LHS expr ) ( ^( CASE_EXPR_RHS expr) )+ )+ )
	;

	case_cond: ^( CASE_COND ^( WHEN cond+ ) ^( THEN expr+ ) )
	;

	limit_clause : ^( LIMIT rel ( INTEGER \| LONGINTEGER \| expr ) )
	;

	sample_clause : ^( SAMPLE rel ( DOUBLENUMBER \| expr ) )
	;

	rank_clause : ^( RANK rel ( rank_by_statement )? )
	;

	rank_by_statement : ^( BY rank_by_clause ( DENSE )? )
	;

	rank_by_clause : STAR ( ASC \| DESC )?
	\| rank_col+
	;

	rank_col : col_range (ASC \| DESC)?
	\| col_ref ( ASC \| DESC )?
	;

	order_clause : ^( ORDER rel order_by_clause func_clause? )
	;

	order_by_clause : STAR ( ASC \| DESC )?
	\| order_col+
	;

	order_col : col_range (ASC \| DESC)?
	\| col_ref ( ASC \| DESC )?
	;

	distinct_clause : ^( DISTINCT rel partition_clause? )
	;

	partition_clause : ^( PARTITION func_name )
	;

	cross_clause : ^( CROSS rel_list partition_clause? )
	;

	rel_list : rel+
	;

	join_clause
	scope {
	int arity;
	}
	@init {
	$join_clause::arity = 0;
	}
	: ^( JOIN join_sub_clause join_type? partition_clause? )
	;

	join_type : QUOTEDSTRING
	;

	join_sub_clause
	: join_item ( LEFT \| RIGHT \| FULL ) OUTER? join_item
	\| join_item+
	;

	join_item
	: ^( JOIN_ITEM rel join_group_by_clause )
	{
	if( $join_clause::arity == 0 ) {
	// For the first input
	$join_clause::arity = $join_group_by_clause.exprCount;
	} else if( $join_group_by_clause.exprCount != $join_clause::arity ) {
	throw new ParserValidationException( input, new SourceLocation( (PigParserNode)$join_item.start ),
	"The arity of the join columns do not match." );
	}
	}
	;

	join_group_by_clause returns[int exprCount]
	@init {
	$exprCount = 0;
	}
	: ^( BY ( join_group_by_expr { $exprCount++; } )+ )
	;

	join_group_by_expr : col_range \| expr \| STAR
	;

	union_clause : ^( UNION ONSCHEMA? rel_list )
	;

	foreach_clause : ^( FOREACH rel foreach_plan )
	;

	foreach_plan : ^( FOREACH_PLAN_SIMPLE generate_clause )
	\| ^( FOREACH_PLAN_COMPLEX nested_blk )
	;

	nested_blk
	scope { Set<String> ids; }
	@init{ $nested_blk::ids = new HashSet<String>(); }
	: nested_command* generate_clause
	;

	generate_clause : ^( GENERATE flatten_generated_item+ )
	;

	nested_command
	: ^( NESTED_CMD IDENTIFIER nested_op )
	{
	$nested_blk::ids.add( $IDENTIFIER.text );
	}
	\| ^( NESTED_CMD_ASSI IDENTIFIER expr )
	{
	$nested_blk::ids.add( $IDENTIFIER.text );
	}
	;

	nested_op : nested_proj
	\| nested_filter
	\| nested_sort
	\| nested_distinct
	\| nested_limit
	\| nested_cross
	\| nested_foreach
	;

	nested_proj : ^( NESTED_PROJ col_ref col_ref+ )
	;

	nested_filter
	: ^( FILTER nested_op_input cond )
	;

	nested_sort : ^( ORDER nested_op_input order_by_clause func_clause? )
	;

	nested_distinct : ^( DISTINCT nested_op_input )
	;

	nested_limit : ^( LIMIT nested_op_input ( INTEGER \| expr ) )
	;

	nested_cross : ^( CROSS nested_op_input_list )
	;

	nested_foreach : ^( FOREACH nested_op_input generate_clause )
	;

	nested_op_input : col_ref \| nested_proj
	;

	nested_op_input_list : nested_op_input+
	;

	stream_clause : ^( STREAM rel ( EXECCOMMAND \| IDENTIFIER ) as_clause? )
	;

	mr_clause : ^( MAPREDUCE QUOTEDSTRING path_list? store_clause load_clause EXECCOMMAND? )
	;

	split_clause : ^( SPLIT rel split_branch+ split_otherwise? )
	;

	split_branch
	: ^( SPLIT_BRANCH alias cond )
	{
	aliases.add( $alias.name );
	}
	;

	split_otherwise : ^( OTHERWISE alias ALL? )
	{
	aliases.add( $alias.name );
	}
	;

	col_ref : alias_col_ref \| dollar_col_ref
	;

	alias_col_ref : GROUP \| CUBE \| IDENTIFIER
	;

	dollar_col_ref : DOLLARVAR
	;

	const_expr : literal
	;

	literal : scalar \| map \| bag \| tuple
	;

	scalar : num_scalar \| QUOTEDSTRING \| NULL \| TRUE \| FALSE
	;

	num_scalar : MINUS? ( INTEGER \| LONGINTEGER \| FLOATNUMBER \| DOUBLENUMBER \| BIGINTEGERNUMBER \| BIGDECIMALNUMBER )
	;

	map : ^( MAP_VAL keyvalue* )
	;

	keyvalue : ^( KEY_VAL_PAIR map_key const_expr )
	;

	map_key : QUOTEDSTRING
	;

	bag : ^( BAG_VAL tuple* )
	;

	tuple : ^( TUPLE_VAL literal* )
	;

	// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
	eid : rel_str_op
	\| IMPORT
	\| RETURNS
	\| DEFINE
	\| LOAD
	\| FILTER
	\| FOREACH
	\| CUBE
	\| ROLLUP
	\| MATCHES
	\| ORDER
	\| RANK
	\| DISTINCT
	\| COGROUP
	\| JOIN
	\| CROSS
	\| UNION
	\| SPLIT
	\| INTO
	\| IF
	\| ALL
	\| AS
	\| BY
	\| USING
	\| INNER
	\| OUTER
	\| PARALLEL
	\| PARTITION
	\| GROUP
	\| AND
	\| OR
	\| NOT
	\| GENERATE
	\| FLATTEN
	\| EVAL
	\| ASC
	\| DESC
	\| BOOLEAN
	\| INT
	\| LONG
	\| FLOAT
	\| DOUBLE
	\| BIGINTEGER
	\| BIGDECIMAL
	\| DATETIME
	\| CHARARRAY
	\| BYTEARRAY
	\| BAG
	\| TUPLE
	\| MAP
	\| IS
	\| NULL
	\| TRUE
	\| FALSE
	\| STREAM
	\| THROUGH
	\| STORE
	\| MAPREDUCE
	\| SHIP
	\| CACHE
	\| INPUT
	\| OUTPUT
	\| STDERROR
	\| STDIN
	\| STDOUT
	\| LIMIT
	\| SAMPLE
	\| LEFT
	\| RIGHT
	\| FULL
	\| IDENTIFIER
	\| TOBAG
	\| TOMAP
	\| TOTUPLE
	\| ASSERT
	;

	// relational operator
	rel_op : rel_op_eq
	\| rel_op_ne
	\| rel_op_gt
	\| rel_op_gte
	\| rel_op_lt
	\| rel_op_lte
	\| STR_OP_MATCHES
	;

	rel_op_eq : STR_OP_EQ \| NUM_OP_EQ
	;

	rel_op_ne : STR_OP_NE \| NUM_OP_NE
	;

	rel_op_gt : STR_OP_GT \| NUM_OP_GT
	;

	rel_op_gte : STR_OP_GTE \| NUM_OP_GTE
	;

	rel_op_lt : STR_OP_LT \| NUM_OP_LT
	;

	rel_op_lte : STR_OP_LTE \| NUM_OP_LTE
	;

	rel_str_op : STR_OP_EQ
	\| STR_OP_NE
	\| STR_OP_GT
	\| STR_OP_LT
	\| STR_OP_GTE
	\| STR_OP_LTE
	\| STR_OP_MATCHES
	;