ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java - hive - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.hive.ql.parse;

 import java.util.ArrayList;
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.CharStream;
 import org.antlr.runtime.NoViableAltException;
 import org.antlr.runtime.RecognitionException;
 import org.antlr.runtime.Token;
 import org.antlr.runtime.TokenRewriteStream;
 import org.antlr.runtime.TokenStream;
 import org.antlr.runtime.tree.CommonTree;
 import org.antlr.runtime.tree.CommonTreeAdaptor;
 import org.antlr.runtime.tree.TreeAdaptor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import org.apache.hadoop.hive.ql.Context;

 /**
  * ParseDriver.
  *
  */
 public class ParseDriver {

   private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver");

   /**
    * ANTLRNoCaseStringStream.
    *
    */
   //This class provides and implementation for a case insensitive token checker
   //for the lexical analysis part of antlr. By converting the token stream into
   //upper case at the time when lexical rules are checked, this class ensures that the
   //lexical rules need to just match the token with upper case letters as opposed to
   //combination of upper case and lower case characteres. This is purely used for matching lexical
   //rules. The actual token text is stored in the same way as the user input without
   //actually converting it into an upper case. The token values are generated by the consume()
   //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion
   //and is purely used for matching lexical rules. This also means that the grammar will only
   //accept capitalized tokens in case it is run from other tools like antlrworks which
   //do not have the ANTLRNoCaseStringStream implementation.
   public class ANTLRNoCaseStringStream extends ANTLRStringStream {

     public ANTLRNoCaseStringStream(String input) {
       super(input);
     }

     @Override
     public int LA(int i) {

       int returnChar = super.LA(i);
       if (returnChar == CharStream.EOF) {
         return returnChar;
       } else if (returnChar == 0) {
         return returnChar;
       }

       return Character.toUpperCase((char) returnChar);
     }
   }

   /**
    * HiveLexerX.
    *
    */
   public class HiveLexerX extends HiveLexer {

     private final ArrayList<ParseError> errors;

     public HiveLexerX() {
       super();
       errors = new ArrayList<ParseError>();
     }

     public HiveLexerX(CharStream input) {
       super(input);
       errors = new ArrayList<ParseError>();
     }

     @Override
     public void displayRecognitionError(String[] tokenNames,
         RecognitionException e) {

       errors.add(new ParseError(this, e, tokenNames));
     }

     @Override
     public String getErrorMessage(RecognitionException e, String[] tokenNames) {
       String msg = null;

       if (e instanceof NoViableAltException) {
         @SuppressWarnings("unused")
         NoViableAltException nvae = (NoViableAltException) e;
         // for development, can add
         // "decision=<<"+nvae.grammarDecisionDescription+">>"
         // and "(decision="+nvae.decisionNumber+") and
         // "state "+nvae.stateNumber
         msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
       } else {
         msg = super.getErrorMessage(e, tokenNames);
       }

       return msg;
     }

     public ArrayList<ParseError> getErrors() {
       return errors;
     }

   }

   /**
    * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
    * so that the graph walking algorithms and the rules framework defined in
    * ql.lib can be used with the AST Nodes.
    */
   public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
     /**
      * Creates an ASTNode for the given token. The ASTNode is a wrapper around
      * antlr's CommonTree class that implements the Node interface.
      *
      * @param payload
      *          The token.
      * @return Object (which is actually an ASTNode) for the token.
      */
     @Override
     public Object create(Token payload) {
       return new ASTNode(payload);
     }

     @Override
     public Object dupNode(Object t) {

       return create(((CommonTree)t).token);
     };

     @Override
     public Object dupTree(Object t, Object parent) {
       // Overriden to copy start index / end index, that is needed through optimization,
       // e.g., for masking/filtering
       ASTNode astNode = (ASTNode) t;
       ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent);
       astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex());
       astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex());
       return astNodeCopy;
     }

     @Override
     public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
       return new ASTErrorNode(input, start, stop, e);
     };
   };

   public ASTNode parse(String command) throws ParseException {
     return parse(command, null);
   }

   public ASTNode parse(String command, Context ctx)
       throws ParseException {
     return parse(command, ctx, null);
   }

   /**
    * Parses a command, optionally assigning the parser's token stream to the
    * given context.
    *
    * @param command
    *          command to parse
    *
    * @param ctx
    *          context with which to associate this parser's token stream, or
    *          null if either no context is available or the context already has
    *          an existing stream
    *
    * @return parsed AST
    */
   public ASTNode parse(String command, Context ctx, String viewFullyQualifiedName)
       throws ParseException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Parsing command: " + command);
     }

     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     if (ctx != null) {
       if (viewFullyQualifiedName == null) {
         // Top level query
         ctx.setTokenRewriteStream(tokens);
       } else {
         // It is a view
         ctx.addViewTokenRewriteStream(viewFullyQualifiedName, tokens);
       }
       lexer.setHiveConf(ctx.getConf());
     }
     HiveParser parser = new HiveParser(tokens);
     if (ctx != null) {
       parser.setHiveConf(ctx.getConf());
     }
     parser.setTreeAdaptor(adaptor);
     HiveParser.statement_return r = null;
     try {
       r = parser.statement();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }

     if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
       LOG.debug("Parse Completed");
     } else if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else {
       throw new ParseException(parser.errors);
     }

     ASTNode tree = (ASTNode) r.getTree();
     tree.setUnknownTokenBoundaries();
     return tree;
   }

   /*
    * Parse a string as a query hint.
    */
   public ASTNode parseHint(String command) throws ParseException {
     LOG.info("Parsing hint: " + command);

     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     HintParser parser = new HintParser(tokens);
     parser.setTreeAdaptor(adaptor);
     HintParser.hint_return r = null;
     try {
       r = parser.hint();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }

     if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
       LOG.info("Parse Completed");
     } else if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else {
       throw new ParseException(parser.errors);
     }

     return (ASTNode) r.getTree();
   }

   /*
    * parse a String as a Select List. This allows table functions to be passed expression Strings
    * that are translated in
    * the context they define at invocation time. Currently used by NPath to allow users to specify
    * what output they want.
    * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This
    * column doesn't exist in
    * the input schema and hence the Result Expression cannot be analyzed by the regular Hive
    * translation process.
    */
   public ASTNode parseSelect(String command, Context ctx) throws ParseException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Parsing command: " + command);
     }

     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     if (ctx != null) {
       ctx.setTokenRewriteStream(tokens);
     }
     HiveParser parser = new HiveParser(tokens);
     parser.setTreeAdaptor(adaptor);
     HiveParser_SelectClauseParser.selectClause_return r = null;
     try {
       r = parser.selectClause();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }

     if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
       LOG.debug("Parse Completed");
     } else if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else {
       throw new ParseException(parser.errors);
     }

     return r.getTree();
   }
   public ASTNode parseExpression(String command) throws ParseException {
     LOG.info("Parsing expression: " + command);

     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     HiveParser parser = new HiveParser(tokens);
     parser.setTreeAdaptor(adaptor);
     HiveParser_IdentifiersParser.expression_return r = null;
     try {
       r = parser.expression();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }

     if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
       LOG.info("Parse Completed");
     } else if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else {
       throw new ParseException(parser.errors);
     }

     return (ASTNode) r.getTree();
   }

   public ASTNode parseTriggerExpression(String command) throws ParseException {
     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     HiveParser parser = new HiveParser(tokens);
     parser.setTreeAdaptor(adaptor);
     HiveParser_ResourcePlanParser.triggerExpressionStandalone_return r = null;
     try {
       r = parser.gResourcePlanParser.triggerExpressionStandalone();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }
     if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else if (parser.errors.size() != 0) {
       throw new ParseException(parser.errors);
     }

     return r.getTree();
   }

   public ASTNode parseTriggerActionExpression(String command) throws ParseException {
     HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
     TokenRewriteStream tokens = new TokenRewriteStream(lexer);
     HiveParser parser = new HiveParser(tokens);
     parser.setTreeAdaptor(adaptor);
     HiveParser_ResourcePlanParser.triggerActionExpressionStandalone_return r = null;
     try {
       r = parser.gResourcePlanParser.triggerActionExpressionStandalone();
     } catch (RecognitionException e) {
       e.printStackTrace();
       throw new ParseException(parser.errors);
     }
     if (lexer.getErrors().size() != 0) {
       throw new ParseException(lexer.getErrors());
     } else if (parser.errors.size() != 0) {
       throw new ParseException(parser.errors);
     }

     return r.getTree();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.hive.ql.parse;

	import java.util.ArrayList;
	import org.antlr.runtime.ANTLRStringStream;
	import org.antlr.runtime.CharStream;
	import org.antlr.runtime.NoViableAltException;
	import org.antlr.runtime.RecognitionException;
	import org.antlr.runtime.Token;
	import org.antlr.runtime.TokenRewriteStream;
	import org.antlr.runtime.TokenStream;
	import org.antlr.runtime.tree.CommonTree;
	import org.antlr.runtime.tree.CommonTreeAdaptor;
	import org.antlr.runtime.tree.TreeAdaptor;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import org.apache.hadoop.hive.ql.Context;

	/**
	* ParseDriver.
	*
	*/
	public class ParseDriver {

	private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver");

	/**
	* ANTLRNoCaseStringStream.
	*
	*/
	//This class provides and implementation for a case insensitive token checker
	//for the lexical analysis part of antlr. By converting the token stream into
	//upper case at the time when lexical rules are checked, this class ensures that the
	//lexical rules need to just match the token with upper case letters as opposed to
	//combination of upper case and lower case characteres. This is purely used for matching lexical
	//rules. The actual token text is stored in the same way as the user input without
	//actually converting it into an upper case. The token values are generated by the consume()
	//function of the super class ANTLRStringStream. The LA() function is the lookahead funtion
	//and is purely used for matching lexical rules. This also means that the grammar will only
	//accept capitalized tokens in case it is run from other tools like antlrworks which
	//do not have the ANTLRNoCaseStringStream implementation.
	public class ANTLRNoCaseStringStream extends ANTLRStringStream {

	public ANTLRNoCaseStringStream(String input) {
	super(input);
	}

	@Override
	public int LA(int i) {

	int returnChar = super.LA(i);
	if (returnChar == CharStream.EOF) {
	return returnChar;
	} else if (returnChar == 0) {
	return returnChar;
	}

	return Character.toUpperCase((char) returnChar);
	}
	}

	/**
	* HiveLexerX.
	*
	*/
	public class HiveLexerX extends HiveLexer {

	private final ArrayList<ParseError> errors;

	public HiveLexerX() {
	super();
	errors = new ArrayList<ParseError>();
	}

	public HiveLexerX(CharStream input) {
	super(input);
	errors = new ArrayList<ParseError>();
	}

	@Override
	public void displayRecognitionError(String[] tokenNames,
	RecognitionException e) {

	errors.add(new ParseError(this, e, tokenNames));
	}

	@Override
	public String getErrorMessage(RecognitionException e, String[] tokenNames) {
	String msg = null;

	if (e instanceof NoViableAltException) {
	@SuppressWarnings("unused")
	NoViableAltException nvae = (NoViableAltException) e;
	// for development, can add
	// "decision=<<"+nvae.grammarDecisionDescription+">>"
	// and "(decision="+nvae.decisionNumber+") and
	// "state "+nvae.stateNumber
	msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
	} else {
	msg = super.getErrorMessage(e, tokenNames);
	}

	return msg;
	}

	public ArrayList<ParseError> getErrors() {
	return errors;
	}

	}

	/**
	* Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
	* so that the graph walking algorithms and the rules framework defined in
	* ql.lib can be used with the AST Nodes.
	*/
	public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
	/**
	* Creates an ASTNode for the given token. The ASTNode is a wrapper around
	* antlr's CommonTree class that implements the Node interface.
	*
	* @param payload
	* The token.
	* @return Object (which is actually an ASTNode) for the token.
	*/
	@Override
	public Object create(Token payload) {
	return new ASTNode(payload);
	}

	@Override
	public Object dupNode(Object t) {

	return create(((CommonTree)t).token);
	};

	@Override
	public Object dupTree(Object t, Object parent) {
	// Overriden to copy start index / end index, that is needed through optimization,
	// e.g., for masking/filtering
	ASTNode astNode = (ASTNode) t;
	ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent);
	astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex());
	astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex());
	return astNodeCopy;
	}

	@Override
	public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
	return new ASTErrorNode(input, start, stop, e);
	};
	};

	public ASTNode parse(String command) throws ParseException {
	return parse(command, null);
	}

	public ASTNode parse(String command, Context ctx)
	throws ParseException {
	return parse(command, ctx, null);
	}

	/**
	* Parses a command, optionally assigning the parser's token stream to the
	* given context.
	*
	* @param command
	* command to parse
	*
	* @param ctx
	* context with which to associate this parser's token stream, or
	* null if either no context is available or the context already has
	* an existing stream
	*
	* @return parsed AST
	*/
	public ASTNode parse(String command, Context ctx, String viewFullyQualifiedName)
	throws ParseException {
	if (LOG.isDebugEnabled()) {
	LOG.debug("Parsing command: " + command);
	}

	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	if (ctx != null) {
	if (viewFullyQualifiedName == null) {
	// Top level query
	ctx.setTokenRewriteStream(tokens);
	} else {
	// It is a view
	ctx.addViewTokenRewriteStream(viewFullyQualifiedName, tokens);
	}
	lexer.setHiveConf(ctx.getConf());
	}
	HiveParser parser = new HiveParser(tokens);
	if (ctx != null) {
	parser.setHiveConf(ctx.getConf());
	}
	parser.setTreeAdaptor(adaptor);
	HiveParser.statement_return r = null;
	try {
	r = parser.statement();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}

	if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
	LOG.debug("Parse Completed");
	} else if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else {
	throw new ParseException(parser.errors);
	}

	ASTNode tree = (ASTNode) r.getTree();
	tree.setUnknownTokenBoundaries();
	return tree;
	}

	/*
	* Parse a string as a query hint.
	*/
	public ASTNode parseHint(String command) throws ParseException {
	LOG.info("Parsing hint: " + command);

	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	HintParser parser = new HintParser(tokens);
	parser.setTreeAdaptor(adaptor);
	HintParser.hint_return r = null;
	try {
	r = parser.hint();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}

	if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
	LOG.info("Parse Completed");
	} else if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else {
	throw new ParseException(parser.errors);
	}

	return (ASTNode) r.getTree();
	}

	/*
	* parse a String as a Select List. This allows table functions to be passed expression Strings
	* that are translated in
	* the context they define at invocation time. Currently used by NPath to allow users to specify
	* what output they want.
	* NPath allows expressions n 'tpath' a column that represents the matched set of rows. This
	* column doesn't exist in
	* the input schema and hence the Result Expression cannot be analyzed by the regular Hive
	* translation process.
	*/
	public ASTNode parseSelect(String command, Context ctx) throws ParseException {
	if (LOG.isDebugEnabled()) {
	LOG.debug("Parsing command: " + command);
	}

	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	if (ctx != null) {
	ctx.setTokenRewriteStream(tokens);
	}
	HiveParser parser = new HiveParser(tokens);
	parser.setTreeAdaptor(adaptor);
	HiveParser_SelectClauseParser.selectClause_return r = null;
	try {
	r = parser.selectClause();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}

	if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
	LOG.debug("Parse Completed");
	} else if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else {
	throw new ParseException(parser.errors);
	}

	return r.getTree();
	}
	public ASTNode parseExpression(String command) throws ParseException {
	LOG.info("Parsing expression: " + command);

	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	HiveParser parser = new HiveParser(tokens);
	parser.setTreeAdaptor(adaptor);
	HiveParser_IdentifiersParser.expression_return r = null;
	try {
	r = parser.expression();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}

	if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
	LOG.info("Parse Completed");
	} else if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else {
	throw new ParseException(parser.errors);
	}

	return (ASTNode) r.getTree();
	}

	public ASTNode parseTriggerExpression(String command) throws ParseException {
	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	HiveParser parser = new HiveParser(tokens);
	parser.setTreeAdaptor(adaptor);
	HiveParser_ResourcePlanParser.triggerExpressionStandalone_return r = null;
	try {
	r = parser.gResourcePlanParser.triggerExpressionStandalone();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}
	if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else if (parser.errors.size() != 0) {
	throw new ParseException(parser.errors);
	}

	return r.getTree();
	}

	public ASTNode parseTriggerActionExpression(String command) throws ParseException {
	HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
	TokenRewriteStream tokens = new TokenRewriteStream(lexer);
	HiveParser parser = new HiveParser(tokens);
	parser.setTreeAdaptor(adaptor);
	HiveParser_ResourcePlanParser.triggerActionExpressionStandalone_return r = null;
	try {
	r = parser.gResourcePlanParser.triggerActionExpressionStandalone();
	} catch (RecognitionException e) {
	e.printStackTrace();
	throw new ParseException(parser.errors);
	}
	if (lexer.getErrors().size() != 0) {
	throw new ParseException(lexer.getErrors());
	} else if (parser.errors.size() != 0) {
	throw new ParseException(parser.errors);
	}

	return r.getTree();
	}
	}