| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.hive.ql.parse; |
| |
| import java.util.ArrayList; |
| import org.antlr.runtime.ANTLRStringStream; |
| import org.antlr.runtime.CharStream; |
| import org.antlr.runtime.NoViableAltException; |
| import org.antlr.runtime.RecognitionException; |
| import org.antlr.runtime.Token; |
| import org.antlr.runtime.TokenRewriteStream; |
| import org.antlr.runtime.TokenStream; |
| import org.antlr.runtime.tree.CommonTree; |
| import org.antlr.runtime.tree.CommonTreeAdaptor; |
| import org.antlr.runtime.tree.TreeAdaptor; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import org.apache.hadoop.hive.ql.Context; |
| |
| /** |
| * ParseDriver. |
| * |
| */ |
| public class ParseDriver { |
| |
| private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver"); |
| |
| /** |
| * ANTLRNoCaseStringStream. |
| * |
| */ |
| //This class provides and implementation for a case insensitive token checker |
| //for the lexical analysis part of antlr. By converting the token stream into |
| //upper case at the time when lexical rules are checked, this class ensures that the |
| //lexical rules need to just match the token with upper case letters as opposed to |
| //combination of upper case and lower case characteres. This is purely used for matching lexical |
| //rules. The actual token text is stored in the same way as the user input without |
| //actually converting it into an upper case. The token values are generated by the consume() |
| //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion |
| //and is purely used for matching lexical rules. This also means that the grammar will only |
| //accept capitalized tokens in case it is run from other tools like antlrworks which |
| //do not have the ANTLRNoCaseStringStream implementation. |
| public class ANTLRNoCaseStringStream extends ANTLRStringStream { |
| |
| public ANTLRNoCaseStringStream(String input) { |
| super(input); |
| } |
| |
| @Override |
| public int LA(int i) { |
| |
| int returnChar = super.LA(i); |
| if (returnChar == CharStream.EOF) { |
| return returnChar; |
| } else if (returnChar == 0) { |
| return returnChar; |
| } |
| |
| return Character.toUpperCase((char) returnChar); |
| } |
| } |
| |
| /** |
| * HiveLexerX. |
| * |
| */ |
| public class HiveLexerX extends HiveLexer { |
| |
| private final ArrayList<ParseError> errors; |
| |
| public HiveLexerX() { |
| super(); |
| errors = new ArrayList<ParseError>(); |
| } |
| |
| public HiveLexerX(CharStream input) { |
| super(input); |
| errors = new ArrayList<ParseError>(); |
| } |
| |
| @Override |
| public void displayRecognitionError(String[] tokenNames, |
| RecognitionException e) { |
| |
| errors.add(new ParseError(this, e, tokenNames)); |
| } |
| |
| @Override |
| public String getErrorMessage(RecognitionException e, String[] tokenNames) { |
| String msg = null; |
| |
| if (e instanceof NoViableAltException) { |
| @SuppressWarnings("unused") |
| NoViableAltException nvae = (NoViableAltException) e; |
| // for development, can add |
| // "decision=<<"+nvae.grammarDecisionDescription+">>" |
| // and "(decision="+nvae.decisionNumber+") and |
| // "state "+nvae.stateNumber |
| msg = "character " + getCharErrorDisplay(e.c) + " not supported here"; |
| } else { |
| msg = super.getErrorMessage(e, tokenNames); |
| } |
| |
| return msg; |
| } |
| |
| public ArrayList<ParseError> getErrors() { |
| return errors; |
| } |
| |
| } |
| |
| /** |
| * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes |
| * so that the graph walking algorithms and the rules framework defined in |
| * ql.lib can be used with the AST Nodes. |
| */ |
| public static final TreeAdaptor adaptor = new CommonTreeAdaptor() { |
| /** |
| * Creates an ASTNode for the given token. The ASTNode is a wrapper around |
| * antlr's CommonTree class that implements the Node interface. |
| * |
| * @param payload |
| * The token. |
| * @return Object (which is actually an ASTNode) for the token. |
| */ |
| @Override |
| public Object create(Token payload) { |
| return new ASTNode(payload); |
| } |
| |
| @Override |
| public Object dupNode(Object t) { |
| |
| return create(((CommonTree)t).token); |
| }; |
| |
| @Override |
| public Object dupTree(Object t, Object parent) { |
| // Overriden to copy start index / end index, that is needed through optimization, |
| // e.g., for masking/filtering |
| ASTNode astNode = (ASTNode) t; |
| ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent); |
| astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex()); |
| astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex()); |
| return astNodeCopy; |
| } |
| |
| @Override |
| public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { |
| return new ASTErrorNode(input, start, stop, e); |
| }; |
| }; |
| |
| public ASTNode parse(String command) throws ParseException { |
| return parse(command, null); |
| } |
| |
| public ASTNode parse(String command, Context ctx) |
| throws ParseException { |
| return parse(command, ctx, null); |
| } |
| |
| /** |
| * Parses a command, optionally assigning the parser's token stream to the |
| * given context. |
| * |
| * @param command |
| * command to parse |
| * |
| * @param ctx |
| * context with which to associate this parser's token stream, or |
| * null if either no context is available or the context already has |
| * an existing stream |
| * |
| * @return parsed AST |
| */ |
| public ASTNode parse(String command, Context ctx, String viewFullyQualifiedName) |
| throws ParseException { |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Parsing command: " + command); |
| } |
| |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| if (ctx != null) { |
| if (viewFullyQualifiedName == null) { |
| // Top level query |
| ctx.setTokenRewriteStream(tokens); |
| } else { |
| // It is a view |
| ctx.addViewTokenRewriteStream(viewFullyQualifiedName, tokens); |
| } |
| lexer.setHiveConf(ctx.getConf()); |
| } |
| HiveParser parser = new HiveParser(tokens); |
| if (ctx != null) { |
| parser.setHiveConf(ctx.getConf()); |
| } |
| parser.setTreeAdaptor(adaptor); |
| HiveParser.statement_return r = null; |
| try { |
| r = parser.statement(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| |
| if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { |
| LOG.debug("Parse Completed"); |
| } else if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else { |
| throw new ParseException(parser.errors); |
| } |
| |
| ASTNode tree = (ASTNode) r.getTree(); |
| tree.setUnknownTokenBoundaries(); |
| return tree; |
| } |
| |
| /* |
| * Parse a string as a query hint. |
| */ |
| public ASTNode parseHint(String command) throws ParseException { |
| LOG.info("Parsing hint: " + command); |
| |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| HintParser parser = new HintParser(tokens); |
| parser.setTreeAdaptor(adaptor); |
| HintParser.hint_return r = null; |
| try { |
| r = parser.hint(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| |
| if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { |
| LOG.info("Parse Completed"); |
| } else if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else { |
| throw new ParseException(parser.errors); |
| } |
| |
| return (ASTNode) r.getTree(); |
| } |
| |
| /* |
| * parse a String as a Select List. This allows table functions to be passed expression Strings |
| * that are translated in |
| * the context they define at invocation time. Currently used by NPath to allow users to specify |
| * what output they want. |
| * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This |
| * column doesn't exist in |
| * the input schema and hence the Result Expression cannot be analyzed by the regular Hive |
| * translation process. |
| */ |
| public ASTNode parseSelect(String command, Context ctx) throws ParseException { |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Parsing command: " + command); |
| } |
| |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| if (ctx != null) { |
| ctx.setTokenRewriteStream(tokens); |
| } |
| HiveParser parser = new HiveParser(tokens); |
| parser.setTreeAdaptor(adaptor); |
| HiveParser_SelectClauseParser.selectClause_return r = null; |
| try { |
| r = parser.selectClause(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| |
| if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { |
| LOG.debug("Parse Completed"); |
| } else if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else { |
| throw new ParseException(parser.errors); |
| } |
| |
| return r.getTree(); |
| } |
| public ASTNode parseExpression(String command) throws ParseException { |
| LOG.info("Parsing expression: " + command); |
| |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| HiveParser parser = new HiveParser(tokens); |
| parser.setTreeAdaptor(adaptor); |
| HiveParser_IdentifiersParser.expression_return r = null; |
| try { |
| r = parser.expression(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| |
| if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { |
| LOG.info("Parse Completed"); |
| } else if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else { |
| throw new ParseException(parser.errors); |
| } |
| |
| return (ASTNode) r.getTree(); |
| } |
| |
| public ASTNode parseTriggerExpression(String command) throws ParseException { |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| HiveParser parser = new HiveParser(tokens); |
| parser.setTreeAdaptor(adaptor); |
| HiveParser_ResourcePlanParser.triggerExpressionStandalone_return r = null; |
| try { |
| r = parser.gResourcePlanParser.triggerExpressionStandalone(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else if (parser.errors.size() != 0) { |
| throw new ParseException(parser.errors); |
| } |
| |
| return r.getTree(); |
| } |
| |
| public ASTNode parseTriggerActionExpression(String command) throws ParseException { |
| HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); |
| TokenRewriteStream tokens = new TokenRewriteStream(lexer); |
| HiveParser parser = new HiveParser(tokens); |
| parser.setTreeAdaptor(adaptor); |
| HiveParser_ResourcePlanParser.triggerActionExpressionStandalone_return r = null; |
| try { |
| r = parser.gResourcePlanParser.triggerActionExpressionStandalone(); |
| } catch (RecognitionException e) { |
| e.printStackTrace(); |
| throw new ParseException(parser.errors); |
| } |
| if (lexer.getErrors().size() != 0) { |
| throw new ParseException(lexer.getErrors()); |
| } else if (parser.errors.size() != 0) { |
| throw new ParseException(parser.errors); |
| } |
| |
| return r.getTree(); |
| } |
| } |