ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java - hive - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.hive.ql.parse;

 import java.sql.Date;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.Stack;

 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

 /**
  * The Factory for creating typecheck processors. The typecheck processors are
  * used to processes the syntax trees for expressions and convert them into
  * expression Node Descriptor trees. They also introduce the correct conversion
  * functions to do proper implicit conversion.
  */
 public final class TypeCheckProcFactory {

   protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class
       .getName());

   private TypeCheckProcFactory() {
     // prevent instantiation
   }

   /**
    * Function to do groupby subexpression elimination. This is called by all the
    * processors initially. As an example, consider the query select a+b,
    * count(1) from T group by a+b; Then a+b is already precomputed in the group
    * by operators key, so we substitute a+b in the select list with the internal
    * column name of the a+b expression that appears in the in input row
    * resolver.
    *
    * @param nd
    *          The node that is being inspected.
    * @param procCtx
    *          The processor context.
    *
    * @return exprNodeColumnDesc.
    */
   public static ExprNodeDesc processGByExpr(Node nd, Object procCtx)
       throws SemanticException {
     // We recursively create the exprNodeDesc. Base cases: when we encounter
     // a column ref, we convert that into an exprNodeColumnDesc; when we
     // encounter
     // a constant, we convert that into an exprNodeConstantDesc. For others we
     // just
     // build the exprNodeFuncDesc with recursively built children.
     ASTNode expr = (ASTNode) nd;
     TypeCheckCtx ctx = (TypeCheckCtx) procCtx;

     RowResolver input = ctx.getInputRR();
     ExprNodeDesc desc = null;

     if ((ctx == null) || (input == null)) {
       return null;
     }

     // If the current subExpression is pre-calculated, as in Group-By etc.
     ColumnInfo colInfo = input.getExpression(expr);
     if (colInfo != null) {
       desc = new ExprNodeColumnDesc(colInfo.getType(), colInfo
           .getInternalName(), colInfo.getTabAlias(), colInfo
           .getIsVirtualCol());
       ASTNode source = input.getExpressionSource(expr);
       if (source != null) {
         ctx.getUnparseTranslator().addCopyTranslation(expr, source);
       }
       return desc;
     }
     return desc;
   }

   public static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr,
       TypeCheckCtx tcCtx) throws SemanticException {
     // Create the walker, the rules dispatcher and the context.
     // create a walker which walks the tree in a DFS manner while maintaining
     // the operator stack. The dispatcher
     // generates the plan from the operator tree
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();

     opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"),
         getNullExprProcessor());
     opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" +
         HiveParser.TinyintLiteral + "%|" +
         HiveParser.SmallintLiteral + "%|" +
         HiveParser.BigintLiteral + "%|" +
         HiveParser.DecimalLiteral + "%"),
         getNumExprProcessor());
     opRules
         .put(new RuleRegExp("R3", HiveParser.Identifier + "%|"
         + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|"
         + HiveParser.TOK_STRINGLITERALSEQUENCE + "%|"
         + "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|"
         + HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|"
         + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|"
         + HiveParser.KW_STRUCT + "%"),
         getStrExprProcessor());
     opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|"
         + HiveParser.KW_FALSE + "%"), getBoolExprProcessor());
     opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), getDateExprProcessor());
     opRules.put(new RuleRegExp("R6", HiveParser.TOK_TABLE_OR_COL + "%"),
         getColumnExprProcessor());

     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
         opRules, tcCtx);
     GraphWalker ogw = new DefaultGraphWalker(disp);

     // Create a list of topop nodes
     ArrayList<Node> topNodes = new ArrayList<Node>();
     topNodes.add(expr);
     HashMap<Node, Object> nodeOutputs = new LinkedHashMap<Node, Object>();
     ogw.startWalking(topNodes, nodeOutputs);

     return convert(nodeOutputs);
   }

   // temporary type-safe casting
   private static Map<ASTNode, ExprNodeDesc> convert(Map<Node, Object> outputs) {
     Map<ASTNode, ExprNodeDesc> converted = new LinkedHashMap<ASTNode, ExprNodeDesc>();
     for (Map.Entry<Node, Object> entry : outputs.entrySet()) {
       if (entry.getKey() instanceof ASTNode &&
           (entry.getValue() == null || entry.getValue() instanceof ExprNodeDesc)) {
         converted.put((ASTNode)entry.getKey(), (ExprNodeDesc)entry.getValue());
       } else {
         LOG.warn("Invalid type entry " + entry);
       }
     }
     return converted;
   }

   /**
    * Processor for processing NULL expression.
    */
   public static class NullExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       return new ExprNodeNullDesc();
     }

   }

   /**
    * Factory method to get NullExprProcessor.
    *
    * @return NullExprProcessor.
    */
   public static NullExprProcessor getNullExprProcessor() {
     return new NullExprProcessor();
   }

   /**
    * Processor for processing numeric constants.
    */
   public static class NumExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       Number v = null;
       ASTNode expr = (ASTNode) nd;
       // The expression can be any one of Double, Long and Integer. We
       // try to parse the expression in that order to ensure that the
       // most specific type is used for conversion.
       try {
         if (expr.getText().endsWith("L")) {
           // Literal bigint.
           v = Long.valueOf(expr.getText().substring(
                 0, expr.getText().length() - 1));
         } else if (expr.getText().endsWith("S")) {
           // Literal smallint.
           v = Short.valueOf(expr.getText().substring(
                 0, expr.getText().length() - 1));
         } else if (expr.getText().endsWith("Y")) {
           // Literal tinyint.
           v = Byte.valueOf(expr.getText().substring(
                 0, expr.getText().length() - 1));
         } else if (expr.getText().endsWith("BD")) {
           // Literal decimal
           return new ExprNodeConstantDesc(TypeInfoFactory.decimalTypeInfo,
                 expr.getText().substring(0, expr.getText().length() - 2));
         } else {
           v = Double.valueOf(expr.getText());
           v = Long.valueOf(expr.getText());
           v = Integer.valueOf(expr.getText());
         }
       } catch (NumberFormatException e) {
         // do nothing here, we will throw an exception in the following block
       }
       if (v == null) {
         throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT
             .getMsg(expr));
       }
       return new ExprNodeConstantDesc(v);
     }

   }

   /**
    * Factory method to get NumExprProcessor.
    *
    * @return NumExprProcessor.
    */
   public static NumExprProcessor getNumExprProcessor() {
     return new NumExprProcessor();
   }

   /**
    * Processor for processing string constants.
    */
   public static class StrExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       ASTNode expr = (ASTNode) nd;
       String str = null;

       switch (expr.getToken().getType()) {
       case HiveParser.StringLiteral:
         str = BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
         break;
       case HiveParser.TOK_STRINGLITERALSEQUENCE:
         StringBuilder sb = new StringBuilder();
         for (Node n : expr.getChildren()) {
           sb.append(
               BaseSemanticAnalyzer.unescapeSQLString(((ASTNode)n).getText()));
         }
         str = sb.toString();
         break;
       case HiveParser.TOK_CHARSETLITERAL:
         str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(),
             expr.getChild(1).getText());
         break;
       default:
         // HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT |
         // HiveParse.KW_RIGHT
         str = BaseSemanticAnalyzer.unescapeIdentifier(expr.getText());
         break;
       }
       return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str);
     }

   }

   /**
    * Factory method to get StrExprProcessor.
    *
    * @return StrExprProcessor.
    */
   public static StrExprProcessor getStrExprProcessor() {
     return new StrExprProcessor();
   }

   /**
    * Processor for boolean constants.
    */
   public static class BoolExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       ASTNode expr = (ASTNode) nd;
       Boolean bool = null;

       switch (expr.getToken().getType()) {
       case HiveParser.KW_TRUE:
         bool = Boolean.TRUE;
         break;
       case HiveParser.KW_FALSE:
         bool = Boolean.FALSE;
         break;
       default:
         assert false;
       }
       return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool);
     }

   }

   /**
    * Factory method to get BoolExprProcessor.
    *
    * @return BoolExprProcessor.
    */
   public static BoolExprProcessor getBoolExprProcessor() {
     return new BoolExprProcessor();
   }

   /**
    * Processor for date constants.
    */
   public static class DateExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       ASTNode expr = (ASTNode) nd;

       // Get the string value and convert to a Date value.
       try {
         String dateString = BaseSemanticAnalyzer.stripQuotes(expr.getText());
         Date date = Date.valueOf(dateString);
         return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, date);
       } catch (IllegalArgumentException err) {
         throw new SemanticException("Unable to convert date literal string to date value.", err);
       }
     }
   }

   /**
    * Factory method to get DateExprProcessor.
    *
    * @return DateExprProcessor.
    */
   public static DateExprProcessor getDateExprProcessor() {
     return new DateExprProcessor();
   }

   /**
    * Processor for table columns.
    */
   public static class ColumnExprProcessor implements NodeProcessor {

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
       if (ctx.getError() != null) {
         return null;
       }

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         return desc;
       }

       ASTNode expr = (ASTNode) nd;
       ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null;
       RowResolver input = ctx.getInputRR();

       if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) {
         ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr);
         return null;
       }

       assert (expr.getChildCount() == 1);
       String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr
           .getChild(0).getText());

       boolean isTableAlias = input.hasTableAlias(tableOrCol);
       ColumnInfo colInfo = input.get(null, tableOrCol);

       if (isTableAlias) {
         if (colInfo != null) {
           if (parent != null && parent.getType() == HiveParser.DOT) {
             // It's a table alias.
             return null;
           }
           // It's a column.
           return new ExprNodeColumnDesc(colInfo.getType(), colInfo
               .getInternalName(), colInfo.getTabAlias(), colInfo
               .getIsVirtualCol());
         } else {
           // It's a table alias.
           // We will process that later in DOT.
           return null;
         }
       } else {
         if (colInfo == null) {
           // It's not a column or a table alias.
           if (input.getIsExprResolver()) {
             ASTNode exprNode = expr;
             if (!stack.empty()) {
               ASTNode tmp = (ASTNode) stack.pop();
               if (!stack.empty()) {
                 exprNode = (ASTNode) stack.peek();
               }
               stack.push(tmp);
             }
             ctx.setError(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(exprNode), expr);
             return null;
           } else {
             List<String> possibleColumnNames = input.getReferenceableColumnAliases(tableOrCol, -1);
             String reason = String.format("(possible column names are: %s)",
                 StringUtils.join(possibleColumnNames, ", "));
             ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr.getChild(0), reason),
                 expr);
             LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":"
                 + input.toString());
             return null;
           }
         } else {
           // It's a column.
           ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo
               .getInternalName(), colInfo.getTabAlias(), colInfo
               .getIsVirtualCol());
           exprNodColDesc.setSkewedCol(colInfo.isSkewedCol());
           return exprNodColDesc;
         }
       }

     }

   }

   /**
    * Factory method to get ColumnExprProcessor.
    *
    * @return ColumnExprProcessor.
    */
   public static ColumnExprProcessor getColumnExprProcessor() {
     return new ColumnExprProcessor();
   }

   /**
    * The default processor for typechecking.
    */
   public static class DefaultExprProcessor implements NodeProcessor {

     static HashMap<Integer, String> specialUnaryOperatorTextHashMap;
     static HashMap<Integer, String> specialFunctionTextHashMap;
     static HashMap<Integer, String> conversionFunctionTextHashMap;
     static HashSet<Integer> windowingTokens;
     static {
       specialUnaryOperatorTextHashMap = new HashMap<Integer, String>();
       specialUnaryOperatorTextHashMap.put(HiveParser.PLUS, "positive");
       specialUnaryOperatorTextHashMap.put(HiveParser.MINUS, "negative");
       specialFunctionTextHashMap = new HashMap<Integer, String>();
       specialFunctionTextHashMap.put(HiveParser.TOK_ISNULL, "isnull");
       specialFunctionTextHashMap.put(HiveParser.TOK_ISNOTNULL, "isnotnull");
       conversionFunctionTextHashMap = new HashMap<Integer, String>();
       conversionFunctionTextHashMap.put(HiveParser.TOK_BOOLEAN,
           serdeConstants.BOOLEAN_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_TINYINT,
           serdeConstants.TINYINT_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_SMALLINT,
           serdeConstants.SMALLINT_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_INT,
           serdeConstants.INT_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_BIGINT,
           serdeConstants.BIGINT_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_FLOAT,
           serdeConstants.FLOAT_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_DOUBLE,
           serdeConstants.DOUBLE_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_STRING,
           serdeConstants.STRING_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR,
           serdeConstants.VARCHAR_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY,
           serdeConstants.BINARY_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_DATE,
           serdeConstants.DATE_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMP,
           serdeConstants.TIMESTAMP_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_DECIMAL,
           serdeConstants.DECIMAL_TYPE_NAME);

       windowingTokens = new HashSet<Integer>();
       windowingTokens.add(HiveParser.KW_OVER);
       windowingTokens.add(HiveParser.TOK_PARTITIONINGSPEC);
       windowingTokens.add(HiveParser.TOK_DISTRIBUTEBY);
       windowingTokens.add(HiveParser.TOK_SORTBY);
       windowingTokens.add(HiveParser.TOK_CLUSTERBY);
       windowingTokens.add(HiveParser.TOK_WINDOWSPEC);
       windowingTokens.add(HiveParser.TOK_WINDOWRANGE);
       windowingTokens.add(HiveParser.TOK_WINDOWVALUES);
       windowingTokens.add(HiveParser.KW_UNBOUNDED);
       windowingTokens.add(HiveParser.KW_PRECEDING);
       windowingTokens.add(HiveParser.KW_FOLLOWING);
       windowingTokens.add(HiveParser.KW_CURRENT);
       windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEASC);
       windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC);
     }

     private static boolean isRedundantConversionFunction(ASTNode expr,
         boolean isFunction, ArrayList<ExprNodeDesc> children) {
       if (!isFunction) {
         return false;
       }
       // conversion functions take a single parameter
       if (children.size() != 1) {
         return false;
       }
       String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr
           .getChild(0)).getType());
       // not a conversion function
       if (funcText == null) {
         return false;
       }
       // return true when the child type and the conversion target type is the
       // same
       return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName()
           .equalsIgnoreCase(funcText);
     }

     public static String getFunctionText(ASTNode expr, boolean isFunction) {
       String funcText = null;
       if (!isFunction) {
         // For operator, the function name is the operator text, unless it's in
         // our special dictionary
         if (expr.getChildCount() == 1) {
           funcText = specialUnaryOperatorTextHashMap.get(expr.getType());
         }
         if (funcText == null) {
           funcText = expr.getText();
         }
       } else {
         // For TOK_FUNCTION, the function name is stored in the first child,
         // unless it's in our
         // special dictionary.
         assert (expr.getChildCount() >= 1);
         int funcType = ((ASTNode) expr.getChild(0)).getType();
         funcText = specialFunctionTextHashMap.get(funcType);
         if (funcText == null) {
           funcText = conversionFunctionTextHashMap.get(funcType);
         }
         if (funcText == null) {
           funcText = ((ASTNode) expr.getChild(0)).getText();
         }
       }
       return BaseSemanticAnalyzer.unescapeIdentifier(funcText);
     }

     /**
      * This function create an ExprNodeDesc for a UDF function given the
      * children (arguments). It will insert implicit type conversion functions
      * if necessary.
      *
      * @throws UDFArgumentException
      */
     static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, Object udfData,
         ExprNodeDesc... children) throws UDFArgumentException {

       FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName);
       if (fi == null) {
         throw new UDFArgumentException(udfName + " not found.");
       }

       GenericUDF genericUDF = fi.getGenericUDF();
       if (genericUDF == null) {
         throw new UDFArgumentException(udfName
             + " is an aggregation function or a table function.");
       }

       // Add udfData to UDF if necessary
       if (udfData != null) {
         if (genericUDF instanceof SettableUDF) {
           ((SettableUDF)genericUDF).setParams(udfData);
         }
       }

       List<ExprNodeDesc> childrenList = new ArrayList<ExprNodeDesc>(children.length);
       childrenList.addAll(Arrays.asList(children));
       return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList);
     }

     public static ExprNodeDesc getFuncExprNodeDesc(String udfName,
         ExprNodeDesc... children) throws UDFArgumentException {
       return getFuncExprNodeDescWithUdfData(udfName, null, children);
     }

     static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
         boolean isFunction, ArrayList<ExprNodeDesc> children, TypeCheckCtx ctx)
         throws SemanticException, UDFArgumentException {
       // return the child directly if the conversion is redundant.
       if (isRedundantConversionFunction(expr, isFunction, children)) {
         assert (children.size() == 1);
         assert (children.get(0) != null);
         return children.get(0);
       }
       String funcText = getFunctionText(expr, isFunction);
       ExprNodeDesc desc;
       if (funcText.equals(".")) {
         // "." : FIELD Expression
         assert (children.size() == 2);
         // Only allow constant field name for now
         assert (children.get(1) instanceof ExprNodeConstantDesc);
         ExprNodeDesc object = children.get(0);
         ExprNodeConstantDesc fieldName = (ExprNodeConstantDesc) children.get(1);
         assert (fieldName.getValue() instanceof String);

         // Calculate result TypeInfo
         String fieldNameString = (String) fieldName.getValue();
         TypeInfo objectTypeInfo = object.getTypeInfo();

         // Allow accessing a field of list element structs directly from a list
         boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST);
         if (isList) {
           objectTypeInfo = ((ListTypeInfo) objectTypeInfo)
               .getListElementTypeInfo();
         }
         if (objectTypeInfo.getCategory() != Category.STRUCT) {
           throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr));
         }
         TypeInfo t = ((StructTypeInfo) objectTypeInfo)
             .getStructFieldTypeInfo(fieldNameString);
         if (isList) {
           t = TypeInfoFactory.getListTypeInfo(t);
         }

         desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString,
             isList);

       } else if (funcText.equals("[")) {
         // "[]" : LSQUARE/INDEX Expression
         assert (children.size() == 2);

         // Check whether this is a list or a map
         TypeInfo myt = children.get(0).getTypeInfo();

         if (myt.getCategory() == Category.LIST) {
           // Only allow integer index for now
           if (!(children.get(1) instanceof ExprNodeConstantDesc)
               || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
               .equals(TypeInfoFactory.intTypeInfo))) {
             throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
                   expr,
                   ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg()));
           }

           // Calculate TypeInfo
           TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo();
           desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
               .getGenericUDFForIndex(), children);
         } else if (myt.getCategory() == Category.MAP) {
           // Only allow constant map key for now
           if (!(children.get(1) instanceof ExprNodeConstantDesc)) {
             throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
                   expr,
                   ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg()));
           }
           if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
               .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) {
             throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE
                 .getMsg(expr));
           }
           // Calculate TypeInfo
           TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo();
           desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
               .getGenericUDFForIndex(), children);
         } else {
           throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr,
               myt.getTypeName()));
         }
       } else {
         // other operators or functions
         FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText);

         if (fi == null) {
           if (isFunction) {
             throw new SemanticException(ErrorMsg.INVALID_FUNCTION
                 .getMsg((ASTNode) expr.getChild(0)));
           } else {
             throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
           }
         }

         // getGenericUDF() actually clones the UDF. Just call it once and reuse.
         GenericUDF genericUDF = fi.getGenericUDF();

         if (!fi.isNative()) {
           ctx.getUnparseTranslator().addIdentifierTranslation(
               (ASTNode) expr.getChild(0));
         }

         // Handle type casts that may contain type parameters
         if (isFunction) {
           ASTNode funcNameNode = (ASTNode)expr.getChild(0);
           switch (funcNameNode.getType()) {
             case HiveParser.TOK_VARCHAR:
               // Add type params
               VarcharTypeParams varcharTypeParams = new VarcharTypeParams();
               varcharTypeParams.length = Integer.valueOf((funcNameNode.getChild(0).getText()));
               if (genericUDF != null) {
                 ((SettableUDF)genericUDF).setParams(varcharTypeParams);
               }
               break;
             default:
               // Do nothing
               break;
           }
         }

         // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't
         // supported
         if (fi.getGenericUDTF() != null) {
           throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg());
         }
         // UDAF in filter condition, group-by caluse, param of funtion, etc.
         if (fi.getGenericUDAFResolver() != null) {
           if (isFunction) {
             throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.
                 getMsg((ASTNode) expr.getChild(0)));
           } else {
             throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr));
           }
         }
         if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) {
           if (FunctionRegistry.isStateful(genericUDF)) {
             throw new SemanticException(
               ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg());
           }
         }

         // Try to infer the type of the constant only if there are two
         // nodes, one of them is column and the other is numeric const
         if (genericUDF instanceof GenericUDFBaseCompare
             && children.size() == 2
             && ((children.get(0) instanceof ExprNodeConstantDesc
                 && children.get(1) instanceof ExprNodeColumnDesc)
                 || (children.get(0) instanceof ExprNodeColumnDesc
                     && children.get(1) instanceof ExprNodeConstantDesc))) {
           int constIdx =
               children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1;

           Set<String> inferTypes = new HashSet<String>(Arrays.asList(
               serdeConstants.TINYINT_TYPE_NAME.toLowerCase(),
               serdeConstants.SMALLINT_TYPE_NAME.toLowerCase(),
               serdeConstants.INT_TYPE_NAME.toLowerCase(),
               serdeConstants.BIGINT_TYPE_NAME.toLowerCase(),
               serdeConstants.FLOAT_TYPE_NAME.toLowerCase(),
               serdeConstants.DOUBLE_TYPE_NAME.toLowerCase(),
               serdeConstants.STRING_TYPE_NAME.toLowerCase()
               ));

           String constType = children.get(constIdx).getTypeString().toLowerCase();
           String columnType = children.get(1 - constIdx).getTypeString().toLowerCase();

           if (inferTypes.contains(constType) && inferTypes.contains(columnType)
               && !columnType.equalsIgnoreCase(constType)) {
             Object originalValue =  ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
             String constValue = originalValue.toString();
             boolean triedDouble = false;
             Number value = null;
             try {
               if (columnType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) {
                 value = new Byte(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) {
                 value = new Short(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
                 value = new Integer(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
                 value = new Long(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
                 value = new Float(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
                 triedDouble = true;
                 value = new Double(constValue);
               } else if (columnType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
                 // Don't scramble the const type information if comparing to a string column,
                 // It's not useful to do so; as of now, there is also a hack in
                 // SemanticAnalyzer#genTablePlan that causes every column to look like a string
                 // a string down here, so number type information is always lost otherwise.
                 boolean isNumber = (originalValue instanceof Number);
                 triedDouble = !isNumber;
                 value = isNumber ? (Number)originalValue : new Double(constValue);
               }
             } catch (NumberFormatException nfe) {
               // this exception suggests the precise type inference did not succeed
               // we'll try again to convert it to double
               // however, if we already tried this, or the column is NUMBER type and
               // the operator is EQUAL, return false due to the type mismatch
               if (triedDouble ||
                   (genericUDF instanceof GenericUDFOPEqual
                   && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) {
                 return new ExprNodeConstantDesc(false);
               }

               try {
                 value = new Double(constValue);
               } catch (NumberFormatException ex) {
                 return new ExprNodeConstantDesc(false);
               }
             }

             if (value != null) {
               children.set(constIdx, new ExprNodeConstantDesc(value));
             }
           }
         }

         desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, children);
       }
       // UDFOPPositive is a no-op.
       // However, we still create it, and then remove it here, to make sure we
       // only allow
       // "+" for numeric types.
       if (FunctionRegistry.isOpPositive(desc)) {
         assert (desc.getChildren().size() == 1);
         desc = desc.getChildren().get(0);
       }
       assert (desc != null);
       return desc;
     }

     /**
      * Returns true if des is a descendant of ans (ancestor)
      */
     private boolean isDescendant(Node ans, Node des) {
       if (ans.getChildren() == null) {
         return false;
       }
       for (Node c : ans.getChildren()) {
         if (c == des) {
           return true;
         }
         if (isDescendant(c, des)) {
           return true;
         }
       }
       return false;
     }

     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {

       TypeCheckCtx ctx = (TypeCheckCtx) procCtx;

       ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
       if (desc != null) {
         // Here we know nd represents a group by expression.

         // During the DFS traversal of the AST, a descendant of nd likely set an
         // error because a sub-tree of nd is unlikely to also be a group by
         // expression. For example, in a query such as
         // SELECT *concat(key)* FROM src GROUP BY concat(key), 'key' will be
         // processed before 'concat(key)' and since 'key' is not a group by
         // expression, an error will be set in ctx by ColumnExprProcessor.

         // We can clear the global error when we see that it was set in a
         // descendant node of a group by expression because
         // processGByExpr() returns a ExprNodeDesc that effectively ignores
         // its children. Although the error can be set multiple times by
         // descendant nodes, DFS traversal ensures that the error only needs to
         // be cleared once. Also, for a case like
         // SELECT concat(value, concat(value))... the logic still works as the
         // error is only set with the first 'value'; all node pocessors quit
         // early if the global error is set.

         if (isDescendant(nd, ctx.getErrorSrcNode())) {
           ctx.setError(null, null);
         }
         return desc;
       }

       if (ctx.getError() != null) {
         return null;
       }

       ASTNode expr = (ASTNode) nd;

       /*
        * A Windowing specification get added as a child to a UDAF invocation to distinguish it
        * from similar UDAFs but on different windows.
        * The UDAF is translated to a WindowFunction invocation in the PTFTranslator.
        * So here we just return null for tokens that appear in a Window Specification.
        * When the traversal reaches up to the UDAF invocation its ExprNodeDesc is build using the
        * ColumnInfo in the InputRR. This is similar to how UDAFs are handled in Select lists.
        * The difference is that there is translation for Window related tokens, so we just
        * return null;
        */
       if ( windowingTokens.contains(expr.getType())) {
         return null;
       }

       if (expr.getType() == HiveParser.TOK_TABNAME) {
         return null;
       }

       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
         RowResolver input = ctx.getInputRR();
         ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
         assert expr.getChildCount() <= 1;
         if (expr.getChildCount() == 1) {
           // table aliased (select a.*, for example)
           ASTNode child = (ASTNode) expr.getChild(0);
           assert child.getType() == HiveParser.TOK_TABNAME;
           assert child.getChildCount() == 1;
           String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
           HashMap<String, ColumnInfo> columns = input.getFieldMap(tableAlias);
           if (columns == null) {
             throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child));
           }
           for (Map.Entry<String, ColumnInfo> colMap : columns.entrySet()) {
             ColumnInfo colInfo = colMap.getValue();
             if (!colInfo.getIsVirtualCol()) {
               columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
                   colInfo.getInternalName(), colInfo.getTabAlias(), false));
             }
           }
         } else {
           // all columns (select *, for example)
           for (ColumnInfo colInfo : input.getColumnInfos()) {
             if (!colInfo.getIsVirtualCol()) {
               columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
                   colInfo.getInternalName(), colInfo.getTabAlias(), false));
             }
           }
         }
         return columnList;
       }

       // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL,
       // and the operator is a DOT, then it's a table column reference.
       if (expr.getType() == HiveParser.DOT
           && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
           && nodeOutputs[0] == null) {

         RowResolver input = ctx.getInputRR();
         String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr
             .getChild(0).getChild(0).getText());
         // NOTE: tableAlias must be a valid non-ambiguous table alias,
         // because we've checked that in TOK_TABLE_OR_COL's process method.
         ColumnInfo colInfo = input.get(tableAlias,
             ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString());

         if (colInfo == null) {
           ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
           return null;
         }
         return new ExprNodeColumnDesc(colInfo.getType(), colInfo
             .getInternalName(), colInfo.getTabAlias(), colInfo
             .getIsVirtualCol());
       }

       // Return nulls for conversion operators
       if (conversionFunctionTextHashMap.keySet().contains(expr.getType())
           || specialFunctionTextHashMap.keySet().contains(expr.getType())
           || expr.getToken().getType() == HiveParser.CharSetName
           || expr.getToken().getType() == HiveParser.CharSetLiteral) {
         return null;
       }

       boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION ||
           expr.getType() == HiveParser.TOK_FUNCTIONSTAR ||
           expr.getType() == HiveParser.TOK_FUNCTIONDI);

       // Create all children
       int childrenBegin = (isFunction ? 1 : 0);
       ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(expr
           .getChildCount()
           - childrenBegin);
       for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) {
         if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) {
           children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren());
         } else {
           children.add((ExprNodeDesc) nodeOutputs[ci]);
         }
       }

       if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
         RowResolver input = ctx.getInputRR();
         for (ColumnInfo colInfo : input.getColumnInfos()) {
           if (!colInfo.getIsVirtualCol()) {
             children.add(new ExprNodeColumnDesc(colInfo.getType(),
                 colInfo.getInternalName(), colInfo.getTabAlias(), false));
           }
         }
       }

       // If any of the children contains null, then return a null
       // this is a hack for now to handle the group by case
       if (children.contains(null)) {
         RowResolver input = ctx.getInputRR();
         List<String> possibleColumnNames = input.getReferenceableColumnAliases(null, -1);
         String reason = String.format("(possible column names are: %s)",
             StringUtils.join(possibleColumnNames, ", "));
         ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason),
             expr);
         return null;
       }

       // Create function desc
       try {
         return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx);
       } catch (UDFArgumentTypeException e) {
         throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg(expr
             .getChild(childrenBegin + e.getArgumentId()), e.getMessage()));
       } catch (UDFArgumentLengthException e) {
         throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg(
             expr, e.getMessage()));
       } catch (UDFArgumentException e) {
         throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e
             .getMessage()));
       }
     }

   }

   /**
    * Factory method to get DefaultExprProcessor.
    *
    * @return DefaultExprProcessor.
    */
   public static DefaultExprProcessor getDefaultExprProcessor() {
     return new DefaultExprProcessor();
   }
 }