tajo-plan/src/main/java/org/apache/tajo/plan/expr/EvalTreeUtil.java - tajo - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.tajo.plan.expr;

 import com.google.common.base.Function;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import org.apache.tajo.algebra.ColumnReferenceExpr;
 import org.apache.tajo.algebra.NamedExpr;
 import org.apache.tajo.algebra.OpType;
 import org.apache.tajo.annotation.Nullable;
 import org.apache.tajo.catalog.*;
 import org.apache.tajo.common.TajoDataTypes.DataType;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.exception.TajoInternalError;
 import org.apache.tajo.plan.LogicalPlan;
 import org.apache.tajo.plan.Target;
 import org.apache.tajo.plan.util.ExprFinder;

 import java.util.*;

 public class EvalTreeUtil {

   public static void changeColumnRef(EvalNode node, String oldName, String newName) {
     node.postOrder(new ChangeColumnRefVisitor(oldName, newName));
   }

   public static int replace(EvalNode expr, EvalNode targetExpr, EvalNode tobeReplaced) {
     EvalReplaceVisitor replacer = new EvalReplaceVisitor(targetExpr, tobeReplaced);
     ReplaceContext context = new ReplaceContext();
     replacer.visit(context, expr, new Stack<>());
     return context.countOfReplaces;
   }

   private static class ReplaceContext {
     int countOfReplaces = 0;
   }

   public static class EvalReplaceVisitor extends BasicEvalNodeVisitor<ReplaceContext, EvalNode> {
     private EvalNode target;
     private EvalNode tobeReplaced;

     public EvalReplaceVisitor(EvalNode target, EvalNode tobeReplaced) {
       this.target = target;
       this.tobeReplaced = tobeReplaced;
     }

     @Override
     public EvalNode visit(ReplaceContext context, EvalNode evalNode, Stack<EvalNode> stack) {
       super.visit(context, evalNode, stack);

       if (evalNode.equals(target)) {
         context.countOfReplaces++;

         EvalNode parent = stack.peek();

         if (parent instanceof BetweenPredicateEval) {
           BetweenPredicateEval between = (BetweenPredicateEval) parent;
           if (between.getPredicand().equals(evalNode)) {
             between.setPredicand(tobeReplaced);
           }
           if (between.getBegin().equals(evalNode)) {
             between.setBegin(tobeReplaced);
           }
           if (between.getEnd().equals(evalNode)) {
             between.setEnd(tobeReplaced);
           }

         } else if (parent instanceof CaseWhenEval) {
           CaseWhenEval caseWhen = (CaseWhenEval) parent;

           // Here, we need to only consider only 'Else'
           // because IfElseEval is handled in the below condition.
           if (caseWhen.hasElse() && caseWhen.getElse().equals(evalNode)) {
             caseWhen.setElseResult(tobeReplaced);
           }
         } else if (parent instanceof CaseWhenEval.IfThenEval) {
           CaseWhenEval.IfThenEval ifThen = (CaseWhenEval.IfThenEval) parent;
           if (ifThen.getCondition().equals(evalNode)) {
             ifThen.setCondition(tobeReplaced);
           }
           if (ifThen.getResult().equals(evalNode)) {
             ifThen.setResult(tobeReplaced);
           }
        } else if (parent instanceof FunctionEval) {
           FunctionEval functionEval = (FunctionEval) parent;
           EvalNode [] arguments = functionEval.getArgs();
           for (int i = 0; i < arguments.length; i++) {
             if (arguments[i].equals(evalNode)) {
               arguments[i] = tobeReplaced;
             }
           }
           functionEval.setArgs(arguments);

         } else if (parent instanceof UnaryEval) {
           if (((UnaryEval)parent).getChild().equals(evalNode)) {
             ((UnaryEval)parent).setChild(tobeReplaced);
           }
         } else if (parent instanceof BinaryEval) {
           BinaryEval binary = (BinaryEval) parent;
           if (binary.getLeftExpr() != null && binary.getLeftExpr().equals(evalNode)) {
             binary.setLeftExpr(tobeReplaced);
           }
           if (binary.getRightExpr() != null && binary.getRightExpr().equals(evalNode)) {
             binary.setRightExpr(tobeReplaced);
           }
         }
       }

       return evalNode;
     }
   }

   /**
    * It finds unique columns from a EvalNode.
    */
   public static LinkedHashSet<Column> findUniqueColumns(EvalNode node) {
     UniqueColumnFinder finder = new UniqueColumnFinder();
     node.postOrder(finder);
     return finder.getColumnRefs();
   }

   public static List<Column> findAllColumnRefs(EvalNode node) {
     AllColumnRefFinder finder = new AllColumnRefFinder();
     node.postOrder(finder);
     return finder.getColumnRefs();
   }

   public static Schema getSchemaByTargets(Schema inputSchema, List<Target> targets) {
     return SchemaBuilder.builder().addAll(targets, new Function<Target, Column>() {
       @Override
       public Column apply(@javax.annotation.Nullable Target target) {
         return new Column(target.hasAlias() ? target.getAlias() : target.getEvalTree().getName(),
             getDomainByExpr(inputSchema, target.getEvalTree()));
       }
     }).build();
   }

   public static String columnsToStr(Collection<Column> columns) {
     StringBuilder sb = new StringBuilder();
     String prefix = "";
     for (Column column: columns) {
       sb.append(prefix).append(column.getQualifiedName());
       prefix = ",";
     }

     return sb.toString();
   }

   public static DataType getDomainByExpr(Schema inputSchema, EvalNode expr) {
     switch (expr.getType()) {
     case AND:
     case OR:
     case EQUAL:
     case NOT_EQUAL:
     case LTH:
     case LEQ:
     case GTH:
     case GEQ:
     case PLUS:
     case MINUS:
     case MULTIPLY:
     case DIVIDE:
     case CONST:
     case FUNCTION:
         return TypeConverter.convert(expr.getValueType());

     case FIELD:
       FieldEval fieldEval = (FieldEval) expr;
       return inputSchema.getColumn(fieldEval.getName()).getDataType();


     default:
       throw new TajoInternalError("Unknown expr type: " + expr.getType().toString());
     }
   }

   /**
    * Return all exprs to refer columns corresponding to the target.
    *
    * @param expr
    * @param target to be found
    * @return a list of exprs
    */
   public static Collection<EvalNode> getContainExpr(EvalNode expr, Column target) {
     Set<EvalNode> exprSet = Sets.newHashSet();
     getContainExpr(expr, target, exprSet);
     return exprSet;
   }

   /**
    * Return the counter to count the number of expression types individually.
    *
    * @param expr
    * @return
    */
   public static Map<EvalType, Integer> getExprCounters(EvalNode expr) {
     VariableCounter counter = new VariableCounter();
     expr.postOrder(counter);
     return counter.getCounter();
   }

   private static void getContainExpr(EvalNode expr, Column target, Set<EvalNode> exprSet) {
     switch (expr.getType()) {
     case EQUAL:
     case LTH:
     case LEQ:
     case GTH:
     case GEQ:
     case NOT_EQUAL:
       if (containColumnRef(expr, target)) {
         exprSet.add(expr);
       }
       break;
     default:
       break;
     }
   }

   /**
    * Examine if the expr contains the column reference corresponding
    * to the target column
    */
   public static boolean containColumnRef(EvalNode expr, Column target) {
     Set<Column> exprSet = findUniqueColumns(expr);
     return exprSet.contains(target);
   }

   /**
    * It separates a singular CNF-formed join condition into a join condition, a left join filter, and
    * right join filter.
    *
    * @param joinQual the original join condition
    * @param leftSchema Left table schema
    * @param rightSchema Left table schema
    * @return Three element EvalNodes, 0 - join condition, 1 - left join filter, 2 - right join filter.
    */
   public static EvalNode[] extractJoinConditions(EvalNode joinQual, Schema leftSchema, Schema rightSchema) {
     List<EvalNode> joinQuals = Lists.newArrayList();
     List<EvalNode> leftFilters = Lists.newArrayList();
     List<EvalNode> rightFilters = Lists.newArrayList();
     for (EvalNode eachQual : AlgebraicUtil.toConjunctiveNormalFormArray(joinQual)) {
       if (!(eachQual instanceof BinaryEval)) {
         continue; // todo 'between', etc.
       }
       BinaryEval binaryEval = (BinaryEval)eachQual;
       LinkedHashSet<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr());
       LinkedHashSet<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr());
       boolean leftInLeft = leftSchema.containsAny(leftColumns);
       boolean rightInLeft = leftSchema.containsAny(rightColumns);
       boolean leftInRight = rightSchema.containsAny(leftColumns);
       boolean rightInRight = rightSchema.containsAny(rightColumns);

       boolean columnsFromLeft = leftInLeft || rightInLeft;
       boolean columnsFromRight = leftInRight || rightInRight;
       if (!columnsFromLeft && !columnsFromRight) {
         continue; // todo constant expression : this should be done in logical phase
       }
       if (columnsFromLeft ^ columnsFromRight) {
         if (columnsFromLeft) {
           leftFilters.add(eachQual);
         } else {
           rightFilters.add(eachQual);
         }
         continue;
       }
       if ((leftInLeft && rightInLeft) || (leftInRight && rightInRight)) {
         continue; // todo not allowed yet : this should be checked in logical phase
       }
       joinQuals.add(eachQual);
     }
     return new EvalNode[] {
         joinQuals.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(joinQuals),
         leftFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(leftFilters),
         rightFilters.isEmpty() ? null : AlgebraicUtil.createSingletonExprFromCNF(rightFilters)
     };
   }

   /**
    * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE.
    *
    * If three conditions are satisfied, we can recognize the expression as a equi join condition.
    * <ol>
    *   <li>An expression is an equal comparison expression.</li>
    *   <li>Both terms in an expression are column references.</li>
    *   <li>Both column references point come from different tables</li>
    * </ol>
    *
    * For theta join condition, we will use "an expression is a predicate including column references which come
    * from different two tables" instead of the first rule.
    *
    * @param expr EvalNode to be evaluated
    * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions.
    *                         Otherwise, it only returns equi-join conditions.
    * @return True if it is join condition.
    */
   public static boolean isJoinQual(EvalNode expr, boolean includeThetaJoin) {
     return isJoinQual(null, null, null, expr, includeThetaJoin);
   }

   /**
    * If a given expression is join condition, it returns TRUE. Otherwise, it returns FALSE.
    *
    * If three conditions are satisfied, we can recognize the expression as a equi join condition.
    * <ol>
    *   <li>An expression is an equal comparison expression.</li>
    *   <li>Both terms in an expression are column references.</li>
    *   <li>Both column references point come from different tables</li>
    * </ol>
    *
    * For theta join condition, we will use "an expression is a predicate including column references which come
    * from different two tables" instead of the first rule.
    *
    * @param block if block is not null, it tracks the lineage of aliased name derived from complex expressions.
    * @param leftSchema Schema to be used to check if columns belong to different relations
    * @param rightSchema Schema to be used to check if columns belong to different relations
    * @param expr EvalNode to be evaluated
    * @param includeThetaJoin If true, it will return equi as well as non-equi join conditions.
    *                         Otherwise, it only returns equi-join conditions.
    * @return True if it is join condition.
    */
   public static boolean isJoinQual(@Nullable LogicalPlan.QueryBlock block,
                                    @Nullable Schema leftSchema, @Nullable Schema rightSchema,
                                    EvalNode expr, boolean includeThetaJoin) {

     if (expr instanceof BinaryEval) {
       boolean joinComparator;
       if (includeThetaJoin) {
         joinComparator = EvalType.isComparisonOperator(expr.getType());
       } else {
         joinComparator = expr.getType() == EvalType.EQUAL;
       }

       BinaryEval binaryEval = (BinaryEval) expr;
       boolean isBothTermFields = isSingleColumn(binaryEval.getLeftExpr()) && isSingleColumn(binaryEval.getRightExpr());

       Set<Column> leftColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getLeftExpr());
       Set<Column> rightColumns = EvalTreeUtil.findUniqueColumns(binaryEval.getRightExpr());

       boolean ensureColumnsOfDifferentTables = false;

       if (leftColumns.size() == 1 && rightColumns.size() == 1) { // ensure there is only one column of each table
         Column leftColumn = leftColumns.iterator().next();
         Column rightColumn = rightColumns.iterator().next();

         // ensure if both column belong to different tables
         if (block != null) {
           ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(block, leftColumn, rightColumn);
         } else if (leftSchema != null && rightSchema != null) {
           ensureColumnsOfDifferentTables = isJoinQualwithSchemas(leftSchema, rightSchema, leftColumn, rightColumn);
         } else {
           ensureColumnsOfDifferentTables = isJoinQualWithOnlyColumns(null, leftColumn, rightColumn);
         }
       }

       return joinComparator && isBothTermFields && ensureColumnsOfDifferentTables;
     } else {
       return false;
     }
   }

   private static boolean isJoinQualwithSchemas(Schema leftSchema, Schema rightSchema, Column left, Column right) {

     boolean duplicated = leftSchema.contains(left) && rightSchema.contains(left);
     duplicated |= leftSchema.contains(right) && rightSchema.contains(right);

     if (duplicated) {
       return false;
     }

     boolean isJoinQual = leftSchema.contains(left) && rightSchema.contains(right);
     isJoinQual |= leftSchema.contains(right) && rightSchema.contains(left);

     return isJoinQual;
   }

   private static boolean isJoinQualWithOnlyColumns(@Nullable LogicalPlan.QueryBlock block,
                                                    Column left, Column right) {
     String leftQualifier = CatalogUtil.extractQualifier(left.getQualifiedName());
     String rightQualifier = CatalogUtil.extractQualifier(right.getQualifiedName());

     // if block is given, it will track an original expression of each term in order to decide whether
     // this expression is a join condition, or not.
     if (block != null) {
       boolean leftQualified = CatalogUtil.isFQColumnName(left.getQualifiedName());
       boolean rightQualified = CatalogUtil.isFQColumnName(right.getQualifiedName());

       if (!leftQualified) { // if left one is aliased name

         // getting original expression of left term
         NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(left.getQualifiedName());
         Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column);

         // ensure there is only one column of an original expression
         if (foundColumns.size() == 1) {
           leftQualifier = CatalogUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName());
         }
       }
       if (!rightQualified) { // if right one is aliased name

         // getting original expression of right term
         NamedExpr rawExpr = block.getNamedExprsManager().getNamedExpr(right.getQualifiedName());
         Set<ColumnReferenceExpr> foundColumns = ExprFinder.finds(rawExpr.getExpr(), OpType.Column);

         // ensure there is only one column of an original expression
         if (foundColumns.size() == 1) {
           rightQualifier = CatalogUtil.extractQualifier(foundColumns.iterator().next().getCanonicalName());
         }
       }
     }

     // if columns of both term is different to each other, it will be true.
     return !leftQualifier.equals(rightQualifier);
   }

   public static boolean isSingleColumn(EvalNode evalNode) {
     return EvalTreeUtil.findUniqueColumns(evalNode).size() == 1;
   }

   public static class ChangeColumnRefVisitor implements EvalNodeVisitor {
     private final String findColumn;
     private final String toBeChanged;

     public ChangeColumnRefVisitor(String oldName, String newName) {
       this.findColumn = oldName;
       this.toBeChanged = newName;
     }

     @Override
     public void visit(EvalNode node) {
       if (node.type == EvalType.FIELD) {
         FieldEval field = (FieldEval) node;
         if (field.getColumnName().equals(findColumn)
             || field.getName().equals(findColumn)) {
           field.replaceColumnRef(toBeChanged);
         }
       }
     }
   }

   public static class AllColumnRefFinder implements EvalNodeVisitor {
     private List<Column> colList = new ArrayList<>();
     private FieldEval field = null;

     @Override
     public void visit(EvalNode node) {
       if (node.getType() == EvalType.FIELD) {
         field = (FieldEval) node;
         colList.add(field.getColumnRef());
       }
     }

     public List<Column> getColumnRefs() {
       return this.colList;
     }
   }

   public static class UniqueColumnFinder implements EvalNodeVisitor {
     private LinkedHashSet<Column> columnSet = Sets.newLinkedHashSet();
     private FieldEval field = null;

     @Override
     public void visit(EvalNode node) {
       if (node.getType() == EvalType.FIELD) {
         field = (FieldEval) node;
         columnSet.add(field.getColumnRef());
       }
     }

     public LinkedHashSet<Column> getColumnRefs() {
       return this.columnSet;
     }
   }

   public static class VariableCounter implements EvalNodeVisitor {
     private final Map<EvalType, Integer> counter;

     public VariableCounter() {
       counter = Maps.newHashMap();
       counter.put(EvalType.FUNCTION, 0);
       counter.put(EvalType.FIELD, 0);
     }

     @Override
     public void visit(EvalNode node) {
       if (counter.containsKey(node.getType())) {
         int val = counter.get(node.getType());
         val++;
         counter.put(node.getType(), val);
       }
     }

     public Map<EvalType, Integer> getCounter() {
       return counter;
     }
   }

   public static Set<AggregationFunctionCallEval> findDistinctAggFunction(EvalNode expr) {
     AllAggFunctionFinder finder = new AllAggFunctionFinder();
     expr.postOrder(finder);
     return finder.getAggregationFunction();
   }

   public static class AllAggFunctionFinder implements EvalNodeVisitor {
     private Set<AggregationFunctionCallEval> aggFucntions = Sets.newHashSet();
     private AggregationFunctionCallEval field = null;

     @Override
     public void visit(EvalNode node) {
       if (node.getType() == EvalType.AGG_FUNCTION) {
         field = (AggregationFunctionCallEval) node;
         aggFucntions.add(field);
       }
     }

     public Set<AggregationFunctionCallEval> getAggregationFunction() {
       return this.aggFucntions;
     }
   }

   public static Set<WindowFunctionEval> findWindowFunction(EvalNode expr) {
     AllWindowFunctionFinder finder = new AllWindowFunctionFinder();
     expr.postOrder(finder);
     return finder.getWindowFunctionSet();
   }

   public static class AllWindowFunctionFinder implements EvalNodeVisitor {
     private Set<WindowFunctionEval> windowFunctions = Sets.newHashSet();

     @Override
     public void visit(EvalNode node) {
       if (node.getType() == EvalType.WINDOW_FUNCTION) {
         WindowFunctionEval field = (WindowFunctionEval) node;
         windowFunctions.add(field);
       }
     }

     public Set<WindowFunctionEval> getWindowFunctionSet() {
       return windowFunctions;
     }
   }

   public static <T extends EvalNode> Collection<T> findEvalsByType(EvalNode evalNode, EvalType type) {
     EvalFinder finder = new EvalFinder(type);
     finder.visit(null, evalNode, new Stack<>());
     return (Collection<T>) finder.evalNodes;
   }

   public static <T extends EvalNode> Collection<T> findOuterJoinSensitiveEvals(EvalNode evalNode) {
     OuterJoinSensitiveEvalFinder finder = new OuterJoinSensitiveEvalFinder();
     finder.visit(null, evalNode, new Stack<>());
     return (Collection<T>) finder.evalNodes;
   }

   public static class EvalFinder extends BasicEvalNodeVisitor<Object, Object> {
     private EvalType targetType;
     List<EvalNode> evalNodes = new ArrayList<>();

     public EvalFinder(EvalType targetType) {
       this.targetType = targetType;
     }

     @Override
     public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) {
       super.visit(context, evalNode, stack);

       if (evalNode.type == targetType) {
         evalNodes.add(evalNode);
       }

       return evalNode;
     }

     public List<EvalNode> getEvalNodes() {
       return evalNodes;
     }
   }

   public static class OuterJoinSensitiveEvalFinder extends BasicEvalNodeVisitor<Object, Object> {
     private List<EvalNode> evalNodes = new ArrayList<>();

     @Override
     public Object visit(Object context, EvalNode evalNode, Stack<EvalNode> stack) {
       super.visit(context, evalNode, stack);

       if (evalNode.type == EvalType.CASE) {
         evalNodes.add(evalNode);
       } else if (evalNode.type == EvalType.FUNCTION) {
         FunctionEval functionEval = (FunctionEval)evalNode;
         if ("coalesce".equals(functionEval.getName())) {
           evalNodes.add(evalNode);
         }
       } else if (evalNode.type == EvalType.IS_NULL) {
         evalNodes.add(evalNode);
       }

       return evalNode;
     }
   }

   public static boolean checkIfCanBeConstant(EvalNode evalNode) {
     return findUniqueColumns(evalNode).size() == 0 && findDistinctAggFunction(evalNode).size() == 0;
   }

   public static Datum evaluateImmediately(EvalContext evalContext, EvalNode evalNode) {
     evalNode.bind(evalContext, null);
     return evalNode.eval(null);
   }

   /**
    * Checks whether EvalNode consists of only partition columns and const values.
    * The partition based simple query can be defined as 'select * from tb_name where col_name1="X" and col_name2="Y" [LIMIT Z]',
    * whose WHERE clause consists of only partition-columns with constant values.
    * Partition columns must be able to form a prefix of HDFS path like '/tb_name1/col_name1=X/col_name2=Y'.
    *
    * @param node The qualification node of a SELECTION node
    * @param partSchema Partition expression schema
    * @return True if the query is partition-column based simple query.
    */
   public static boolean checkIfPartitionSelection(EvalNode node, Schema partSchema) {
     if (node != null && node instanceof BinaryEval) {
       BinaryEval eval = (BinaryEval)node;
       EvalNode left = eval.getLeftExpr();
       EvalNode right = eval.getRightExpr();
       EvalType type = eval.getType();

       if (type == EvalType.EQUAL) {
         if (left instanceof FieldEval && right instanceof ConstEval && partSchema.contains(((FieldEval) left).getColumnName())) {
           return true;
         } else if (left instanceof ConstEval && right instanceof FieldEval && partSchema.contains(((FieldEval) right).getColumnName())) {
           return true;
         }
       } else if ((type == EvalType.AND || type == EvalType.OR)
           && left instanceof BinaryEval && right instanceof BinaryEval) {
         return checkIfPartitionSelection(left, partSchema) && checkIfPartitionSelection(right, partSchema);
       }
     }
     return false;
   }
 }