ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java - hive - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hive.ql.parse;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.antlr.runtime.TokenRewriteStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.hooks.Entity;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.session.SessionState;


 /**
  * A subclass of the {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer} that just handles
  * update, delete and merge statements.  It works by rewriting the updates and deletes into insert
  * statements (since they are actually inserts) and then doing some patch up to make them work as
  * updates and deletes instead.
  */
 public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {

   private boolean useSuper = false;

   UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
     super(queryState);
   }

   @Override
   public void analyzeInternal(ASTNode tree) throws SemanticException {
     if (useSuper) {
       super.analyzeInternal(tree);
     } else {

       if (!SessionState.get().getTxnMgr().supportsAcid()) {
         throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TXNMGR.getMsg());
       }
       switch (tree.getToken().getType()) {
         case HiveParser.TOK_DELETE_FROM:
           analyzeDelete(tree);
           break;
         case HiveParser.TOK_UPDATE_TABLE:
           analyzeUpdate(tree);
           break;
         case HiveParser.TOK_MERGE:
           analyzeMerge(tree);
           break;
         default:
           throw new RuntimeException("Asked to parse token " + tree.getName() + " in " +
               "UpdateDeleteSemanticAnalyzer");
       }
       cleanUpMetaColumnAccessControl();

     }
   }
   private boolean updating() {
     return currentOperation == Context.Operation.UPDATE;
   }
   private boolean deleting() {
     return currentOperation == Context.Operation.DELETE;
   }

   private void analyzeUpdate(ASTNode tree) throws SemanticException {
     currentOperation = Context.Operation.UPDATE;
     reparseAndSuperAnalyze(tree);
   }

   private void analyzeDelete(ASTNode tree) throws SemanticException {
     currentOperation = Context.Operation.DELETE;
     reparseAndSuperAnalyze(tree);
   }
   /**
    * Append list of partition columns to Insert statement, i.e. the 1st set of partCol1,partCol2
    * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
    */
   private void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) {
     // If the table is partitioned we have to put the partition() clause in
     if (partCols != null && partCols.size() > 0) {
       rewrittenQueryStr.append(" partition (");
       boolean first = true;
       for (FieldSchema fschema : partCols) {
         if (first)
           first = false;
         else
           rewrittenQueryStr.append(", ");
         //would be nice if there was a way to determine if quotes are needed
         rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf));
       }
       rewrittenQueryStr.append(")");
     }
   }
   /**
    * Append list of partition columns to Insert statement, i.e. the 2nd set of partCol1,partCol2
    * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
    * @param target target table
    */
   private void addPartitionColsToSelect(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr,
                                         ASTNode target) throws SemanticException {
     String targetName = target != null ? getSimpleTableName(target) : null;

     // If the table is partitioned, we need to select the partition columns as well.
     if (partCols != null) {
       for (FieldSchema fschema : partCols) {
         rewrittenQueryStr.append(", ");
         //would be nice if there was a way to determine if quotes are needed
         if(targetName != null) {
           rewrittenQueryStr.append(targetName).append('.');
         }
         rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf));
       }
     }
   }
   /**
    * Assert that we are not asked to update a bucketing column or partition column
    * @param colName it's the A in "SET A = B"
    */
   private void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException {
     String columnName = normalizeColName(colName.getText());

     // Make sure this isn't one of the partitioning columns, that's not supported.
     for (FieldSchema fschema : targetTable.getPartCols()) {
       if (fschema.getName().equalsIgnoreCase(columnName)) {
         throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg());
       }
     }
     //updating bucket column should move row from one file to another - not supported
     if(targetTable.getBucketCols() != null && targetTable.getBucketCols().contains(columnName)) {
       throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE,columnName);
     }
     boolean foundColumnInTargetTable = false;
     for(FieldSchema col : targetTable.getCols()) {
       if(columnName.equalsIgnoreCase(col.getName())) {
         foundColumnInTargetTable = true;
         break;
       }
     }
     if(!foundColumnInTargetTable) {
       throw new SemanticException(ErrorMsg.INVALID_TARGET_COLUMN_IN_SET_CLAUSE, colName.getText(),
         getDotName(new String[] {targetTable.getDbName(), targetTable.getTableName()}));
     }
   }
   private ASTNode findLHSofAssignment(ASTNode assignment) {
     assert assignment.getToken().getType() == HiveParser.EQUAL :
       "Expected set assignments to use equals operator but found " + assignment.getName();
     ASTNode tableOrColTok = (ASTNode)assignment.getChildren().get(0);
     assert tableOrColTok.getToken().getType() == HiveParser.TOK_TABLE_OR_COL :
       "Expected left side of assignment to be table or column";
     ASTNode colName = (ASTNode)tableOrColTok.getChildren().get(0);
     assert colName.getToken().getType() == HiveParser.Identifier :
       "Expected column name";
     return colName;
   }
   private Map<String, ASTNode> collectSetColumnsAndExpressions(ASTNode setClause,
                          Set<String> setRCols, Table targetTable) throws SemanticException {
     // An update needs to select all of the columns, as we rewrite the entire row.  Also,
     // we need to figure out which columns we are going to replace.
     assert setClause.getToken().getType() == HiveParser.TOK_SET_COLUMNS_CLAUSE :
       "Expected second child of update token to be set token";

     // Get the children of the set clause, each of which should be a column assignment
     List<? extends Node> assignments = setClause.getChildren();
     // Must be deterministic order map for consistent q-test output across Java versions
     Map<String, ASTNode> setCols = new LinkedHashMap<String, ASTNode>(assignments.size());
     for (Node a : assignments) {
       ASTNode assignment = (ASTNode)a;
       ASTNode colName = findLHSofAssignment(assignment);
       if(setRCols != null) {
         addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols);
       }
       checkValidSetClauseTarget(colName, targetTable);

       String columnName = normalizeColName(colName.getText());
       // This means that in UPDATE T SET x = _something_
       // _something_ can be whatever is supported in SELECT _something_
       setCols.put(columnName, (ASTNode)assignment.getChildren().get(1));
     }
     return setCols;
   }
   /**
    * @return the Metastore representation of the target table
    */
   private Table getTargetTable(ASTNode tabRef) throws SemanticException {
     String[] tableName;
     Table mTable;
     switch (tabRef.getType()) {
       case HiveParser.TOK_TABREF:
         tableName = getQualifiedTableName((ASTNode) tabRef.getChild(0));
         break;
       case HiveParser.TOK_TABNAME:
         tableName = getQualifiedTableName(tabRef);
         break;
       default:
           throw raiseWrongType("TOK_TABREF|TOK_TABNAME", tabRef);
     }
     try {
       mTable = db.getTable(tableName[0], tableName[1]);
     } catch (InvalidTableException e) {
       LOG.error("Failed to find table " + getDotName(tableName) + " got exception "
         + e.getMessage());
       throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(getDotName(tableName)), e);
     } catch (HiveException e) {
       LOG.error("Failed to find table " + getDotName(tableName) + " got exception "
         + e.getMessage());
       throw new SemanticException(e.getMessage(), e);
     }
     return mTable;
   }
   // Walk through all our inputs and set them to note that this read is part of an update or a
   // delete.
   private void markReadEntityForUpdate() {
     for (ReadEntity input : inputs) {
       if(isWritten(input)) {
         //todo: this is actually not adding anything since LockComponent uses a Trie to "promote" a lock
         //except by accident - when we have a partitioned target table we have a ReadEntity and WriteEntity
         //for the table, so we mark ReadEntity and then delete WriteEntity (replace with Partition entries)
         //so DbTxnManager skips Read lock on the ReadEntity....
         input.setUpdateOrDelete(true);//input.noLockNeeded()?
       }
     }
   }
   /**
    *  For updates, we need to set the column access info so that it contains information on
    *  the columns we are updating.
    *  (But not all the columns of the target table even though the rewritten query writes
    *  all columns of target table since that is an implmentation detail)
    */
   private void setUpAccessControlInfoForUpdate(Table mTable, Map<String, ASTNode> setCols) {
     ColumnAccessInfo cai = new ColumnAccessInfo();
     for (String colName : setCols.keySet()) {
       cai.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName);
     }
     setUpdateColumnAccessInfo(cai);
   }
   /**
    * We need to weed ROW__ID out of the input column info, as it doesn't make any sense to
    * require the user to have authorization on that column.
    */
   private void cleanUpMetaColumnAccessControl() {
     //we do this for Update/Delete (incl Merge) because we introduce this column into the query
     //as part of rewrite
     if (columnAccessInfo != null) {
       columnAccessInfo.stripVirtualColumn(VirtualColumn.ROWID);
     }
   }
   /**
    * Parse the newly generated SQL statement to get a new AST
    */
   private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException {
     // Parse the rewritten query string
     Context rewrittenCtx;
     try {
       // Set dynamic partitioning to nonstrict so that queries do not need any partition
       // references.
       // todo: this may be a perf issue as it prevents the optimizer.. or not
       HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
       rewrittenCtx = new Context(conf);
       rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
       rewrittenCtx.setIsUpdateDeleteMerge(true);
     } catch (IOException e) {
       throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
     }
     rewrittenCtx.setCmd(rewrittenQueryStr.toString());

     ASTNode rewrittenTree;
     try {
       LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
       rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
     } catch (ParseException e) {
       throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
     }
     return new ReparseResult(rewrittenTree, rewrittenCtx);
   }
   /**
    * Assert it supports Acid write
    */
   private void validateTargetTable(Table mTable) throws SemanticException {
     if (mTable.getTableType() == TableType.VIRTUAL_VIEW ||
       mTable.getTableType() == TableType.MATERIALIZED_VIEW) {
         LOG.error("Table " + getDotName(new String[] {mTable.getDbName(), mTable.getTableName()}) + " is a view or materialized view");
         throw new SemanticException(ErrorMsg.UPDATE_DELETE_VIEW.getMsg());
     }
   }
   /**
    * This supports update and delete statements
    */
   private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
     List<? extends Node> children = tree.getChildren();
     // The first child should be the table we are deleting from
     ASTNode tabName = (ASTNode)children.get(0);
     assert tabName.getToken().getType() == HiveParser.TOK_TABNAME :
         "Expected tablename as first child of " + operation() + " but found " + tabName.getName();

     // Rewrite the delete or update into an insert.  Crazy, but it works as deletes and update
     // actually are inserts into the delta file in Hive.  A delete
     // DELETE FROM _tablename_ [WHERE ...]
     // will be rewritten as
     // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[,
     // _partcols_] from _tablename_ SORT BY ROW__ID
     // An update
     // UPDATE _tablename_ SET x = _expr_ [WHERE...]
     // will be rewritten as
     // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_,
     // _partcols_from _tablename_ SORT BY ROW__ID
     // where _all_ is all the non-partition columns.  The expressions from the set clause will be
     // re-attached later.
     // The where clause will also be re-attached later.
     // The sort by clause is put in there so that records come out in the right order to enable
     // merge on read.

     StringBuilder rewrittenQueryStr = new StringBuilder();
     Table mTable = getTargetTable(tabName);
     validateTargetTable(mTable);

     rewrittenQueryStr.append("insert into table ");
     rewrittenQueryStr.append(getFullTableNameForSQL(tabName));

     addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);

     rewrittenQueryStr.append(" select ROW__ID");

     Map<Integer, ASTNode> setColExprs = null;
     Map<String, ASTNode> setCols = null;
     // Must be deterministic order set for consistent q-test output across Java versions
     Set<String> setRCols = new LinkedHashSet<String>();
     if (updating()) {
       // We won't write the set
       // expressions in the rewritten query.  We'll patch that up later.
       // The set list from update should be the second child (index 1)
       assert children.size() >= 2 : "Expected update token to have at least two children";
       ASTNode setClause = (ASTNode)children.get(1);
       setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
       setColExprs = new HashMap<>(setClause.getChildCount());

       List<FieldSchema> nonPartCols = mTable.getCols();
       for (int i = 0; i < nonPartCols.size(); i++) {
         rewrittenQueryStr.append(',');
         String name = nonPartCols.get(i).getName();
         ASTNode setCol = setCols.get(name);
         rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
         if (setCol != null) {
           // This is one of the columns we're setting, record it's position so we can come back
           // later and patch it up.
           // Add one to the index because the select has the ROW__ID as the first column.
           setColExprs.put(i + 1, setCol);
         }
       }
     }

     addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
     rewrittenQueryStr.append(" from ");
     rewrittenQueryStr.append(getFullTableNameForSQL(tabName));

     ASTNode where = null;
     int whereIndex = deleting() ? 1 : 2;
     if (children.size() > whereIndex) {
       where = (ASTNode)children.get(whereIndex);
       assert where.getToken().getType() == HiveParser.TOK_WHERE :
           "Expected where clause, but found " + where.getName();
     }

     // Add a sort by clause so that the row ids come out in the correct order
     rewrittenQueryStr.append(" sort by ROW__ID ");

     ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
     Context rewrittenCtx = rr.rewrittenCtx;
     ASTNode rewrittenTree = rr.rewrittenTree;

     ASTNode rewrittenInsert = (ASTNode)rewrittenTree.getChildren().get(1);
     assert rewrittenInsert.getToken().getType() == HiveParser.TOK_INSERT :
         "Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();

     if(updating()) {
       rewrittenCtx.setOperation(Context.Operation.UPDATE);
       rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
     }
     else if(deleting()) {
       rewrittenCtx.setOperation(Context.Operation.DELETE);
       rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
     }

     if (where != null) {
       // The structure of the AST for the rewritten insert statement is:
       // TOK_QUERY -> TOK_FROM
       //          \-> TOK_INSERT -> TOK_INSERT_INTO
       //                        \-> TOK_SELECT
       //                        \-> TOK_SORTBY
       // The following adds the TOK_WHERE and its subtree from the original query as a child of
       // TOK_INSERT, which is where it would have landed if it had been there originally in the
       // string.  We do it this way because it's easy then turning the original AST back into a
       // string and reparsing it.  We have to move the SORT_BY over one,
       // so grab it and then push it to the second slot, and put the where in the first slot
       ASTNode sortBy = (ASTNode)rewrittenInsert.getChildren().get(2);
       assert sortBy.getToken().getType() == HiveParser.TOK_SORTBY :
           "Expected TOK_SORTBY to be first child of TOK_SELECT, but found " + sortBy.getName();
       rewrittenInsert.addChild(sortBy);
       rewrittenInsert.setChild(2, where);
     }

     // Patch up the projection list for updates, putting back the original set expressions.
     if (updating() && setColExprs != null) {
       // Walk through the projection list and replace the column names with the
       // expressions from the original update.  Under the TOK_SELECT (see above) the structure
       // looks like:
       // TOK_SELECT -> TOK_SELEXPR -> expr
       //           \-> TOK_SELEXPR -> expr ...
       ASTNode rewrittenSelect = (ASTNode)rewrittenInsert.getChildren().get(1);
       assert rewrittenSelect.getToken().getType() == HiveParser.TOK_SELECT :
           "Expected TOK_SELECT as second child of TOK_INSERT but found " +
               rewrittenSelect.getName();
       for (Map.Entry<Integer, ASTNode> entry : setColExprs.entrySet()) {
         ASTNode selExpr = (ASTNode)rewrittenSelect.getChildren().get(entry.getKey());
         assert selExpr.getToken().getType() == HiveParser.TOK_SELEXPR :
             "Expected child of TOK_SELECT to be TOK_SELEXPR but was " + selExpr.getName();
         // Now, change it's child
         selExpr.setChild(0, entry.getValue());
       }
     }

     try {
       useSuper = true;
       super.analyze(rewrittenTree, rewrittenCtx);
     } finally {
       useSuper = false;
     }

     updateOutputs(mTable);


     if (updating()) {
       setUpAccessControlInfoForUpdate(mTable, setCols);

       // Add the setRCols to the input list
       for (String colName : setRCols) {
         if(columnAccessInfo != null) {//assuming this means we are not doing Auth
           columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()),
             colName);
         }
       }
     }
   }
   /**
    * Check that {@code readEntity} is also being written
    */
   private boolean isWritten(Entity readEntity) {
     for(Entity writeEntity : outputs) {
       //make sure to compare them as Entity, i.e. that it's the same table or partition, etc
       if(writeEntity.toString().equalsIgnoreCase(readEntity.toString())) {
         return true;
       }
     }
     return false;
   }
   private String operation() {
     if (currentOperation == Context.Operation.OTHER) {
       throw new IllegalStateException("UpdateDeleteSemanticAnalyzer neither updating nor " +
         "deleting, operation not known.");
     }
     return currentOperation.toString();
   }

   // This method finds any columns on the right side of a set statement (thus rcols) and puts them
   // in a set so we can add them to the list of input cols to check.
   private void addSetRCols(ASTNode node, Set<String> setRCols) {

     // See if this node is a TOK_TABLE_OR_COL.  If so, find the value and put it in the list.  If
     // not, recurse on any children
     if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
       ASTNode colName = (ASTNode)node.getChildren().get(0);
       assert colName.getToken().getType() == HiveParser.Identifier :
           "Expected column name";
       setRCols.add(normalizeColName(colName.getText()));
     } else if (node.getChildren() != null) {
       for (Node n : node.getChildren()) {
         addSetRCols((ASTNode)n, setRCols);
       }
     }
   }

   /**
    * Column names are stored in metastore in lower case, regardless of the CREATE TABLE statement.
    * Unfortunately there is no single place that normalizes the input query.
    * @param colName not null
    */
   private static String normalizeColName(String colName) {
     return colName.toLowerCase();
   }

   private Context.Operation currentOperation = Context.Operation.OTHER;
   private static final String Indent = "  ";

   private IdentifierQuoter quotedIdenfierHelper;

   /**
    * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it.
    * Since HiveLexer.g is written such that it strips away any ` (back ticks) around
    * quoted identifiers we need to add those back to generated SQL.
    * Additionally, the parser only produces tokens of type Identifier and never
    * QuotedIdentifier (HIVE-6013).  So here we just quote all identifiers.
    * (') around String literals are retained w/o issues
    */
   private static class IdentifierQuoter {
     private final TokenRewriteStream trs;
     private final IdentityHashMap<ASTNode, ASTNode> visitedNodes = new IdentityHashMap<>();
     IdentifierQuoter(TokenRewriteStream trs) {
       this.trs = trs;
       if(trs == null) {
         throw new IllegalArgumentException("Must have a TokenRewriteStream");
       }
     }
     private void visit(ASTNode n) {
       if(n.getType() == HiveParser.Identifier) {
         if(visitedNodes.containsKey(n)) {
           /**
            * Since we are modifying the stream, it's not idempotent.  Ideally, the caller would take
            * care to only quote Identifiers in each subtree once, but this makes it safe
            */
           return;
         }
         visitedNodes.put(n, n);
         trs.insertBefore(n.getToken(), "`");
         trs.insertAfter(n.getToken(), "`");
       }
       if(n.getChildCount() <= 0) {return;}
       for(Node c : n.getChildren()) {
         visit((ASTNode)c);
       }
     }
   }

   /**
    * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it without
    * needing to understand what it is (except for QuotedIdentifiers)
    *
    */
   private String getMatchedText(ASTNode n) {
     quotedIdenfierHelper.visit(n);
     return ctx.getTokenRewriteStream().toString(n.getTokenStartIndex(),
       n.getTokenStopIndex() + 1).trim();
   }
   /**
    * Here we take a Merge statement AST and generate a semantically equivalent multi-insert
    * statement to exectue.  Each Insert leg represents a single WHEN clause.  As much as possible,
    * the new SQL statement is made to look like the input SQL statement so that it's easier to map
    * Query Compiler errors from generated SQL to original one this way.
    * The generated SQL is a complete representation of the original input for the same reason.
    * In many places SemanticAnalyzer throws exceptions that contain (line, position) coordinates.
    * If generated SQL doesn't have everything and is patched up later, these coordinates point to
    * the wrong place.
    *
    * @throws SemanticException
    */
   private void analyzeMerge(ASTNode tree) throws SemanticException {
     currentOperation = Context.Operation.MERGE;
     quotedIdenfierHelper = new IdentifierQuoter(ctx.getTokenRewriteStream());
     /*
      * See org.apache.hadoop.hive.ql.parse.TestMergeStatement for some examples of the merge AST
       For example, given:
       merge into acidTbl using nonAcidPart2 source ON acidTbl.a = source.a2
       WHEN MATCHED THEN UPDATE set b = source.b2
       WHEN NOT MATCHED THEN INSERT VALUES(source.a2, source.b2)

       We get AST like this:
       "(tok_merge " +
         "(tok_tabname acidtbl) (tok_tabref (tok_tabname nonacidpart2) source) " +
         "(= (. (tok_table_or_col acidtbl) a) (. (tok_table_or_col source) a2)) " +
         "(tok_matched " +
         "(tok_update " +
         "(tok_set_columns_clause (= (tok_table_or_col b) (. (tok_table_or_col source) b2))))) " +
         "(tok_not_matched " +
         "tok_insert " +
         "(tok_value_row (. (tok_table_or_col source) a2) (. (tok_table_or_col source) b2))))");

         And need to produce a multi-insert like this to execute:
         FROM acidTbl right outer join nonAcidPart2 ON acidTbl.a = source.a2
         Insert into table acidTbl select nonAcidPart2.a2, nonAcidPart2.b2 where acidTbl.a is null
         INSERT INTO TABLE acidTbl select target.ROW__ID, nonAcidPart2.a2, nonAcidPart2.b2 where nonAcidPart2.a2=acidTbl.a sort by acidTbl.ROW__ID
     */
     /*todo: we need some sort of validation phase over original AST to make things user friendly; for example, if
      original command refers to a column that doesn't exist, this will be caught when processing the rewritten query but
      the errors will point at locations that the user can't map to anything
      - VALUES clause must have the same number of values as target table (including partition cols).  Part cols go last in Select clause of Insert as Select
      todo: do we care to preserve comments in original SQL?
      todo: check if identifiers are propertly escaped/quoted in the generated SQL - it's currently inconsistent
       Look at UnparseTranslator.addIdentifierTranslation() - it does unescape + unparse...
      todo: consider "WHEN NOT MATCHED BY SOURCE THEN UPDATE SET TargetTable.Col1 = SourceTable.Col1 "; what happens when source is empty?  This should be a runtime error - maybe not
       the outer side of ROJ is empty => the join produces 0 rows.  If supporting WHEN NOT MATCHED BY SOURCE, then this should be a runtime error
     */
     ASTNode target = (ASTNode)tree.getChild(0);
     ASTNode source = (ASTNode)tree.getChild(1);
     String targetName = getSimpleTableName(target);
     String sourceName = getSimpleTableName(source);
     ASTNode onClause = (ASTNode) tree.getChild(2);
     String onClauseAsText = getMatchedText(onClause);

     Table targetTable = getTargetTable(target);
     validateTargetTable(targetTable);
     List<ASTNode> whenClauses = findWhenClauses(tree);

     StringBuilder rewrittenQueryStr = new StringBuilder("FROM\n");
     rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(target));
     if(isAliased(target)) {
       rewrittenQueryStr.append(" ").append(targetName);
     }
     rewrittenQueryStr.append('\n');
     rewrittenQueryStr.append(Indent).append(chooseJoinType(whenClauses)).append("\n");
     if(source.getType() == HiveParser.TOK_SUBQUERY) {
       //this includes the mandatory alias
       rewrittenQueryStr.append(Indent).append(getMatchedText(source));
     }
     else {
       rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(source));
       if(isAliased(source)) {
         rewrittenQueryStr.append(" ").append(sourceName);
       }
     }
     rewrittenQueryStr.append('\n');
     rewrittenQueryStr.append(Indent).append("ON ").append(onClauseAsText).append('\n');

     /**
      * We allow at most 2 WHEN MATCHED clause, in which case 1 must be Update the other Delete
      * If we have both update and delete, the 1st one (in SQL code) must have "AND <extra predicate>"
      * so that the 2nd can ensure not to process the same rows.
      * Update and Delete may be in any order.  (Insert is always last)
      */
     String extraPredicate = null;
     int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
     int numInsertClauses = 0;
     for(ASTNode whenClause : whenClauses) {
       switch (getWhenClauseOperation(whenClause).getType()) {
         case HiveParser.TOK_INSERT:
           numInsertClauses++;
           handleInsert(whenClause, rewrittenQueryStr, target, onClause, targetTable, targetName, onClauseAsText);
           break;
         case HiveParser.TOK_UPDATE:
           numWhenMatchedUpdateClauses++;
           String s = handleUpdate(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
           if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
             extraPredicate = s;//i.e. it's the 1st WHEN MATCHED
           }
           break;
         case HiveParser.TOK_DELETE:
           numWhenMatchedDeleteClauses++;
           String s1 = handleDelete(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
           if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
             extraPredicate = s1;//i.e. it's the 1st WHEN MATCHED
           }
           break;
         default:
           throw new IllegalStateException("Unexpected WHEN clause type: " + whenClause.getType() +
             addParseInfo(whenClause));
       }
       if(numWhenMatchedDeleteClauses > 1) {
         throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_DELETE, ctx.getCmd());
       }
       if(numWhenMatchedUpdateClauses > 1) {
         throw new SemanticException(ErrorMsg.MERGE_TOO_MANY_UPDATE, ctx.getCmd());
       }
       assert numInsertClauses < 2;
       if(numInsertClauses == 1 && numWhenMatchedUpdateClauses == 1) {
         if(AcidUtils.getAcidOperationalProperties(targetTable).isSplitUpdate()) {
           throw new IllegalStateException("Tables with " +
             hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + "=" +
             TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY + " currently do not " +
             "support MERGE with both Insert and Update clauses.");
         }
       }
     }
     if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) {
       throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd());
     }
     boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText,
       targetTable, numWhenMatchedDeleteClauses == 0 && numWhenMatchedUpdateClauses == 0);
     ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
     Context rewrittenCtx = rr.rewrittenCtx;
     ASTNode rewrittenTree = rr.rewrittenTree;
     rewrittenCtx.setOperation(Context.Operation.MERGE);

     //set dest name mapping on new context; 1st chid is TOK_FROM
     for(int insClauseIdx = 1, whenClauseIdx = 0;
         insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0/*skip cardinality violation clause*/);
         insClauseIdx++, whenClauseIdx++) {
       //we've added Insert clauses in order or WHEN items in whenClauses
       ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
       switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
         case HiveParser.TOK_INSERT:
           rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
           break;
         case HiveParser.TOK_UPDATE:
           rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.UPDATE);
           break;
         case HiveParser.TOK_DELETE:
           rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.DELETE);
           break;
         default:
           assert false;
       }
     }
     if(validating) {
       //here means the last branch of the multi-insert is Cardinality Validation
       rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
     }
     try {
       useSuper = true;
       super.analyze(rewrittenTree, rewrittenCtx);
     } finally {
       useSuper = false;
     }
     updateOutputs(targetTable);
   }

   /**
    * SemanticAnalyzer will generate a WriteEntity for the target table since it doesn't know/check
    * if the read and write are of the same table in "insert ... select ....".  Since DbTxnManager
    * uses Read/WriteEntity objects to decide which locks to acquire, we get more concurrency if we
    * have change the table WriteEntity to a set of partition WriteEntity objects based on
    * ReadEntity objects computed for this table.
    */
   private void updateOutputs(Table targetTable) {
     markReadEntityForUpdate();

     if(targetTable.isPartitioned()) {
       List<ReadEntity> partitionsRead = getRestrictedPartitionSet(targetTable);
       if(!partitionsRead.isEmpty()) {
         //if there is WriteEntity with WriteType=UPDATE/DELETE for target table, replace it with
         //WriteEntity for each partition
         List<WriteEntity> toRemove = new ArrayList<>();
         for(WriteEntity we : outputs) {
           WriteEntity.WriteType wt = we.getWriteType();
           if(isTargetTable(we, targetTable) &&
             (wt == WriteEntity.WriteType.UPDATE || wt == WriteEntity.WriteType.DELETE)) {
             /**
              * The assumption here is that SemanticAnalyzer will will generate ReadEntity for each
              * partition that exists and is matched by the WHERE clause (which may be all of them).
              * Since we don't allow updating the value of a partition column, we know that we always
              * write the same (or fewer) partitions than we read.  Still, the write is a Dynamic
              * Partition write - see HIVE-15032.
              */
             toRemove.add(we);
           }
         }
         outputs.removeAll(toRemove);
         for(ReadEntity re : partitionsRead) {
           for(WriteEntity original : toRemove) {
             //since we may have both Update and Delete branches, Auth needs to know
             WriteEntity we = new WriteEntity(re.getPartition(), original.getWriteType());
             we.setDynamicPartitionWrite(original.isDynamicPartitionWrite());
             outputs.add(we);
           }
         }
       }
     }
   }
   /**
    * If the optimizer has determined that it only has to read some of the partitions of the
    * target table to satisfy the query, then we know that the write side of update/delete
    * (and update/delete parts of merge)
    * can only write (at most) that set of partitions (since we currently don't allow updating
    * partition (or bucket) columns).  So we want to replace the table level
    * WriteEntity in the outputs with WriteEntity for each of these partitions
    * ToDo: see if this should be moved to SemanticAnalyzer itself since it applies to any
    * insert which does a select against the same table.  Then SemanticAnalyzer would also
    * be able to not use DP for the Insert...
    *
    * Note that the Insert of Merge may be creating new partitions and writing to partitions
    * which were not read  (WHEN NOT MATCHED...).  WriteEntity for that should be created
    * in MoveTask (or some other task after the query is complete)
    */
   private List<ReadEntity> getRestrictedPartitionSet(Table targetTable) {
     List<ReadEntity> partitionsRead = new ArrayList<>();
     for(ReadEntity re : inputs) {
       if(re.isFromTopLevelQuery && re.getType() == Entity.Type.PARTITION && isTargetTable(re, targetTable)) {
         partitionsRead.add(re);
       }
     }
     return partitionsRead;
   }
   /**
    * if there is no WHEN NOT MATCHED THEN INSERT, we don't outer join
    */
   private String chooseJoinType(List<ASTNode> whenClauses) {
     for(ASTNode whenClause : whenClauses) {
       if(getWhenClauseOperation(whenClause).getType() == HiveParser.TOK_INSERT) {
         return "RIGHT OUTER JOIN";
       }
     }
     return "INNER JOIN";
   }
   /**
    * does this Entity belong to target table (partition)
    */
   private boolean isTargetTable(Entity entity, Table targetTable) {
     //todo: https://issues.apache.org/jira/browse/HIVE-15048
     /**
      * is this the right way to compare?  Should it just compare paths?
      * equals() impl looks heavy weight
      */
     return targetTable.equals(entity.getTable());
   }

   /**
    * Per SQL Spec ISO/IEC 9075-2:2011(E) Section 14.2 under "General Rules" Item 6/Subitem a/Subitem 2/Subitem B,
    * an error should be raised if > 1 row of "source" matches the same row in "target".
    * This should not affect the runtime of the query as it's running in parallel with other
    * branches of the multi-insert.  It won't actually write any data to merge_tmp_table since the
    * cardinality_violation() UDF throws an error whenever it's called killing the query
    * @return true if another Insert clause was added
    */
   private boolean handleCardinalityViolation(StringBuilder rewrittenQueryStr, ASTNode target,
                                              String onClauseAsString, Table targetTable,
                                              boolean onlyHaveWhenNotMatchedClause)
               throws SemanticException {
     if(!conf.getBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK)) {
       LOG.info("Merge statement cardinality violation check is disabled: " +
         HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK.varname);
       return false;
     }
     if(onlyHaveWhenNotMatchedClause) {
       //if no update or delete in Merge, there is no need to to do cardinality check
       return false;
     }
     //this is a tmp table and thus Session scoped and acid requires SQL statement to be serial in a
     // given session, i.e. the name can be fixed across all invocations
     String tableName = "merge_tmp_table";
     rewrittenQueryStr.append("\nINSERT INTO ").append(tableName)
       .append("\n  SELECT cardinality_violation(")
       .append(getSimpleTableName(target)).append(".ROW__ID");
       addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);

       rewrittenQueryStr.append(")\n WHERE ").append(onClauseAsString)
       .append(" GROUP BY ").append(getSimpleTableName(target)).append(".ROW__ID");

       addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);

       rewrittenQueryStr.append(" HAVING count(*) > 1");
     //say table T has partiton p, we are generating
     //select cardinality_violation(ROW_ID, p) WHERE ... GROUP BY ROW__ID, p
     //the Group By args are passed to cardinality_violation to add the violating value to the error msg
     try {
       if (null == db.getTable(tableName, false)) {
         StorageFormat format = new StorageFormat(conf);
         format.processStorageFormat("TextFile");
         Table table = db.newTable(tableName);
         table.setSerializationLib(format.getSerde());
         List<FieldSchema> fields = new ArrayList<FieldSchema>();
         fields.add(new FieldSchema("val", "int", null));
         table.setFields(fields);
         table.setDataLocation(Warehouse.getDnsPath(new Path(SessionState.get().getTempTableSpace(),
           tableName), conf));
         table.getTTable().setTemporary(true);
         table.setStoredAsSubDirectories(false);
         table.setInputFormatClass(format.getInputFormat());
         table.setOutputFormatClass(format.getOutputFormat());
         db.createTable(table, true);
       }
     }
     catch(HiveException|MetaException e) {
       throw new SemanticException(e.getMessage(), e);
     }
     return true;
   }
   /**
    * @param onClauseAsString - because there is no clone() and we need to use in multiple places
    * @param deleteExtraPredicate - see notes at caller
    */
   private String handleUpdate(ASTNode whenMatchedUpdateClause, StringBuilder rewrittenQueryStr,
                               ASTNode target, String onClauseAsString, Table targetTable,
                               String deleteExtraPredicate) throws SemanticException {
     assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED;
     assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE;
     String targetName = getSimpleTableName(target);
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
     rewrittenQueryStr.append("    -- update clause\n select ").append(targetName).append(".ROW__ID");

     ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0);
     //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions
     //before reparsing, i.e. they are known to SemanticAnalyzer logic
     Map<String, ASTNode> setColsExprs = collectSetColumnsAndExpressions(setClause, null, targetTable);
     //if target table has cols c1,c2,c3 and p1 partition col and we had "SET c2 = 5, c1 = current_date()" we want to end up with
     //insert into target (p1) select current_date(), 5, c3, p1 where ....
     //since we take the RHS of set exactly as it was in Input, we don't need to deal with quoting/escaping column/table names
     List<FieldSchema> nonPartCols = targetTable.getCols();
     for(FieldSchema fs : nonPartCols) {
       rewrittenQueryStr.append(", ");
       String name = fs.getName();
       if (setColsExprs.containsKey(name)) {
         String rhsExp = getMatchedText(setColsExprs.get(name));
         //"set a=5, b=8" - rhsExp picks up the next char (e.g. ',') from the token stream
         switch (rhsExp.charAt(rhsExp.length() - 1)) {
           case ',':
           case '\n':
             rhsExp = rhsExp.substring(0, rhsExp.length() - 1);
         }
         rewrittenQueryStr.append(rhsExp);
       }
       else {
         rewrittenQueryStr.append(getSimpleTableName(target)).append(".").append(HiveUtils.unparseIdentifier(name, this.conf));
       }
     }
     addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedUpdateClause);
     if(extraPredicate != null) {
       //we have WHEN MATCHED AND <boolean expr> THEN DELETE
       rewrittenQueryStr.append(" AND ").append(extraPredicate);
     }
     if(deleteExtraPredicate != null) {
       rewrittenQueryStr.append(" AND NOT(").append(deleteExtraPredicate).append(")");
     }
     rewrittenQueryStr.append("\n sort by ");
     rewrittenQueryStr.append(targetName).append(".ROW__ID \n");

     setUpAccessControlInfoForUpdate(targetTable, setColsExprs);
     //we don't deal with columns on RHS of SET expression since the whole expr is part of the
     //rewritten SQL statement and is thus handled by SemanticAnalzyer.  Nor do we have to
     //figure which cols on RHS are from source and which from target

     return extraPredicate;
   }
   /**
    * @param onClauseAsString - because there is no clone() and we need to use in multiple places
    * @param updateExtraPredicate - see notes at caller
    */
   private String handleDelete(ASTNode whenMatchedDeleteClause, StringBuilder rewrittenQueryStr, ASTNode target,
                             String onClauseAsString, Table targetTable, String updateExtraPredicate) throws SemanticException {
     assert whenMatchedDeleteClause.getType() == HiveParser.TOK_MATCHED;
     assert getWhenClauseOperation(whenMatchedDeleteClause).getType() == HiveParser.TOK_DELETE;
     List<FieldSchema> partCols = targetTable.getPartCols();
     String targetName = getSimpleTableName(target);
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(partCols, rewrittenQueryStr);

     rewrittenQueryStr.append("    -- delete clause\n select ").append(targetName).append(".ROW__ID ");
     addPartitionColsToSelect(partCols, rewrittenQueryStr, target);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause);
     if(extraPredicate != null) {
       //we have WHEN MATCHED AND <boolean expr> THEN DELETE
       rewrittenQueryStr.append(" AND ").append(extraPredicate);
     }
     if(updateExtraPredicate != null) {
       rewrittenQueryStr.append(" AND NOT(").append(updateExtraPredicate).append(")");
     }
     rewrittenQueryStr.append("\n sort by ");
     rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
     return extraPredicate;
   }
   private static String addParseInfo(ASTNode n) {
     return " at " + ErrorMsg.renderPosition(n);
   }

   /**
    * Returns the table name to use in the generated query preserving original quotes/escapes if any
    * @see #getFullTableNameForSQL(ASTNode)
    */
   private String getSimpleTableName(ASTNode n) throws SemanticException {
     return HiveUtils.unparseIdentifier(getSimpleTableNameBase(n), this.conf);
   }
   private String getSimpleTableNameBase(ASTNode n) throws SemanticException {
     switch (n.getType()) {
       case HiveParser.TOK_TABREF:
         int aliasIndex = findTabRefIdxs(n)[0];
         if (aliasIndex != 0) {
           return n.getChild(aliasIndex).getText();//the alias
         }
         return getSimpleTableNameBase((ASTNode) n.getChild(0));
         case HiveParser.TOK_TABNAME:
         if(n.getChildCount() == 2) {
           //db.table -> return table
           return n.getChild(1).getText();
         }
         return n.getChild(0).getText();
       case HiveParser.TOK_SUBQUERY:
         return n.getChild(1).getText();//the alias
       default:
         throw raiseWrongType("TOK_TABREF|TOK_TABNAME|TOK_SUBQUERY", n);
     }
   }
   /**
    * @return table name in db.table form with proper quoting/escaping to be used in a SQL statement
    */
   private String getFullTableNameForSQL(ASTNode n) throws SemanticException {
     switch (n.getType()) {
       case HiveParser.TOK_TABNAME:
         String[] tableName = getQualifiedTableName(n);
         return getDotName(new String[] {
           HiveUtils.unparseIdentifier(tableName[0], this.conf),
           HiveUtils.unparseIdentifier(tableName[1], this.conf) });
       case HiveParser.TOK_TABREF:
         return getFullTableNameForSQL((ASTNode) n.getChild(0));
       default:
         throw raiseWrongType("TOK_TABNAME", n);
     }
   }  private static final class ReparseResult {
     private final ASTNode rewrittenTree;
     private final Context rewrittenCtx;
     ReparseResult(ASTNode n, Context c) {
       rewrittenTree = n;
       rewrittenCtx = c;
     }
   }
   private static IllegalArgumentException raiseWrongType(String expectedTokName, ASTNode n) {
     return new IllegalArgumentException("Expected " + expectedTokName + "; got " + n.getType());
   }
   private boolean isAliased(ASTNode n) {
     switch (n.getType()) {
       case HiveParser.TOK_TABREF:
         return findTabRefIdxs(n)[0] != 0;
       case HiveParser.TOK_TABNAME:
         return false;
       case HiveParser.TOK_SUBQUERY:
         assert n.getChildCount() > 1 : "Expected Derived Table to be aliased";
         return true;
       default:
         throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n);
     }
   }
   /**
    * Collect WHEN clauses from Merge statement AST
    */
   private List<ASTNode> findWhenClauses(ASTNode tree) throws SemanticException {
     assert tree.getType() == HiveParser.TOK_MERGE;
     List<ASTNode> whenClauses = new ArrayList<>();
     for(int idx = 3; idx < tree.getChildCount(); idx++) {
       ASTNode whenClause = (ASTNode)tree.getChild(idx);
       assert whenClause.getType() == HiveParser.TOK_MATCHED ||
         whenClause.getType() == HiveParser.TOK_NOT_MATCHED :
         "Unexpected node type found: " + whenClause.getType() + addParseInfo(whenClause);
       whenClauses.add(whenClause);
     }
     if(whenClauses.size() <= 0) {
       //Futureproofing: the parser will actually not allow this
       throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement");
     }
     return whenClauses;
   }
   private ASTNode getWhenClauseOperation(ASTNode whenClause) {
     if(!(whenClause.getType() == HiveParser.TOK_MATCHED || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
       throw  raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
     }
     return (ASTNode) whenClause.getChild(0);
   }
   /**
    * returns the <boolean predicate> as in WHEN MATCHED AND <boolean predicate> THEN...
    * @return may be null
    */
   private String getWhenClausePredicate(ASTNode whenClause) {
     if(!(whenClause.getType() == HiveParser.TOK_MATCHED || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
       throw  raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
     }
     if(whenClause.getChildCount() == 2) {
       return getMatchedText((ASTNode)whenClause.getChild(1));
     }
     return null;
   }
   /**
    * Generates the Insert leg of the multi-insert SQL to represent WHEN NOT MATCHED THEN INSERT clause
    * @param targetTableNameInSourceQuery - simple name/alias
    * @throws SemanticException
    */
   private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder rewrittenQueryStr, ASTNode target,
                             ASTNode onClause, Table targetTable,
                             String targetTableNameInSourceQuery, String onClauseAsString) throws SemanticException {
     assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
     assert getWhenClauseOperation(whenNotMatchedClause).getType() == HiveParser.TOK_INSERT;
     List<FieldSchema> partCols = targetTable.getPartCols();
     String valuesClause = getMatchedText((ASTNode)getWhenClauseOperation(whenNotMatchedClause).getChild(0));
     valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'

     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(partCols, rewrittenQueryStr);

     OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
       conf, onClauseAsString);
     oca.analyze();
     rewrittenQueryStr.append("    -- insert clause\n  select ")
       .append(valuesClause).append("\n   WHERE ").append(oca.getPredicate());
     String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
     if(extraPredicate != null) {
       //we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT
       rewrittenQueryStr.append(" AND ")
         .append(getMatchedText(((ASTNode)whenNotMatchedClause.getChild(1)))).append('\n');
     }
   }
   /**
    * Suppose the input Merge statement has ON target.a = source.b and c = d.  Assume, that 'c' is from
    * target table and 'd' is from source expression.  In order to properly
    * generate the Insert for WHEN NOT MATCHED THEN INSERT, we need to make sure that the Where
    * clause of this Insert contains "target.a is null and target.c is null"  This ensures that this
    * Insert leg does not receive any rows that are processed by Insert corresponding to
    * WHEN MATCHED THEN ... clauses.  (Implicit in this is a mini resolver that figures out if an
    * unqualified column is part of the target table.  We can get away with this simple logic because
    * we know that target is always a table (as opposed to some derived table).
    * The job of this class is to generate this predicate.
    *
    * Note that is this predicate cannot simply be NOT(on-clause-expr).  IF on-clause-expr evaluates
    * to Unknown, it will be treated as False in the WHEN MATCHED Inserts but NOT(Unknown) = Unknown,
    * and so it will be False for WHEN NOT MATCHED Insert...
    */
   private static final class OnClauseAnalyzer {
     private final ASTNode onClause;
     private final Map<String, List<String>> table2column = new HashMap<>();
     private final List<String> unresolvedColumns = new ArrayList<>();
     private final List<FieldSchema> allTargetTableColumns = new ArrayList<>();
     private final Set<String> tableNamesFound = new HashSet<>();
     private final String targetTableNameInSourceQuery;
     private final HiveConf conf;
     private final String onClauseAsString;
     /**
      * @param targetTableNameInSourceQuery alias or simple name
      */
     OnClauseAnalyzer(ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery,
                      HiveConf conf, String onClauseAsString) {
       this.onClause = onClause;
       allTargetTableColumns.addAll(targetTable.getCols());
       allTargetTableColumns.addAll(targetTable.getPartCols());
       this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery);
       this.conf = conf;
       this.onClauseAsString = onClauseAsString;
     }
     /**
      * finds all columns and groups by table ref (if there is one)
      */
     private void visit(ASTNode n) {
       if(n.getType() == HiveParser.TOK_TABLE_OR_COL) {
         ASTNode parent = (ASTNode) n.getParent();
         if(parent != null && parent.getType() == HiveParser.DOT) {
           //the ref must be a table, so look for column name as right child of DOT
           if(parent.getParent() != null && parent.getParent().getType() == HiveParser.DOT) {
             //I don't think this can happen... but just in case
             throw new IllegalArgumentException("Found unexpected db.table.col reference in " + onClauseAsString);
           }
           addColumn2Table(n.getChild(0).getText(), parent.getChild(1).getText());
         }
         else {
           //must be just a column name
           unresolvedColumns.add(n.getChild(0).getText());
         }
       }
       if(n.getChildCount() == 0) {
         return;
       }
       for(Node child : n.getChildren()) {
         visit((ASTNode)child);
       }
     }
     private void analyze() {
       visit(onClause);
       if(tableNamesFound.size() > 2) {
         throw new IllegalArgumentException("Found > 2 table refs in ON clause.  Found " +
           tableNamesFound + " in " + onClauseAsString);
       }
       handleUnresolvedColumns();
       if(tableNamesFound.size() > 2) {
         throw new IllegalArgumentException("Found > 2 table refs in ON clause (incl unresolved).  " +
           "Found " + tableNamesFound + " in " + onClauseAsString);
       }
     }
     /**
      * Find those that belong to target table
      */
     private void handleUnresolvedColumns() {
       if(unresolvedColumns.isEmpty()) { return; }
       for(String c : unresolvedColumns) {
         for(FieldSchema fs : allTargetTableColumns) {
           if(c.equalsIgnoreCase(fs.getName())) {
             //c belongs to target table; strictly speaking there maybe an ambiguous ref but
             //this will be caught later when multi-insert is parsed
             addColumn2Table(targetTableNameInSourceQuery.toLowerCase(), c);
             break;
           }
         }
       }
     }
     private void addColumn2Table(String tableName, String columnName) {
       tableName = tableName.toLowerCase();//normalize name for mapping
       tableNamesFound.add(tableName);
       List<String> cols = table2column.get(tableName);
       if(cols == null) {
         cols = new ArrayList<>();
         table2column.put(tableName, cols);
       }
       //we want to preserve 'columnName' as it was in original input query so that rewrite
       //looks as much as possible like original query
       cols.add(columnName);
     }
     /**
      * Now generate the predicate for Where clause
      */
     private String getPredicate() {
       //normilize table name for mapping
       List<String> targetCols = table2column.get(targetTableNameInSourceQuery.toLowerCase());
       if(targetCols == null) {
         /*e.g. ON source.t=1
         * this is not strictly speaking invlaid but it does ensure that all columns from target
         * table are all NULL for every row.  This would make any WHEN MATCHED clause invalid since
         * we don't have a ROW__ID.  The WHEN NOT MATCHED could be meaningful but it's just data from
         * source satisfying source.t=1...  not worth the effort to support this*/
         throw new IllegalArgumentException(ErrorMsg.INVALID_TABLE_IN_ON_CLAUSE_OF_MERGE
           .format(targetTableNameInSourceQuery, onClauseAsString));
       }
       StringBuilder sb = new StringBuilder();
       for(String col : targetCols) {
         if(sb.length() > 0) {
           sb.append(" AND ");
         }
         //but preserve table name in SQL
         sb.append(HiveUtils.unparseIdentifier(targetTableNameInSourceQuery, conf)).append(".").append(HiveUtils.unparseIdentifier(col, conf)).append(" IS NULL");
       }
       return sb.toString();
     }
   }
 }