HIVE-20102: Add a couple of additional tests for query parsing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
index bda3c21..895c2f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
@@ -153,6 +153,17 @@
     };
 
     @Override
+    public Object dupTree(Object t, Object parent) {
+      // Overriden to copy start index / end index, that is needed through optimization,
+      // e.g., for masking/filtering
+      ASTNode astNode = (ASTNode) t;
+      ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent);
+      astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex());
+      astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex());
+      return astNodeCopy;
+    }
+
+    @Override
     public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
       return new ASTErrorNode(input, start, stop, e);
     };
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
index 64b3541..a2f6fbb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
@@ -423,6 +423,10 @@
     return viewDesc != null && !viewDesc.isMaterialized();
   }
 
+  public boolean isMultiDestQuery() {
+    return qbp != null && qbp.getClauseNamesForDest() != null && qbp.getClauseNamesForDest().size() > 1;
+  }
+
   public HashMap<String, Table> getViewToTabSchema() {
     return viewAliasToViewSchema;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 37e1a73..5044480 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12117,8 +12117,9 @@
   }
 
   void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException {
-    // 1. Generate Resolved Parse tree from syntax tree
     LOG.info("Starting Semantic Analysis");
+    // 1. Generate Resolved Parse tree from syntax tree
+    boolean needsTransform = needsTransform();
     //change the location of position alias process here
     processPositionAlias(ast);
     PlannerContext plannerCtx = pcf.create();
@@ -12138,7 +12139,6 @@
     // Otherwise we have to wait until after the masking/filtering step.
     boolean isCacheEnabled = isResultsCacheEnabled();
     QueryResultsCache.LookupInfo lookupInfo = null;
-    boolean needsTransform = needsTransform();
     if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
       lookupInfo = createLookupInfoForQuery(ast);
       if (checkResultsCache(lookupInfo)) {
@@ -12146,32 +12146,45 @@
       }
     }
 
+    ASTNode astForMasking;
+    if (isCBOExecuted() && needsTransform &&
+        (qb.isCTAS() || qb.isView() || qb.isMaterializedView() || qb.isMultiDestQuery())) {
+      // If we use CBO and we may apply masking/filtering policies, we create a copy of the ast.
+      // The reason is that the generation of the operator tree may modify the initial ast,
+      // but if we need to parse for a second time, we would like to parse the unmodified ast.
+      astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast);
+    } else {
+      astForMasking = ast;
+    }
+
     // 2. Gen OP Tree from resolved Parse Tree
     Operator sinkOp = genOPTree(ast, plannerCtx);
 
+    boolean usesMasking = false;
     if (!unparseTranslator.isEnabled() &&
         (tableMask.isEnabled() && analyzeRewrite == null)) {
       // Here we rewrite the * and also the masking table
-      ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(),
+      ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(),
           ctx, db, tabNameToTabObject, ignoredTokens);
-      if (tree != ast) {
+      if (astForMasking != rewrittenAST) {
+        usesMasking = true;
         plannerCtx = pcf.create();
         ctx.setSkipTableMasking(true);
         init(true);
         //change the location of position alias process here
-        processPositionAlias(tree);
-        genResolvedParseTree(tree, plannerCtx);
+        processPositionAlias(rewrittenAST);
+        genResolvedParseTree(rewrittenAST, plannerCtx);
         if (this instanceof CalcitePlanner) {
           ((CalcitePlanner) this).resetCalciteConfiguration();
         }
-        sinkOp = genOPTree(tree, plannerCtx);
+        sinkOp = genOPTree(rewrittenAST, plannerCtx);
       }
     }
 
     // Check query results cache
-    // In the case that row or column masking/filtering was required, the cache must be checked
-    // here, after applying the masking/filtering rewrite rules to the AST.
-    if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
+    // In the case that row or column masking/filtering was required, we do not support caching.
+    // TODO: Enable caching for queries with masking/filtering
+    if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) {
       lookupInfo = createLookupInfoForQuery(ast);
       if (checkResultsCache(lookupInfo)) {
         return;
diff --git a/ql/src/test/queries/clientpositive/masking_13.q b/ql/src/test/queries/clientpositive/masking_13.q
new file mode 100644
index 0000000..bb050b5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_13.q
@@ -0,0 +1,28 @@
+--! qt:dataset:srcpart
+--! qt:dataset:src
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+create table new_masking_test_nx as
+select * from masking_test;
+select * from new_masking_test_nx;
+
+create view `masking_test_view` as select key from `masking_test`;
+
+explain
+select key from `masking_test_view`;
+select key from `masking_test_view`;
+
+create table `my_table_masked` (key int);
+insert into `my_table_masked` select key from `masking_test_view`;
+select * from `my_table_masked`;
+
+create table new_masking_test_nx_2 as
+select * from masking_test_view;
+
+select * from new_masking_test_nx_2;
diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
index 1bb9c9a..688ea24 100644
--- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q
+++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
@@ -11,7 +11,8 @@
 select key, count(*) from masking_test_n7 group by key;
 select key, count(*) from masking_test_n7 group by key;
 
--- This time we should use the cache
+-- It will not use the cache as it is masked
+-- TODO: We should use the cache
 explain
 select key, count(*) from masking_test_n7 group by key;
 select key, count(*) from masking_test_n7 group by key;
diff --git a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
index 20a2e8d..e0e48b0 100644
--- a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
+++ b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
@@ -92,15 +92,62 @@
 select key, count(*) from masking_test_n7 group by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: masking_test_n7
+                  filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+                    Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
         ListSink
-      Cached Query Result: true
 
 PREHOOK: query: select key, count(*) from masking_test_n7 group by key
 PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/masking_13.q.out b/ql/src/test/results/clientpositive/masking_13.q.out
new file mode 100644
index 0000000..8fa3a41
--- /dev/null
+++ b/ql/src/test/results/clientpositive/masking_13.q.out
@@ -0,0 +1,206 @@
+PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+0	0_lav
+4	4_lav
+8	8_lav
+0	0_lav
+0	0_lav
+2	2_lav
+PREHOOK: query: create table new_masking_test_nx as
+select * from masking_test
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@masking_test
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_masking_test_nx
+POSTHOOK: query: create table new_masking_test_nx as
+select * from masking_test
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_masking_test_nx
+POSTHOOK: Lineage: new_masking_test_nx.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: new_masking_test_nx.value EXPRESSION [(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from new_masking_test_nx
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_masking_test_nx
+#### A masked pattern was here ####
+POSTHOOK: query: select * from new_masking_test_nx
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_masking_test_nx
+#### A masked pattern was here ####
+0	0_lav
+4	4_lav
+8	8_lav
+0	0_lav
+0	0_lav
+2	2_lav
+PREHOOK: query: create view `masking_test_view` as select key from `masking_test`
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@masking_test
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test_view
+POSTHOOK: query: create view `masking_test_view` as select key from `masking_test`
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test_view
+POSTHOOK: Lineage: masking_test_view.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: explain
+select key from `masking_test_view`
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key from `masking_test_view`
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test
+            properties:
+              insideView TRUE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger((UDFToDouble(key) / 2.0D)) (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key from `masking_test_view`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+#### A masked pattern was here ####
+POSTHOOK: query: select key from `masking_test_view`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+#### A masked pattern was here ####
+4
+PREHOOK: query: create table `my_table_masked` (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@my_table_masked
+POSTHOOK: query: create table `my_table_masked` (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@my_table_masked
+PREHOOK: query: insert into `my_table_masked` select key from `masking_test_view`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+PREHOOK: Output: default@my_table_masked
+POSTHOOK: query: insert into `my_table_masked` select key from `masking_test_view`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+POSTHOOK: Output: default@my_table_masked
+POSTHOOK: Lineage: my_table_masked.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from `my_table_masked`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@my_table_masked
+#### A masked pattern was here ####
+POSTHOOK: query: select * from `my_table_masked`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@my_table_masked
+#### A masked pattern was here ####
+4
+PREHOOK: query: create table new_masking_test_nx_2 as
+select * from masking_test_view
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_masking_test_nx_2
+POSTHOOK: query: create table new_masking_test_nx_2 as
+select * from masking_test_view
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_masking_test_nx_2
+POSTHOOK: Lineage: new_masking_test_nx_2.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from new_masking_test_nx_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_masking_test_nx_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from new_masking_test_nx_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_masking_test_nx_2
+#### A masked pattern was here ####
+4