IMPALA-10482, IMPALA-10493: Fix bugs in full ACID collection query rewrites

IMPALA-10482: SELECT * query on unrelative collection column of
transactional ORC table will hit IllegalStateException.

The AcidRewriter will rewrite queries like
"select item from my_complex_orc.int_array" to
"select item from my_complex_orc t, t.int_array"

This cause troubles in star expansion. Because the original query
"select * from my_complex_orc.int_array" is analyzed as
"select item from my_complex_orc.int_array"

But the rewritten query "select * from my_complex_orc t, t.int_array" is
analyzed as "select id, item from my_complex_orc t, t.int_array".

Hidden table refs can also cause issues during regular column
resolution. E.g. when the table has top-level 'pos'/'item'/'key'/'value'
columns.

The workaround is to keep track of the automatically added table refs
during query rewrite. So when we analyze the rewritten query we can
ignore these auxiliary table refs.

IMPALA-10493: Using JOIN ON syntax to join two full ACID collections
produces wrong results.

When AcidRewriter.splitCollectionRef() creates a new collection ref
it doesn't copy every information needed to correctly execute the
query. E.g. it dropped the ON clause, turning INNER joins to CROSS
joins.

Testing:
 * added e2e tests

Change-Id: I8fc758d3c1e75c7066936d590aec8bff8d2b00b0
Reviewed-on: http://gerrit.cloudera.org:8080/17038
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 916de22..0f6cc658 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1106,6 +1106,7 @@
     if (pathType == PathType.SLOT_REF || pathType == PathType.STAR) {
       // Paths rooted at all of the unique registered tuple descriptors.
       for (TableRef tblRef: tableRefMap_.values()) {
+        if (tblRef.isHidden()) continue;
         candidates.add(new Path(tblRef.getDesc(), rawPath));
       }
     } else {
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 8acdcb6..ef175eb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -575,6 +575,7 @@
       }
       // expand in From clause order
       for (TableRef tableRef: fromClause_) {
+        if (tableRef.isHidden()) continue;
         if (analyzer_.isSemiJoined(tableRef.getId())) continue;
         Path resolvedPath = new Path(tableRef.getDesc(),
             Collections.<String>emptyList());
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index b8099ad..3bc0f39 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -1827,10 +1827,20 @@
       analyzer.removeAlias(collTblRef.getUniqueAlias());
       TableRef newCollTblRef =
           TableRef.newTableRef(analyzer, newCollPath, collTblRef.getUniqueAlias());
+      // Set JOIN attributes. Please note that we cannot use TableRef.setJoinAttrs()
+      // because we only want to copy the ON clause and the plan hints. The col names
+      // in USING have been already converted to an ON clause. We let the analyzer/
+      // planner to figure out the other attributes.
+      newCollTblRef.setOnClause(collTblRef.getOnClause());
+      newCollTblRef.setJoinHints(collTblRef.getJoinHints());
+      newCollTblRef.setTableHints(collTblRef.getTableHints());
       // Substitute the old collection ref to 'newCollTblRef'.
       stmt.fromClause_.set(tableRefIdx, newCollTblRef);
       // Insert the base table ref in front of the collection ref.
       stmt.fromClause_.add(tableRefIdx, baseTblRef);
+      // The newly added table ref should be hidden, e.g. it shouldn't affect star
+      // expansion, neither column resolution.
+      baseTblRef.setHidden(true);
     }
 
     private List<String> generatePathFrom(TableName tblName) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index 3a9b358..d6b312c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -150,6 +150,10 @@
   // END: Members that need to be reset()
   /////////////////////////////////////////
 
+  // true if this table ref is hidden, because e.g. it was generated during statement
+  // rewrite.
+  private boolean isHidden_ = false;
+
   /**
    * Returns a new, resolved, and analyzed table ref.
    */
@@ -228,6 +232,7 @@
     desc_ = other.desc_;
     exposeNestedColumnsByTableMaskView_ = other.exposeNestedColumnsByTableMaskView_;
     scalarColumns_ = new LinkedHashMap<>(other.scalarColumns_);
+    isHidden_ = other.isHidden_;
   }
 
   @Override
@@ -343,6 +348,9 @@
     return exposeNestedColumnsByTableMaskView_;
   }
 
+  public void setHidden(boolean isHidden) { isHidden_ = isHidden; }
+  public boolean isHidden() { return isHidden_; }
+
   public void setJoinHints(List<PlanHint> hints) {
     Preconditions.checkNotNull(hints);
     joinHints_ = hints;
@@ -736,4 +744,4 @@
     joinHints_ = new ArrayList<>();
     tableHints_ = new ArrayList<>();
   }
-}
\ No newline at end of file
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 7d6c8d1..59c0028 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -4550,7 +4550,7 @@
     testNumberOfMembers(ValuesStmt.class, 0);
 
     // Also check TableRefs.
-    testNumberOfMembers(TableRef.class, 25);
+    testNumberOfMembers(TableRef.class, 26);
     testNumberOfMembers(BaseTableRef.class, 0);
     testNumberOfMembers(InlineViewRef.class, 9);
   }
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 6fafd3f..e209cee 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -800,6 +800,20 @@
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
+pos_item_key_value_complextypestbl
+---- COLUMNS
+pos bigint
+item int
+key string
+value int
+int_array array<int>
+int_map map<string, int>
+---- DEPENDENT_LOAD_HIVE
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT id, id, CAST(id AS STRING), CAST(id AS STRING), int_array, int_map FROM {db_name}{db_suffix}.complextypestbl;
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
 complextypestbl_non_transactional
 ---- COLUMNS
 id bigint
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 66854c8..ad52f4f 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -92,6 +92,8 @@
 table_name:complextypestbl_medium, constraint:restrict_to, table_format:parquet/none/none
 table_name:complextypestbl_medium, constraint:restrict_to, table_format:orc/def/block
 table_name:complextypestbl_non_transactional, constraint:restrict_to, table_format:orc/def/block
+table_name:pos_item_key_value_complextypestbl, constraint:restrict_to, table_format:orc/def/block
+table_name:pos_item_key_value_complextypestbl, constraint:restrict_to, table_format:parquet/none/none
 
 table_name:alltypeserror, constraint:exclude, table_format:parquet/none/none
 table_name:alltypeserrornonulls, constraint:exclude, table_format:parquet/none/none
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-scanner-basic.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-scanner-basic.test
index 3caca5b..266d904 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-scanner-basic.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-scanner-basic.test
@@ -234,3 +234,198 @@
 ---- TYPES
 bigint
 ====
+---- QUERY
+# IMPALA-10482: Select-star query on unrelative collection column of transactional
+# table hits IllegalStateException
+select * from complextypestbl.int_array;
+---- RESULTS
+1
+2
+3
+NULL
+1
+2
+NULL
+3
+NULL
+-1
+---- TYPES
+INT
+====
+---- QUERY
+# IMPALA-10482: Select-star query on unrelative collection column of transactional
+# table hits IllegalStateException
+select * from complextypestbl.int_array_array.item
+---- RESULTS
+-1
+-2
+1
+2
+3
+4
+NULL
+1
+2
+NULL
+3
+NULL
+4
+5
+6
+---- TYPES
+INT
+====
+---- QUERY
+# IMPALA-10482: Select-star query on unrelative collection column of transactional
+# table hits IllegalStateException
+select * from complextypestbl.nested_struct.c.d.item;
+---- RESULTS
+-1,'nonnullable'
+10,'aaa'
+-10,'bbb'
+11,'c'
+NULL,'NULL'
+10,'aaa'
+NULL,'NULL'
+-10,'bbb'
+NULL,'NULL'
+11,'c'
+NULL,'NULL'
+NULL,'NULL'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# IMPALA-10482: Select-star query on unrelative collection column of transactional
+# table hits IllegalStateException
+select * from complextypestbl.int_array a1, complextypestbl.int_array a2
+order by 1,2
+limit 10;
+---- RESULTS
+-1,-1
+-1,1
+-1,1
+-1,2
+-1,2
+-1,3
+-1,3
+-1,NULL
+-1,NULL
+-1,NULL
+---- TYPES
+INT,INT
+====
+---- QUERY
+select *
+from complextypestbl.int_array a1, complextypestbl.int_array a2
+where a1.item=a2.item and a1.item < 2;
+---- RESULTS
+1,1
+1,1
+1,1
+1,1
+-1,-1
+---- TYPES
+INT,INT
+====
+---- QUERY
+# IMPALA-10493: Use ON clause to join full ACID collections
+select *
+from complextypestbl.int_array a1 join complextypestbl.int_array a2
+on a1.item=a2.item
+where a1.item < 2;
+---- RESULTS
+1,1
+1,1
+1,1
+1,1
+-1,-1
+---- TYPES
+INT,INT
+====
+---- QUERY
+# IMPALA-10493: Use USING clause to join full ACID collections
+select a1.item, a2.item
+from complextypestbl.int_array a1 join complextypestbl.int_array a2
+using (item) where a1.item != 1;
+---- RESULTS
+-1,-1
+2,2
+2,2
+3,3
+3,3
+2,2
+2,2
+3,3
+3,3
+---- TYPES
+INT,INT
+====
+---- QUERY
+select count(*) from complextypestbl.int_array a1, complextypestbl.int_array a2;
+---- RESULTS
+100
+====
+---- QUERY
+select count(*) from complextypestbl.int_array a1, complextypestbl.int_array a2
+where a1.item=a2.item and a1.item > 1;
+---- RESULTS
+8
+====
+---- QUERY
+select pos, item from pos_item_key_value_complextypestbl;
+---- RESULTS
+1,1
+2,2
+3,3
+4,4
+5,5
+6,6
+7,7
+8,8
+---- TYPES
+BIGINT,INT
+====
+---- QUERY
+select pos, item from pos_item_key_value_complextypestbl.int_array;
+---- RESULTS
+0,-1
+0,1
+1,2
+2,3
+0,NULL
+1,1
+2,2
+3,NULL
+4,3
+5,NULL
+---- TYPES
+BIGINT,INT
+====
+---- QUERY
+select key, value from pos_item_key_value_complextypestbl;
+---- RESULTS
+'1',1
+'2',2
+'3',3
+'4',4
+'5',5
+'6',6
+'7',7
+'8',8
+---- TYPES
+STRING,INT
+====
+---- QUERY
+select key, value from pos_item_key_value_complextypestbl.int_map;
+---- RESULTS
+'k1',-1
+'k1',1
+'k2',100
+'k1',2
+'k2',NULL
+'k1',NULL
+'k3',NULL
+---- TYPES
+STRING,INT
+====