HIVE-29084: Wrong results for LATERAL VIEW queries due to incorrect WHERE filter removal (#6014)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 4819138..be5aabe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -599,15 +599,7 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException
}
} else if (isLateralView(r)) {
TableFunctionScan tfs = ((TableFunctionScan) r);
-
- // retrieve the base table source.
- QueryBlockInfo tableFunctionSource = convertSource(tfs.getInput(0));
- String sqAlias = tableFunctionSource.schema.get(0).table;
- // the schema will contain the base table source fields
- s = new Schema(tfs, sqAlias);
-
- ast = createASTLateralView(tfs, s, tableFunctionSource, sqAlias);
-
+ return createASTLateralView(tfs, convertSource(tfs.getInput(0)), nextAlias());
} else if (r instanceof TableSpool) {
TableSpool spool = (TableSpool) r;
ASTConverter cteConverter =
@@ -661,8 +653,8 @@ private ASTNode pkFkHint(int fkTableIndex, boolean nonFkSideIsFiltered) {
}
}
- private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
- QueryBlockInfo tableFunctionSource, String sqAlias) {
+ private static QueryBlockInfo createASTLateralView(TableFunctionScan tfs, QueryBlockInfo tableFunctionSource,
+ String alias) {
// The structure of the AST LATERAL VIEW will be:
//
// TOK_LATERAL_VIEW
@@ -683,7 +675,7 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
RexCall lateralCall = (RexCall) tfs.getCall();
RexCall call = (RexCall) lateralCall.getOperands().get(0);
for (RexNode rn : call.getOperands()) {
- ASTNode expr = rn.accept(new RexVisitor(s, rn instanceof RexLiteral,
+ ASTNode expr = rn.accept(new RexVisitor(tableFunctionSource.schema, rn instanceof RexLiteral,
tfs.getCluster().getRexBuilder()));
children.add(expr);
}
@@ -695,16 +687,15 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
// Add only the table generated size columns to the select expr for the function,
// skipping over the base table columns from the input side of the join.
- int i = 0;
- for (ColumnInfo c : s) {
- if (i++ < tableFunctionSource.schema.size()) {
- continue;
- }
- selexpr.add(HiveParser.Identifier, c.column);
+ List<RelDataTypeField> lvFields = tfs.getRowType().getFieldList()
+ .subList(tableFunctionSource.schema.size(), tfs.getRowType().getFieldCount());
+ for (RelDataTypeField field : lvFields) {
+ selexpr.add(HiveParser.Identifier, field.getName());
}
+
// add the table alias for the lateral view.
ASTBuilder tabAlias = ASTBuilder.construct(HiveParser.TOK_TABALIAS, "TOK_TABALIAS");
- tabAlias.add(HiveParser.Identifier, sqAlias);
+ tabAlias.add(HiveParser.Identifier, alias);
// add the table alias to the SEL_EXPR
selexpr.add(tabAlias.node());
@@ -720,7 +711,8 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
// finally, add the LATERAL VIEW clause under the left side source which is the base table.
lateralview.add(tableFunctionSource.ast);
- return lateralview.node();
+ Schema outputSchema = new Schema(tableFunctionSource.schema, new Schema(alias, lvFields));
+ return new QueryBlockInfo(outputSchema, lateralview.node());
}
private boolean isLateralView(RelNode relNode) {
diff --git a/ql/src/test/queries/clientpositive/lateral_view_cbo_ppd_filter_loss.q b/ql/src/test/queries/clientpositive/lateral_view_cbo_ppd_filter_loss.q
new file mode 100644
index 0000000..51814c9
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/lateral_view_cbo_ppd_filter_loss.q
@@ -0,0 +1,28 @@
+-- SORT_QUERY_RESULTS
+
+-- Verifies PPD doesn't eliminate an OR filter on the values of the same LV column
+SELECT t.key, lv.col
+FROM (SELECT stack(2, '0', '1') AS key) t
+LATERAL VIEW explode(array('2', '3')) lv AS col
+WHERE t.key = '0' OR lv.col = '3';
+
+-- Verifies PPD doesn't eliminate inequality filter between base table and LV columns
+SELECT t.key, lv.col
+FROM (SELECT '1' AS key) t
+LATERAL VIEW explode(array('1', '2')) lv AS col
+WHERE t.key != lv.col;
+
+-- Verifies PPD doesn't eliminate OR filter between different LV columns
+SELECT t.*, lv1.col1, lv2.col2
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
+LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
+WHERE lv1.col1 = 'a' OR lv2.col2 = 'c';
+
+-- Verifies PPD doesn't eliminate complex filter with three LV columns
+SELECT t.*, lv1.col1, lv2.col2, lv3.col3
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
+LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
+LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
+WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3;
diff --git a/ql/src/test/results/clientpositive/llap/lateral_view_cbo_ppd_filter_loss.q.out b/ql/src/test/results/clientpositive/llap/lateral_view_cbo_ppd_filter_loss.q.out
new file mode 100644
index 0000000..3583b24
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/lateral_view_cbo_ppd_filter_loss.q.out
@@ -0,0 +1,71 @@
+PREHOOK: query: SELECT t.key, lv.col
+FROM (SELECT stack(2, '0', '1') AS key) t
+LATERAL VIEW explode(array('2', '3')) lv AS col
+WHERE t.key = '0' OR lv.col = '3'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.key, lv.col
+FROM (SELECT stack(2, '0', '1') AS key) t
+LATERAL VIEW explode(array('2', '3')) lv AS col
+WHERE t.key = '0' OR lv.col = '3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0 2
+0 3
+1 3
+PREHOOK: query: SELECT t.key, lv.col
+FROM (SELECT '1' AS key) t
+LATERAL VIEW explode(array('1', '2')) lv AS col
+WHERE t.key != lv.col
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.key, lv.col
+FROM (SELECT '1' AS key) t
+LATERAL VIEW explode(array('1', '2')) lv AS col
+WHERE t.key != lv.col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1 2
+PREHOOK: query: SELECT t.*, lv1.col1, lv2.col2
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
+LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
+WHERE lv1.col1 = 'a' OR lv2.col2 = 'c'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.*, lv1.col1, lv2.col2
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
+LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
+WHERE lv1.col1 = 'a' OR lv2.col2 = 'c'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1 a b
+1 a c
+1 b c
+PREHOOK: query: SELECT t.*, lv1.col1, lv2.col2, lv3.col3
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
+LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
+LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
+WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.*, lv1.col1, lv2.col2, lv3.col3
+FROM (SELECT 1) t
+LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
+LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
+LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
+WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1 x y x
+1 y x y
diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out
index 4419187..f5d8840 100644
--- a/ql/src/test/results/clientpositive/llap/lineage2.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out
@@ -724,7 +724,7 @@
PREHOOK: type: QUERY
PREHOOK: Input: default@relations
#### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"436a649a0d9540e8f093f8353d86813a","queryText":"select identity, ep1_id from relations\n lateral view explode(ep1_ids) nav_rel as ep1_id","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"relations._col12","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"identity"},{"id":1,"vertexType":"COLUMN","vertexId":"ep1_id"},{"id":2,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":3,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"436a649a0d9540e8f093f8353d86813a","queryText":"select identity, ep1_id from relations\n lateral view explode(ep1_ids) nav_rel as ep1_id","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"nav_rel.ep1_id","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"identity"},{"id":1,"vertexType":"COLUMN","vertexId":"ep1_id"},{"id":2,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":3,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"}]}
PREHOOK: query: insert into rels_exploded select identity, type,
ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id
from relations lateral view explode(ep1_ids) rel1 as ep1_id