HIVE-22666: Introduce TopNKey operator for PTF Reduce Sink (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index b7c1250..0ccaeea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -28,6 +28,9 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY;
@@ -38,10 +41,13 @@
private static final long serialVersionUID = 1L;
- private transient TopNKeyFilter<KeyWrapper> topNKeyFilter;
+ private transient Map<KeyWrapper, TopNKeyFilter<KeyWrapper>> topNKeyFilters;
+ private transient KeyWrapper partitionKeyWrapper;
private transient KeyWrapper keyWrapper;
+ private transient KeyWrapperComparator keyWrapperComparator;
+
/** Kryo ctor. */
public TopNKeyOperator() {
super();
@@ -55,38 +61,62 @@
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
- String columnSortOrder = conf.getColumnSortOrder();
- String nullSortOrder = conf.getNullOrder();
-
ObjectInspector rowInspector = inputObjInspectors[0];
outputObjInspector = rowInspector;
- // init keyFields
- int numKeys = conf.getKeyColumns().size();
- ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numKeys];
- ObjectInspector[] keyObjectInspectors = new ObjectInspector[numKeys];
- ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[numKeys];
+ int numPartitionKeys = conf.getPartitionKeyColumns().size();
+ List<ExprNodeDesc> keyColumns = conf.getKeyColumns().subList(numPartitionKeys, conf.getKeyColumns().size());
+ String columnSortOrder = conf.getColumnSortOrder().substring(numPartitionKeys);
+ String nullSortOrder = conf.getNullOrder().substring(numPartitionKeys);
- for (int i = 0; i < numKeys; i++) {
- ExprNodeDesc key = conf.getKeyColumns().get(i);
+ // init keyFields
+ ObjectInspector[] keyObjectInspectors = new ObjectInspector[keyColumns.size()];
+ ObjectInspector[] currentKeyObjectInspectors = new ObjectInspector[keyColumns.size()];
+ keyWrapper = initObjectInspectors(hconf, keyColumns, rowInspector, keyObjectInspectors, currentKeyObjectInspectors);
+ ObjectInspector[] partitionKeyObjectInspectors = new ObjectInspector[numPartitionKeys];
+ ObjectInspector[] partitionCurrentKeyObjectInspectors = new ObjectInspector[numPartitionKeys];
+ partitionKeyWrapper = initObjectInspectors(hconf, conf.getPartitionKeyColumns(), rowInspector,
+ partitionKeyObjectInspectors, partitionCurrentKeyObjectInspectors);
+
+ keyWrapperComparator = new KeyWrapperComparator(
+ keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder.toString(), nullSortOrder.toString());
+
+ this.topNKeyFilters = new HashMap<>();
+ }
+
+ private KeyWrapper initObjectInspectors(Configuration hconf,
+ List<ExprNodeDesc> keyColumns,
+ ObjectInspector rowInspector,
+ ObjectInspector[] keyObjectInspectors,
+ ObjectInspector[] currentKeyObjectInspectors) throws HiveException {
+ ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[keyColumns.size()];
+ for (int i = 0; i < keyColumns.size(); i++) {
+ ExprNodeDesc key = keyColumns.get(i);
keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i],
- ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE);
+ ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE);
}
- this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), new KeyWrapperComparator(
- keyObjectInspectors, currentKeyObjectInspectors, columnSortOrder, nullSortOrder));
-
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors,
currentKeyObjectInspectors);
- keyWrapper = keyWrapperFactory.getKeyWrapper();
+ return keyWrapperFactory.getKeyWrapper();
}
@Override
public void process(Object row, int tag) throws HiveException {
+ partitionKeyWrapper.getNewKey(row, inputObjInspectors[tag]);
+ partitionKeyWrapper.setHashKey();
+
+ TopNKeyFilter<KeyWrapper> topNKeyFilter = topNKeyFilters.get(partitionKeyWrapper);
+ if (topNKeyFilter == null) {
+ topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), keyWrapperComparator);
+ topNKeyFilters.put(partitionKeyWrapper.copyKey(), topNKeyFilter);
+ }
+
keyWrapper.getNewKey(row, inputObjInspectors[tag]);
keyWrapper.setHashKey();
+
if (topNKeyFilter.canForward(keyWrapper)) {
forward(row, outputObjInspector);
}
@@ -94,7 +124,7 @@
@Override
protected final void closeOp(boolean abort) throws HiveException {
- topNKeyFilter.clear();
+ topNKeyFilters.clear();
super.closeOp(abort);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java
index ce6efa4..a9ff6b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java
@@ -26,12 +26,14 @@
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Collections;
import java.util.List;
import java.util.Stack;
@@ -58,20 +60,25 @@
return null;
}
- // Currently, per partitioning top n key is not supported
- // in TopNKey operator
- if (reduceSinkDesc.isPTFReduceSink()) {
- return null;
- }
-
// Check whether there already is a top n key operator
Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
if (parentOperator instanceof TopNKeyOperator) {
return null;
}
+ List<ExprNodeDesc> partitionCols = Collections.emptyList();
+ if (reduceSinkDesc.isPTFReduceSink()) {
+ // All keys are partition keys or no keys at all
+ // Note: partition cols are prefix of key cols
+ if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
+ return null;
+ }
+
+ partitionCols = reduceSinkDesc.getPartitionCols();
+ }
+
TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(),
- reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols());
+ reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols);
copyDown(reduceSinkOperator, topNKeyDesc);
return null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java
index 348fbb5..d2ae723 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java
@@ -154,13 +154,12 @@
final TopNKeyDesc topNKeyDesc = topNKey.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc);
- if (commonKeyPrefix.isEmpty()) {
+ if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName());
- final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), commonKeyPrefix.getMappedOrder(),
- commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns());
+ final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown(copyDown(groupBy, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
@@ -184,13 +183,12 @@
final TopNKeyDesc topNKeyDesc = topNKey.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, reduceSinkDesc);
- if (commonKeyPrefix.isEmpty()) {
+ if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), reduceSink.getName());
- final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(),
- commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns());
+ final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown(copyDown(reduceSink, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
@@ -242,14 +240,13 @@
reduceSinkDesc.getColumnExprMap(),
reduceSinkDesc.getOrder(),
reduceSinkDesc.getNullOrder());
- if (commonKeyPrefix.isEmpty()) {
+ if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {} and {}",
topNKey.getName(), join.getName(), reduceSinkOperator.getName());
- final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(),
- commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns());
+ final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
index f752967..19910a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.plan;
+import org.apache.hadoop.hive.ql.optimizer.topnkey.CommonKeyPrefix;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import java.util.ArrayList;
@@ -29,12 +30,14 @@
*/
@Explain(displayName = "Top N Key Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class TopNKeyDesc extends AbstractOperatorDesc {
+
private static final long serialVersionUID = 1L;
private int topN;
private String columnSortOrder;
private String nullOrder;
private List<ExprNodeDesc> keyColumns;
+ private List<ExprNodeDesc> partitionKeyColumns;
public TopNKeyDesc() {
}
@@ -43,13 +46,14 @@
final int topN,
final String columnSortOrder,
final String nullOrder,
- final List<ExprNodeDesc> keyColumns) {
+ final List<ExprNodeDesc> keyColumns,
+ final List<ExprNodeDesc> partitionKeyColumns) {
this.topN = topN;
-
this.keyColumns = new ArrayList<>(keyColumns.size());
StringBuilder sortOrder = new StringBuilder(columnSortOrder.length());
StringBuilder nullSortOrder = new StringBuilder(nullOrder.length());
+ this.partitionKeyColumns = new ArrayList<>(partitionKeyColumns.size());
for (int i = 0; i < keyColumns.size(); ++i) {
ExprNodeDesc keyExpression = keyColumns.get(i);
@@ -63,6 +67,13 @@
this.columnSortOrder = sortOrder.toString();
this.nullOrder = nullSortOrder.toString();
+
+ for (ExprNodeDesc keyExpression : partitionKeyColumns) {
+ if (keyExpression instanceof ExprNodeConstantDesc) {
+ continue;
+ }
+ this.partitionKeyColumns.add(keyExpression);
+ }
}
@Explain(displayName = "top n", explainLevels = { Level.DEFAULT, Level.EXTENDED, Level.USER })
@@ -118,6 +129,25 @@
return ret;
}
+ public List<ExprNodeDesc> getPartitionKeyColumns() {
+ return partitionKeyColumns;
+ }
+
+ public void setPartitionKeyColumns(List<ExprNodeDesc> partitionKeyColumns) {
+ this.partitionKeyColumns = partitionKeyColumns;
+ }
+
+ @Explain(displayName = "Map-reduce partition columns")
+ public String getPartitionKeyString() {
+ return PlanUtils.getExprListString(partitionKeyColumns);
+ }
+
+ @Explain(displayName = "PartitionCols", explainLevels = { Level.USER })
+ public String getUserLevelExplainPartitionKeyString() {
+ return PlanUtils.getExprListString(partitionKeyColumns, true);
+ }
+
+
@Override
public boolean isSame(OperatorDesc other) {
if (getClass().getName().equals(other.getClass().getName())) {
@@ -164,4 +194,11 @@
}
return new TopNKeyDescExplainVectorization(this, vectorTopNKeyDesc);
}
+
+ public TopNKeyDesc combine(CommonKeyPrefix commonKeyPrefix) {
+ return new TopNKeyDesc(topN, commonKeyPrefix.getMappedOrder(),
+ commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns(),
+ commonKeyPrefix.getMappedColumns().subList(0, partitionKeyColumns.size()));
+ }
+
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
new file mode 100644
index 0000000..fce850f
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec;
+
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+
+import java.util.Comparator;
+import java.util.Objects;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.junit.Test;
+
+/**
+ * Unit test of TopNKeyFilter.
+ */
+public class TestTopNKeyFilter {
+
+ public static final Comparator<TestKeyWrapper> TEST_KEY_WRAPPER_COMPARATOR = Comparator.comparingInt(o -> o.keyValue);
+
+ @Test
+ public void testNothingCanBeForwardedIfTopNIs0() {
+ TopNKeyFilter<TestKeyWrapper> topNKeyFilter = new TopNKeyFilter<>(0, TEST_KEY_WRAPPER_COMPARATOR);
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false));
+ }
+
+ @Test
+ public void testFirstTopNKeysCanBeForwarded() {
+ TopNKeyFilter<TestKeyWrapper> topNKeyFilter = new TopNKeyFilter<>(3, TEST_KEY_WRAPPER_COMPARATOR);
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false));
+ }
+
+ @Test
+ public void testKeyCanNotBeForwardedIfItIsDroppedOutFromTopNKeys() {
+ TopNKeyFilter<TestKeyWrapper> topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR);
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(2)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(false));
+ }
+
+ @Test
+ public void testMembersOfTopNKeysStillCanBeForwardedAfterNonTopNKeysTried() {
+ TopNKeyFilter<TestKeyWrapper> topNKeyFilter = new TopNKeyFilter<>(2, TEST_KEY_WRAPPER_COMPARATOR);
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(false));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(3)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+ }
+
+ /**
+ * Test implementation of KeyWrapper.
+ */
+ private static class TestKeyWrapper extends KeyWrapper {
+
+ private final int keyValue;
+
+ TestKeyWrapper(int keyValue) {
+ this.keyValue = keyValue;
+ }
+
+ @Override
+ public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException {
+
+ }
+
+ @Override
+ public void setHashKey() {
+
+ }
+
+ @Override
+ public KeyWrapper copyKey() {
+ return new TestKeyWrapper(this.keyValue);
+ }
+
+ @Override
+ public void copyKey(KeyWrapper oldWrapper) {
+
+ }
+
+ @Override
+ public Object[] getKeyArray() {
+ return new Object[0];
+ }
+
+ @Override
+ public boolean isCopy() {
+ return false;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ TestKeyWrapper that = (TestKeyWrapper) o;
+ return keyValue == that.keyValue;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(keyValue);
+ }
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index a5b3ce7..96ed1ba 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -3,6 +3,7 @@
--! qt:dataset:lineitem
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
+set hive.optimize.topnkey=false;
-- SORT_QUERY_RESULTS
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index f863645..f25168a 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -2,6 +2,7 @@
--! qt:dataset:part
--! qt:dataset:lineitem
set hive.mapred.mode=nonstrict;
+set hive.optimize.topnkey=false;
-- SORT_QUERY_RESULTS
diff --git a/ql/src/test/queries/clientpositive/topnkey_windowing.q b/ql/src/test/queries/clientpositive/topnkey_windowing.q
new file mode 100644
index 0000000..a5352d2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topnkey_windowing.q
@@ -0,0 +1,110 @@
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1431655765;
+SET hive.vectorized.execution.enabled=false;
+
+
+CREATE TABLE topnkey_windowing (tw_code string, tw_value double);
+INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 104),
+ ('A', 109),
+ ('A', 109),
+ ('A', 103),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 101),
+ ('A', 101),
+ ('A', 114),
+ ('A', 120),
+ ('B', 105),
+ ('B', 106),
+ ('B', 106),
+ ('B', NULL),
+ ('B', 106),
+ ('A', 107),
+ ('B', 108),
+ ('A', 102),
+ ('B', 110),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109);
+
+SET hive.optimize.topnkey=true;
+EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+
+SET hive.optimize.topnkey=true;
+EXPLAIN extended
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+
+
+SET hive.optimize.topnkey=true;
+EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+DROP TABLE topnkey_windowing;
diff --git a/ql/src/test/queries/clientpositive/topnkey_windowing_order.q b/ql/src/test/queries/clientpositive/topnkey_windowing_order.q
new file mode 100644
index 0000000..686d72a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topnkey_windowing_order.q
@@ -0,0 +1,109 @@
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1431655765;
+SET hive.vectorized.execution.enabled=false;
+
+
+CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double);
+INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 104, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'C', 109, 9),
+ ('A', 'C', 103, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 114, 9),
+ ('A', 'D', 120, 9),
+ ('B', 'E', 105, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', NULL, NULL),
+ ('B', 'E', 106, 9),
+ ('A', 'C', 107, 9),
+ ('B', 'E', 108, 9),
+ ('A', 'C', 102, 9),
+ ('B', 'E', 110, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, NULL, 109, 9),
+ ('A', 'D', 109, 9);
+
+SET hive.optimize.topnkey=true;
+EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+
+SET hive.optimize.topnkey=true;
+EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+
+SET hive.optimize.topnkey=true;
+EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+SET hive.optimize.topnkey=false;
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3;
+
+DROP TABLE topnkey_windowing;
diff --git a/ql/src/test/queries/clientpositive/vector_windowing_streaming.q b/ql/src/test/queries/clientpositive/vector_windowing_streaming.q
index e1011f9..2f7b628 100644
--- a/ql/src/test/queries/clientpositive/vector_windowing_streaming.q
+++ b/ql/src/test/queries/clientpositive/vector_windowing_streaming.q
@@ -5,6 +5,7 @@
SET hive.vectorized.execution.reduce.enabled=true;
set hive.vectorized.execution.ptf.enabled=true;
set hive.fetch.task.conversion=none;
+set hive.optimize.topnkey=false;
drop table over10k_n8;
diff --git a/ql/src/test/queries/clientpositive/windowing_filter.q b/ql/src/test/queries/clientpositive/windowing_filter.q
index 2483c18..14d0c5a 100644
--- a/ql/src/test/queries/clientpositive/windowing_filter.q
+++ b/ql/src/test/queries/clientpositive/windowing_filter.q
@@ -1,5 +1,6 @@
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=1431655765;
+set hive.optimize.topnkey=false;
create table testtable_n1000 (s_state string, ss_net_profit double);
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index 4f0e752..ea8fe5e 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -3434,19 +3434,13 @@
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: +
- keys: _col3 (type: string)
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
null sort order: z
+ sort order: +
Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 4
- Reduce Output Operator
- key expressions: _col3 (type: string)
- null sort order: z
- sort order: +
- Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 67e349b..c24b79d 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -4512,7 +4512,7 @@
78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -4615,23 +4615,17 @@
Filter Operator
predicate: ((_col12 is null or (_col9 = 0L)) and ((_col10 >= _col9) or (_col9 = 0L) or _col12 is not null or _col5 is null) and (_col5 is not null or (_col9 = 0L) or _col12 is not null)) (type: boolean)
Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE
- Top N Key Operator
- sort order: ++
- keys: _col3 (type: string), _col0 (type: int)
- null sort order: zz
- Statistics: Num rows: 33 Data size: 20987 Basic stats: COMPLETE Column stats: COMPLETE
- top n: 4
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col0 (type: int)
+ null sort order: zz
+ sort order: ++
Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col3 (type: string), _col0 (type: int)
- null sort order: zz
- sort order: ++
- Statistics: Num rows: 33 Data size: 20427 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -4727,7 +4721,7 @@
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4
PREHOOK: type: QUERY
PREHOOK: Input: default@part
diff --git a/ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out b/ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out
new file mode 100644
index 0000000..52ba490
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey_windowing.q.out
@@ -0,0 +1,626 @@
+PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@topnkey_windowing
+PREHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 104),
+ ('A', 109),
+ ('A', 109),
+ ('A', 103),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 101),
+ ('A', 101),
+ ('A', 114),
+ ('A', 120),
+ ('B', 105),
+ ('B', 106),
+ ('B', 106),
+ ('B', NULL),
+ ('B', 106),
+ ('A', 107),
+ ('B', 108),
+ ('A', 102),
+ ('B', 110),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 104),
+ ('A', 109),
+ ('A', 109),
+ ('A', 103),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 101),
+ ('A', 101),
+ ('A', 114),
+ ('A', 120),
+ ('B', 105),
+ ('B', 106),
+ ('B', 106),
+ ('B', NULL),
+ ('B', 106),
+ ('A', 107),
+ ('B', 108),
+ ('A', 102),
+ ('B', 110),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@topnkey_windowing
+POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++
+ keys: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: EXPLAIN extended
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN extended
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT *
+FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0`
+FROM `default`.`topnkey_windowing`) AS `t`
+WHERE `rank_window_0` <= 3
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Top N Key Operator
+ sort order: +
+ keys: tw_value (type: double)
+ null sort order: z
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: 0 (type: int), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 4
+ TopN Hash Memory Usage: 0.1
+ value expressions: tw_code (type: string)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: topnkey_windowing
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns tw_code,tw_value
+ columns.comments
+ columns.types string:double
+#### A masked pattern was here ####
+ name default.topnkey_windowing
+ numFiles 1
+ numRows 26
+ rawDataSize 176
+ serialization.ddl struct topnkey_windowing { string tw_code, double tw_value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 202
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns tw_code,tw_value
+ columns.comments
+ columns.types string:double
+#### A masked pattern was here ####
+ name default.topnkey_windowing
+ numFiles 1
+ numRows 26
+ rawDataSize 176
+ serialization.ddl struct topnkey_windowing { string tw_code, double tw_value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 202
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.topnkey_windowing
+ name: default.topnkey_windowing
+ Truncated Path -> Alias:
+ /topnkey_windowing [topnkey_windowing]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+A 1
+A 1
+A 3
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+A 1
+A 1
+A 3
+PREHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++
+ keys: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: dense_rank_window_0
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (dense_rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), dense_rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+A 1
+A 1
+A 2
+A 3
+B 1
+B 2
+B 2
+B 2
+B 3
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+A 1
+A 1
+A 2
+A 3
+B 1
+B 2
+B 2
+B 2
+B 3
+PREHOOK: query: DROP TABLE topnkey_windowing
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@topnkey_windowing
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: DROP TABLE topnkey_windowing
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@topnkey_windowing
+POSTHOOK: Output: default@topnkey_windowing
diff --git a/ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out b/ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out
new file mode 100644
index 0000000..9721705
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey_windowing_order.q.out
@@ -0,0 +1,568 @@
+PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@topnkey_windowing
+PREHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 104, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'C', 109, 9),
+ ('A', 'C', 103, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 114, 9),
+ ('A', 'D', 120, 9),
+ ('B', 'E', 105, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', NULL, NULL),
+ ('B', 'E', 106, 9),
+ ('A', 'C', 107, 9),
+ ('B', 'E', 108, 9),
+ ('A', 'C', 102, 9),
+ ('B', 'E', 110, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, NULL, 109, 9),
+ ('A', 'D', 109, 9)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 104, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'C', 109, 9),
+ ('A', 'C', 103, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 114, 9),
+ ('A', 'D', 120, 9),
+ ('B', 'E', 105, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', NULL, NULL),
+ ('B', 'E', 106, 9),
+ ('A', 'C', 107, 9),
+ ('B', 'E', 108, 9),
+ ('A', 'C', 102, 9),
+ ('B', 'E', 110, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, NULL, 109, 9),
+ ('A', 'D', 109, 9)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@topnkey_windowing
+POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++
+ keys: tw_a (type: string), tw_v1 (type: double)
+ null sort order: aa
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_v1 (type: double)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col2: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8395 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2346 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 3
+B 3
+B 3
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 3
+B 3
+B 3
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++-
+ keys: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double)
+ null sort order: aza
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double)
+ null sort order: aza
+ sort order: ++-
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double)
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col2: double, _col3: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2, _col3
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8547 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2378 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +++
+ keys: tw_a (type: string), tw_b (type: string), tw_v1 (type: double)
+ null sort order: aaz
+ Map-reduce partition columns: tw_a (type: string), tw_b (type: string)
+ Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 4
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double)
+ null sort order: aaz
+ sort order: +++
+ Map-reduce partition columns: tw_a (type: string), tw_b (type: string)
+ Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS LAST
+ partition by: _col0, _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 10010 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2686 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+NULL 1
+NULL 1
+A 1
+A 2
+A 3
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+NULL 1
+NULL 1
+A 1
+A 2
+A 3
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: DROP TABLE topnkey_windowing
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@topnkey_windowing
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: DROP TABLE topnkey_windowing
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@topnkey_windowing
+POSTHOOK: Output: default@topnkey_windowing
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out
index 786bbe1..195a7e4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out
@@ -19,7 +19,7 @@
POSTHOOK: Input: default@store_sales
POSTHOOK: Output: database:default
POSTHOOK: Output: default@mv_store_sales_item_customer
-Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain
select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
from(select *
@@ -112,10 +112,10 @@
limit:100
Stage-1
Reducer 8 vectorized
- File Output Operator [FS_147]
- Limit [LIM_146] (rows=100 width=218)
+ File Output Operator [FS_149]
+ Limit [LIM_148] (rows=100 width=218)
Number of rows:100
- Select Operator [SEL_145] (rows=6951 width=218)
+ Select Operator [SEL_147] (rows=6951 width=218)
Output:["_col0","_col1","_col2"]
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_70]
@@ -123,100 +123,104 @@
Output:["_col0","_col1","_col2"]
Top N Key Operator [TNK_99] (rows=6951 width=218)
keys:_col1,top n:100
- Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218)
- Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"]
+ Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218)
+ Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"]
<-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_144]
+ SHUFFLE [RS_146]
PartitionCols:_col0
- Select Operator [SEL_142] (rows=462000 width=111)
+ Select Operator [SEL_144] (rows=462000 width=111)
Output:["_col0","_col1"]
TableScan [TS_56] (rows=462000 width=111)
default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"]
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_66]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115)
- Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"]
+ Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115)
+ Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"]
<-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_143]
+ SHUFFLE [RS_145]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_142]
+ Please refer to the previous Select Operator [SEL_144]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_63]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12)
- Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12)
+ Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"]
<-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_136]
+ SHUFFLE [RS_138]
PartitionCols:_col1
- Select Operator [SEL_135] (rows=6951 width=8)
+ Select Operator [SEL_137] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_134] (rows=6951 width=116)
+ Filter Operator [FIL_136] (rows=6951 width=116)
predicate:(rank_window_0 < 11)
- PTF Operator [PTF_133] (rows=20854 width=116)
+ PTF Operator [PTF_135] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_132] (rows=20854 width=116)
+ Select Operator [SEL_134] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_21]
PartitionCols:0
- Filter Operator [FIL_20] (rows=20854 width=228)
- predicate:(_col1 > (0.9 * _col2))
- Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228)
- Conds:(Inner),Output:["_col0","_col1","_col2"]
- <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_131]
- Select Operator [SEL_130] (rows=1 width=112)
- Output:["_col0"]
- Filter Operator [FIL_129] (rows=1 width=120)
- predicate:(_col1 is not null and _col2 is not null)
- Select Operator [SEL_128] (rows=1 width=120)
- Output:["_col1","_col2"]
- Group By Operator [GBY_127] (rows=1 width=124)
+ Top N Key Operator [TNK_100] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Filter Operator [FIL_20] (rows=20854 width=228)
+ predicate:(_col1 > (0.9 * _col2))
+ Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228)
+ Conds:(Inner),Output:["_col0","_col1","_col2"]
+ <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_133]
+ Select Operator [SEL_132] (rows=1 width=112)
+ Output:["_col0"]
+ Filter Operator [FIL_131] (rows=1 width=120)
+ predicate:(_col1 is not null and _col2 is not null)
+ Select Operator [SEL_130] (rows=1 width=120)
+ Output:["_col1","_col2"]
+ Group By Operator [GBY_129] (rows=1 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_128]
+ PartitionCols:_col0
+ Group By Operator [GBY_127] (rows=258 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
+ Select Operator [SEL_126] (rows=287946 width=114)
+ Output:["_col1"]
+ Filter Operator [FIL_125] (rows=287946 width=114)
+ predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
+ TableScan [TS_8] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_124]
+ Select Operator [SEL_123] (rows=62562 width=116)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_122] (rows=62562 width=124)
+ predicate:(_col1 is not null and _col2 is not null)
+ Group By Operator [GBY_121] (rows=62562 width=124)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_126]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_120]
PartitionCols:_col0
- Group By Operator [GBY_125] (rows=258 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
- Select Operator [SEL_124] (rows=287946 width=114)
- Output:["_col1"]
- Filter Operator [FIL_123] (rows=287946 width=114)
- predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
- TableScan [TS_8] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
- <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_122]
- Select Operator [SEL_121] (rows=62562 width=116)
- Output:["_col0","_col1"]
- Filter Operator [FIL_120] (rows=62562 width=124)
- predicate:(_col1 is not null and _col2 is not null)
- Group By Operator [GBY_119] (rows=62562 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_118]
- PartitionCols:_col0
- Group By Operator [GBY_117] (rows=3199976 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
- Select Operator [SEL_116] (rows=6399952 width=114)
- Output:["ss_item_sk","ss_net_profit"]
- Filter Operator [FIL_115] (rows=6399952 width=114)
- predicate:(ss_store_sk = 410)
- TableScan [TS_0] (rows=575995635 width=114)
- default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
+ Group By Operator [GBY_119] (rows=3199976 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
+ Select Operator [SEL_118] (rows=6399952 width=114)
+ Output:["ss_item_sk","ss_net_profit"]
+ Filter Operator [FIL_117] (rows=6399952 width=114)
+ predicate:(ss_store_sk = 410)
+ TableScan [TS_0] (rows=575995635 width=114)
+ default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_141]
+ SHUFFLE [RS_143]
PartitionCols:_col1
- Select Operator [SEL_140] (rows=6951 width=8)
+ Select Operator [SEL_142] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_139] (rows=6951 width=116)
+ Filter Operator [FIL_141] (rows=6951 width=116)
predicate:(rank_window_0 < 11)
- PTF Operator [PTF_138] (rows=20854 width=116)
+ PTF Operator [PTF_140] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_137] (rows=20854 width=116)
+ Select Operator [SEL_139] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_49]
PartitionCols:0
- Please refer to the previous Filter Operator [FIL_20]
+ Top N Key Operator [TNK_101] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Please refer to the previous Filter Operator [FIL_20]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out
index eace7a0..8591d36 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain
select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
from(select *
@@ -91,10 +91,10 @@
limit:100
Stage-1
Reducer 8 vectorized
- File Output Operator [FS_147]
- Limit [LIM_146] (rows=100 width=218)
+ File Output Operator [FS_149]
+ Limit [LIM_148] (rows=100 width=218)
Number of rows:100
- Select Operator [SEL_145] (rows=6951 width=218)
+ Select Operator [SEL_147] (rows=6951 width=218)
Output:["_col0","_col1","_col2"]
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_70]
@@ -102,100 +102,104 @@
Output:["_col0","_col1","_col2"]
Top N Key Operator [TNK_99] (rows=6951 width=218)
keys:_col1,top n:100
- Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218)
- Conds:RS_66._col2=RS_144._col0(Inner),Output:["_col1","_col5","_col7"]
+ Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218)
+ Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"]
<-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_144]
+ SHUFFLE [RS_146]
PartitionCols:_col0
- Select Operator [SEL_142] (rows=462000 width=111)
+ Select Operator [SEL_144] (rows=462000 width=111)
Output:["_col0","_col1"]
TableScan [TS_56] (rows=462000 width=111)
default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"]
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_66]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_113] (rows=6951 width=115)
- Conds:RS_63._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col5"]
+ Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115)
+ Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"]
<-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_143]
+ SHUFFLE [RS_145]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_142]
+ Please refer to the previous Select Operator [SEL_144]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_63]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_112] (rows=6951 width=12)
- Conds:RS_136._col1=RS_141._col1(Inner),Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12)
+ Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"]
<-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_136]
+ SHUFFLE [RS_138]
PartitionCols:_col1
- Select Operator [SEL_135] (rows=6951 width=8)
+ Select Operator [SEL_137] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_134] (rows=6951 width=116)
+ Filter Operator [FIL_136] (rows=6951 width=116)
predicate:(rank_window_0 < 11)
- PTF Operator [PTF_133] (rows=20854 width=116)
+ PTF Operator [PTF_135] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_132] (rows=20854 width=116)
+ Select Operator [SEL_134] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_21]
PartitionCols:0
- Filter Operator [FIL_20] (rows=20854 width=228)
- predicate:(_col1 > (0.9 * _col2))
- Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228)
- Conds:(Inner),Output:["_col0","_col1","_col2"]
- <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_131]
- Select Operator [SEL_130] (rows=1 width=112)
- Output:["_col0"]
- Filter Operator [FIL_129] (rows=1 width=120)
- predicate:(_col1 is not null and _col2 is not null)
- Select Operator [SEL_128] (rows=1 width=120)
- Output:["_col1","_col2"]
- Group By Operator [GBY_127] (rows=1 width=124)
+ Top N Key Operator [TNK_100] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Filter Operator [FIL_20] (rows=20854 width=228)
+ predicate:(_col1 > (0.9 * _col2))
+ Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228)
+ Conds:(Inner),Output:["_col0","_col1","_col2"]
+ <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_133]
+ Select Operator [SEL_132] (rows=1 width=112)
+ Output:["_col0"]
+ Filter Operator [FIL_131] (rows=1 width=120)
+ predicate:(_col1 is not null and _col2 is not null)
+ Select Operator [SEL_130] (rows=1 width=120)
+ Output:["_col1","_col2"]
+ Group By Operator [GBY_129] (rows=1 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_128]
+ PartitionCols:_col0
+ Group By Operator [GBY_127] (rows=258 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
+ Select Operator [SEL_126] (rows=287946 width=114)
+ Output:["_col1"]
+ Filter Operator [FIL_125] (rows=287946 width=114)
+ predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
+ TableScan [TS_8] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_124]
+ Select Operator [SEL_123] (rows=62562 width=116)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_122] (rows=62562 width=124)
+ predicate:(_col1 is not null and _col2 is not null)
+ Group By Operator [GBY_121] (rows=62562 width=124)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_126]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_120]
PartitionCols:_col0
- Group By Operator [GBY_125] (rows=258 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
- Select Operator [SEL_124] (rows=287946 width=114)
- Output:["_col1"]
- Filter Operator [FIL_123] (rows=287946 width=114)
- predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
- TableScan [TS_8] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
- <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_122]
- Select Operator [SEL_121] (rows=62562 width=116)
- Output:["_col0","_col1"]
- Filter Operator [FIL_120] (rows=62562 width=124)
- predicate:(_col1 is not null and _col2 is not null)
- Group By Operator [GBY_119] (rows=62562 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_118]
- PartitionCols:_col0
- Group By Operator [GBY_117] (rows=3199976 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
- Select Operator [SEL_116] (rows=6399952 width=114)
- Output:["ss_item_sk","ss_net_profit"]
- Filter Operator [FIL_115] (rows=6399952 width=114)
- predicate:(ss_store_sk = 410)
- TableScan [TS_0] (rows=575995635 width=114)
- default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
+ Group By Operator [GBY_119] (rows=3199976 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
+ Select Operator [SEL_118] (rows=6399952 width=114)
+ Output:["ss_item_sk","ss_net_profit"]
+ Filter Operator [FIL_117] (rows=6399952 width=114)
+ predicate:(ss_store_sk = 410)
+ TableScan [TS_0] (rows=575995635 width=114)
+ default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_141]
+ SHUFFLE [RS_143]
PartitionCols:_col1
- Select Operator [SEL_140] (rows=6951 width=8)
+ Select Operator [SEL_142] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_139] (rows=6951 width=116)
+ Filter Operator [FIL_141] (rows=6951 width=116)
predicate:(rank_window_0 < 11)
- PTF Operator [PTF_138] (rows=20854 width=116)
+ PTF Operator [PTF_140] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_137] (rows=20854 width=116)
+ Select Operator [SEL_139] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_49]
PartitionCols:0
- Please refer to the previous Filter Operator [FIL_20]
+ Top N Key Operator [TNK_101] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Please refer to the previous Filter Operator [FIL_20]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out
index 9ed7306..f5fe4b0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out
@@ -111,88 +111,90 @@
limit:100
Stage-1
Reducer 7 vectorized
- File Output Operator [FS_109]
- Limit [LIM_108] (rows=100 width=617)
+ File Output Operator [FS_111]
+ Limit [LIM_110] (rows=100 width=617)
Number of rows:100
- Select Operator [SEL_107] (rows=1575989691 width=617)
+ Select Operator [SEL_109] (rows=1575989691 width=617)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_106]
- Select Operator [SEL_105] (rows=1575989691 width=617)
+ SHUFFLE [RS_108]
+ Select Operator [SEL_107] (rows=1575989691 width=617)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Top N Key Operator [TNK_104] (rows=1575989691 width=613)
+ Top N Key Operator [TNK_106] (rows=1575989691 width=613)
keys:_col6, _col5, _col4, _col7, _col0, _col2, _col1, _col3, _col8, rank_window_0,top n:100
- Filter Operator [FIL_103] (rows=1575989691 width=613)
+ Filter Operator [FIL_105] (rows=1575989691 width=613)
predicate:(rank_window_0 <= 100)
- PTF Operator [PTF_102] (rows=4727969073 width=613)
+ PTF Operator [PTF_104] (rows=4727969073 width=613)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col6"}]
- Select Operator [SEL_101] (rows=4727969073 width=613)
+ Select Operator [SEL_103] (rows=4727969073 width=613)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_100]
+ SHUFFLE [RS_102]
PartitionCols:_col6
- Select Operator [SEL_99] (rows=4727969073 width=613)
+ Select Operator [SEL_101] (rows=4727969073 width=613)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Group By Operator [GBY_98] (rows=4727969073 width=621)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_21]
- PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Group By Operator [GBY_20] (rows=4727969073 width=621)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L
- Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=613)
- Conds:RS_16._col1=RS_97._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"]
- <-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_97]
- PartitionCols:_col0
- Select Operator [SEL_96] (rows=462000 width=393)
- Output:["_col0","_col1","_col2","_col3","_col4"]
- TableScan [TS_8] (rows=462000 width=393)
- default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=228)
- Conds:RS_13._col2=RS_95._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"]
- <-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_95]
- PartitionCols:_col0
- Select Operator [SEL_94] (rows=1704 width=104)
- Output:["_col0","_col1"]
- TableScan [TS_6] (rows=1704 width=104)
- default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_13]
- PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_80] (rows=525329897 width=131)
- Conds:RS_93._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
- <-Map 8 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_85]
- PartitionCols:_col0
- Select Operator [SEL_84] (rows=317 width=16)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_83] (rows=317 width=20)
- predicate:d_month_seq BETWEEN 1212 AND 1223
- TableScan [TS_3] (rows=73049 width=20)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_93]
- PartitionCols:_col0
- Select Operator [SEL_92] (rows=525329897 width=123)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_91] (rows=525329897 width=122)
- predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=122)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"]
- <-Reducer 9 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_90]
- Group By Operator [GBY_89] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_88]
- Group By Operator [GBY_87] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_86] (rows=317 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_84]
+ Top N Key Operator [TNK_100] (rows=4727969073 width=621)
+ PartitionCols:_col6,keys:_col6, _col9,top n:101
+ Group By Operator [GBY_99] (rows=4727969073 width=621)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_21]
+ PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Group By Operator [GBY_20] (rows=4727969073 width=621)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col9, _col11, _col12, _col13, _col14, 0L
+ Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=613)
+ Conds:RS_16._col1=RS_98._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"]
+ <-Map 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_98]
+ PartitionCols:_col0
+ Select Operator [SEL_97] (rows=462000 width=393)
+ Output:["_col0","_col1","_col2","_col3","_col4"]
+ TableScan [TS_8] (rows=462000 width=393)
+ default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=228)
+ Conds:RS_13._col2=RS_96._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_96]
+ PartitionCols:_col0
+ Select Operator [SEL_95] (rows=1704 width=104)
+ Output:["_col0","_col1"]
+ TableScan [TS_6] (rows=1704 width=104)
+ default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_81] (rows=525329897 width=131)
+ Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
+ <-Map 8 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_86]
+ PartitionCols:_col0
+ Select Operator [SEL_85] (rows=317 width=16)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_84] (rows=317 width=20)
+ predicate:d_month_seq BETWEEN 1212 AND 1223
+ TableScan [TS_3] (rows=73049 width=20)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_94]
+ PartitionCols:_col0
+ Select Operator [SEL_93] (rows=525329897 width=123)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_92] (rows=525329897 width=122)
+ predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=122)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"]
+ <-Reducer 9 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_91]
+ Group By Operator [GBY_90] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_89]
+ Group By Operator [GBY_88] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_87] (rows=317 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_85]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out
index 40dfaa2..b2bde8d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out
@@ -100,27 +100,27 @@
limit:-1
Stage-1
Reducer 6 vectorized
- File Output Operator [FS_172]
- Limit [LIM_171] (rows=100 width=492)
+ File Output Operator [FS_174]
+ Limit [LIM_173] (rows=100 width=492)
Number of rows:100
- Select Operator [SEL_170] (rows=720 width=492)
+ Select Operator [SEL_172] (rows=720 width=492)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_169]
- Select Operator [SEL_168] (rows=720 width=492)
+ SHUFFLE [RS_171]
+ Select Operator [SEL_170] (rows=720 width=492)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Top N Key Operator [TNK_167] (rows=720 width=304)
+ Top N Key Operator [TNK_169] (rows=720 width=304)
keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100
- PTF Operator [PTF_166] (rows=720 width=304)
+ PTF Operator [PTF_168] (rows=720 width=304)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}]
- Select Operator [SEL_165] (rows=720 width=304)
+ Select Operator [SEL_167] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"]
<-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_164]
+ SHUFFLE [RS_166]
PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END
- Select Operator [SEL_163] (rows=720 width=304)
+ Select Operator [SEL_165] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"]
- Group By Operator [GBY_162] (rows=720 width=304)
+ Group By Operator [GBY_164] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_50]
@@ -129,90 +129,92 @@
Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L
Select Operator [SEL_47] (rows=525329897 width=290)
Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_137] (rows=525329897 width=290)
+ Merge Join Operator [MERGEJOIN_138] (rows=525329897 width=290)
Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_44]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=110)
- Conds:RS_148._col0=RS_140._col0(Inner),Output:["_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=110)
+ Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2"]
<-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_140]
+ SHUFFLE [RS_141]
PartitionCols:_col0
- Select Operator [SEL_139] (rows=317 width=8)
+ Select Operator [SEL_140] (rows=317 width=8)
Output:["_col0"]
- Filter Operator [FIL_138] (rows=317 width=8)
+ Filter Operator [FIL_139] (rows=317 width=8)
predicate:d_month_seq BETWEEN 1212 AND 1223
TableScan [TS_3] (rows=73049 width=8)
default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_148]
+ SHUFFLE [RS_149]
PartitionCols:_col0
- Select Operator [SEL_147] (rows=525329897 width=114)
+ Select Operator [SEL_148] (rows=525329897 width=114)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_146] (rows=525329897 width=114)
+ Filter Operator [FIL_147] (rows=525329897 width=114)
predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_42_d1_d_date_sk_min) AND DynamicValue(RS_42_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_42_d1_d_date_sk_bloom_filter)))
TableScan [TS_0] (rows=575995635 width=114)
default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
<-Reducer 12 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_145]
- Group By Operator [GBY_144] (rows=1 width=12)
+ BROADCAST [RS_146]
+ Group By Operator [GBY_145] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
<-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
- SHUFFLE [RS_143]
- Group By Operator [GBY_142] (rows=1 width=12)
+ SHUFFLE [RS_144]
+ Group By Operator [GBY_143] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_141] (rows=317 width=4)
+ Select Operator [SEL_142] (rows=317 width=4)
Output:["_col0"]
- Please refer to the previous Select Operator [SEL_139]
+ Please refer to the previous Select Operator [SEL_140]
<-Reducer 10 [SIMPLE_EDGE]
SHUFFLE [RS_45]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_136] (rows=556 width=188)
- Conds:RS_161._col2=RS_158._col0(Inner),Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_137] (rows=556 width=188)
+ Conds:RS_163._col2=RS_160._col0(Inner),Output:["_col0","_col1","_col2"]
<-Map 13 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_161]
+ SHUFFLE [RS_163]
PartitionCols:_col2
- Select Operator [SEL_160] (rows=1704 width=188)
+ Select Operator [SEL_162] (rows=1704 width=188)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_159] (rows=1704 width=188)
+ Filter Operator [FIL_161] (rows=1704 width=188)
predicate:s_state is not null
TableScan [TS_6] (rows=1704 width=188)
default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_158]
+ SHUFFLE [RS_160]
PartitionCols:_col0
- Select Operator [SEL_157] (rows=16 width=86)
+ Select Operator [SEL_159] (rows=16 width=86)
Output:["_col0"]
- Filter Operator [FIL_156] (rows=16 width=198)
+ Filter Operator [FIL_158] (rows=16 width=198)
predicate:(rank_window_0 <= 5)
- PTF Operator [PTF_155] (rows=49 width=198)
+ PTF Operator [PTF_157] (rows=49 width=198)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}]
- Select Operator [SEL_154] (rows=49 width=198)
+ Select Operator [SEL_156] (rows=49 width=198)
Output:["_col0","_col1"]
<-Reducer 8 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_153]
+ SHUFFLE [RS_155]
PartitionCols:_col0
- Group By Operator [GBY_152] (rows=49 width=198)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_26]
- PartitionCols:_col0
- Group By Operator [GBY_25] (rows=19404 width=198)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192)
- Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_21]
- PartitionCols:_col1
- Please refer to the previous Merge Join Operator [MERGEJOIN_133]
- <-Map 14 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_151]
- PartitionCols:_col0
- Select Operator [SEL_150] (rows=1704 width=90)
- Output:["_col0","_col1"]
- Filter Operator [FIL_149] (rows=1704 width=90)
- predicate:s_state is not null
- TableScan [TS_15] (rows=1704 width=90)
- default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"]
+ Top N Key Operator [TNK_154] (rows=49 width=198)
+ PartitionCols:_col0,keys:_col0, _col1,top n:6
+ Group By Operator [GBY_153] (rows=49 width=198)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col0
+ Group By Operator [GBY_25] (rows=19404 width=198)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_136] (rows=525329897 width=192)
+ Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_21]
+ PartitionCols:_col1
+ Please refer to the previous Merge Join Operator [MERGEJOIN_134]
+ <-Map 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_152]
+ PartitionCols:_col0
+ Select Operator [SEL_151] (rows=1704 width=90)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_150] (rows=1704 width=90)
+ predicate:s_state is not null
+ TableScan [TS_15] (rows=1704 width=90)
+ default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out
index a0defab..1c16328 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product
+Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product
PREHOOK: query: explain
select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
from(select *
@@ -91,10 +91,10 @@
limit:100
Stage-1
Reducer 4 vectorized
- File Output Operator [FS_148]
- Limit [LIM_147] (rows=100 width=218)
+ File Output Operator [FS_150]
+ Limit [LIM_149] (rows=100 width=218)
Number of rows:100
- Select Operator [SEL_146] (rows=6951 width=218)
+ Select Operator [SEL_148] (rows=6951 width=218)
Output:["_col0","_col1","_col2"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_73]
@@ -102,102 +102,106 @@
Output:["_col0","_col1","_col2"]
Top N Key Operator [TNK_99] (rows=6951 width=218)
keys:_col3,top n:100
- Merge Join Operator [MERGEJOIN_114] (rows=6951 width=218)
+ Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218)
Conds:RS_69._col3=RS_70._col3(Inner),Output:["_col1","_col3","_col5"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_69]
PartitionCols:_col3
- Merge Join Operator [MERGEJOIN_111] (rows=6951 width=111)
- Conds:RS_117._col0=RS_140._col0(Inner),Output:["_col1","_col3"]
+ Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111)
+ Conds:RS_119._col0=RS_142._col0(Inner),Output:["_col1","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_117]
+ SHUFFLE [RS_119]
PartitionCols:_col0
- Select Operator [SEL_116] (rows=462000 width=111)
+ Select Operator [SEL_118] (rows=462000 width=111)
Output:["_col0","_col1"]
- Filter Operator [FIL_115] (rows=462000 width=111)
+ Filter Operator [FIL_117] (rows=462000 width=111)
predicate:i_item_sk is not null
TableScan [TS_0] (rows=462000 width=111)
default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_140]
+ SHUFFLE [RS_142]
PartitionCols:_col0
- Select Operator [SEL_139] (rows=6951 width=8)
+ Select Operator [SEL_141] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_138] (rows=6951 width=116)
+ Filter Operator [FIL_140] (rows=6951 width=116)
predicate:((rank_window_0 < 11) and _col0 is not null)
- PTF Operator [PTF_137] (rows=20854 width=116)
+ PTF Operator [PTF_139] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_136] (rows=20854 width=116)
+ Select Operator [SEL_138] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 8 [SIMPLE_EDGE]
SHUFFLE [RS_24]
PartitionCols:0
- Filter Operator [FIL_23] (rows=20854 width=228)
- predicate:(_col1 > (0.9 * _col2))
- Merge Join Operator [MERGEJOIN_110] (rows=62562 width=228)
- Conds:(Inner),Output:["_col0","_col1","_col2"]
- <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_135]
- Select Operator [SEL_134] (rows=1 width=112)
- Output:["_col0"]
- Filter Operator [FIL_133] (rows=1 width=120)
- predicate:(_col1 is not null and _col2 is not null)
- Select Operator [SEL_132] (rows=1 width=120)
- Output:["_col1","_col2"]
- Group By Operator [GBY_131] (rows=1 width=124)
+ Top N Key Operator [TNK_100] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Filter Operator [FIL_23] (rows=20854 width=228)
+ predicate:(_col1 > (0.9 * _col2))
+ Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228)
+ Conds:(Inner),Output:["_col0","_col1","_col2"]
+ <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_137]
+ Select Operator [SEL_136] (rows=1 width=112)
+ Output:["_col0"]
+ Filter Operator [FIL_135] (rows=1 width=120)
+ predicate:(_col1 is not null and _col2 is not null)
+ Select Operator [SEL_134] (rows=1 width=120)
+ Output:["_col1","_col2"]
+ Group By Operator [GBY_133] (rows=1 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+ <-Map 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_132]
+ PartitionCols:_col0
+ Group By Operator [GBY_131] (rows=258 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
+ Select Operator [SEL_130] (rows=287946 width=114)
+ Output:["_col1"]
+ Filter Operator [FIL_129] (rows=287946 width=114)
+ predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
+ TableScan [TS_11] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
+ <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_128]
+ Select Operator [SEL_127] (rows=62562 width=116)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_126] (rows=62562 width=124)
+ predicate:(_col1 is not null and _col2 is not null)
+ Group By Operator [GBY_125] (rows=62562 width=124)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_130]
+ <-Map 6 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_124]
PartitionCols:_col0
- Group By Operator [GBY_129] (rows=258 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true
- Select Operator [SEL_128] (rows=287946 width=114)
- Output:["_col1"]
- Filter Operator [FIL_127] (rows=287946 width=114)
- predicate:(ss_hdemo_sk is null and (ss_store_sk = 410))
- TableScan [TS_11] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"]
- <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_126]
- Select Operator [SEL_125] (rows=62562 width=116)
- Output:["_col0","_col1"]
- Filter Operator [FIL_124] (rows=62562 width=124)
- predicate:(_col1 is not null and _col2 is not null)
- Group By Operator [GBY_123] (rows=62562 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 6 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_122]
- PartitionCols:_col0
- Group By Operator [GBY_121] (rows=3199976 width=124)
- Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
- Select Operator [SEL_120] (rows=6399952 width=114)
- Output:["ss_item_sk","ss_net_profit"]
- Filter Operator [FIL_119] (rows=6399952 width=114)
- predicate:(ss_store_sk = 410)
- TableScan [TS_3] (rows=575995635 width=114)
- default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
+ Group By Operator [GBY_123] (rows=3199976 width=124)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk
+ Select Operator [SEL_122] (rows=6399952 width=114)
+ Output:["ss_item_sk","ss_net_profit"]
+ Filter Operator [FIL_121] (rows=6399952 width=114)
+ predicate:(ss_store_sk = 410)
+ TableScan [TS_3] (rows=575995635 width=114)
+ default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_70]
PartitionCols:_col3
- Merge Join Operator [MERGEJOIN_113] (rows=6951 width=111)
- Conds:RS_118._col0=RS_145._col0(Inner),Output:["_col1","_col3"]
+ Merge Join Operator [MERGEJOIN_115] (rows=6951 width=111)
+ Conds:RS_120._col0=RS_147._col0(Inner),Output:["_col1","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_118]
+ SHUFFLE [RS_120]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_116]
+ Please refer to the previous Select Operator [SEL_118]
<-Reducer 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_145]
+ SHUFFLE [RS_147]
PartitionCols:_col0
- Select Operator [SEL_144] (rows=6951 width=8)
+ Select Operator [SEL_146] (rows=6951 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_143] (rows=6951 width=116)
+ Filter Operator [FIL_145] (rows=6951 width=116)
predicate:((rank_window_0 < 11) and _col0 is not null)
- PTF Operator [PTF_142] (rows=20854 width=116)
+ PTF Operator [PTF_144] (rows=20854 width=116)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}]
- Select Operator [SEL_141] (rows=20854 width=116)
+ Select Operator [SEL_143] (rows=20854 width=116)
Output:["_col0","_col1"]
<-Reducer 8 [SIMPLE_EDGE]
SHUFFLE [RS_55]
PartitionCols:0
- Please refer to the previous Filter Operator [FIL_23]
+ Top N Key Operator [TNK_101] (rows=20854 width=228)
+ keys:_col1,top n:11
+ Please refer to the previous Filter Operator [FIL_23]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/query67.q.out
index 54d76e9..bd225cd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query67.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query67.q.out
@@ -111,92 +111,94 @@
limit:100
Stage-1
Reducer 7 vectorized
- File Output Operator [FS_113]
- Limit [LIM_112] (rows=100 width=617)
+ File Output Operator [FS_115]
+ Limit [LIM_114] (rows=100 width=617)
Number of rows:100
- Select Operator [SEL_111] (rows=1575989691 width=617)
+ Select Operator [SEL_113] (rows=1575989691 width=617)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_110]
- Select Operator [SEL_109] (rows=1575989691 width=617)
+ SHUFFLE [RS_112]
+ Select Operator [SEL_111] (rows=1575989691 width=617)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
- Top N Key Operator [TNK_108] (rows=1575989691 width=613)
+ Top N Key Operator [TNK_110] (rows=1575989691 width=613)
keys:_col2, _col1, _col0, _col3, _col4, _col6, _col5, _col7, _col8, rank_window_0,top n:100
- Filter Operator [FIL_107] (rows=1575989691 width=613)
+ Filter Operator [FIL_109] (rows=1575989691 width=613)
predicate:(rank_window_0 <= 100)
- PTF Operator [PTF_106] (rows=4727969073 width=613)
+ PTF Operator [PTF_108] (rows=4727969073 width=613)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col2"}]
- Select Operator [SEL_105] (rows=4727969073 width=613)
+ Select Operator [SEL_107] (rows=4727969073 width=613)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_104]
+ SHUFFLE [RS_106]
PartitionCols:_col2
- Select Operator [SEL_103] (rows=4727969073 width=613)
+ Select Operator [SEL_105] (rows=4727969073 width=613)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Group By Operator [GBY_102] (rows=4727969073 width=621)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_23]
- PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Group By Operator [GBY_22] (rows=4727969073 width=621)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L
- Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=613)
- Conds:RS_18._col1=RS_101._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"]
- <-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_101]
- PartitionCols:_col0
- Select Operator [SEL_100] (rows=462000 width=393)
- Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_99] (rows=462000 width=393)
- predicate:i_item_sk is not null
- TableScan [TS_9] (rows=462000 width=393)
- default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_18]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=228)
- Conds:RS_15._col2=RS_98._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"]
- <-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_98]
- PartitionCols:_col0
- Select Operator [SEL_97] (rows=1704 width=104)
- Output:["_col0","_col1"]
- Filter Operator [FIL_96] (rows=1704 width=104)
- predicate:s_store_sk is not null
- TableScan [TS_6] (rows=1704 width=104)
- default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_15]
- PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_82] (rows=525329897 width=131)
- Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
- <-Map 8 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_87]
- PartitionCols:_col0
- Select Operator [SEL_86] (rows=317 width=16)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_85] (rows=317 width=20)
- predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
- TableScan [TS_3] (rows=73049 width=20)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_95]
- PartitionCols:_col0
- Select Operator [SEL_94] (rows=525329897 width=123)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_93] (rows=525329897 width=122)
- predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=122)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"]
- <-Reducer 9 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_92]
- Group By Operator [GBY_91] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_90]
- Group By Operator [GBY_89] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_88] (rows=317 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_86]
+ Top N Key Operator [TNK_104] (rows=4727969073 width=621)
+ PartitionCols:_col2,keys:_col2, _col9,top n:101
+ Group By Operator [GBY_103] (rows=4727969073 width=621)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Group By Operator [GBY_22] (rows=4727969073 width=621)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L
+ Merge Join Operator [MERGEJOIN_85] (rows=525329897 width=613)
+ Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"]
+ <-Map 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_102]
+ PartitionCols:_col0
+ Select Operator [SEL_101] (rows=462000 width=393)
+ Output:["_col0","_col1","_col2","_col3","_col4"]
+ Filter Operator [FIL_100] (rows=462000 width=393)
+ predicate:i_item_sk is not null
+ TableScan [TS_9] (rows=462000 width=393)
+ default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_84] (rows=525329897 width=228)
+ Conds:RS_15._col2=RS_99._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_98] (rows=1704 width=104)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_97] (rows=1704 width=104)
+ predicate:s_store_sk is not null
+ TableScan [TS_6] (rows=1704 width=104)
+ default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_15]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_83] (rows=525329897 width=131)
+ Conds:RS_96._col0=RS_88._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
+ <-Map 8 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_88]
+ PartitionCols:_col0
+ Select Operator [SEL_87] (rows=317 width=16)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_86] (rows=317 width=20)
+ predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=20)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_96]
+ PartitionCols:_col0
+ Select Operator [SEL_95] (rows=525329897 width=123)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_94] (rows=525329897 width=122)
+ predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=122)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"]
+ <-Reducer 9 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_93]
+ Group By Operator [GBY_92] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_91]
+ Group By Operator [GBY_90] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_89] (rows=317 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_87]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/query70.q.out
index 23f9166..6107ec0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query70.q.out
@@ -100,27 +100,27 @@
limit:-1
Stage-1
Reducer 7 vectorized
- File Output Operator [FS_170]
- Limit [LIM_169] (rows=100 width=492)
+ File Output Operator [FS_172]
+ Limit [LIM_171] (rows=100 width=492)
Number of rows:100
- Select Operator [SEL_168] (rows=720 width=492)
+ Select Operator [SEL_170] (rows=720 width=492)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_167]
- Select Operator [SEL_166] (rows=720 width=492)
+ SHUFFLE [RS_169]
+ Select Operator [SEL_168] (rows=720 width=492)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Top N Key Operator [TNK_165] (rows=720 width=304)
+ Top N Key Operator [TNK_167] (rows=720 width=304)
keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100
- PTF Operator [PTF_164] (rows=720 width=304)
+ PTF Operator [PTF_166] (rows=720 width=304)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}]
- Select Operator [SEL_163] (rows=720 width=304)
+ Select Operator [SEL_165] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_162]
+ SHUFFLE [RS_164]
PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END
- Select Operator [SEL_161] (rows=720 width=304)
+ Select Operator [SEL_163] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"]
- Group By Operator [GBY_160] (rows=720 width=304)
+ Group By Operator [GBY_162] (rows=720 width=304)
Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_49]
@@ -129,89 +129,91 @@
Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L
Select Operator [SEL_46] (rows=171536292 width=280)
Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_135] (rows=171536292 width=280)
- Conds:RS_43._col7=RS_159._col0(Inner),Output:["_col2","_col6","_col7"]
+ Merge Join Operator [MERGEJOIN_136] (rows=171536292 width=280)
+ Conds:RS_43._col7=RS_161._col0(Inner),Output:["_col2","_col6","_col7"]
<-Reducer 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_159]
+ SHUFFLE [RS_161]
PartitionCols:_col0
- Select Operator [SEL_158] (rows=16 width=86)
+ Select Operator [SEL_160] (rows=16 width=86)
Output:["_col0"]
- Filter Operator [FIL_157] (rows=16 width=198)
+ Filter Operator [FIL_159] (rows=16 width=198)
predicate:(rank_window_0 <= 5)
- PTF Operator [PTF_156] (rows=49 width=198)
+ PTF Operator [PTF_158] (rows=49 width=198)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}]
- Select Operator [SEL_155] (rows=49 width=198)
+ Select Operator [SEL_157] (rows=49 width=198)
Output:["_col0","_col1"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_154]
+ SHUFFLE [RS_156]
PartitionCols:_col0
- Group By Operator [GBY_153] (rows=49 width=198)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 8 [SIMPLE_EDGE]
- SHUFFLE [RS_26]
- PartitionCols:_col0
- Group By Operator [GBY_25] (rows=19404 width=198)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_134] (rows=525329897 width=192)
- Conds:RS_21._col1=RS_152._col0(Inner),Output:["_col2","_col5"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_21]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_131] (rows=525329897 width=110)
- Conds:RS_146._col0=RS_138._col0(Inner),Output:["_col1","_col2"]
- <-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_138]
- PartitionCols:_col0
- Select Operator [SEL_137] (rows=317 width=8)
- Output:["_col0"]
- Filter Operator [FIL_136] (rows=317 width=8)
- predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
- TableScan [TS_3] (rows=73049 width=8)
- default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_146]
- PartitionCols:_col0
- Select Operator [SEL_145] (rows=525329897 width=114)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_144] (rows=525329897 width=114)
- predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
- <-Reducer 12 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_143]
- Group By Operator [GBY_142] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
- SHUFFLE [RS_141]
- Group By Operator [GBY_140] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_139] (rows=317 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_137]
- <-Map 14 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_152]
- PartitionCols:_col0
- Select Operator [SEL_151] (rows=1704 width=90)
- Output:["_col0","_col1"]
- Filter Operator [FIL_150] (rows=1704 width=90)
- predicate:(s_store_sk is not null and s_state is not null)
- TableScan [TS_15] (rows=1704 width=90)
- default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"]
+ Top N Key Operator [TNK_155] (rows=49 width=198)
+ PartitionCols:_col0,keys:_col0, _col1,top n:6
+ Group By Operator [GBY_154] (rows=49 width=198)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col0
+ Group By Operator [GBY_25] (rows=19404 width=198)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_135] (rows=525329897 width=192)
+ Conds:RS_21._col1=RS_153._col0(Inner),Output:["_col2","_col5"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_21]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=110)
+ Conds:RS_147._col0=RS_139._col0(Inner),Output:["_col1","_col2"]
+ <-Map 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_139]
+ PartitionCols:_col0
+ Select Operator [SEL_138] (rows=317 width=8)
+ Output:["_col0"]
+ Filter Operator [FIL_137] (rows=317 width=8)
+ predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=8)
+ default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_147]
+ PartitionCols:_col0
+ Select Operator [SEL_146] (rows=525329897 width=114)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_145] (rows=525329897 width=114)
+ predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
+ <-Reducer 12 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_144]
+ Group By Operator [GBY_143] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_142]
+ Group By Operator [GBY_141] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_140] (rows=317 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_138]
+ <-Map 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_153]
+ PartitionCols:_col0
+ Select Operator [SEL_152] (rows=1704 width=90)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_151] (rows=1704 width=90)
+ predicate:(s_store_sk is not null and s_state is not null)
+ TableScan [TS_15] (rows=1704 width=90)
+ default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_43]
PartitionCols:_col7
- Merge Join Operator [MERGEJOIN_132] (rows=525329897 width=290)
- Conds:RS_40._col1=RS_149._col0(Inner),Output:["_col2","_col6","_col7"]
+ Merge Join Operator [MERGEJOIN_133] (rows=525329897 width=290)
+ Conds:RS_40._col1=RS_150._col0(Inner),Output:["_col2","_col6","_col7"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_40]
PartitionCols:_col1
- Please refer to the previous Merge Join Operator [MERGEJOIN_131]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_132]
<-Map 13 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_149]
+ SHUFFLE [RS_150]
PartitionCols:_col0
- Select Operator [SEL_148] (rows=1704 width=188)
+ Select Operator [SEL_149] (rows=1704 width=188)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_147] (rows=1704 width=188)
+ Filter Operator [FIL_148] (rows=1704 width=188)
predicate:(s_state is not null and s_store_sk is not null)
TableScan [TS_6] (rows=1704 width=188)
default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"]
diff --git a/ql/src/test/results/clientpositive/topnkey_windowing.q.out b/ql/src/test/results/clientpositive/topnkey_windowing.q.out
new file mode 100644
index 0000000..c186790
--- /dev/null
+++ b/ql/src/test/results/clientpositive/topnkey_windowing.q.out
@@ -0,0 +1,576 @@
+PREHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_code string, tw_value double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@topnkey_windowing
+PREHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 104),
+ ('A', 109),
+ ('A', 109),
+ ('A', 103),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 101),
+ ('A', 101),
+ ('A', 114),
+ ('A', 120),
+ ('B', 105),
+ ('B', 106),
+ ('B', 106),
+ ('B', NULL),
+ ('B', 106),
+ ('A', 107),
+ ('B', 108),
+ ('A', 102),
+ ('B', 110),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 104),
+ ('A', 109),
+ ('A', 109),
+ ('A', 103),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109),
+ ('A', 101),
+ ('A', 101),
+ ('A', 114),
+ ('A', 120),
+ ('B', 105),
+ ('B', 106),
+ ('B', 106),
+ ('B', NULL),
+ ('B', 106),
+ ('A', 107),
+ ('B', 108),
+ ('A', 102),
+ ('B', 110),
+ (NULL, NULL),
+ (NULL, 109),
+ ('A', 109)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@topnkey_windowing
+POSTHOOK: Lineage: topnkey_windowing.tw_code SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_value SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: EXPLAIN extended
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN extended
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT *
+FROM (SELECT `tw_code`, RANK() OVER (PARTITION BY 0 ORDER BY `tw_value` ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING) AS `rank_window_0`
+FROM `default`.`topnkey_windowing`) AS `t`
+WHERE `rank_window_0` <= 3
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: 0 (type: int), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 4
+ TopN Hash Memory Usage: 0.1
+ value expressions: tw_code (type: string)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: topnkey_windowing
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns tw_code,tw_value
+ columns.comments
+ columns.types string:double
+#### A masked pattern was here ####
+ name default.topnkey_windowing
+ numFiles 1
+ numRows 26
+ rawDataSize 176
+ serialization.ddl struct topnkey_windowing { string tw_code, double tw_value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 202
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tw_code":"true","tw_value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns tw_code,tw_value
+ columns.comments
+ columns.types string:double
+#### A masked pattern was here ####
+ name default.topnkey_windowing
+ numFiles 1
+ numRows 26
+ rawDataSize 176
+ serialization.ddl struct topnkey_windowing { string tw_code, double tw_value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 202
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.topnkey_windowing
+ name: default.topnkey_windowing
+ Truncated Path -> Alias:
+ /topnkey_windowing [$hdt$_0:topnkey_windowing]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+A 1
+A 1
+A 3
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code as tw_code,
+ rank() OVER (ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+A 1
+A 1
+A 3
+PREHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: tw_code (type: string), tw_value (type: double)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: tw_code (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: dense_rank_window_0
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (dense_rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), dense_rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+A 1
+A 1
+A 2
+A 3
+B 1
+B 2
+B 2
+B 2
+B 3
+PREHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_code, ranking
+FROM (
+ SELECT tw_code AS tw_code,
+ dense_rank() OVER (PARTITION BY tw_code ORDER BY tw_value) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+A 1
+A 1
+A 2
+A 3
+B 1
+B 2
+B 2
+B 2
+B 3
+PREHOOK: query: DROP TABLE topnkey_windowing
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@topnkey_windowing
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: DROP TABLE topnkey_windowing
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@topnkey_windowing
+POSTHOOK: Output: default@topnkey_windowing
diff --git a/ql/src/test/results/clientpositive/topnkey_windowing_order.q.out b/ql/src/test/results/clientpositive/topnkey_windowing_order.q.out
new file mode 100644
index 0000000..7a9a67d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/topnkey_windowing_order.q.out
@@ -0,0 +1,517 @@
+PREHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: CREATE TABLE topnkey_windowing (tw_a string, tw_b string, tw_v1 double, tw_v2 double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@topnkey_windowing
+PREHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 104, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'C', 109, 9),
+ ('A', 'C', 103, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 114, 9),
+ ('A', 'D', 120, 9),
+ ('B', 'E', 105, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', NULL, NULL),
+ ('B', 'E', 106, 9),
+ ('A', 'C', 107, 9),
+ ('B', 'E', 108, 9),
+ ('A', 'C', 102, 9),
+ ('B', 'E', 110, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, NULL, 109, 9),
+ ('A', 'D', 109, 9)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: INSERT INTO topnkey_windowing VALUES
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 104, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'C', 109, 9),
+ ('A', 'C', 103, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, 'D', 109, 9),
+ ('A', 'D', 109, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 101, 9),
+ ('A', 'D', 114, 9),
+ ('A', 'D', 120, 9),
+ ('B', 'E', 105, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', 106, 9),
+ ('B', 'E', NULL, NULL),
+ ('B', 'E', 106, 9),
+ ('A', 'C', 107, 9),
+ ('B', 'E', 108, 9),
+ ('A', 'C', 102, 9),
+ ('B', 'E', 110, 9),
+ (NULL, NULL, NULL, NULL),
+ (NULL, NULL, 109, 9),
+ ('A', 'D', 109, 9)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@topnkey_windowing
+POSTHOOK: Lineage: topnkey_windowing.tw_a SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_b SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_v1 SCRIPT []
+POSTHOOK: Lineage: topnkey_windowing.tw_v2 SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_v1 (type: double)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 1969 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col2: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 8937 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2625 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 3
+B 3
+B 3
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 3
+B 3
+B 3
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_v1 (type: double), tw_v2 (type: double)
+ null sort order: aza
+ sort order: ++-
+ Map-reduce partition columns: tw_a (type: string)
+ Statistics: Num rows: 26 Data size: 2153 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: double)
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col2: double, _col3: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS LAST, _col3 DESC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2, _col3
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 9121 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 2681 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a ORDER BY tw_v1 ASC NULLS LAST, tw_v2 DESC NULLS FIRST) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 1
+NULL 1
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: topnkey_windowing
+ Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: tw_a (type: string), tw_b (type: string), tw_v1 (type: double)
+ null sort order: aaz
+ sort order: +++
+ Map-reduce partition columns: tw_a (type: string), tw_b (type: string)
+ Statistics: Num rows: 26 Data size: 3924 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS LAST
+ partition by: _col0, _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 10892 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 <= 3) (type: boolean)
+ Statistics: Num rows: 8 Data size: 3220 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 457 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+NULL 1
+NULL 1
+A 1
+A 2
+A 3
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT tw_a, ranking
+FROM (
+ SELECT tw_a AS tw_a,
+ rank() OVER (PARTITION BY tw_a, tw_b ORDER BY tw_v1) AS ranking
+ FROM topnkey_windowing) tmp1
+ WHERE ranking <= 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@topnkey_windowing
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+NULL 2
+NULL 2
+NULL 1
+NULL 1
+A 1
+A 2
+A 3
+A 1
+A 1
+A 3
+B 1
+B 2
+B 2
+B 2
+PREHOOK: query: DROP TABLE topnkey_windowing
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@topnkey_windowing
+PREHOOK: Output: default@topnkey_windowing
+POSTHOOK: query: DROP TABLE topnkey_windowing
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@topnkey_windowing
+POSTHOOK: Output: default@topnkey_windowing