blob: 5b6986361e8410a7fc60c4a63799c0ef775a3dfd [file] [log] [blame]
CREATE TABLE ptf_count_distinct (
id int,
txt1 string,
txt2 string);
INSERT INTO ptf_count_distinct VALUES
(1,'2010005759','7164335675012038'),
(2,'2010005759','7164335675012038');
SELECT "*** Testing STRING, same values ***";
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 in aggregations ***";
explain SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct;
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 in aggregations ***";
EXPLAIN SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct;
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 1 in aggregations (both columns only have 1 distinct value) ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as n,
count(distinct txt2) over(partition by txt1) as m
FROM ptf_count_distinct;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as n,
count(distinct txt2) over(partition by txt1) as m
FROM ptf_count_distinct;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 1 in aggregations (both columns have only 1 distinct value) ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as n,
count(distinct txt2) over(partition by txt1) as m
FROM ptf_count_distinct;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as n,
count(distinct txt2) over(partition by txt1) as m
FROM ptf_count_distinct;
CREATE TABLE ptf_count_distinct_different_values (
id int,
txt1 string,
txt2 string);
INSERT INTO ptf_count_distinct_different_values VALUES
(1,'1010005758','7164335675012038'),
(2,'2010005759','7164335675012038');
SELECT "*** Testing STRING, different values ***";
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 2 for distinct_txt1_over_txt2, 1 for distinct_txt2_over_txt1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt1) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt1) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 for both distinct_txt1_over_txt2 and distinct_txt2_over_txt1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as n,
count(distinct txt2) over(partition by txt2) as m
FROM ptf_count_distinct_different_values
ORDER BY txt1;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 2 for distinct_txt1_over_txt2, 1 for distinct_txt2_over_txt1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt1) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt2) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt1) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 for both distinct_txt1_over_txt2 and distinct_txt2_over_txt1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt2) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT
txt1,
txt2,
count(distinct txt1) over(partition by txt1) as distinct_txt1_over_txt2,
count(distinct txt2) over(partition by txt2) as distinct_txt2_over_txt1
FROM ptf_count_distinct_different_values
ORDER BY txt1;
SELECT "*** Testing INT/LONG ***";
CREATE TABLE ptf_count_distinct_different_values_long (
id int,
long1 int,
long2 int);
INSERT INTO ptf_count_distinct_different_values_long VALUES
(1, 1010005758, 716433567),
(2, 2010005759, 716433567);
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 2 for distinct_long1_over_long2, 1 for distinct_long2_over_long1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
long1,
long2,
count(distinct long1) over(partition by long2) as distinct_long1_over_long2,
count(distinct long2) over(partition by long1) as distinct_long2_over_long1
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT
long1,
long2,
count(distinct long1) over(partition by long2) as distinct_long1_over_long2,
count(distinct long2) over(partition by long1) as distinct_long2_over_long1
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 for both distinct_long1_over_long1 and distinct_long2_over_long2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
long1,
long2,
count(distinct long1) over(partition by long1) as distinct_long1_over_long1,
count(distinct long2) over(partition by long2) as distinct_long2_over_long2
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT
long1,
long2,
count(distinct long1) over(partition by long1) as distinct_long1_over_long1,
count(distinct long2) over(partition by long2) as distinct_long2_over_long2
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 2 for distinct_long1_over_long2, 1 for distinct_long2_over_long1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
long1,
long2,
count(distinct long1) over(partition by long2) as distinct_long1_over_long2,
count(distinct long2) over(partition by long1) as distinct_long2_over_long1
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT
long1,
long2,
count(distinct long1) over(partition by long2) as distinct_long1_over_long2,
count(distinct long2) over(partition by long1) as distinct_long2_over_long1
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 for both distinct_long1_over_long1 and distinct_long2_over_long2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
long1,
long2,
count(distinct long1) over(partition by long1) as distinct_long1_over_long1,
count(distinct long2) over(partition by long2) as distinct_long2_over_long2
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT
long1,
long2,
count(distinct long1) over(partition by long1) as distinct_long1_over_long1,
count(distinct long2) over(partition by long2) as distinct_long2_over_long2
FROM ptf_count_distinct_different_values_long
ORDER BY long1;
SELECT "*** Testing DOUBLE ***";
CREATE TABLE ptf_count_distinct_different_values_double (
id int,
double1 double,
double2 double);
INSERT INTO ptf_count_distinct_different_values_double VALUES
(1, 1010005758.1, 716433567.1),
(2, 2010005759.1, 716433567.1);
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 2 for distinct_double1_over_double2, 1 for distinct_double2_over_double1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
double1,
double2,
count(distinct double1) over(partition by double2) as distinct_double1_over_double2,
count(distinct double2) over(partition by double1) as distinct_double2_over_double1
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT
double1,
double2,
count(distinct double1) over(partition by double2) as distinct_double1_over_double2,
count(distinct double2) over(partition by double1) as distinct_double2_over_double1
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 for both distinct_double1_over_double1 and distinct_double2_over_double2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
double1,
double2,
count(distinct double1) over(partition by double1) as distinct_double1_over_double1,
count(distinct double2) over(partition by double2) as distinct_double2_over_double2
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT
double1,
double2,
count(distinct double1) over(partition by double1) as distinct_double1_over_double1,
count(distinct double2) over(partition by double2) as distinct_double2_over_double2
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 2 for distinct_double1_over_double2, 1 for distinct_double2_over_double1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
double1,
double2,
count(distinct double1) over(partition by double2) as distinct_double1_over_double2,
count(distinct double2) over(partition by double1) as distinct_double2_over_double1
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT
double1,
double2,
count(distinct double1) over(partition by double2) as distinct_double1_over_double2,
count(distinct double2) over(partition by double1) as distinct_double2_over_double1
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 for both distinct_double1_over_double1 and distinct_double2_over_double2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
double1,
double2,
count(distinct double1) over(partition by double1) as distinct_double1_over_double1,
count(distinct double2) over(partition by double2) as distinct_double2_over_double2
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT
double1,
double2,
count(distinct double1) over(partition by double1) as distinct_double1_over_double1,
count(distinct double2) over(partition by double2) as distinct_double2_over_double2
FROM ptf_count_distinct_different_values_double
ORDER BY double1;
SELECT "*** Testing DECIMAL ***";
CREATE TABLE ptf_count_distinct_different_values_decimal (
id int,
decimal1 decimal,
decimal2 decimal);
INSERT INTO ptf_count_distinct_different_values_decimal VALUES
(1, 1010005758.1, 716433567.1),
(2, 2010005759.1, 716433567.1);
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 2 for distinct_decimal1_over_decimal2, 1 for distinct_decimal2_over_decimal1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal2) as distinct_decimal1_over_decimal2,
count(distinct decimal2) over(partition by decimal1) as distinct_decimal2_over_decimal1
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal2) as distinct_decimal1_over_decimal2,
count(distinct decimal2) over(partition by decimal1) as distinct_decimal2_over_decimal1
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 for both distinct_decimal1_over_decimal1 and distinct_decimal2_over_decimal2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal1) as distinct_decimal1_over_decimal1,
count(distinct decimal2) over(partition by decimal2) as distinct_decimal2_over_decimal2
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal1) as distinct_decimal1_over_decimal1,
count(distinct decimal2) over(partition by decimal2) as distinct_decimal2_over_decimal2
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 2 for distinct_decimal1_over_decimal2, 1 for distinct_decimal2_over_decimal1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal2) as distinct_decimal1_over_decimal2,
count(distinct decimal2) over(partition by decimal1) as distinct_decimal2_over_decimal1
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal2) as distinct_decimal1_over_decimal2,
count(distinct decimal2) over(partition by decimal1) as distinct_decimal2_over_decimal1
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 for both distinct_decimal1_over_decimal1 and distinct_decimal2_over_decimal2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal1) as distinct_decimal1_over_decimal1,
count(distinct decimal2) over(partition by decimal2) as distinct_decimal2_over_decimal2
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT
decimal1,
decimal2,
count(distinct decimal1) over(partition by decimal1) as distinct_decimal1_over_decimal1,
count(distinct decimal2) over(partition by decimal2) as distinct_decimal2_over_decimal2
FROM ptf_count_distinct_different_values_decimal
ORDER BY decimal1;
SELECT "*** Testing TIMESTAMP ***";
CREATE TABLE ptf_count_distinct_different_values_timestamp (
id int,
timestamp1 timestamp,
timestamp2 timestamp);
INSERT INTO ptf_count_distinct_different_values_timestamp VALUES
(1, '2015-11-29 09:30:00', '2020-11-29 09:30:00'),
(2, '2015-11-29 09:30:01', '2020-11-29 09:30:00');
set hive.vectorized.execution.ptf.enabled=true;
SELECT "*** Distinct on another column, vectorized ptf: expecting 2 for distinct_timestamp1_over_timestamp2, 1 for distinct_timestamp2_over_timestamp1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp2) as distinct_timestamp1_over_timestamp2,
count(distinct timestamp2) over(partition by timestamp1) as distinct_timestamp2_over_timestamp1
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp2) as distinct_timestamp1_over_timestamp2,
count(distinct timestamp2) over(partition by timestamp1) as distinct_timestamp2_over_timestamp1
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT "*** Distinct on partitioning column, vectorized ptf: expecting 1 for both distinct_timestamp1_over_timestamp1 and distinct_timestamp2_over_timestamp2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp1) as distinct_timestamp1_over_timestamp1,
count(distinct timestamp2) over(partition by timestamp2) as distinct_timestamp2_over_timestamp2
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp1) as distinct_timestamp1_over_timestamp1,
count(distinct timestamp2) over(partition by timestamp2) as distinct_timestamp2_over_timestamp2
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
set hive.vectorized.execution.ptf.enabled=false;
SELECT "*** Distinct on another column, non-vectorized ptf: expecting 2 for distinct_timestamp1_over_timestamp2, 1 for distinct_timestamp2_over_timestamp1 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp2) as distinct_timestamp1_over_timestamp2,
count(distinct timestamp2) over(partition by timestamp1) as distinct_timestamp2_over_timestamp1
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp2) as distinct_timestamp1_over_timestamp2,
count(distinct timestamp2) over(partition by timestamp1) as distinct_timestamp2_over_timestamp1
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT "*** Distinct on partitioning column, non-vectorized ptf: expecting 1 for both distinct_timestamp1_over_timestamp1 and distinct_timestamp2_over_timestamp2 ***";
EXPLAIN VECTORIZATION DETAIL SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp1) as distinct_timestamp1_over_timestamp1,
count(distinct timestamp2) over(partition by timestamp2) as distinct_timestamp2_over_timestamp2
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;
SELECT
timestamp1,
timestamp2,
count(distinct timestamp1) over(partition by timestamp1) as distinct_timestamp1_over_timestamp1,
count(distinct timestamp2) over(partition by timestamp2) as distinct_timestamp2_over_timestamp2
FROM ptf_count_distinct_different_values_timestamp
ORDER BY timestamp1;