blob: 9f76f2921f9dd3a217f405241c46733765e114d6 [file] [log] [blame]
set hive.fetch.task.conversion = none;
set hive.strict.checks.cartesian.product=false;
SET hive.vectorized.execution.enabled=true;
-- First, create source tables
DROP TABLE IF EXISTS dummy_n0;
CREATE TABLE dummy_n0 (i int);
INSERT INTO TABLE dummy_n0 VALUES (42);
DROP TABLE IF EXISTS nested_tbl_1;
CREATE TABLE nested_tbl_1 (
a int,
s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>,
s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>,
s3 struct<f12: array<struct<f13:string, f14:int>>>,
s4 map<string, struct<f15:int>>,
s5 struct<f16: array<struct<f17:string, f18:struct<f19:int>>>>,
s6 map<string, struct<f20:array<struct<f21:struct<f22:int>>>>>
) STORED AS ORC;
INSERT INTO TABLE nested_tbl_1 SELECT
1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4),
named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))),
named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))),
map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)),
named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))),
map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))),
'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2)))))
FROM dummy_n0;
DROP TABLE IF EXISTS nested_tbl_2;
CREATE TABLE nested_tbl_2 LIKE nested_tbl_1;
INSERT INTO TABLE nested_tbl_2 SELECT
2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4),
named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))),
named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))),
map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)),
named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))),
map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))),
'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4)))))
FROM dummy_n0;
-- Testing only select statements
EXPLAIN VECTORIZATION
SELECT a FROM nested_tbl_1;
SELECT a FROM nested_tbl_1;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f1 FROM nested_tbl_1;
SELECT s1.f1 FROM nested_tbl_1;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f1, s1.f2 FROM nested_tbl_1;
SELECT s1.f1, s1.f2 FROM nested_tbl_1;
-- In this case 's1.f3' and 's1.f3.f4' should be merged
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1;
SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1;
-- Testing select array and index shifting
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3.f5 FROM nested_tbl_1;
SELECT s1.f3.f5 FROM nested_tbl_1;
-- Testing select from multiple structs
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1;
SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1;
-- Testing select with filter
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE;
SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4;
SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE;
SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE;
-- Testing lateral view
EXPLAIN VECTORIZATION EXPRESSION
SELECT col1, col2 FROM nested_tbl_1
LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1
LATERAL VIEW explode(s3.f12) tbl2 AS col2;
SELECT col1, col2 FROM nested_tbl_1
LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1
LATERAL VIEW explode(s3.f12) tbl2 AS col2;
-- Testing UDFs
EXPLAIN VECTORIZATION EXPRESSION
SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1;
SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1;
-- Testing aggregations
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5;
SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3;
SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3;
EXPLAIN VECTORIZATION EXPRESSION
SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3;
SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3;
-- Testing joins
EXPLAIN VECTORIZATION EXPRESSION
SELECT t1.s1.f3.f5, t2.s2.f8
FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2
ON t1.s1.f3.f4 = t2.s1.f6
WHERE t2.s2.f8.f9 == FALSE;
SELECT t1.s1.f3.f5, t2.s2.f8
FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2
ON t1.s1.f3.f4 = t2.s1.f6
WHERE t2.s2.f8.f9 == FALSE;
EXPLAIN VECTORIZATION EXPRESSION
SELECT t1.s1.f3.f5, t2.s2.f8
FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6
WHERE t2.s2.f8.f9 == TRUE;
SELECT t1.s1.f3.f5, t2.s2.f8
FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6
WHERE t2.s2.f8.f9 == TRUE;
EXPLAIN VECTORIZATION EXPRESSION
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE;
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE;
EXPLAIN VECTORIZATION EXPRESSION
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f1 <> t2.s2.f8.f9;
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f1 <> t2.s2.f8.f9;
EXPLAIN VECTORIZATION EXPRESSION
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9;
SELECT t1.s1.f3.f5
FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9;
-- Testing insert with aliases
DROP TABLE IF EXISTS nested_tbl_3;
CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC;
INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3)
SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3
FROM nested_tbl_1;
SELECT * FROM nested_tbl_3;
-- Testing select struct field from elements in array or map
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s3.f12[0].f14
FROM nested_tbl_1
GROUP BY s3.f12[0].f14;
SELECT count(s1.f6), s3.f12[0].f14
FROM nested_tbl_1
GROUP BY s3.f12[0].f14;
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s4['key1'].f15
FROM nested_tbl_1
GROUP BY s4['key1'].f15;
SELECT count(s1.f6), s4['key1'].f15
FROM nested_tbl_1
GROUP BY s4['key1'].f15;
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s5.f16[0].f18.f19
FROM nested_tbl_1
GROUP BY s5.f16[0].f18.f19;
SELECT count(s1.f6), s5.f16[0].f18.f19
FROM nested_tbl_1
GROUP BY s5.f16[0].f18.f19;
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s5.f16.f18.f19
FROM nested_tbl_1
GROUP BY s5.f16.f18.f19;
SELECT count(s1.f6), s5.f16.f18.f19
FROM nested_tbl_1
GROUP BY s5.f16.f18.f19;
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s6['key1'].f20[0].f21.f22
FROM nested_tbl_1
GROUP BY s6['key1'].f20[0].f21.f22;
SELECT count(s1.f6), s6['key1'].f20[0].f21.f22
FROM nested_tbl_1
GROUP BY s6['key1'].f20[0].f21.f22;
EXPLAIN VECTORIZATION EXPRESSION
SELECT count(s1.f6), s6['key1'].f20.f21.f22
FROM nested_tbl_1
GROUP BY s6['key1'].f20.f21.f22;
SELECT count(s1.f6), s6['key1'].f20.f21.f22
FROM nested_tbl_1
GROUP BY s6['key1'].f20.f21.f22;