blob: 853bddbbfabfb1b644488e47724034bcff34d2e4 [file] [log] [blame]
# test conversion of disjunctive predicate to conjunctive normal form
# inner join
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey > 10 and o_custkey > 20)
or (l_suppkey > 30 and o_custkey > 40))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey > 10 OR o_custkey > 40, o_custkey > 20 OR l_suppkey > 30
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey > 20 OR o_custkey > 40
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey > 10 OR l_suppkey > 30
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=575.77K(filtered from 600.12K)
====
# outer join
select count(*) from lineitem left outer join orders
on l_orderkey = o_orderkey
where ((l_suppkey > 10 and o_custkey > 20)
or (l_suppkey > 30 and o_custkey > 40))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey > 10 OR o_custkey > 40, o_custkey > 20 OR l_suppkey > 30, o_custkey > 20 OR o_custkey > 40
| row-size=40B cardinality=600.12K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey > 20 OR o_custkey > 40
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey > 10 OR l_suppkey > 30
row-size=24B cardinality=600.12K
====
# BETWEEN predicate within each side of the OR
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey between 10 and 30 and o_custkey > 20)
or (l_suppkey between 30 and 50 and o_custkey > 40))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey <= 30 OR o_custkey > 40, l_suppkey >= 10 OR o_custkey > 40, o_custkey > 20 OR l_suppkey <= 50, o_custkey > 20 OR l_suppkey >= 30
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey > 20 OR o_custkey > 40
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey <= 30 OR l_suppkey >= 30 AND l_suppkey <= 50, l_suppkey >= 10 OR l_suppkey >= 30 AND l_suppkey <= 50
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=575.77K(filtered from 600.12K)
====
# equality predicates on l_suppkey should eventually be converted to IN
# after CNF rewrite enables another rule to be applied
# IN predicates on o_custkey should be preserved
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey = 10 and o_custkey in (20, 21))
or (l_suppkey = 30 and o_custkey in (40, 41))
or (l_suppkey = 50 and o_custkey in (60, 61)))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey IN (10, 30) OR o_custkey IN (60, 61), o_custkey IN (20, 21, 40, 41) OR l_suppkey = 50, l_suppkey = 10 OR o_custkey IN (40, 41) OR l_suppkey = 50, o_custkey IN (20, 21) OR l_suppkey = 30 OR l_suppkey = 50, l_suppkey = 10 OR o_custkey IN (40, 41) OR o_custkey IN (60, 61), o_custkey IN (20, 21) OR l_suppkey = 30 OR o_custkey IN (60, 61)
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=1
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey IN (20, 21, 40, 41, 60, 61)
| row-size=16B cardinality=91
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey IN (10, 30, 50)
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=349(filtered from 586)
====
# NOT predicate
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and not ((l_suppkey > 10 and o_custkey > 20)
or (l_suppkey > 30 and o_custkey > 40))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: o_orderkey = l_orderkey
| other predicates: NOT o_custkey > 20 OR o_custkey > 40 AND o_custkey > 20 OR l_suppkey > 30 AND l_suppkey > 10 OR o_custkey > 40 AND l_suppkey > 10 OR l_suppkey > 30
| runtime filters: RF000 <- l_orderkey
| row-size=40B cardinality=600.12K
|
|--00:SCAN HDFS [tpch_parquet.lineitem]
| HDFS partitions=1/1 files=3 size=193.98MB
| predicates: l_partkey > 0
| row-size=24B cardinality=600.12K
|
01:SCAN HDFS [tpch_parquet.orders]
HDFS partitions=1/1 files=2 size=54.21MB
runtime filters: RF000 -> o_orderkey
row-size=16B cardinality=600.12K(filtered from 1.50M)
====
# set the max_cnf_exprs limit
# in this case partial conversion is expected
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey between 10 and 50 and o_custkey between 20 and 40)
or (l_suppkey between 30 and 90 and o_custkey between 60 and 100))
and l_partkey > 0;
---- QUERYOPTIONS
MAX_CNF_EXPRS=4
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: o_custkey <= 40 OR l_suppkey >= 30 AND l_suppkey <= 90, l_suppkey <= 50 OR (l_suppkey >= 30 AND l_suppkey <= 90 AND o_custkey >= 60 AND o_custkey <= 100), l_suppkey >= 10 OR (l_suppkey >= 30 AND l_suppkey <= 90 AND o_custkey >= 60 AND o_custkey <= 100), o_custkey >= 20 OR (l_suppkey >= 30 AND l_suppkey <= 90 AND o_custkey >= 60 AND o_custkey <= 100)
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey <= 40 OR o_custkey >= 60 AND o_custkey <= 100
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=575.77K(filtered from 600.12K)
====
# reset the max_cnf_exprs limit to unlimited (-1)
# in this case full conversion is expected
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey between 10 and 50 and o_custkey between 20 and 40)
or (l_suppkey between 30 and 90 and o_custkey between 60 and 100))
and l_partkey > 0;
---- QUERYOPTIONS
MAX_CNF_EXPRS=-1
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey <= 50 OR o_custkey <= 100, l_suppkey <= 50 OR o_custkey >= 60, l_suppkey >= 10 OR o_custkey <= 100, l_suppkey >= 10 OR o_custkey >= 60, o_custkey <= 40 OR l_suppkey <= 90, o_custkey <= 40 OR l_suppkey >= 30, o_custkey >= 20 OR l_suppkey <= 90, o_custkey >= 20 OR l_suppkey >= 30
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey <= 40 OR o_custkey >= 60 AND o_custkey <= 100, o_custkey >= 20 OR o_custkey >= 60 AND o_custkey <= 100
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey <= 50 OR l_suppkey >= 30 AND l_suppkey <= 90, l_suppkey >= 10 OR l_suppkey >= 30 AND l_suppkey <= 90
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=575.77K(filtered from 600.12K)
====
# same as above, but set max_cnf_exprs to 0 (also implies unlimited)
# in this case full conversion is expected
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey between 10 and 50 and o_custkey between 20 and 40)
or (l_suppkey between 30 and 90 and o_custkey between 60 and 100))
and l_partkey > 0;
---- QUERYOPTIONS
MAX_CNF_EXPRS=0
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: l_suppkey <= 50 OR o_custkey <= 100, l_suppkey <= 50 OR o_custkey >= 60, l_suppkey >= 10 OR o_custkey <= 100, l_suppkey >= 10 OR o_custkey >= 60, o_custkey <= 40 OR l_suppkey <= 90, o_custkey <= 40 OR l_suppkey >= 30, o_custkey >= 20 OR l_suppkey <= 90, o_custkey >= 20 OR l_suppkey >= 30
| runtime filters: RF000 <- o_orderkey
| row-size=40B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: o_custkey <= 40 OR o_custkey >= 60 AND o_custkey <= 100, o_custkey >= 20 OR o_custkey >= 60 AND o_custkey <= 100
| row-size=16B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
predicates: l_partkey > 0, l_suppkey <= 50 OR l_suppkey >= 30 AND l_suppkey <= 90, l_suppkey >= 10 OR l_suppkey >= 30 AND l_suppkey <= 90
runtime filters: RF000 -> l_orderkey
row-size=24B cardinality=575.77K(filtered from 600.12K)
====
# disable the rewrite, so no conversion is expected
select count(*) from lineitem, orders
where l_orderkey = o_orderkey
and ((l_suppkey between 10 and 50 and o_custkey between 20 and 40)
or (l_suppkey between 30 and 90 and o_custkey between 60 and 100))
and l_partkey > 0;
---- QUERYOPTIONS
ENABLE_CNF_REWRITES=false
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: o_orderkey = l_orderkey
| other predicates: ((l_suppkey >= 10 AND l_suppkey <= 50 AND o_custkey >= 20 AND o_custkey <= 40) OR (l_suppkey >= 30 AND l_suppkey <= 90 AND o_custkey >= 60 AND o_custkey <= 100))
| runtime filters: RF000 <- l_orderkey
| row-size=40B cardinality=600.12K
|
|--00:SCAN HDFS [tpch_parquet.lineitem]
| HDFS partitions=1/1 files=3 size=193.98MB
| predicates: l_partkey > 0
| row-size=24B cardinality=600.12K
|
01:SCAN HDFS [tpch_parquet.orders]
HDFS partitions=1/1 files=2 size=54.21MB
runtime filters: RF000 -> o_orderkey
row-size=16B cardinality=600.12K(filtered from 1.50M)
====
# IMPALA-9620: query1
# Test predicates in the SELECT and GROUP-BY
# with enable_cnf_rewrites = true. No rewrite is expected
# but query was failing without the patch.
select l_quantity,
if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
from lineitem
group by l_quantity,
if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
limit 5
---- QUERYOPTIONS
ENABLE_CNF_REWRITES=true
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: l_quantity, if(l_quantity < 5 OR l_quantity > 45, 'invalid', 'valid')
| limit: 5
| row-size=20B cardinality=5
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
row-size=8B cardinality=6.00M
====
# IMPALA-9620: query2
select case when not (l_quantity = 5) then 0 else 1 end
from lineitem
group by case when not (l_quantity = 5) then 0 else 1 end
---- QUERYOPTIONS
ENABLE_CNF_REWRITES=true
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: CASE WHEN NOT (l_quantity = 5) THEN 0 ELSE 1 END
| row-size=1B cardinality=2
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
row-size=8B cardinality=6.00M
====
# Test predicates in the SELECT and ORDER-BY
select l_quantity,
if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
from lineitem
order by l_quantity,
if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
limit 5
---- QUERYOPTIONS
ENABLE_CNF_REWRITES=true
---- PLAN
PLAN-ROOT SINK
|
01:TOP-N [LIMIT=5]
| order by: l_quantity ASC, if(l_quantity < 5 OR l_quantity > 45, 'invalid', 'valid') ASC
| row-size=20B cardinality=5
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.98MB
row-size=8B cardinality=6.00M
====
# Test predicate in the ORDER BY of an analytic function.
select rank() over
(order by if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid'))
from tpch.lineitem
limit 5;
---- QUERYOPTIONS
ENABLE_CNF_REWRITES=true
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: rank()
| order by: if(l_quantity < 5 OR l_quantity > 45, 'invalid', 'valid') ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| limit: 5
| row-size=28B cardinality=5
|
01:TOP-N [LIMIT=5]
| order by: if(l_quantity < 5 OR l_quantity > 45, 'invalid', 'valid') ASC
| row-size=20B cardinality=6.00M
|
00:SCAN HDFS [tpch.lineitem]
HDFS partitions=1/1 files=1 size=718.94MB
row-size=8B cardinality=6.00M
====
# IMPALA-11274: Test with string functions in the disjunctive predicate.
# In this case the predicate is not converted to CNF
select count(*) from lineitem, orders
where l_orderkey = o_orderkey and
((upper(l_returnflag) = 'Y' and upper(o_orderpriority) = 'HIGH')
or (upper(l_returnflag) = 'N' and upper(o_orderpriority) = 'LOW'))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: o_orderkey = l_orderkey
| other predicates: ((upper(l_returnflag) = 'Y' AND upper(o_orderpriority) = 'HIGH') OR (upper(l_returnflag) = 'N' AND upper(o_orderpriority) = 'LOW'))
| runtime filters: RF000 <- l_orderkey
| row-size=57B cardinality=600.12K
|
|--00:SCAN HDFS [tpch_parquet.lineitem]
| HDFS partitions=1/1 files=3 size=193.99MB
| predicates: l_partkey > 0
| row-size=29B cardinality=600.12K
|
01:SCAN HDFS [tpch_parquet.orders]
HDFS partitions=1/1 files=2 size=54.21MB
runtime filters: RF000 -> o_orderkey
row-size=28B cardinality=600.12K(filtered from 1.50M)
====
# IMPALA-11274: Functions like CAST should still be eligible for CNF
select count(*) from lineitem, orders
where l_orderkey = o_orderkey and
((cast(l_returnflag as varchar(2)) = 'Y' and cast(o_orderpriority as varchar(5)) = 'HIGH')
or (cast(l_returnflag as varchar(2)) = 'N' and cast(o_orderpriority as varchar(5)) = 'LOW'))
and l_partkey > 0;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| other predicates: CAST(l_returnflag AS VARCHAR(2)) = 'Y' OR CAST(o_orderpriority AS VARCHAR(5)) = 'LOW', CAST(o_orderpriority AS VARCHAR(5)) = 'HIGH' OR CAST(l_returnflag AS VARCHAR(2)) = 'N'
| runtime filters: RF000 <- o_orderkey
| row-size=57B cardinality=57.58K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| predicates: CAST(o_orderpriority AS VARCHAR(5)) IN ('HIGH', 'LOW')
| row-size=28B cardinality=150.00K
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.99MB
predicates: l_partkey > 0, CAST(l_returnflag AS VARCHAR(2)) IN ('Y', 'N')
runtime filters: RF000 -> l_orderkey
row-size=29B cardinality=575.77K(filtered from 600.12K)
====
# IMPALA-11274: Simple arithmetic expressions should still be eligible for CNF
select count(*) from lineitem, orders
where l_orderkey = o_orderkey and
(2 * log10(l_quantity) < 3 and cast(l_returnflag as varchar(2)) = 'Y')
or l_quantity >= 50;
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:NESTED LOOP JOIN [INNER JOIN]
| predicates: l_orderkey = o_orderkey OR l_quantity >= 50
| row-size=37B cardinality=600.12K
|
|--01:SCAN HDFS [tpch_parquet.orders]
| HDFS partitions=1/1 files=2 size=54.21MB
| row-size=8B cardinality=1.50M
|
00:SCAN HDFS [tpch_parquet.lineitem]
HDFS partitions=1/1 files=3 size=193.99MB
predicates: (2 * log10(l_quantity) < 3 AND CAST(l_returnflag AS VARCHAR(2)) = 'Y') OR l_quantity >= 50
row-size=29B cardinality=600.12K
====