| # Predicate tests |
| # In this file, "inequality" means not equals. It is a general |
| # term for <, <=, >, >=. The term "not equals" is used for the |
| # != (AKA <>) operator. |
| # No predicate at all. Establishes cardinality baseline. |
| select * from tpch.customer |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| row-size=218B cardinality=150.00K |
| ==== |
| # Predicate on a single value: card = |T|/ndv |
| # Unique key, NDV=|T| |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey = 10 |
| row-size=218B cardinality=1 |
| ==== |
| # Predicate on a single value: card = |T|/ndv |
| # Non-unique key, NDV=25 |
| select * |
| from tpch.customer c |
| where c.c_nationkey = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_nationkey = 10 |
| row-size=218B cardinality=6.00K |
| ==== |
| # OR'ed predicate, card = 2/ndv |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 OR c.c_custkey = 20 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey IN (10, 20) |
| row-size=218B cardinality=2 |
| ==== |
| # OR'ed predicate, distinct columns |
| # card = max(card of each OR term) |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 |
| or c.c_nationkey = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey = 10 OR c.c_nationkey = 10 |
| row-size=218B cardinality=6.00K |
| ==== |
| # As above, but with smaller table to see effect |
| # NDV(id) = |T| = 8 |
| # NDV(bool_val) = 2 |
| # card = max(|T|/NDV(id),|T|/NDV(bool_val)) = max(1, 4) |
| # But done by adding cardinalities, which seems right, but is wrong |
| # Bug: IMPALA-8038 |
| select * |
| from functional.alltypestiny t |
| where t.id = 10 |
| or t.bool_col = true |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypestiny t] |
| HDFS partitions=4/4 files=4 size=460B |
| predicates: t.id = 10 OR t.bool_col = TRUE |
| row-size=89B cardinality=5 |
| ==== |
| # IN, card = x/ndv |
| select * |
| from tpch.customer c |
| where c.c_custkey in (10, 20, 30) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey IN (10, 20, 30) |
| row-size=218B cardinality=3 |
| ==== |
| # IN with duplicate values. Remove dups. |
| # Bug: IMPALA-8030 |
| select * |
| from tpch.customer c |
| where c.c_custkey in (10, 20, 30, 30, 10, 20) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20) |
| row-size=218B cardinality=6 |
| ==== |
| # OR on same value: card = 1/ndv |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 OR c.c_custkey = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey = 10 |
| row-size=218B cardinality=1 |
| ==== |
| # OR on same value: card = 1/ndv |
| # Different expression order |
| # Bug: IMPALA-8030 |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 OR 10 = c.c_custkey |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey IN (10, 10) |
| row-size=218B cardinality=2 |
| ==== |
| # AND'ed predicate, card = 0 |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 AND c.c_custkey = 20 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:EMPTYSET |
| ==== |
| # AND on same value: card = 1/ndv |
| select * |
| from tpch.customer c |
| where c.c_custkey = 10 AND c.c_custkey = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey = 10 |
| row-size=218B cardinality=1 |
| ==== |
| # Not-equal, card = 1 - 1/ndv |
| # Use smaller table so effect is clear |
| # |T|=8, NDV=8 |
| # Bug: IMPALA-8039 |
| # Bug, expected cardinality ~7 |
| select * |
| from functional.alltypestiny |
| where id != 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| HDFS partitions=4/4 files=4 size=460B |
| predicates: id != 10 |
| row-size=89B cardinality=1 |
| ==== |
| # Inequality. No useful stats. |
| # Bug: IMPALA-8037, Assumes sel = 0.1 |
| # Bug: Expected cardinality ~49.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey < 1234 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey < 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Inequality twice on same value. Remove duplicate. |
| # Bug: IMPALA-8037 |
| # Bug: Expected cardinality ~49.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey < 1234 |
| and c.c_custkey < 1234 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey < 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Inequality twice on same value, but reversed "direction". Remove duplicate. |
| # Bug: IMPALA-8037 |
| # Bug: Expected cardinality ~49.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey < 1234 |
| and 1234 > c.c_custkey |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey < 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Two inequalities of the same "direction" Assume only one applies. |
| # Bugs: IMPALA-8031, IMPALA-8037 |
| # Bug: Expected cardinality ~28.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey < 1234 |
| and c.c_custkey < 2345 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey < 1234, c.c_custkey < 2345 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Two inequalities. No useful stats. |
| # But with effect of exponential-backoff |
| # Bug: IMPALA-8037 |
| # Bug: Expected cardinality ~28.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey < 1234 |
| and c.c_nationkey < 100 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey < 1234, c.c_nationkey < 100 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Between. No useful stats. Should assume, say 0.16 |
| # But, gets rewritten before computing selectivity, |
| # So same as AND'ed inequalities |
| # sel = 0.1, but Ramakrishnan and Gherke suggest 1/4 |
| # Bug: IMPALA-8037 |
| # Bug: Expected cardinality ~28.5K |
| select * |
| from tpch.customer c |
| where c.c_custkey between 1234 and 2345 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Compound inequality (< and >) that is the same as BETWEEN. |
| # Gets compouted as AND of inequalities. |
| # sel = 0.1 (single sel for all) |
| # But, these conditions are not independent, so a better estimate |
| # would be, say 1/6 |
| # Bugs: TBD |
| # Bug: Expected cardinality ~28.5K |
| # |<<< <<< ===| c > x |
| # |=== >>> >>>| c < y |
| # |<<< === >>>| c > x AND c < y |
| select * |
| from tpch.customer c |
| where c.c_custkey >= 1234 and c.c_custkey <= 2345 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Between and redundant inequality |
| # Bug: expected cardinality ~15K |
| select * |
| from tpch.customer c |
| where c.c_custkey between 1234 and 2345 |
| and c.c_custkey <= 2345 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_custkey <= 2345, c.c_custkey >= 1234 |
| row-size=218B cardinality=15.00K |
| ==== |
| # Partitioned scan. |
| # |table| = 11K |
| # |partition| = 1000 |
| # Five patitions match |
| # |table'| = 5000 |
| select * |
| from functional.alltypesagg a |
| where a.day >= 6 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesagg a] |
| partition predicates: a.`day` >= 6 |
| HDFS partitions=5/11 files=5 size=372.38KB |
| row-size=95B cardinality=5.00K |
| ==== |
| # Partitioned table, one partition matches |
| select * |
| from functional.alltypesagg a |
| where a.day = 6 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesagg a] |
| partition predicates: a.`day` = 6 |
| HDFS partitions=1/11 files=1 size=74.48KB |
| row-size=95B cardinality=1.00K |
| ==== |
| # Partitioned table, no partitions match |
| select * |
| from functional.alltypesagg a |
| where a.day = 23 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesagg a] |
| partition predicates: a.`day` = 23 |
| partitions=0/11 files=0 size=0B |
| row-size=95B cardinality=0 |
| ==== |
| # IS NULL |
| # Estimated from null count, which is 0 for TPCH tables. |
| # TODO: Add tests for ndv stats but no null counts |
| # Impala does not suport NOT NULL fields, so we have to assume the |
| # field can be null. That means null is one of the distinct values. |
| # sel(mktsegment is null) = 1/NDV = 0.2 |
| # Bug: Many, expected cardinality ~15K |
| select * |
| from tpch.customer c |
| where c.c_mktsegment is null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_mktsegment IS NULL |
| row-size=218B cardinality=1 |
| ==== |
| # IS NOT NULL |
| # Similar to above. |
| # Bug: Expected cardinality ~15K |
| select * |
| from tpch.customer c |
| where c.c_mktsegment is not null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: c.c_mktsegment IS NOT NULL |
| row-size=218B cardinality=150.00K |
| ==== |
| # |alltypesagg| = 11K. Null count of tinyint_col = 2000. |
| select * |
| from functional.alltypesagg |
| where tinyint_col is null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| predicates: tinyint_col IS NULL |
| row-size=95B cardinality=2.00K |
| ==== |
| # As above. |
| select * |
| from functional.alltypesagg |
| where tinyint_col is not null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| predicates: tinyint_col IS NOT NULL |
| row-size=95B cardinality=9.00K |
| ==== |
| # IS NULL on an expression. Guess 0.1 selectivity |
| select * |
| from tpch.customer c |
| where concat(c.c_mktsegment, c_comment) is null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: concat(c.c_mktsegment, c_comment) IS NULL |
| row-size=218B cardinality=15.00K |
| ==== |
| # IS NOT NULL. |
| # Bug: Should guess 0.9 selectivity, actually guesses 0.1 |
| # Expected cardinality ~15K |
| select * |
| from tpch.customer c |
| where concat(c.c_mktsegment, c_comment) is not null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [tpch.customer c] |
| HDFS partitions=1/1 files=1 size=23.08MB |
| predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL |
| row-size=218B cardinality=15.00K |
| ==== |
| # Scan of a table with no stats and zero rows |
| select * |
| from functional.alltypesnopart |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesnopart] |
| HDFS partitions=1/1 files=0 size=0B |
| row-size=72B cardinality=0 |
| ==== |
| # Filter on the no-stats table |
| select * |
| from functional.alltypesnopart |
| where int_col = 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.alltypesnopart] |
| HDFS partitions=1/1 files=0 size=0B |
| predicates: int_col = 10 |
| row-size=72B cardinality=0 |
| ==== |
| # VARCHAR column with stats |
| # Regression test for IMPALA-8849 - previously produced a negative row size. |
| select varchar_col from functional.chars_medium |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 00:SCAN HDFS [functional.chars_medium] |
| HDFS partitions=1/1 files=1 size=320.68KB |
| row-size=15B cardinality=11.00K |
| ==== |