testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test - impala - Git at Google

 # Predicate tests
 # In this file, "inequality" means not equals. It is a general
 # term for <, <=, >, >=. The term "not equals" is used for the
 # != (AKA <>) operator.
 # No predicate at all. Establishes cardinality baseline.
 select * from tpch.customer
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer]
    HDFS partitions=1/1 files=1 size=23.08MB
    row-size=218B cardinality=150.00K
 ====
 # Predicate on a single value: card = |T|/ndv
 # Unique key, NDV=|T|
 select *
 from tpch.customer c
 where c.c_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
 # Predicate on a single value: card = |T|/ndv
 # Non-unique key, NDV=25
 select *
 from tpch.customer c
 where c.c_nationkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_nationkey = 10
    row-size=218B cardinality=6.00K
 ====
 # OR'ed predicate, card = 2/ndv
 select *
 from tpch.customer c
 where c.c_custkey = 10 OR c.c_custkey = 20
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20)
    row-size=218B cardinality=2
 ====
 # OR'ed predicate, distinct columns
 # card = max(card of each OR term)
 select *
 from tpch.customer c
 where c.c_custkey = 10
    or c.c_nationkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10 OR c.c_nationkey = 10
    row-size=218B cardinality=6.00K
 ====
 # As above, but with smaller table to see effect
 # NDV(id) = |T| = 8
 # NDV(bool_val) = 2
 # card = max(|T|/NDV(id),|T|/NDV(bool_val)) = max(1, 4)
 # But done by adding cardinalities, which seems right, but is wrong
 # Bug: IMPALA-8038
 select *
 from functional.alltypestiny t
 where t.id = 10
    or t.bool_col = true
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypestiny t]
    HDFS partitions=4/4 files=4 size=460B
    predicates: t.id = 10 OR t.bool_col = TRUE
    row-size=89B cardinality=5
 ====
 # IN, card = x/ndv
 select *
 from tpch.customer c
 where c.c_custkey in (10, 20, 30)
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20, 30)
    row-size=218B cardinality=3
 ====
 # IN with duplicate values. Remove dups.
 # Bug: IMPALA-8030
 select *
 from tpch.customer c
 where c.c_custkey in (10, 20, 30, 30, 10, 20)
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20)
    row-size=218B cardinality=6
 ====
 # OR on same value: card = 1/ndv
 select *
 from tpch.customer c
 where c.c_custkey = 10 OR c.c_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
 # OR on same value: card = 1/ndv
 # Different expression order
 # Bug: IMPALA-8030
 select *
 from tpch.customer c
 where c.c_custkey = 10 OR 10 = c.c_custkey
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 10)
    row-size=218B cardinality=2
 ====
 # AND'ed predicate, card = 0
 select *
 from tpch.customer c
 where c.c_custkey = 10 AND c.c_custkey = 20
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:EMPTYSET
 ====
 # AND on same value: card = 1/ndv
 select *
 from tpch.customer c
 where c.c_custkey = 10 AND c.c_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
 # Not-equal, card = 1 - 1/ndv
 # Use smaller table so effect is clear
 # |T|=8, NDV=8
 # Bug: IMPALA-8039
 # Bug, expected cardinality ~7
 select *
 from functional.alltypestiny
 where id != 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypestiny]
    HDFS partitions=4/4 files=4 size=460B
    predicates: id != 10
    row-size=89B cardinality=1
 ====
 # Inequality. No useful stats.
 # Bug: IMPALA-8037, Assumes sel = 0.1
 # Bug: Expected cardinality ~49.5K
 select *
 from tpch.customer c
 where c.c_custkey < 1234
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
 # Inequality twice on same value. Remove duplicate.
 # Bug: IMPALA-8037
 # Bug: Expected cardinality ~49.5K
 select *
 from tpch.customer c
 where c.c_custkey < 1234
   and c.c_custkey < 1234
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
 # Inequality twice on same value, but reversed "direction". Remove duplicate.
 # Bug: IMPALA-8037
 # Bug: Expected cardinality ~49.5K
 select *
 from tpch.customer c
 where c.c_custkey < 1234
   and 1234 > c.c_custkey
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
 # Two inequalities of the same "direction" Assume only one applies.
 # Bugs: IMPALA-8031, IMPALA-8037
 # Bug: Expected cardinality ~28.5K
 select *
 from tpch.customer c
 where c.c_custkey < 1234
   and c.c_custkey < 2345
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234, c.c_custkey < 2345
    row-size=218B cardinality=15.00K
 ====
 # Two inequalities. No useful stats.
 # But with effect of exponential-backoff
 # Bug: IMPALA-8037
 # Bug: Expected cardinality ~28.5K
 select *
 from tpch.customer c
 where c.c_custkey < 1234
   and c.c_nationkey < 100
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234, c.c_nationkey < 100
    row-size=218B cardinality=15.00K
 ====
 # Between. No useful stats. Should assume, say 0.16
 # But, gets rewritten before computing selectivity,
 # So same as AND'ed inequalities
 # sel = 0.1, but Ramakrishnan and Gherke suggest 1/4
 # Bug: IMPALA-8037
 # Bug: Expected cardinality ~28.5K
 select *
 from tpch.customer c
 where c.c_custkey between 1234 and 2345
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
 # Compound inequality (< and >) that is the same as BETWEEN.
 # Gets compouted as AND of inequalities.
 # sel = 0.1 (single sel for all)
 # But, these conditions are not independent, so a better estimate
 # would be, say 1/6
 # Bugs: TBD
 # Bug: Expected cardinality ~28.5K
 # |<<< <<< ===| c > x
 # |=== >>> >>>| c < y
 # |<<< === >>>| c > x AND c < y
 select *
 from tpch.customer c
 where c.c_custkey >= 1234 and c.c_custkey <= 2345
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
 # Between and redundant inequality
 # Bug: expected cardinality ~15K
 select *
 from tpch.customer c
 where c.c_custkey between 1234 and 2345
   and c.c_custkey <= 2345
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
 # Partitioned scan.
 # |table| = 11K
 # |partition| = 1000
 # Five patitions match
 # |table'| = 5000
 select *
 from functional.alltypesagg a
 where a.day >= 6
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partition predicates: a.`day` >= 6
    HDFS partitions=5/11 files=5 size=372.38KB
    row-size=95B cardinality=5.00K
 ====
 # Partitioned table, one partition matches
 select *
 from functional.alltypesagg a
 where a.day = 6
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partition predicates: a.`day` = 6
    HDFS partitions=1/11 files=1 size=74.48KB
    row-size=95B cardinality=1.00K
 ====
 # Partitioned table, no partitions match
 select *
 from functional.alltypesagg a
 where a.day = 23
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partition predicates: a.`day` = 23
    partitions=0/11 files=0 size=0B
    row-size=95B cardinality=0
 ====
 # IS NULL
 # Estimated from null count, which is 0 for TPCH tables.
 # TODO: Add tests for ndv stats but no null counts
 # Impala does not suport NOT NULL fields, so we have to assume the
 # field can be null. That means null is one of the distinct values.
 # sel(mktsegment is null) = 1/NDV = 0.2
 # Bug: Many, expected cardinality ~15K
 select *
 from tpch.customer c
 where c.c_mktsegment is null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_mktsegment IS NULL
    row-size=218B cardinality=1
 ====
 # IS NOT NULL
 # Similar to above.
 # Bug: Expected cardinality ~15K
 select *
 from tpch.customer c
 where c.c_mktsegment is not null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_mktsegment IS NOT NULL
    row-size=218B cardinality=150.00K
 ====
 # |alltypesagg| = 11K. Null count of tinyint_col = 2000.
 select *
 from functional.alltypesagg
 where tinyint_col is null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    predicates: tinyint_col IS NULL
    row-size=95B cardinality=2.00K
 ====
 # As above.
 select *
 from functional.alltypesagg
 where tinyint_col is not null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    predicates: tinyint_col IS NOT NULL
    row-size=95B cardinality=9.00K
 ====
 # IS NULL on an expression. Guess 0.1 selectivity
 select *
 from tpch.customer c
 where concat(c.c_mktsegment, c_comment) is null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: concat(c.c_mktsegment, c_comment) IS NULL
    row-size=218B cardinality=15.00K
 ====
 # IS NOT NULL.
 # Bug: Should guess 0.9 selectivity, actually guesses 0.1
 # Expected cardinality ~15K
 select *
 from tpch.customer c
 where concat(c.c_mktsegment, c_comment) is not null
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
    HDFS partitions=1/1 files=1 size=23.08MB
    predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL
    row-size=218B cardinality=15.00K
 ====
 # Scan of a table with no stats and zero rows
 select *
 from functional.alltypesnopart
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesnopart]
    HDFS partitions=1/1 files=0 size=0B
    row-size=72B cardinality=0
 ====
 # Filter on the no-stats table
 select *
 from functional.alltypesnopart
 where int_col = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesnopart]
    HDFS partitions=1/1 files=0 size=0B
    predicates: int_col = 10
    row-size=72B cardinality=0
 ====
 # VARCHAR column with stats
 # Regression test for IMPALA-8849 - previously produced a negative row size.
 select varchar_col from functional.chars_medium
 ---- PLAN
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.chars_medium]
    HDFS partitions=1/1 files=1 size=320.68KB
    row-size=15B cardinality=11.00K
 ====
	# Predicate tests
	# In this file, "inequality" means not equals. It is a general
	# term for <, <=, >, >=. The term "not equals" is used for the
	# != (AKA <>) operator.
	# No predicate at all. Establishes cardinality baseline.
	select * from tpch.customer
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer]
	HDFS partitions=1/1 files=1 size=23.08MB
	row-size=218B cardinality=150.00K
	====
	# Predicate on a single value: card = \|T\|/ndv
	# Unique key, NDV=\|T\|
	select *
	from tpch.customer c
	where c.c_custkey = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey = 10
	row-size=218B cardinality=1
	====
	# Predicate on a single value: card = \|T\|/ndv
	# Non-unique key, NDV=25
	select *
	from tpch.customer c
	where c.c_nationkey = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_nationkey = 10
	row-size=218B cardinality=6.00K
	====
	# OR'ed predicate, card = 2/ndv
	select *
	from tpch.customer c
	where c.c_custkey = 10 OR c.c_custkey = 20
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey IN (10, 20)
	row-size=218B cardinality=2
	====
	# OR'ed predicate, distinct columns
	# card = max(card of each OR term)
	select *
	from tpch.customer c
	where c.c_custkey = 10
	or c.c_nationkey = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey = 10 OR c.c_nationkey = 10
	row-size=218B cardinality=6.00K
	====
	# As above, but with smaller table to see effect
	# NDV(id) = \|T\| = 8
	# NDV(bool_val) = 2
	# card = max(\|T\|/NDV(id),\|T\|/NDV(bool_val)) = max(1, 4)
	# But done by adding cardinalities, which seems right, but is wrong
	# Bug: IMPALA-8038
	select *
	from functional.alltypestiny t
	where t.id = 10
	or t.bool_col = true
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypestiny t]
	HDFS partitions=4/4 files=4 size=460B
	predicates: t.id = 10 OR t.bool_col = TRUE
	row-size=89B cardinality=5
	====
	# IN, card = x/ndv
	select *
	from tpch.customer c
	where c.c_custkey in (10, 20, 30)
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey IN (10, 20, 30)
	row-size=218B cardinality=3
	====
	# IN with duplicate values. Remove dups.
	# Bug: IMPALA-8030
	select *
	from tpch.customer c
	where c.c_custkey in (10, 20, 30, 30, 10, 20)
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20)
	row-size=218B cardinality=6
	====
	# OR on same value: card = 1/ndv
	select *
	from tpch.customer c
	where c.c_custkey = 10 OR c.c_custkey = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey = 10
	row-size=218B cardinality=1
	====
	# OR on same value: card = 1/ndv
	# Different expression order
	# Bug: IMPALA-8030
	select *
	from tpch.customer c
	where c.c_custkey = 10 OR 10 = c.c_custkey
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey IN (10, 10)
	row-size=218B cardinality=2
	====
	# AND'ed predicate, card = 0
	select *
	from tpch.customer c
	where c.c_custkey = 10 AND c.c_custkey = 20
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:EMPTYSET
	====
	# AND on same value: card = 1/ndv
	select *
	from tpch.customer c
	where c.c_custkey = 10 AND c.c_custkey = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey = 10
	row-size=218B cardinality=1
	====
	# Not-equal, card = 1 - 1/ndv
	# Use smaller table so effect is clear
	# \|T\|=8, NDV=8
	# Bug: IMPALA-8039
	# Bug, expected cardinality ~7
	select *
	from functional.alltypestiny
	where id != 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypestiny]
	HDFS partitions=4/4 files=4 size=460B
	predicates: id != 10
	row-size=89B cardinality=1
	====
	# Inequality. No useful stats.
	# Bug: IMPALA-8037, Assumes sel = 0.1
	# Bug: Expected cardinality ~49.5K
	select *
	from tpch.customer c
	where c.c_custkey < 1234
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey < 1234
	row-size=218B cardinality=15.00K
	====
	# Inequality twice on same value. Remove duplicate.
	# Bug: IMPALA-8037
	# Bug: Expected cardinality ~49.5K
	select *
	from tpch.customer c
	where c.c_custkey < 1234
	and c.c_custkey < 1234
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey < 1234
	row-size=218B cardinality=15.00K
	====
	# Inequality twice on same value, but reversed "direction". Remove duplicate.
	# Bug: IMPALA-8037
	# Bug: Expected cardinality ~49.5K
	select *
	from tpch.customer c
	where c.c_custkey < 1234
	and 1234 > c.c_custkey
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey < 1234
	row-size=218B cardinality=15.00K
	====
	# Two inequalities of the same "direction" Assume only one applies.
	# Bugs: IMPALA-8031, IMPALA-8037
	# Bug: Expected cardinality ~28.5K
	select *
	from tpch.customer c
	where c.c_custkey < 1234
	and c.c_custkey < 2345
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey < 1234, c.c_custkey < 2345
	row-size=218B cardinality=15.00K
	====
	# Two inequalities. No useful stats.
	# But with effect of exponential-backoff
	# Bug: IMPALA-8037
	# Bug: Expected cardinality ~28.5K
	select *
	from tpch.customer c
	where c.c_custkey < 1234
	and c.c_nationkey < 100
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey < 1234, c.c_nationkey < 100
	row-size=218B cardinality=15.00K
	====
	# Between. No useful stats. Should assume, say 0.16
	# But, gets rewritten before computing selectivity,
	# So same as AND'ed inequalities
	# sel = 0.1, but Ramakrishnan and Gherke suggest 1/4
	# Bug: IMPALA-8037
	# Bug: Expected cardinality ~28.5K
	select *
	from tpch.customer c
	where c.c_custkey between 1234 and 2345
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
	row-size=218B cardinality=15.00K
	====
	# Compound inequality (< and >) that is the same as BETWEEN.
	# Gets compouted as AND of inequalities.
	# sel = 0.1 (single sel for all)
	# But, these conditions are not independent, so a better estimate
	# would be, say 1/6
	# Bugs: TBD
	# Bug: Expected cardinality ~28.5K
	# \|<<< <<< ===\| c > x
	# \|=== >>> >>>\| c < y
	# \|<<< === >>>\| c > x AND c < y
	select *
	from tpch.customer c
	where c.c_custkey >= 1234 and c.c_custkey <= 2345
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
	row-size=218B cardinality=15.00K
	====
	# Between and redundant inequality
	# Bug: expected cardinality ~15K
	select *
	from tpch.customer c
	where c.c_custkey between 1234 and 2345
	and c.c_custkey <= 2345
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
	row-size=218B cardinality=15.00K
	====
	# Partitioned scan.
	# \|table\| = 11K
	# \|partition\| = 1000
	# Five patitions match
	# \|table'\| = 5000
	select *
	from functional.alltypesagg a
	where a.day >= 6
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesagg a]
	partition predicates: a.`day` >= 6
	HDFS partitions=5/11 files=5 size=372.38KB
	row-size=95B cardinality=5.00K
	====
	# Partitioned table, one partition matches
	select *
	from functional.alltypesagg a
	where a.day = 6
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesagg a]
	partition predicates: a.`day` = 6
	HDFS partitions=1/11 files=1 size=74.48KB
	row-size=95B cardinality=1.00K
	====
	# Partitioned table, no partitions match
	select *
	from functional.alltypesagg a
	where a.day = 23
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesagg a]
	partition predicates: a.`day` = 23
	partitions=0/11 files=0 size=0B
	row-size=95B cardinality=0
	====
	# IS NULL
	# Estimated from null count, which is 0 for TPCH tables.
	# TODO: Add tests for ndv stats but no null counts
	# Impala does not suport NOT NULL fields, so we have to assume the
	# field can be null. That means null is one of the distinct values.
	# sel(mktsegment is null) = 1/NDV = 0.2
	# Bug: Many, expected cardinality ~15K
	select *
	from tpch.customer c
	where c.c_mktsegment is null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_mktsegment IS NULL
	row-size=218B cardinality=1
	====
	# IS NOT NULL
	# Similar to above.
	# Bug: Expected cardinality ~15K
	select *
	from tpch.customer c
	where c.c_mktsegment is not null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: c.c_mktsegment IS NOT NULL
	row-size=218B cardinality=150.00K
	====
	# \|alltypesagg\| = 11K. Null count of tinyint_col = 2000.
	select *
	from functional.alltypesagg
	where tinyint_col is null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesagg]
	HDFS partitions=11/11 files=11 size=814.73KB
	predicates: tinyint_col IS NULL
	row-size=95B cardinality=2.00K
	====
	# As above.
	select *
	from functional.alltypesagg
	where tinyint_col is not null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesagg]
	HDFS partitions=11/11 files=11 size=814.73KB
	predicates: tinyint_col IS NOT NULL
	row-size=95B cardinality=9.00K
	====
	# IS NULL on an expression. Guess 0.1 selectivity
	select *
	from tpch.customer c
	where concat(c.c_mktsegment, c_comment) is null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: concat(c.c_mktsegment, c_comment) IS NULL
	row-size=218B cardinality=15.00K
	====
	# IS NOT NULL.
	# Bug: Should guess 0.9 selectivity, actually guesses 0.1
	# Expected cardinality ~15K
	select *
	from tpch.customer c
	where concat(c.c_mktsegment, c_comment) is not null
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [tpch.customer c]
	HDFS partitions=1/1 files=1 size=23.08MB
	predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL
	row-size=218B cardinality=15.00K
	====
	# Scan of a table with no stats and zero rows
	select *
	from functional.alltypesnopart
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesnopart]
	HDFS partitions=1/1 files=0 size=0B
	row-size=72B cardinality=0
	====
	# Filter on the no-stats table
	select *
	from functional.alltypesnopart
	where int_col = 10
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.alltypesnopart]
	HDFS partitions=1/1 files=0 size=0B
	predicates: int_col = 10
	row-size=72B cardinality=0
	====
	# VARCHAR column with stats
	# Regression test for IMPALA-8849 - previously produced a negative row size.
	select varchar_col from functional.chars_medium
	---- PLAN
	PLAN-ROOT SINK
	\|
	00:SCAN HDFS [functional.chars_medium]
	HDFS partitions=1/1 files=1 size=320.68KB
	row-size=15B cardinality=11.00K
	====