blob: 90792b855b2de5c4d05db5e20e4ad772621638b1 [file] [log] [blame]
# Test a variety of predicates:
# simple predicate (accepted)
# predicate with randomness (rejected)
# predicate that evaluates to true on null (rejected)
# two slot predicate (rejected)
select count(*) from functional_parquet.alltypes
where int_col > 1 and int_col * rand() > 50 and int_col is null
and int_col > tinyint_col;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional_parquet.alltypes]
HDFS partitions=24/24 files=24 size=200.33KB
predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=12.84K
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
parquet statistics predicates: int_col > CAST(1 AS INT)
parquet dictionary predicates: int_col > CAST(1 AS INT)
mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
tuple-ids=0 row-size=5B cardinality=1.27K
in pipelines: 00(GETNEXT)
====
# Test non-parquet types to ensure that parquet predicates are skipped
select count(*) from functional.alltypes
where int_col > 1 and int_col * rand() > 50 and int_col is null
and int_col > tinyint_col;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=5B cardinality=730
in pipelines: 00(GETNEXT)
====
# Test a variety of types
select count(*) from functional_parquet.alltypes
where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
and double_col > 100.00 and date_string_col > '1993-10-01'
and string_col in ('aaaa', 'bbbb', 'cccc')
and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
and year > 2000 and month < 12;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
HDFS partitions=22/24 files=22 size=183.48KB
predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/22 rows=11.74K
columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
extrapolated-rows=disabled max-scan-range-rows=unavailable
parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
tuple-ids=0 row-size=72B cardinality=1.17K
in pipelines: 00(GETNEXT)
====
# Test non-parquet files for a variety of predicates
select count(*) from functional.alltypes
where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
and double_col > 100.00 and date_string_col > '1993-10-01'
and string_col in ('aaaa', 'bbbb', 'cccc')
and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
and year > 2000 and month < 12;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
HDFS partitions=22/24 files=22 size=437.72KB
predicates: id = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 22/22 rows=6.68K
columns: all
extrapolated-rows=disabled max-scan-range-rows=339
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=81B cardinality=1
in pipelines: 00(GETNEXT)
====
# Test negative cases for IN predicate min/max filtering
# - NOT IN
# - IN list with NULL
# - IN list contains non-Literals
# - complex expression on left side of IN
select count(*) from functional_parquet.alltypes
where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
and mod(int_col,50) IN (0,1)
and id IN (int_col);
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional_parquet.alltypes]
HDFS partitions=24/24 files=24 size=200.33KB
predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=12.84K
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
parquet dictionary predicates: id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=20B cardinality=1.27K
in pipelines: 00(GETNEXT)
====
# Test collection types where all collections on the path are required (inner
# join descent). Expect the scan node to include !empty checks for both collections and
# the min-max filtering for the leaf predicate.
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a
where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=2,1,0 row-size=36B cardinality=440.00K
| in pipelines: 00(GETNEXT)
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | mem-estimate=20B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1,0 row-size=36B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=20B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 04:SUBPLAN
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1 row-size=16B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | mem-estimate=12B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2,1 row-size=16B cardinality=10
| | | in pipelines: 00(GETNEXT)
| | |
| | |--05:SINGULAR ROW SRC
| | | parent-subplan=04
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | | tuple-ids=1 row-size=12B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | 06:UNNEST [cn.item a]
| | parent-subplan=04
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2 row-size=0B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.nested_struct.c.d cn]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=0B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
HDFS partitions=1/1 files=2 size=6.92KB
predicates: !empty(c.nested_struct.c.d)
predicates on cn: !empty(cn.item)
predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled max-scan-range-rows=unavailable
parquet statistics predicates on a: a.item.e < CAST(-10 AS INT)
parquet dictionary predicates on a: a.item.e < CAST(-10 AS INT)
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=20B cardinality=4.40K
in pipelines: 00(GETNEXT)
====
# Test collection types where the lower collection in the path is optional
# (outer join descent) and the upper is required (inner join descent).
# Expect the scan node to include !empty test for the root, but no min-max
# filter for the leaf (since it does not have a !empty check).
select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn
left outer join cn.item a
where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=2N,1,0 row-size=36B cardinality=44.00K
| in pipelines: 00(GETNEXT)
|
|--08:SUBPLAN
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2N,1,0 row-size=36B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | | predicates: a.item.e < CAST(-10 AS INT)
| | | mem-estimate=32B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2N,1,0 row-size=36B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | |--04:SINGULAR ROW SRC
| | | parent-subplan=08
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | | tuple-ids=1,0 row-size=32B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | 05:UNNEST [cn.item a]
| | parent-subplan=08
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2 row-size=0B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| 07:NESTED LOOP JOIN [CROSS JOIN]
| | mem-estimate=20B mem-reservation=0B thread-reservation=0
| | tuple-ids=1,0 row-size=32B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=20B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.nested_struct.c.d cn]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=0B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
HDFS partitions=1/1 files=2 size=6.92KB
predicates: !empty(c.nested_struct.c.d)
predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled max-scan-range-rows=unavailable
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=20B cardinality=4.40K
in pipelines: 00(GETNEXT)
====
# Tests collection types where the outer is optional (outer join descent)
# and the inner is required (inner join descent). In this case, !empty is
# not pushed for either collection, so there is no min-max pruning either.
select id from functional_parquet.complextypestbl c
left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=2,1N,0 row-size=36B cardinality=44.00K
| in pipelines: 00(GETNEXT)
|
|--08:SUBPLAN
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1N,0 row-size=36B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| |--06:NESTED LOOP JOIN [CROSS JOIN]
| | | mem-estimate=32B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2,1N,0 row-size=36B cardinality=10
| | | in pipelines: 00(GETNEXT)
| | |
| | |--04:SINGULAR ROW SRC
| | | parent-subplan=08
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | | tuple-ids=1N,0 row-size=32B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | 05:UNNEST [cn.item a]
| | parent-subplan=08
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2 row-size=0B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| 07:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | mem-estimate=20B mem-reservation=0B thread-reservation=0
| | tuple-ids=1N,0 row-size=32B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=20B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.nested_struct.c.d cn]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=0B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
HDFS partitions=1/1 files=2 size=6.92KB
predicates on a: a.item.e < CAST(-10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled max-scan-range-rows=unavailable
mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
tuple-ids=0 row-size=20B cardinality=4.40K
in pipelines: 00(GETNEXT)
====
# Test collections so that each level has a filter applied.
select c_custkey from tpch_nested_parquet.customer c, c.c_orders o,
o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: c_custkey
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=2,1,0 row-size=48B cardinality=1.50M
| in pipelines: 00(GETNEXT)
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | mem-estimate=20B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1,0 row-size=48B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=20B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 04:SUBPLAN
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1 row-size=28B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | mem-estimate=20B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2,1 row-size=28B cardinality=10
| | | in pipelines: 00(GETNEXT)
| | |
| | |--05:SINGULAR ROW SRC
| | | parent-subplan=04
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | | tuple-ids=1 row-size=20B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | 06:UNNEST [o.o_lineitems l]
| | parent-subplan=04
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2 row-size=0B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.c_orders o]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=0B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=288.99MB
predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
predicates on l: l.l_partkey > CAST(0 AS BIGINT)
stored statistics:
table: rows=150.00K size=288.99MB
columns missing stats: c_orders
extrapolated-rows=disabled max-scan-range-rows=50.12K
parquet statistics predicates: c_custkey > CAST(0 AS BIGINT)
parquet statistics predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
parquet statistics predicates on l: l.l_partkey > CAST(0 AS BIGINT)
parquet dictionary predicates: c_custkey > CAST(0 AS BIGINT)
parquet dictionary predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
parquet dictionary predicates on l: l.l_partkey > CAST(0 AS BIGINT)
mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
tuple-ids=0 row-size=20B cardinality=15.00K
in pipelines: 00(GETNEXT)
====
# Test collections in a way that would incorrectly apply a min-max
# filter at the scan. Expect no min-max filter and no !empty tests.
select count(*) from functional_parquet.complextypestbl c left outer join
(select * from c.int_array where item > 10) v;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=26.00MB mem-reservation=8.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=3 row-size=8B cardinality=1
| in pipelines: 05(GETNEXT), 00(OPEN)
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1N,0 row-size=16B cardinality=4.40K
| in pipelines: 00(GETNEXT)
|
|--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | mem-estimate=12B mem-reservation=0B thread-reservation=0
| | tuple-ids=1N,0 row-size=16B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=12B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.int_array]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=4B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [functional_parquet.complextypestbl c]
HDFS partitions=1/1 files=2 size=6.92KB
predicates on int_array: item > CAST(10 AS INT)
stored statistics:
table: rows=unavailable size=unavailable
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
mem-estimate=16.00MB mem-reservation=8.00KB thread-reservation=1
tuple-ids=0 row-size=12B cardinality=4.40K
in pipelines: 00(GETNEXT)
====
# Multiple nested collection values (at the same nesting level) where dictionary
# pruning is applicable.
select c_name, o.o_clerk from tpch_nested_parquet.customer c,
c.c_orders o, o.o_lineitems l
where l.l_shipdate = '1994-08-19' and
l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R' and
l.l_comment is null;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: c_name, o.o_clerk
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SUBPLAN
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=2,1,0 row-size=126B cardinality=15.00M
| in pipelines: 00(GETNEXT)
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | mem-estimate=42B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1,0 row-size=126B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--02:SINGULAR ROW SRC
| | parent-subplan=01
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=0 row-size=42B cardinality=1
| | in pipelines: 00(GETNEXT)
| |
| 04:SUBPLAN
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2,1 row-size=84B cardinality=100
| | in pipelines: 00(GETNEXT)
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | mem-estimate=24B mem-reservation=0B thread-reservation=0
| | | tuple-ids=2,1 row-size=84B cardinality=10
| | | in pipelines: 00(GETNEXT)
| | |
| | |--05:SINGULAR ROW SRC
| | | parent-subplan=04
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | | tuple-ids=1 row-size=24B cardinality=1
| | | in pipelines: 00(GETNEXT)
| | |
| | 06:UNNEST [o.o_lineitems l]
| | parent-subplan=04
| | mem-estimate=0B mem-reservation=0B thread-reservation=0
| | tuple-ids=2 row-size=0B cardinality=10
| | in pipelines: 00(GETNEXT)
| |
| 03:UNNEST [c.c_orders o]
| parent-subplan=01
| mem-estimate=0B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=0B cardinality=10
| in pipelines: 00(GETNEXT)
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=288.99MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R', l.l_comment IS NULL
stored statistics:
table: rows=150.00K size=288.99MB
columns missing stats: c_orders
extrapolated-rows=disabled max-scan-range-rows=50.12K
parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=1
tuple-ids=0 row-size=42B cardinality=150.00K
in pipelines: 00(GETNEXT)
====
# Test a variety of predicates on a mixed format table.
# Scan multiple partitions with atleast one Parquet partition.
select count(*) from functional.alltypesmixedformat
where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
and double_col > 100.00 and date_string_col > '1993-10-01'
and string_col in ('aaaa', 'bbbb', 'cccc')
and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
and year > 2000 and month < 12;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypesmixedformat]
partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
HDFS partitions=4/4 files=4 size=66.33KB
predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/4 rows=2.55K
columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
extrapolated-rows=disabled max-scan-range-rows=unavailable
parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
tuple-ids=0 row-size=72B cardinality=254
in pipelines: 00(GETNEXT)
====
# Test a variety of predicates on a mixed format table.
# Scan all partitions other than the parquet partition.
select count(*) from functional.alltypesmixedformat
where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
and double_col > 100.00 and date_string_col > '1993-10-01'
and string_col in ('aaaa', 'bbbb', 'cccc')
and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
and year != 2009 and month != 4;
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=10.00MB mem-reservation=0B thread-reservation=1
PLAN-ROOT SINK
| output exprs: count(*)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=8B cardinality=1
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypesmixedformat]
partition predicates: `year` != CAST(2009 AS INT), `month` != CAST(4 AS INT)
partitions=0/4 files=0 size=0B
predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/0 rows=0
columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
extrapolated-rows=disabled max-scan-range-rows=0
mem-estimate=0B mem-reservation=0B thread-reservation=0
tuple-ids=0 row-size=72B cardinality=0
in pipelines: 00(GETNEXT)
====