testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering-disabled.test - impala - Git at Google

 # All queries in this test are run with PARQUET_DICTIONARY_FILTERING and
 # PARQUET_READ_STATISTICS disabled. The expected behavior is for the planner to skip
 # assigning statistics and dictionary conjuncts while querying parquet files.
 # Parquet predicates to be skipped:
 # parquet statistics predicate on int_col
 # parquet dictionary predicate on int_col
 select count(*) from functional_parquet.alltypes
 where int_col > 1 and int_col * rand() > 50 and int_col is null
 and int_col > tinyint_col;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    HDFS partitions=24/24 files=24 size=200.33KB
    predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/24 rows=12.84K
      columns: unavailable
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=5B cardinality=1.27K
    in pipelines: 00(GETNEXT)
 ====
 # Parquet predicates to be skipped:
 # parquet statistics predicate on bigint_col, double_col, float_col, id, tinyint_col,
 #                                 string_col, smallint_col & date_string_col
 # parquet dictionary predicate on bool_col, bigint_col, double_col, float_col, id,
 #                                 tinyint_col, string_col, smallint_col, int_col &
 #                                 date_string_col
 select count(*) from functional_parquet.alltypes
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
    HDFS partitions=22/24 files=22 size=183.48KB
    predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/22 rows=11.74K
      columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
    tuple-ids=0 row-size=72B cardinality=1.17K
    in pipelines: 00(GETNEXT)
 ====
 # Parquet predicates to be skipped:
 # parquet dictionary predicates on id, string_col & int_col
 select count(*) from functional_parquet.alltypes
 where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
 and mod(int_col,50) IN (0,1)
 and id IN (int_col);
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    HDFS partitions=24/24 files=24 size=200.33KB
    predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/24 rows=12.84K
      columns: unavailable
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=1.27K
    in pipelines: 00(GETNEXT)
 ====
 # Nested parquet predicates to be skipped:
 # parquet statistics predicates on a.item.e
 # parquet dictionary predicates on a.item.e
 select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a
 where a.item.e < -10;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: id
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=36B cardinality=440.00K
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=36B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=16B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=12B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=16B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=12B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [cn.item a]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.nested_struct.c.d cn]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [functional_parquet.complextypestbl c]
    HDFS partitions=1/1 files=2 size=6.92KB
    predicates: !empty(c.nested_struct.c.d)
    predicates on cn: !empty(cn.item)
    predicates on a: a.item.e < CAST(-10 AS INT)
    stored statistics:
      table: rows=unavailable size=unavailable
      columns missing stats: id
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=4.40K
    in pipelines: 00(GETNEXT)
 ====
 # Parquet predicates to be skipped at each level:
 # parquet statistics predicates on c_custkey, o.o_orderkey & l.l_partkey
 # parquet dictionary predicates on c_custkey, o.o_orderkey & l.l_partkey
 select c_custkey from tpch_nested_parquet.customer c, c.c_orders o,
 o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: c_custkey
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=48B cardinality=1.50M
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=48B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=28B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=28B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=20B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [o.o_lineitems l]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.c_orders o]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    HDFS partitions=1/1 files=4 size=288.99MB
    predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
    predicates on l: l.l_partkey > CAST(0 AS BIGINT)
    stored statistics:
      table: rows=150.00K size=288.99MB
      columns missing stats: c_orders
    extrapolated-rows=disabled max-scan-range-rows=50.12K
    mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=15.00K
    in pipelines: 00(GETNEXT)
 ====
 # Parquet filtering to be skipped on multiple collections at the same nested level:
 # parquet statistics filtering on l.l_shipdate, l.l_receiptdate, l.l_shipmode
 #                                 & l.l_returnflag
 # parquet dictionary filtering on l.l_shipmode, l.l_receiptdate, l.l_shipmode
 #                                 & l.l_returnflag
 select c_name, o.o_clerk from tpch_nested_parquet.customer c,
 c.c_orders o, o.o_lineitems l
 where l.l_shipdate = '1994-08-19' and
 l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R' and
 l.l_comment is null;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: c_name, o.o_clerk
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=126B cardinality=15.00M
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=42B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=126B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=42B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=84B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=24B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=84B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=24B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [o.o_lineitems l]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.c_orders o]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    HDFS partitions=1/1 files=4 size=288.99MB
    predicates: !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems)
    predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R', l.l_comment IS NULL
    stored statistics:
      table: rows=150.00K size=288.99MB
      columns missing stats: c_orders
    extrapolated-rows=disabled max-scan-range-rows=50.12K
    mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=1
    tuple-ids=0 row-size=42B cardinality=150.00K
    in pipelines: 00(GETNEXT)
 ====
 # Parquet filtering to be skipped on a mixed file format table:
 # parquet statistics predicates on bigint_col, double_col, float_col, id, tinyint_col,
 #                                  string_col, smallint_col & date_string_col
 # parquet dictionary predicates on bool_col, bigint_col, double_col, float_col, id,
 #                                  tinyint_col, string_col, smallint_col, int_col,
 #                                  timestamp_col & date_string_col
 select count(*) from functional.alltypesmixedformat
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional.alltypesmixedformat]
    partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
    HDFS partitions=4/4 files=4 size=66.33KB
    predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/4 rows=2.55K
      columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
    tuple-ids=0 row-size=72B cardinality=254
    in pipelines: 00(GETNEXT)
 ====
	# All queries in this test are run with PARQUET_DICTIONARY_FILTERING and
	# PARQUET_READ_STATISTICS disabled. The expected behavior is for the planner to skip
	# assigning statistics and dictionary conjuncts while querying parquet files.
	# Parquet predicates to be skipped:
	# parquet statistics predicate on int_col
	# parquet dictionary predicate on int_col
	select count(*) from functional_parquet.alltypes
	where int_col > 1 and int_col * rand() > 50 and int_col is null
	and int_col > tinyint_col;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=1 row-size=8B cardinality=1
	\| in pipelines: 01(GETNEXT), 00(OPEN)
	\|
	00:SCAN HDFS [functional_parquet.alltypes]
	HDFS partitions=24/24 files=24 size=200.33KB
	predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
	stored statistics:
	table: rows=unavailable size=unavailable
	partitions: 0/24 rows=12.84K
	columns: unavailable
	extrapolated-rows=disabled max-scan-range-rows=unavailable
	mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
	tuple-ids=0 row-size=5B cardinality=1.27K
	in pipelines: 00(GETNEXT)
	====
	# Parquet predicates to be skipped:
	# parquet statistics predicate on bigint_col, double_col, float_col, id, tinyint_col,
	# string_col, smallint_col & date_string_col
	# parquet dictionary predicate on bool_col, bigint_col, double_col, float_col, id,
	# tinyint_col, string_col, smallint_col, int_col &
	# date_string_col
	select count(*) from functional_parquet.alltypes
	where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
	and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
	and double_col > 100.00 and date_string_col > '1993-10-01'
	and string_col in ('aaaa', 'bbbb', 'cccc')
	and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
	and year > 2000 and month < 12;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=1 row-size=8B cardinality=1
	\| in pipelines: 01(GETNEXT), 00(OPEN)
	\|
	00:SCAN HDFS [functional_parquet.alltypes]
	partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
	HDFS partitions=22/24 files=22 size=183.48KB
	predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
	stored statistics:
	table: rows=unavailable size=unavailable
	partitions: 0/22 rows=11.74K
	columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
	extrapolated-rows=disabled max-scan-range-rows=unavailable
	mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
	tuple-ids=0 row-size=72B cardinality=1.17K
	in pipelines: 00(GETNEXT)
	====
	# Parquet predicates to be skipped:
	# parquet dictionary predicates on id, string_col & int_col
	select count(*) from functional_parquet.alltypes
	where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
	and mod(int_col,50) IN (0,1)
	and id IN (int_col);
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=1 row-size=8B cardinality=1
	\| in pipelines: 01(GETNEXT), 00(OPEN)
	\|
	00:SCAN HDFS [functional_parquet.alltypes]
	HDFS partitions=24/24 files=24 size=200.33KB
	predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
	stored statistics:
	table: rows=unavailable size=unavailable
	partitions: 0/24 rows=12.84K
	columns: unavailable
	extrapolated-rows=disabled max-scan-range-rows=unavailable
	mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
	tuple-ids=0 row-size=20B cardinality=1.27K
	in pipelines: 00(GETNEXT)
	====
	# Nested parquet predicates to be skipped:
	# parquet statistics predicates on a.item.e
	# parquet dictionary predicates on a.item.e
	select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a
	where a.item.e < -10;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: id
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:SUBPLAN
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=2,1,0 row-size=36B cardinality=440.00K
	\| in pipelines: 00(GETNEXT)
	\|
	\|--08:NESTED LOOP JOIN [CROSS JOIN]
	\| \| mem-estimate=20B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1,0 row-size=36B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--02:SINGULAR ROW SRC
	\| \| parent-subplan=01
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=0 row-size=20B cardinality=1
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 04:SUBPLAN
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1 row-size=16B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--07:NESTED LOOP JOIN [CROSS JOIN]
	\| \| \| mem-estimate=12B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=2,1 row-size=16B cardinality=10
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| \|--05:SINGULAR ROW SRC
	\| \| \| parent-subplan=04
	\| \| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=1 row-size=12B cardinality=1
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| 06:UNNEST [cn.item a]
	\| \| parent-subplan=04
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2 row-size=0B cardinality=10
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 03:UNNEST [c.nested_struct.c.d cn]
	\| parent-subplan=01
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=1 row-size=0B cardinality=10
	\| in pipelines: 00(GETNEXT)
	\|
	00:SCAN HDFS [functional_parquet.complextypestbl c]
	HDFS partitions=1/1 files=2 size=6.92KB
	predicates: !empty(c.nested_struct.c.d)
	predicates on cn: !empty(cn.item)
	predicates on a: a.item.e < CAST(-10 AS INT)
	stored statistics:
	table: rows=unavailable size=unavailable
	columns missing stats: id
	extrapolated-rows=disabled max-scan-range-rows=unavailable
	mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
	tuple-ids=0 row-size=20B cardinality=4.40K
	in pipelines: 00(GETNEXT)
	====
	# Parquet predicates to be skipped at each level:
	# parquet statistics predicates on c_custkey, o.o_orderkey & l.l_partkey
	# parquet dictionary predicates on c_custkey, o.o_orderkey & l.l_partkey
	select c_custkey from tpch_nested_parquet.customer c, c.c_orders o,
	o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: c_custkey
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:SUBPLAN
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=2,1,0 row-size=48B cardinality=1.50M
	\| in pipelines: 00(GETNEXT)
	\|
	\|--08:NESTED LOOP JOIN [CROSS JOIN]
	\| \| mem-estimate=20B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1,0 row-size=48B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--02:SINGULAR ROW SRC
	\| \| parent-subplan=01
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=0 row-size=20B cardinality=1
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 04:SUBPLAN
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1 row-size=28B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--07:NESTED LOOP JOIN [CROSS JOIN]
	\| \| \| mem-estimate=20B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=2,1 row-size=28B cardinality=10
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| \|--05:SINGULAR ROW SRC
	\| \| \| parent-subplan=04
	\| \| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=1 row-size=20B cardinality=1
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| 06:UNNEST [o.o_lineitems l]
	\| \| parent-subplan=04
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2 row-size=0B cardinality=10
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 03:UNNEST [c.c_orders o]
	\| parent-subplan=01
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=1 row-size=0B cardinality=10
	\| in pipelines: 00(GETNEXT)
	\|
	00:SCAN HDFS [tpch_nested_parquet.customer c]
	HDFS partitions=1/1 files=4 size=288.99MB
	predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
	predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
	predicates on l: l.l_partkey > CAST(0 AS BIGINT)
	stored statistics:
	table: rows=150.00K size=288.99MB
	columns missing stats: c_orders
	extrapolated-rows=disabled max-scan-range-rows=50.12K
	mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
	tuple-ids=0 row-size=20B cardinality=15.00K
	in pipelines: 00(GETNEXT)
	====
	# Parquet filtering to be skipped on multiple collections at the same nested level:
	# parquet statistics filtering on l.l_shipdate, l.l_receiptdate, l.l_shipmode
	# & l.l_returnflag
	# parquet dictionary filtering on l.l_shipmode, l.l_receiptdate, l.l_shipmode
	# & l.l_returnflag
	select c_name, o.o_clerk from tpch_nested_parquet.customer c,
	c.c_orders o, o.o_lineitems l
	where l.l_shipdate = '1994-08-19' and
	l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R' and
	l.l_comment is null;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: c_name, o.o_clerk
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:SUBPLAN
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=2,1,0 row-size=126B cardinality=15.00M
	\| in pipelines: 00(GETNEXT)
	\|
	\|--08:NESTED LOOP JOIN [CROSS JOIN]
	\| \| mem-estimate=42B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1,0 row-size=126B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--02:SINGULAR ROW SRC
	\| \| parent-subplan=01
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=0 row-size=42B cardinality=1
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 04:SUBPLAN
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2,1 row-size=84B cardinality=100
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| \|--07:NESTED LOOP JOIN [CROSS JOIN]
	\| \| \| mem-estimate=24B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=2,1 row-size=84B cardinality=10
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| \|--05:SINGULAR ROW SRC
	\| \| \| parent-subplan=04
	\| \| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| \| tuple-ids=1 row-size=24B cardinality=1
	\| \| \| in pipelines: 00(GETNEXT)
	\| \| \|
	\| \| 06:UNNEST [o.o_lineitems l]
	\| \| parent-subplan=04
	\| \| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| \| tuple-ids=2 row-size=0B cardinality=10
	\| \| in pipelines: 00(GETNEXT)
	\| \|
	\| 03:UNNEST [c.c_orders o]
	\| parent-subplan=01
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\| tuple-ids=1 row-size=0B cardinality=10
	\| in pipelines: 00(GETNEXT)
	\|
	00:SCAN HDFS [tpch_nested_parquet.customer c]
	HDFS partitions=1/1 files=4 size=288.99MB
	predicates: !empty(c.c_orders)
	predicates on o: !empty(o.o_lineitems)
	predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R', l.l_comment IS NULL
	stored statistics:
	table: rows=150.00K size=288.99MB
	columns missing stats: c_orders
	extrapolated-rows=disabled max-scan-range-rows=50.12K
	mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=1
	tuple-ids=0 row-size=42B cardinality=150.00K
	in pipelines: 00(GETNEXT)
	====
	# Parquet filtering to be skipped on a mixed file format table:
	# parquet statistics predicates on bigint_col, double_col, float_col, id, tinyint_col,
	# string_col, smallint_col & date_string_col
	# parquet dictionary predicates on bool_col, bigint_col, double_col, float_col, id,
	# tinyint_col, string_col, smallint_col, int_col,
	# timestamp_col & date_string_col
	select count(*) from functional.alltypesmixedformat
	where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
	and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
	and double_col > 100.00 and date_string_col > '1993-10-01'
	and string_col in ('aaaa', 'bbbb', 'cccc')
	and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
	and year > 2000 and month < 12;
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	01:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=1 row-size=8B cardinality=1
	\| in pipelines: 01(GETNEXT), 00(OPEN)
	\|
	00:SCAN HDFS [functional.alltypesmixedformat]
	partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
	HDFS partitions=4/4 files=4 size=66.33KB
	predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
	stored statistics:
	table: rows=unavailable size=unavailable
	partitions: 0/4 rows=2.55K
	columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
	extrapolated-rows=disabled max-scan-range-rows=unavailable
	mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
	tuple-ids=0 row-size=72B cardinality=254
	in pipelines: 00(GETNEXT)
	====