testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test - impala - Git at Google

 # Test a variety of predicates:
 # simple predicate (accepted)
 # predicate with randomness (rejected)
 # predicate that evaluates to true on null (rejected)
 # two slot predicate (rejected)
 select count(*) from functional_parquet.alltypes
 where int_col > 1 and int_col * rand() > 50 and int_col is null
 and int_col > tinyint_col;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    HDFS partitions=24/24 files=24 size=200.33KB
    predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/24 rows=12.84K
      columns: unavailable
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    parquet statistics predicates: int_col > CAST(1 AS INT)
    parquet dictionary predicates: int_col > CAST(1 AS INT)
    mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
    tuple-ids=0 row-size=5B cardinality=1.27K
    in pipelines: 00(GETNEXT)
 ====
 # Test non-parquet types to ensure that parquet predicates are skipped
 select count(*) from functional.alltypes
 where int_col > 1 and int_col * rand() > 50 and int_col is null
 and int_col > tinyint_col;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE)
    stored statistics:
      table: rows=7.30K size=478.45KB
      partitions: 24/24 rows=7.30K
      columns: all
    extrapolated-rows=disabled max-scan-range-rows=310
    mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=5B cardinality=730
    in pipelines: 00(GETNEXT)
 ====
 # Test a variety of types
 select count(*) from functional_parquet.alltypes
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
    HDFS partitions=22/24 files=22 size=183.48KB
    predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/22 rows=11.74K
      columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
    parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
    tuple-ids=0 row-size=72B cardinality=1.17K
    in pipelines: 00(GETNEXT)
 ====
 # Test non-parquet files for a variety of predicates
 select count(*) from functional.alltypes
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional.alltypes]
    partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
    HDFS partitions=22/24 files=22 size=437.72KB
    predicates: id = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=7.30K size=478.45KB
      partitions: 22/22 rows=6.68K
      columns: all
    extrapolated-rows=disabled max-scan-range-rows=339
    mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
    tuple-ids=0 row-size=81B cardinality=1
    in pipelines: 00(GETNEXT)
 ====
 # Test negative cases for IN predicate min/max filtering
 #  - NOT IN
 #  - IN list with NULL
 #  - IN list contains non-Literals
 #  - complex expression on left side of IN
 select count(*) from functional_parquet.alltypes
 where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
 and mod(int_col,50) IN (0,1)
 and id IN (int_col);
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional_parquet.alltypes]
    HDFS partitions=24/24 files=24 size=200.33KB
    predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/24 rows=12.84K
      columns: unavailable
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    parquet dictionary predicates: id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL)
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=1.27K
    in pipelines: 00(GETNEXT)
 ====
 # Test collection types where all collections on the path are required (inner
 # join descent). Expect the scan node to include !empty checks for both collections and
 # the min-max filtering for the leaf predicate.
 select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a
 where a.item.e < -10;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: id
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=36B cardinality=440.00K
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=36B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=16B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=12B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=16B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=12B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [cn.item a]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.nested_struct.c.d cn]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [functional_parquet.complextypestbl c]
    HDFS partitions=1/1 files=2 size=6.92KB
    predicates: !empty(c.nested_struct.c.d)
    predicates on cn: !empty(cn.item)
    predicates on a: a.item.e < CAST(-10 AS INT)
    stored statistics:
      table: rows=unavailable size=unavailable
      columns missing stats: id
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    parquet statistics predicates on a: a.item.e < CAST(-10 AS INT)
    parquet dictionary predicates on a: a.item.e < CAST(-10 AS INT)
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=4.40K
    in pipelines: 00(GETNEXT)
 ====
 # Test collection types where the lower collection in the path is optional
 # (outer join descent) and the upper is required (inner join descent).
 # Expect the scan node to include !empty test for the root, but no min-max
 # filter for the leaf (since it does not have a !empty check).
 select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn
 left outer join cn.item a
 where a.item.e < -10;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: id
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2N,1,0 row-size=36B cardinality=44.00K
 |  in pipelines: 00(GETNEXT)
 |
 |--08:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2N,1,0 row-size=36B cardinality=10
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN]
 |  |  |  predicates: a.item.e < CAST(-10 AS INT)
 |  |  |  mem-estimate=32B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2N,1,0 row-size=36B cardinality=1
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--04:SINGULAR ROW SRC
 |  |  |     parent-subplan=08
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1,0 row-size=32B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  05:UNNEST [cn.item a]
 |  |     parent-subplan=08
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=1,0 row-size=32B cardinality=10
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.nested_struct.c.d cn]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [functional_parquet.complextypestbl c]
    HDFS partitions=1/1 files=2 size=6.92KB
    predicates: !empty(c.nested_struct.c.d)
    predicates on a: a.item.e < CAST(-10 AS INT)
    stored statistics:
      table: rows=unavailable size=unavailable
      columns missing stats: id
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=4.40K
    in pipelines: 00(GETNEXT)
 ====
 # Tests collection types where the outer is optional (outer join descent)
 # and the inner is required (inner join descent). In this case, !empty is
 # not pushed for either collection, so there is no min-max pruning either.
 select id from functional_parquet.complextypestbl c
 left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: id
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1N,0 row-size=36B cardinality=44.00K
 |  in pipelines: 00(GETNEXT)
 |
 |--08:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1N,0 row-size=36B cardinality=10
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--06:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=32B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1N,0 row-size=36B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--04:SINGULAR ROW SRC
 |  |  |     parent-subplan=08
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1N,0 row-size=32B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  05:UNNEST [cn.item a]
 |  |     parent-subplan=08
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  07:NESTED LOOP JOIN [RIGHT OUTER JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=1N,0 row-size=32B cardinality=1
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.nested_struct.c.d cn]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [functional_parquet.complextypestbl c]
    HDFS partitions=1/1 files=2 size=6.92KB
    predicates on a: a.item.e < CAST(-10 AS INT)
    stored statistics:
      table: rows=unavailable size=unavailable
      columns missing stats: id
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=4.40K
    in pipelines: 00(GETNEXT)
 ====
 # Test collections so that each level has a filter applied.
 select c_custkey from tpch_nested_parquet.customer c, c.c_orders o,
 o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: c_custkey
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=48B cardinality=1.50M
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=48B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=20B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=28B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=20B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=28B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=20B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [o.o_lineitems l]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.c_orders o]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    HDFS partitions=1/1 files=4 size=288.99MB
    predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT)
    predicates on l: l.l_partkey > CAST(0 AS BIGINT)
    stored statistics:
      table: rows=150.00K size=288.99MB
      columns missing stats: c_orders
    extrapolated-rows=disabled max-scan-range-rows=50.12K
    parquet statistics predicates: c_custkey > CAST(0 AS BIGINT)
    parquet statistics predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
    parquet statistics predicates on l: l.l_partkey > CAST(0 AS BIGINT)
    parquet dictionary predicates: c_custkey > CAST(0 AS BIGINT)
    parquet dictionary predicates on o: o.o_orderkey > CAST(0 AS BIGINT)
    parquet dictionary predicates on l: l.l_partkey > CAST(0 AS BIGINT)
    mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1
    tuple-ids=0 row-size=20B cardinality=15.00K
    in pipelines: 00(GETNEXT)
 ====
 # Test collections in a way that would incorrectly apply a min-max
 # filter at the scan. Expect no min-max filter and no !empty tests.
 select count(*) from functional_parquet.complextypestbl c left outer join
 (select * from c.int_array where item > 10) v;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=26.00MB mem-reservation=8.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=3 row-size=8B cardinality=1
 |  in pipelines: 05(GETNEXT), 00(OPEN)
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=1N,0 row-size=16B cardinality=4.40K
 |  in pipelines: 00(GETNEXT)
 |
 |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
 |  |  mem-estimate=12B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=1N,0 row-size=16B cardinality=1
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=12B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.int_array]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=4B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [functional_parquet.complextypestbl c]
    HDFS partitions=1/1 files=2 size=6.92KB
    predicates on int_array: item > CAST(10 AS INT)
    stored statistics:
      table: rows=unavailable size=unavailable
      columns: unavailable
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    mem-estimate=16.00MB mem-reservation=8.00KB thread-reservation=1
    tuple-ids=0 row-size=12B cardinality=4.40K
    in pipelines: 00(GETNEXT)
 ====
 # Multiple nested collection values (at the same nesting level) where dictionary
 # pruning is applicable.
 select c_name, o.o_clerk from tpch_nested_parquet.customer c,
 c.c_orders o, o.o_lineitems l
 where l.l_shipdate = '1994-08-19' and
 l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R' and
 l.l_comment is null;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: c_name, o.o_clerk
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:SUBPLAN
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  tuple-ids=2,1,0 row-size=126B cardinality=15.00M
 |  in pipelines: 00(GETNEXT)
 |
 |--08:NESTED LOOP JOIN [CROSS JOIN]
 |  |  mem-estimate=42B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1,0 row-size=126B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--02:SINGULAR ROW SRC
 |  |     parent-subplan=01
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=0 row-size=42B cardinality=1
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  04:SUBPLAN
 |  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  tuple-ids=2,1 row-size=84B cardinality=100
 |  |  in pipelines: 00(GETNEXT)
 |  |
 |  |--07:NESTED LOOP JOIN [CROSS JOIN]
 |  |  |  mem-estimate=24B mem-reservation=0B thread-reservation=0
 |  |  |  tuple-ids=2,1 row-size=84B cardinality=10
 |  |  |  in pipelines: 00(GETNEXT)
 |  |  |
 |  |  |--05:SINGULAR ROW SRC
 |  |  |     parent-subplan=04
 |  |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |  |     tuple-ids=1 row-size=24B cardinality=1
 |  |  |     in pipelines: 00(GETNEXT)
 |  |  |
 |  |  06:UNNEST [o.o_lineitems l]
 |  |     parent-subplan=04
 |  |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |  |     tuple-ids=2 row-size=0B cardinality=10
 |  |     in pipelines: 00(GETNEXT)
 |  |
 |  03:UNNEST [c.c_orders o]
 |     parent-subplan=01
 |     mem-estimate=0B mem-reservation=0B thread-reservation=0
 |     tuple-ids=1 row-size=0B cardinality=10
 |     in pipelines: 00(GETNEXT)
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    HDFS partitions=1/1 files=4 size=288.99MB
    predicates: !empty(c.c_orders)
    predicates on o: !empty(o.o_lineitems)
    predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R', l.l_comment IS NULL
    stored statistics:
      table: rows=150.00K size=288.99MB
      columns missing stats: c_orders
    extrapolated-rows=disabled max-scan-range-rows=50.12K
    parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
    parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
    mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=1
    tuple-ids=0 row-size=42B cardinality=150.00K
    in pipelines: 00(GETNEXT)
 ====
 # Test a variety of predicates on a mixed format table.
 # Scan multiple partitions with atleast one Parquet partition.
 select count(*) from functional.alltypesmixedformat
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year > 2000 and month < 12;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional.alltypesmixedformat]
    partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT)
    HDFS partitions=4/4 files=4 size=66.33KB
    predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/4 rows=2.55K
      columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
    extrapolated-rows=disabled max-scan-range-rows=unavailable
    parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01'
    parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1
    tuple-ids=0 row-size=72B cardinality=254
    in pipelines: 00(GETNEXT)
 ====
 # Test a variety of predicates on a mixed format table.
 # Scan all partitions other than the parquet partition.
 select count(*) from functional.alltypesmixedformat
 where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5)
 and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
 and double_col > 100.00 and date_string_col > '1993-10-01'
 and string_col in ('aaaa', 'bbbb', 'cccc')
 and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
 and year != 2009 and month != 4;
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=10.00MB mem-reservation=0B thread-reservation=1
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=1 row-size=8B cardinality=1
 |  in pipelines: 01(GETNEXT), 00(OPEN)
 |
 00:SCAN HDFS [functional.alltypesmixedformat]
    partition predicates: `year` != CAST(2009 AS INT), `month` != CAST(4 AS INT)
    partitions=0/4 files=0 size=0B
    predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01'
    stored statistics:
      table: rows=unavailable size=unavailable
      partitions: 0/0 rows=0
      columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
    extrapolated-rows=disabled max-scan-range-rows=0
    mem-estimate=0B mem-reservation=0B thread-reservation=0
    tuple-ids=0 row-size=72B cardinality=0
    in pipelines: 00(GETNEXT)
 ====