| # Test a variety of predicates: |
| # simple predicate (accepted) |
| # predicate with randomness (rejected) |
| # predicate that evaluates to true on null (rejected) |
| # two slot predicate (rejected) |
| select count(*) from functional_parquet.alltypes |
| where int_col > 1 and int_col * rand() > 50 and int_col is null |
| and int_col > tinyint_col; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional_parquet.alltypes] |
| HDFS partitions=24/24 files=24 size=200.33KB |
| predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| partitions: 0/24 rows=12.84K |
| columns: unavailable |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| parquet statistics predicates: int_col > CAST(1 AS INT) |
| parquet dictionary predicates: int_col > CAST(1 AS INT) |
| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 |
| tuple-ids=0 row-size=5B cardinality=1.27K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test non-parquet types to ensure that parquet predicates are skipped |
| select count(*) from functional.alltypes |
| where int_col > 1 and int_col * rand() > 50 and int_col is null |
| and int_col > tinyint_col; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| predicates: int_col IS NULL, int_col > CAST(1 AS INT), int_col > CAST(tinyint_col AS INT), CAST(int_col AS DOUBLE) * rand() > CAST(50 AS DOUBLE) |
| stored statistics: |
| table: rows=7.30K size=478.45KB |
| partitions: 24/24 rows=7.30K |
| columns: all |
| extrapolated-rows=disabled max-scan-range-rows=310 |
| mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1 |
| tuple-ids=0 row-size=5B cardinality=730 |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test a variety of types |
| select count(*) from functional_parquet.alltypes |
| where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5) |
| and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00 |
| and double_col > 100.00 and date_string_col > '1993-10-01' |
| and string_col in ('aaaa', 'bbbb', 'cccc') |
| and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1 |
| and year > 2000 and month < 12; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional_parquet.alltypes] |
| partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT) |
| HDFS partitions=22/24 files=22 size=183.48KB |
| predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| partitions: 0/22 rows=11.74K |
| columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01' |
| parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1 |
| tuple-ids=0 row-size=72B cardinality=1.17K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test non-parquet files for a variety of predicates |
| select count(*) from functional.alltypes |
| where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5) |
| and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00 |
| and double_col > 100.00 and date_string_col > '1993-10-01' |
| and string_col in ('aaaa', 'bbbb', 'cccc') |
| and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1 |
| and year > 2000 and month < 12; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=138.00MB mem-reservation=32.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT) |
| HDFS partitions=22/24 files=22 size=437.72KB |
| predicates: id = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| stored statistics: |
| table: rows=7.30K size=478.45KB |
| partitions: 22/22 rows=6.68K |
| columns: all |
| extrapolated-rows=disabled max-scan-range-rows=339 |
| mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1 |
| tuple-ids=0 row-size=81B cardinality=1 |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test negative cases for IN predicate min/max filtering |
| # - NOT IN |
| # - IN list with NULL |
| # - IN list contains non-Literals |
| # - complex expression on left side of IN |
| select count(*) from functional_parquet.alltypes |
| where id NOT IN (0,1,2) and string_col IN ('aaaa', 'bbbb', 'cccc', NULL) |
| and mod(int_col,50) IN (0,1) |
| and id IN (int_col); |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=58.00MB mem-reservation=24.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional_parquet.alltypes] |
| HDFS partitions=24/24 files=24 size=200.33KB |
| predicates: id IN (int_col), id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| partitions: 0/24 rows=12.84K |
| columns: unavailable |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| parquet dictionary predicates: id NOT IN (CAST(0 AS INT), CAST(1 AS INT), CAST(2 AS INT)), int_col % CAST(50 AS INT) IN (CAST(0 AS INT), CAST(1 AS INT)), string_col IN ('aaaa', 'bbbb', 'cccc', NULL) |
| mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 |
| tuple-ids=0 row-size=20B cardinality=1.27K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test collection types where all collections on the path are required (inner |
| # join descent). Expect the scan node to include !empty checks for both collections and |
| # the min-max filtering for the leaf predicate. |
| select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn, cn.item a |
| where a.item.e < -10; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: id |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=2,1,0 row-size=36B cardinality=440.00K |
| | in pipelines: 00(GETNEXT) |
| | |
| |--08:NESTED LOOP JOIN [CROSS JOIN] |
| | | mem-estimate=20B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1,0 row-size=36B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=20B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 04:SUBPLAN |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1 row-size=16B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--07:NESTED LOOP JOIN [CROSS JOIN] |
| | | | mem-estimate=12B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=2,1 row-size=16B cardinality=10 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | |--05:SINGULAR ROW SRC |
| | | | parent-subplan=04 |
| | | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=1 row-size=12B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | 06:UNNEST [cn.item a] |
| | | parent-subplan=04 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2 row-size=0B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.nested_struct.c.d cn] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=0B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [functional_parquet.complextypestbl c] |
| HDFS partitions=1/1 files=2 size=6.92KB |
| predicates: !empty(c.nested_struct.c.d) |
| predicates on cn: !empty(cn.item) |
| predicates on a: a.item.e < CAST(-10 AS INT) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| columns missing stats: id |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| parquet statistics predicates on a: a.item.e < CAST(-10 AS INT) |
| parquet dictionary predicates on a: a.item.e < CAST(-10 AS INT) |
| mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 |
| tuple-ids=0 row-size=20B cardinality=4.40K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test collection types where the lower collection in the path is optional |
| # (outer join descent) and the upper is required (inner join descent). |
| # Expect the scan node to include !empty test for the root, but no min-max |
| # filter for the leaf (since it does not have a !empty check). |
| select id from functional_parquet.complextypestbl c, c.nested_struct.c.d cn |
| left outer join cn.item a |
| where a.item.e < -10; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: id |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=2N,1,0 row-size=36B cardinality=44.00K |
| | in pipelines: 00(GETNEXT) |
| | |
| |--08:SUBPLAN |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2N,1,0 row-size=36B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN] |
| | | | predicates: a.item.e < CAST(-10 AS INT) |
| | | | mem-estimate=32B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=2N,1,0 row-size=36B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | |--04:SINGULAR ROW SRC |
| | | | parent-subplan=08 |
| | | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=1,0 row-size=32B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | 05:UNNEST [cn.item a] |
| | | parent-subplan=08 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2 row-size=0B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 07:NESTED LOOP JOIN [CROSS JOIN] |
| | | mem-estimate=20B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=1,0 row-size=32B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=20B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.nested_struct.c.d cn] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=0B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [functional_parquet.complextypestbl c] |
| HDFS partitions=1/1 files=2 size=6.92KB |
| predicates: !empty(c.nested_struct.c.d) |
| predicates on a: a.item.e < CAST(-10 AS INT) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| columns missing stats: id |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 |
| tuple-ids=0 row-size=20B cardinality=4.40K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Tests collection types where the outer is optional (outer join descent) |
| # and the inner is required (inner join descent). In this case, !empty is |
| # not pushed for either collection, so there is no min-max pruning either. |
| select id from functional_parquet.complextypestbl c |
| left outer join c.nested_struct.c.d cn, cn.item a where a.item.e < -10; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: id |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=2,1N,0 row-size=36B cardinality=44.00K |
| | in pipelines: 00(GETNEXT) |
| | |
| |--08:SUBPLAN |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1N,0 row-size=36B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--06:NESTED LOOP JOIN [CROSS JOIN] |
| | | | mem-estimate=32B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=2,1N,0 row-size=36B cardinality=10 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | |--04:SINGULAR ROW SRC |
| | | | parent-subplan=08 |
| | | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=1N,0 row-size=32B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | 05:UNNEST [cn.item a] |
| | | parent-subplan=08 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2 row-size=0B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 07:NESTED LOOP JOIN [RIGHT OUTER JOIN] |
| | | mem-estimate=20B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=1N,0 row-size=32B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=20B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.nested_struct.c.d cn] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=0B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [functional_parquet.complextypestbl c] |
| HDFS partitions=1/1 files=2 size=6.92KB |
| predicates on a: a.item.e < CAST(-10 AS INT) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| columns missing stats: id |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| mem-estimate=48.00MB mem-reservation=24.00KB thread-reservation=1 |
| tuple-ids=0 row-size=20B cardinality=4.40K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test collections so that each level has a filter applied. |
| select c_custkey from tpch_nested_parquet.customer c, c.c_orders o, |
| o.o_lineitems l where c_custkey > 0 and o.o_orderkey > 0 and l.l_partkey > 0; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: c_custkey |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=2,1,0 row-size=48B cardinality=1.50M |
| | in pipelines: 00(GETNEXT) |
| | |
| |--08:NESTED LOOP JOIN [CROSS JOIN] |
| | | mem-estimate=20B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1,0 row-size=48B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=20B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 04:SUBPLAN |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1 row-size=28B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--07:NESTED LOOP JOIN [CROSS JOIN] |
| | | | mem-estimate=20B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=2,1 row-size=28B cardinality=10 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | |--05:SINGULAR ROW SRC |
| | | | parent-subplan=04 |
| | | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=1 row-size=20B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | 06:UNNEST [o.o_lineitems l] |
| | | parent-subplan=04 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2 row-size=0B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.c_orders o] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=0B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [tpch_nested_parquet.customer c] |
| HDFS partitions=1/1 files=4 size=288.99MB |
| predicates: c_custkey > CAST(0 AS BIGINT), !empty(c.c_orders) |
| predicates on o: !empty(o.o_lineitems), o.o_orderkey > CAST(0 AS BIGINT) |
| predicates on l: l.l_partkey > CAST(0 AS BIGINT) |
| stored statistics: |
| table: rows=150.00K size=288.99MB |
| columns missing stats: c_orders |
| extrapolated-rows=disabled max-scan-range-rows=50.12K |
| parquet statistics predicates: c_custkey > CAST(0 AS BIGINT) |
| parquet statistics predicates on o: o.o_orderkey > CAST(0 AS BIGINT) |
| parquet statistics predicates on l: l.l_partkey > CAST(0 AS BIGINT) |
| parquet dictionary predicates: c_custkey > CAST(0 AS BIGINT) |
| parquet dictionary predicates on o: o.o_orderkey > CAST(0 AS BIGINT) |
| parquet dictionary predicates on l: l.l_partkey > CAST(0 AS BIGINT) |
| mem-estimate=264.00MB mem-reservation=16.00MB thread-reservation=1 |
| tuple-ids=0 row-size=20B cardinality=15.00K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test collections in a way that would incorrectly apply a min-max |
| # filter at the scan. Expect no min-max filter and no !empty tests. |
| select count(*) from functional_parquet.complextypestbl c left outer join |
| (select * from c.int_array where item > 10) v; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=26.00MB mem-reservation=8.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=3 row-size=8B cardinality=1 |
| | in pipelines: 05(GETNEXT), 00(OPEN) |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1N,0 row-size=16B cardinality=4.40K |
| | in pipelines: 00(GETNEXT) |
| | |
| |--04:NESTED LOOP JOIN [RIGHT OUTER JOIN] |
| | | mem-estimate=12B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=1N,0 row-size=16B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=12B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.int_array] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=4B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [functional_parquet.complextypestbl c] |
| HDFS partitions=1/1 files=2 size=6.92KB |
| predicates on int_array: item > CAST(10 AS INT) |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| columns: unavailable |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| mem-estimate=16.00MB mem-reservation=8.00KB thread-reservation=1 |
| tuple-ids=0 row-size=12B cardinality=4.40K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Multiple nested collection values (at the same nesting level) where dictionary |
| # pruning is applicable. |
| select c_name, o.o_clerk from tpch_nested_parquet.customer c, |
| c.c_orders o, o.o_lineitems l |
| where l.l_shipdate = '1994-08-19' and |
| l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R' and |
| l.l_comment is null; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: c_name, o.o_clerk |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:SUBPLAN |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=2,1,0 row-size=126B cardinality=15.00M |
| | in pipelines: 00(GETNEXT) |
| | |
| |--08:NESTED LOOP JOIN [CROSS JOIN] |
| | | mem-estimate=42B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1,0 row-size=126B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--02:SINGULAR ROW SRC |
| | | parent-subplan=01 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=0 row-size=42B cardinality=1 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 04:SUBPLAN |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2,1 row-size=84B cardinality=100 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | |--07:NESTED LOOP JOIN [CROSS JOIN] |
| | | | mem-estimate=24B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=2,1 row-size=84B cardinality=10 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | |--05:SINGULAR ROW SRC |
| | | | parent-subplan=04 |
| | | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | | tuple-ids=1 row-size=24B cardinality=1 |
| | | | in pipelines: 00(GETNEXT) |
| | | | |
| | | 06:UNNEST [o.o_lineitems l] |
| | | parent-subplan=04 |
| | | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | | tuple-ids=2 row-size=0B cardinality=10 |
| | | in pipelines: 00(GETNEXT) |
| | | |
| | 03:UNNEST [c.c_orders o] |
| | parent-subplan=01 |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | tuple-ids=1 row-size=0B cardinality=10 |
| | in pipelines: 00(GETNEXT) |
| | |
| 00:SCAN HDFS [tpch_nested_parquet.customer c] |
| HDFS partitions=1/1 files=4 size=288.99MB |
| predicates: !empty(c.c_orders) |
| predicates on o: !empty(o.o_lineitems) |
| predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R', l.l_comment IS NULL |
| stored statistics: |
| table: rows=150.00K size=288.99MB |
| columns missing stats: c_orders |
| extrapolated-rows=disabled max-scan-range-rows=50.12K |
| parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R' |
| parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R' |
| mem-estimate=616.00MB mem-reservation=32.00MB thread-reservation=1 |
| tuple-ids=0 row-size=42B cardinality=150.00K |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test a variety of predicates on a mixed format table. |
| # Scan multiple partitions with atleast one Parquet partition. |
| select count(*) from functional.alltypesmixedformat |
| where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5) |
| and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00 |
| and double_col > 100.00 and date_string_col > '1993-10-01' |
| and string_col in ('aaaa', 'bbbb', 'cccc') |
| and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1 |
| and year > 2000 and month < 12; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=138.00MB mem-reservation=88.00KB thread-reservation=2 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional.alltypesmixedformat] |
| partition predicates: `year` > CAST(2000 AS INT), `month` < CAST(12 AS INT) |
| HDFS partitions=4/4 files=4 size=66.33KB |
| predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| partitions: 0/4 rows=2.55K |
| columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col |
| extrapolated-rows=disabled max-scan-range-rows=unavailable |
| parquet statistics predicates: bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), date_string_col > '1993-10-01' |
| parquet dictionary predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| mem-estimate=128.00MB mem-reservation=88.00KB thread-reservation=1 |
| tuple-ids=0 row-size=72B cardinality=254 |
| in pipelines: 00(GETNEXT) |
| ==== |
| # Test a variety of predicates on a mixed format table. |
| # Scan all partitions other than the parquet partition. |
| select count(*) from functional.alltypesmixedformat |
| where id = 1 and bool_col and tinyint_col < 50 and smallint_col in (1,2,3,4,5) |
| and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00 |
| and double_col > 100.00 and date_string_col > '1993-10-01' |
| and string_col in ('aaaa', 'bbbb', 'cccc') |
| and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1 |
| and year != 2009 and month != 4; |
| ---- PLAN |
| F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 |
| | Per-Host Resources: mem-estimate=10.00MB mem-reservation=0B thread-reservation=1 |
| PLAN-ROOT SINK |
| | output exprs: count(*) |
| | mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0 |
| | tuple-ids=1 row-size=8B cardinality=1 |
| | in pipelines: 01(GETNEXT), 00(OPEN) |
| | |
| 00:SCAN HDFS [functional.alltypesmixedformat] |
| partition predicates: `year` != CAST(2009 AS INT), `month` != CAST(4 AS INT) |
| partitions=0/4 files=0 size=0B |
| predicates: bool_col, bigint_col < CAST(5000 AS BIGINT), double_col > CAST(100.00 AS DOUBLE), float_col > CAST(50.00 AS FLOAT), id = CAST(1 AS INT), tinyint_col < CAST(50 AS TINYINT), int_col % CAST(2 AS INT) = CAST(1 AS INT), string_col IN ('aaaa', 'bbbb', 'cccc'), smallint_col IN (CAST(1 AS SMALLINT), CAST(2 AS SMALLINT), CAST(3 AS SMALLINT), CAST(4 AS SMALLINT), CAST(5 AS SMALLINT)), timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 00:00:00') = CAST(1 AS INT), date_string_col > '1993-10-01' |
| stored statistics: |
| table: rows=unavailable size=unavailable |
| partitions: 0/0 rows=0 |
| columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col |
| extrapolated-rows=disabled max-scan-range-rows=0 |
| mem-estimate=0B mem-reservation=0B thread-reservation=0 |
| tuple-ids=0 row-size=72B cardinality=0 |
| in pipelines: 00(GETNEXT) |
| ==== |