blob: 2ff75a22161dfaba055c110fb7b2b7e4b12b72b4 [file] [log] [blame]
# Sample 10%
select * from functional.alltypes tablesample system(10) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=3/24 files=3 size=60.68KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=2.45K
mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=730
in pipelines: 00(GETNEXT)
====
# Sample 50%
select * from functional.alltypes tablesample system(50) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=80.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=12/24 files=12 size=239.26KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=621
mem-estimate=80.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=3.65K
in pipelines: 00(GETNEXT)
====
# Sampling and scan predicates. Scan predicates are evaluated after sampling and
# that is reflected in the cardinality.
select * from functional.alltypes tablesample system(50) repeatable(1234)
where id < 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=80.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=12/24 files=12 size=239.26KB
predicates: id < CAST(10 AS INT)
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=621
mem-estimate=80.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=365
in pipelines: 00(GETNEXT)
====
# Partition pruning + sampling. Partition pruning happens before sampling.
select * from functional.alltypes tablesample system(50) repeatable(1234)
where year = 2009
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=48.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
partition predicates: `year` = CAST(2009 AS INT)
HDFS partitions=6/24 files=6 size=119.70KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 12/12 rows=3.65K
columns: all
extrapolated-rows=disabled max-scan-range-rows=1.24K
mem-estimate=48.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=1.82K
in pipelines: 00(GETNEXT)
====
# Edge case: sample 0%, no files should be selected
select * from functional.alltypes tablesample system(0)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=0B mem-reservation=0B thread-reservation=1
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
partitions=0/24 files=0 size=0B
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=0
mem-estimate=0B mem-reservation=0B thread-reservation=0
tuple-ids=0 row-size=89B cardinality=0
in pipelines: 00(GETNEXT)
====
# Edge case: sample 1%, at least one file should be selected
select * from functional.alltypes tablesample system(1) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=1/24 files=1 size=19.95KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=7.30K
mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=73
in pipelines: 00(GETNEXT)
====
# Edge case: sample 1% and prune partitions, at least one file should be selected
select * from functional.alltypes tablesample system(1) repeatable(1234)
where year = 2010
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
partition predicates: `year` = CAST(2010 AS INT)
HDFS partitions=1/24 files=1 size=20.36KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 12/12 rows=3.65K
columns: all
extrapolated-rows=disabled max-scan-range-rows=7.30K
mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=37
in pipelines: 00(GETNEXT)
====
# Edge case: sample 100%, all files should be selected
select * from functional.alltypes tablesample system (100)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id, functional.alltypes.bool_col, functional.alltypes.tinyint_col, functional.alltypes.smallint_col, functional.alltypes.int_col, functional.alltypes.bigint_col, functional.alltypes.float_col, functional.alltypes.double_col, functional.alltypes.date_string_col, functional.alltypes.string_col, functional.alltypes.timestamp_col, functional.alltypes.year, functional.alltypes.month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# Table that has no stats.
select id from functional_parquet.alltypes tablesample system(10) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional_parquet.alltypes]
HDFS partitions=3/24 files=3 size=25.50KB
stored statistics:
table: rows=unavailable size=unavailable
partitions: 0/24 rows=12.84K
columns: unavailable
extrapolated-rows=disabled max-scan-range-rows=unavailable
mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=1
tuple-ids=0 row-size=4B cardinality=1.27K
in pipelines: 00(GETNEXT)
====
# Sampling in a subquery.
select id from functional.alltypes t1 where exists (
select id from functional.alltypessmall t2 tablesample system(10) repeatable(1234)
where t1.id = t2.id)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=162.94MB mem-reservation=2.98MB thread-reservation=3 runtime-filters-memory=1.00MB
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000[bloom] <- t2.id
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
| tuple-ids=0 row-size=4B cardinality=10
| in pipelines: 00(GETNEXT), 01(OPEN)
|
|--01:SCAN HDFS [functional.alltypessmall t2]
| HDFS partitions=1/4 files=1 size=1.57KB
| stored statistics:
| table: rows=100 size=6.32KB
| partitions: 4/4 rows=100
| columns: all
| extrapolated-rows=disabled max-scan-range-rows=100
| mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
| tuple-ids=1 row-size=4B cardinality=10
| in pipelines: 01(GETNEXT)
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000[bloom] -> t1.id
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=4B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# Sampling in WITH-clause view.
with t as (select * from functional.alltypes tablesample system(10) repeatable(1234))
select id from t
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: functional.alltypes.id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=3/24 files=3 size=60.68KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=2.45K
mem-estimate=32.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=4B cardinality=730
in pipelines: 00(GETNEXT)
====