blob: a4c06e035d40354ea9f4acdbb169c8cf02e58168 [file] [log] [blame]
# Sample 10%
select * from functional.alltypes tablesample system(10) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=3/24 files=3 size=60.68KB
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=32.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=730
====
# Sample 50%
select * from functional.alltypes tablesample system(50) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=80.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=12/24 files=12 size=239.26KB
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=80.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=3650
====
# Sampling and scan predicates. Scan predicates are evaluated after sampling and
# that is reflected in the cardinality.
select * from functional.alltypes tablesample system(50) repeatable(1234)
where id < 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=80.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=12/24 files=12 size=239.26KB
predicates: id < 10
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
parquet dictionary predicates: id < 10
mem-estimate=80.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=365
====
# Partition pruning + sampling. Partition pruning happens after sampling.
select * from functional.alltypes tablesample system(50) repeatable(1234)
where year = 2009
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=48.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=6/24 files=6 size=119.70KB
stats-rows=3650 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=48.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=1825
====
# Edge case: sample 0%, no files should be selected
select * from functional.alltypes tablesample system(0)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=0B mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=0/24 files=0 size=0B
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=0B mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=0
====
# Edge case: sample 1%, at least one file should be selected
select * from functional.alltypes tablesample system(1) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=1/24 files=1 size=19.95KB
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=32.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=73
====
# Edge case: sample 1% and prune partitions, at least one file should be selected
select * from functional.alltypes tablesample system(1) repeatable(1234)
where year = 2010
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=1/24 files=1 size=20.36KB
stats-rows=3650 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=32.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=37
====
# Edge case: sample 100%, all files should be selected
select * from functional.alltypes tablesample system (100)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=128.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=128.00MB mem-reservation=0B
tuple-ids=0 row-size=97B cardinality=7300
====
# Table that has no stats.
select id from functional_parquet.alltypes tablesample system(10) repeatable(1234)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional_parquet.alltypes]
partitions=3/24 files=3 size=22.53KB
stats-rows=unavailable extrapolated-rows=disabled
table stats: rows=unavailable size=unavailable
column stats: unavailable
mem-estimate=16.00MB mem-reservation=0B
tuple-ids=0 row-size=4B cardinality=unavailable
====
# Sampling in a subquery.
select id from functional.alltypes t1 where exists (
select id from functional.alltypessmall t2 tablesample system(10) repeatable(1234)
where t1.id = t2.id)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=161.94MB mem-reservation=1.94MB
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB
| tuple-ids=0 row-size=4B cardinality=10
|
|--01:SCAN HDFS [functional.alltypessmall t2]
| partitions=1/4 files=1 size=1.57KB
| stats-rows=100 extrapolated-rows=disabled
| table stats: rows=100 size=6.32KB
| column stats: all
| mem-estimate=32.00MB mem-reservation=0B
| tuple-ids=1 row-size=4B cardinality=10
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t1.id
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=128.00MB mem-reservation=0B
tuple-ids=0 row-size=4B cardinality=7300
====
# Sampling in WITH-clause view.
with t as (select * from functional.alltypes tablesample system(10) repeatable(1234))
select id from t
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
PLAN-ROOT SINK
| mem-estimate=0B mem-reservation=0B
|
00:SCAN HDFS [functional.alltypes]
partitions=3/24 files=3 size=60.68KB
stats-rows=7300 extrapolated-rows=disabled
table stats: rows=7300 size=478.45KB
column stats: all
mem-estimate=32.00MB mem-reservation=0B
tuple-ids=0 row-size=4B cardinality=730
====