blob: ce28957a9a7b74acf6ea9b9bd021b61b6dc5f532 [file] [log] [blame]
# sort on a non-deterministic expr, gets materialized
select * from functional.alltypes order by random()
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: random() ASC
| materialized: random()
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=97B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# sort on a deterministic expr that exceeds the cost threshold
select * from functional.alltypes order by abs(id) + abs(id)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: abs(id) + abs(id) ASC
| materialized: abs(id) + abs(id)
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=97B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# sort on a deterministic expr that doesn't exceed the cost threshold
select * from functional.alltypes order by tinyint_col + 1
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: tinyint_col + 1 ASC
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=89B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# sort on multiple exprs, subset is materialized
select * from functional.alltypes
order by dayofweek(timestamp_col), true, id + 1, string_col = date_string_col, id = tinyint_col
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: dayofweek(timestamp_col) ASC, TRUE ASC, id + 1 ASC, string_col = date_string_col ASC, id = tinyint_col ASC
| materialized: dayofweek(timestamp_col), string_col = date_string_col
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=94B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# expensive analytic order by expr gets materialized
select last_value(id) over (order by to_date(timestamp_col), bool_col is null)
from functional.alltypes
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=144.00MB mem-reservation=16.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: last_value(id)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
02:ANALYTIC
| functions: last_value(id)
| order by: to_date(timestamp_col) ASC, bool_col IS NULL ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=3,2 row-size=37B cardinality=7.30K
| in pipelines: 01(GETNEXT)
|
01:SORT
| order by: to_date(timestamp_col) ASC, bool_col IS NULL ASC
| materialized: to_date(timestamp_col)
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=3 row-size=33B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=21B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# expensive order by expr in top-n gets materialized
select id from functional.alltypes order by string_col like 'a.*b', id * bigint_col,
regexp_replace(string_col, 'a.*b', 'c') limit 10
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:TOP-N [LIMIT=10]
| order by: string_col LIKE 'a.*b' ASC, id * bigint_col ASC, regexp_replace(string_col, 'a.*b', 'c') ASC
| materialized: string_col LIKE 'a.*b', regexp_replace(string_col, 'a.*b', 'c')
| mem-estimate=250B mem-reservation=0B thread-reservation=0
| tuple-ids=1 row-size=25B cardinality=10
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=25B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# sort on udf, gets materialized
select * from functional.alltypes order by TestFn(double_col)
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: default.testfn(double_col) ASC
| materialized: default.testfn(double_col)
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=93B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=89B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# sort expr contains SlotRefs that don't need to be materialized separately
select concat(date_string_col, string_col) c from functional.alltypes order by c
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=140.00MB mem-reservation=12.03MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: concat(date_string_col, string_col)
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
01:SORT
| order by: concat(date_string_col, string_col) ASC
| materialized: concat(date_string_col, string_col)
| mem-estimate=12.00MB mem-reservation=12.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=1 row-size=12B cardinality=7.30K
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7.30K size=478.45KB
partitions: 24/24 rows=7.30K
columns: all
extrapolated-rows=disabled max-scan-range-rows=310
mem-estimate=128.00MB mem-reservation=32.00KB thread-reservation=1
tuple-ids=0 row-size=33B cardinality=7.30K
in pipelines: 00(GETNEXT)
====
# IMPALA-5270: Rand() and udf inside inline view referenced by analytic function.
select id, row_number() over (partition by u order by r) from
(select id, random() r, u from
(select id, TestFn(double_col) u from functional.alltypestiny) v1
) v2
order by id
---- PLAN
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
| Per-Host Resources: mem-estimate=42.00MB mem-reservation=16.00MB thread-reservation=2
PLAN-ROOT SINK
| output exprs: id, row_number()
| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
03:SORT
| order by: id ASC
| mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=4 row-size=12B cardinality=8
| in pipelines: 03(GETNEXT), 01(OPEN)
|
02:ANALYTIC
| functions: row_number()
| partition by: u
| order by: random() ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=6,5 row-size=32B cardinality=8
| in pipelines: 01(GETNEXT)
|
01:SORT
| order by: default.testfn(double_col) ASC NULLS FIRST, random() ASC
| materialized: default.testfn(double_col), random()
| mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
| tuple-ids=6 row-size=24B cardinality=8
| in pipelines: 01(GETNEXT), 00(OPEN)
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
stored statistics:
table: rows=8 size=460B
partitions: 4/4 rows=8
columns: all
extrapolated-rows=disabled max-scan-range-rows=2
mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
tuple-ids=0 row-size=12B cardinality=8
in pipelines: 00(GETNEXT)
====