blob: 3817a82f4d5ea385a498035948cb3594613cf5d0 [file] [log] [blame]
select * from functional_seq.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional_seq.alltypes t1]
HDFS partitions=24/24 files=24 size=557.47KB
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional_seq.alltypes t1]
HDFS partitions=24/24 files=24 size=557.47KB
limit: 5
row-size=80B cardinality=5
====
# Query is over the limit of 8 rows to be optimized, will distribute the query
select * from functional.alltypes t1 limit 10
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 10
row-size=89B cardinality=10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 10
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 10
row-size=89B cardinality=10
====
# Query is optimized, run on coordinator only
select * from functional.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 5
row-size=89B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 5
row-size=89B cardinality=5
====
# If a predicate is applied the optimization is disabled
select * from functional.alltypes t1 where t1.id < 99 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: t1.id < 99
limit: 5
row-size=89B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: t1.id < 99
limit: 5
row-size=89B cardinality=5
====
# No optimization for hbase tables
select * from functional_hbase.alltypes t1 where t1.id < 99 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
predicates: t1.id < 99
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HBASE [functional_hbase.alltypes t1]
predicates: t1.id < 99
limit: 5
row-size=80B cardinality=5
====
# Applies optimization for small queries in hbase
select * from functional_hbase.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
limit: 5
row-size=80B cardinality=5
====
insert into
functional_hbase.alltypes
values (1, 1, true, "1999-12-01", 2.0, 1.0, 1, 12, 2, "abs",
cast(now() as timestamp), 1, 1999)
---- PLAN
WRITE TO HBASE table=functional_hbase.alltypes
|
00:UNION
constant-operands=1
row-size=57B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HBASE table=functional_hbase.alltypes
|
00:UNION
constant-operands=1
row-size=57B cardinality=1
====
create table tm as select * from functional_hbase.alltypes limit 5
---- PLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
====
create table tm as select * from functional_hbase.alltypes limit 50
---- PLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 50
row-size=80B cardinality=50
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
01:EXCHANGE [UNPARTITIONED]
| limit: 50
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 50
row-size=80B cardinality=50
====
select * from functional_hbase.alltypes limit 5
union all
select * from functional_hbase.alltypes limit 2
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=80B cardinality=7
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 2
| row-size=80B cardinality=2
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=80B cardinality=7
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 2
| row-size=80B cardinality=2
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
====
select * from functional_hbase.alltypes limit 5
union all
select * from functional_hbase.alltypes limit 5
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=80B cardinality=10
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 5
| row-size=80B cardinality=5
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=80B cardinality=10
|
|--04:EXCHANGE [UNPARTITIONED]
| | limit: 5
| |
| 02:SCAN HBASE [functional_hbase.alltypes]
| limit: 5
| row-size=80B cardinality=5
|
03:EXCHANGE [UNPARTITIONED]
| limit: 5
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
row-size=80B cardinality=5
====
# testbl does not have stats computed, so the small query optimization will be disabled
select * from
functional.testtbl a join functional.testtbl b on a.id = b.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl b]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl a]
HDFS partitions=1/1 files=0 size=0B
runtime filters: RF000 -> a.id
row-size=24B cardinality=0
====
select * from
functional.testtbl a, functional.testtbl b
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl b]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl a]
HDFS partitions=1/1 files=0 size=0B
row-size=24B cardinality=0
====
# IMPALA-3335: test that queries with joins can be run as small queries
select * from
functional.alltypestiny a
where a.id in (select id from functional.alltypestiny limit 5) limit 5
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = id
| runtime filters: RF000 <- id
| limit: 5
| row-size=89B cardinality=5
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| limit: 5
| row-size=4B cardinality=5
|
00:SCAN HDFS [functional.alltypestiny a]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id
row-size=89B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
| limit: 5
|
02:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: a.id = id
| runtime filters: RF000 <- id
| limit: 5
| row-size=89B cardinality=5
|
|--04:EXCHANGE [BROADCAST]
| |
| 03:EXCHANGE [UNPARTITIONED]
| | limit: 5
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| limit: 5
| row-size=4B cardinality=5
|
00:SCAN HDFS [functional.alltypestiny a]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id
row-size=89B cardinality=8
====
# Test correct single-node planning for mixed union distinct/all (IMPALA-1553).
select
id, bool_col
from functional.alltypestiny a
where year=2009 and month=1
union distinct
select id, bool_col
from functional.alltypestiny b
where year=2009 and month=1
union all
select id, bool_col
from functional.alltypestiny c
where year=2009 and month=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: 03
| row-size=5B cardinality=6
|
|--05:SCAN HDFS [functional.alltypestiny c]
| partition predicates: `year` = 2009, `month` = 2
| HDFS partitions=1/4 files=1 size=115B
| row-size=5B cardinality=2
|
03:AGGREGATE [FINALIZE]
| group by: id, bool_col
| row-size=5B cardinality=4
|
00:UNION
| row-size=5B cardinality=4
|
|--02:SCAN HDFS [functional.alltypestiny b]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=115B
| row-size=5B cardinality=2
|
01:SCAN HDFS [functional.alltypestiny a]
partition predicates: `year` = 2009, `month` = 1
HDFS partitions=1/4 files=1 size=115B
row-size=5B cardinality=2
====
# IMPALA-2527: Tests that the small query optimization is disabled for colleciton types
select key from functional.allcomplextypes.map_map_col.value limit 5;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HDFS [functional.allcomplextypes.map_map_col.value]
partitions=0/0 files=0 size=0B
limit: 5
row-size=12B cardinality=0
====
# Test query on large Kudu table with all Kudu primary key columns in equivalence
# predicates: not more than 1 row could be returned from Kudu,
# small query optimization should be enabled.
select * from tpch_kudu.partsupp where ps_partkey=2 and ps_suppkey=5003
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN KUDU [tpch_kudu.partsupp]
kudu predicates: ps_partkey = 2, ps_suppkey = 5003
row-size=172B cardinality=1
====
# Test query on large Kudu table with partial Kudu primary key columns in equivalence
# predicates: any number of rows could be returned from Kudu,
# small query optimization should be disabled
select * from tpch_kudu.partsupp where ps_partkey=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN KUDU [tpch_kudu.partsupp]
kudu predicates: ps_partkey = 2
row-size=172B cardinality=4
====
# Test that the small query opt is disabled for distinct partition key scans, even
# with a small number of files scanned, because the cost of opening each file
# is taken into account.
select distinct month from functional.alltypestiny;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| group by: `month`
| row-size=4B cardinality=4
|
02:EXCHANGE [HASH(`month`)]
|
01:AGGREGATE [STREAMING]
| group by: `month`
| row-size=4B cardinality=4
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
partition key scan
row-size=4B cardinality=4
====
# Test that the small query opt is disabled for distinct partition key scans with a large
# number of files scanned.
select distinct ss_sold_date_sk from tpcds.store_sales;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| group by: ss_sold_date_sk
| row-size=4B cardinality=1.82K
|
02:EXCHANGE [HASH(ss_sold_date_sk)]
|
01:AGGREGATE [STREAMING]
| group by: ss_sold_date_sk
| row-size=4B cardinality=1.82K
|
00:SCAN HDFS [tpcds.store_sales]
HDFS partitions=1824/1824 files=1824 size=346.60MB
partition key scan
row-size=4B cardinality=1.82K
====