blob: c3811201bbdaa197029d110cfd429516f5dea00f [file] [log] [blame]
====
---- QUERY
# Explain a simple hash join query.
explain
select *
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
---- RESULTS: VERIFY_IS_EQUAL
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
'= o_orderkey'
''
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
' PLAN-ROOT SINK'
' | output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0
' |'
' 04:EXCHANGE [UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' tuple-ids=0,1 row-size=402B cardinality=5.76M'
' in pipelines: 00(GETNEXT)'
''
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=04, UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 02:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: l_orderkey = o_orderkey'
' | fk/pk conjuncts: l_orderkey = o_orderkey'
' | runtime filters: RF000[bloom] <- o_orderkey'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=.*
' | tuple-ids=0,1 row-size=402B cardinality=5.76M'
' | in pipelines: 00(GETNEXT), 01(OPEN)'
' |'
' |--03:EXCHANGE [BROADCAST]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' | tuple-ids=1 row-size=171B cardinality=1.50M'
' | in pipelines: 01(GETNEXT)'
' |'
' 00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
' tuple-ids=0 row-size=231B cardinality=6.00M'
' in pipelines: 00(GETNEXT)'
''
row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.* mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' tuple-ids=1 row-size=171B cardinality=1.50M'
' in pipelines: 01(GETNEXT)'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS: VERIFY_IS_SUBSET
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
====
---- QUERY
# Tests whether all file formats are present in the explain string.
explain select * from functional.alltypesmixedformat;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET, RC_FILE, SEQUENCE_FILE, TEXT]'
====
---- QUERY
# Tests whether file formats are present in the explain string in case of Iceberg tables.
explain select * from functional_parquet.iceberg_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET]'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain cast exprs
# 'd_date' is STRING type, rewrite succeed
explain
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
order by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
'BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
# 'd_date' is STRING type, rewrite succeed
# Aliases in group by clauses will substituted by exprs
explain
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by a
order by a
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
'BY a ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain exprs
# 'd_date_sk' is INT type, not rewrite
explain
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
order by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
# 'd_date_sk' is INT type, not rewrite
# Aliases in group by clauses will substituted by exprs
explain
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by a
order by a
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
====
---- QUERY
# Tests HIDE_ANALYZED_QUERY=TRUE
set HIDE_ANALYZED_QUERY=TRUE;
explain select count(*) from tpch.region
---- RESULTS: VERIFY_IS_NOT_IN
'Analyzed query:'
====