KYLIN-5730 Add more helpful message
diff --git a/dev-support/release-manager/tpch-benchmark/README.md b/dev-support/release-manager/tpch-benchmark/README.md
index b02372d..4d348b2 100644
--- a/dev-support/release-manager/tpch-benchmark/README.md
+++ b/dev-support/release-manager/tpch-benchmark/README.md
@@ -8,18 +8,15 @@
![TPC-H Schema](resources/TPCH-Schema.png)
+
+# How to create Kylin model based on query pattern
+
+
### Query 1 : Pricing Summary Report Query
#### Original Version
-
-
```sql
-/**
- * TPC-H Query 1 : Pricing Summary Report Query / 价格统计报告查询
- * 在单个表lineitem上查询某个时间段内,对已经付款的、已经运送的等各类商品进行统计,包括业务量的计费、发货、折扣、税、平均价格等信息。
- * 带有分组、排序、聚集操作并存的单表查询操作。这个查询会导致表上的数据有95%到97%行被读取到。
- */
-select /*+ MODEL_PRIORITY(Lineitem) */
+select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
@@ -31,7 +28,7 @@
avg(l_discount) as avg_disc,
count(*) as count_order
from tpch_cn.lineitem
-where l_shipdate <= '1998-12-01' - interval '90' day -- DELTA is randomly selected within [60. 120].
+where l_shipdate <= '1998-12-01' - interval 'DELTA' day -- DELTA is randomly selected within [60. 120].
group by l_returnflag,
l_linestatus
order by l_returnflag,
@@ -65,12 +62,6 @@
#### Original Version
```sql
-/**
- * TPC-H Query 2 : Minimum Cost Supplier Query / 最小代价供货商查询
- * Return the first 100 selected rows
- * 得到给定的区域内,对于指定的零件(某一类型和大小的零件),哪个供应者能以最低的价格供应它,就可以选择哪个供应者来订货。
- * 带有排序、聚集操作、子查询并存的多表查询操作。
- */
select s_acctbal,
s_name,
n_name,
@@ -112,7 +103,9 @@
#### Modified Version
```sql
-select s_acctbal,
+
+select
+ s_acctbal,
s_name,
n_name,
p_partkey,
@@ -121,73 +114,31 @@
s_phone,
s_comment
from
- tpch_cn.partsupp,
- tpch_cn.part,
- tpch_cn.supplier,
- tpch_cn.nation,
- tpch_cn.region
-where p_partkey = ps_partkey
- and s_suppkey = ps_suppkey
- and p_size = 15
+ tpch_cn.partsupp
+ join tpch_cn.part on p_partkey = ps_partkey
+ join tpch_cn.supplier on s_suppkey = ps_suppkey
+ join tpch_cn.nation on s_nationkey = n_nationkey
+ join tpch_cn.region on n_regionkey = r_regionkey
+where
+ p_size = 15
and p_type like '%BRASS'
- and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost
= (
- select min(ps_supplycost)
- from tpch_cn.partsupp,
- tpch_cn.supplier,
- tpch_cn.nation,
- tpch_cn.region
+ select
+ min(ps_supplycost)
+ from tpch_cn.partsupp
+ join tpch_cn.supplier on s_suppkey = ps_suppkey
+ join tpch_cn.nation on s_nationkey = n_nationkey
+ join tpch_cn.region on n_regionkey = r_regionkey
where p_partkey = ps_partkey
- and s_suppkey = ps_suppkey
- and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey
- and r_name = 'EUROPE')
+ and r_name = 'EUROPE'
+ )
order by s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100;
-
-select min(ps_supplycost)
-from tpch_cn.partsupp,
- tpch_cn.supplier,
- tpch_cn.nation,
- tpch_cn.region
-where s_suppkey = ps_suppkey
- and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey
- and r_name = 'EUROPE';
-
-select s_acctbal,
- s_name,
- n_name,
- p_partkey,
- p_mfgr,
- s_address,
- s_phone,
- s_comment
-from
- tpch_cn.partsupp,
- tpch_cn.part,
- tpch_cn.supplier,
- tpch_cn.nation,
- tpch_cn.region
-where p_partkey = ps_partkey
- and s_suppkey = ps_suppkey
- and p_size = 15
- and p_type like '%BRASS'
- and s_nationkey = n_nationkey
- and n_regionkey = r_regionkey
- and r_name = 'EUROPE'
-order by s_acctbal desc,
- n_name,
- s_name,
- p_partkey
-limit 100;
-
```
@@ -196,12 +147,6 @@
#### Original Version
```sql
-/**
- * TPC-H Query 3 : Shipping Priority Query / 运送优先级查询
- * Return the first 10 selected rows
- * 查询得到收入在前10位的尚未运送的订单。在指定的日期之前还没有运送的订单中具有最大收入的订单的运送优先级(订单按照收入的降序排序)和潜在的收入(潜在的收入为l_extendedprice * (1-l_discount)的和)。
- * 带有分组、排序、聚集操作并存的三表查询操作。
- */
select l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
@@ -214,14 +159,17 @@
and l_orderkey = o_orderkey
and o_orderdate < '1995-03-15' -- DATE is a randomly selected day within [1995-03-01 .. 1995-03-31].
and l_shipdate > '1995-03-15'
-group by l_orderkey,
- o_orderdate,
- o_shippriority
-order by revenue desc, o_orderdate
+group by
+ l_orderkey,
+ o_orderdate,
+ o_shippriority
+order by
+ revenue desc,
+ o_orderdate
limit 10;
```
-
+#### Modified Version
```sql
select l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
@@ -246,36 +194,28 @@
#### Original Version
```sql
-/**
- * TPC-H Query 4 : Order Priority Checking Query / 订单优先级查询
- * 查询得到订单优先级统计值。计算给定的某三个月的订单的数量,在每个订单中至少有一行由顾客在它的提交日期之后收到。
- * 带有分组、排序、聚集操作、子查询并存的单表查询操作。子查询是相关子查询。
- */
select o_orderpriority,
count(*) as order_count
from tpch_cn.orders
where o_orderdate >= '1993-07-01' -- DATE is the first day of a randomly selected month between the first month of 1993 and the 10th month of 1997.
and o_orderdate < DATE '1993-07-01' + interval '3' month
- and exists ( -- exists 子查询
- select *
- from tpch_cn.lineitem
- where l_orderkey = o_orderkey
- and l_commitdate < l_receiptdate
- )
+ and exists ( -- exists subquery
+ select *
+ from tpch_cn.lineitem
+ where l_orderkey = o_orderkey
+ and l_commitdate < l_receiptdate
+ )
group by o_orderpriority
order by o_orderpriority;
```
-### Query 5
+#### Modified Version
+
+### Local Supplier Volume Query (Q5)
#### Original Version
```sql
-/**
- * TPC-H Query 5 : Local Supplier Volume Query / 某地区供货商为公司带来的收入查询
- * 通过某个地区零件供货商而获得的收入(收入按sum(l_extendedprice * (1 -l_discount))计算)统计信息。可用于决定在给定的区域是否需要建立一个当地分配中心
- * 带有分组、排序、聚集操作、子查询并存的多表连接查询操作。
- */
select n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from tpch_cn.customer,
@@ -287,7 +227,7 @@
where c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
- and c_nationkey = s_nationkey -- 这里形成一个环, Kylin 不支持这样的 Join Graph
+ and c_nationkey = s_nationkey -- Table Ring
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'ASIA' -- REGION is randomly selected within the list of values defined for R_NAME in C;aise 4.2.3;
@@ -297,6 +237,8 @@
order by revenue desc;
```
+#### Modified Version
+
```sql
select n1.n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
@@ -317,18 +259,12 @@
```
-### Query 6
+### Forecasting Revenue Change Query (Q6)
#### Original Version
```sql
-/**
- * TPC-H Query 6 : Forecasting Revenue Change Query / 预测收入变化查询
- * 得到某一年中通过变换折扣带来的增量收入。这是典型的“what-if”判断,用来寻找增加收入的途径。
- * 预测收入变化查询考虑了指定的一年中折扣在“DISCOUNT-0.01”和“DISCOUNT+0.01”之间的已运送的所有订单,求解把l_quantity小于quantity的订单的折扣消除之后总收入增加的数量。
- * 带有聚集操作的单表查询操作。查询语句使用了BETWEEN-AND操作符,有的数据库可以对BETWEEN-AND进行优化。
- */
-select sum(l_extendedprice * l_discount) as revenue -- 潜在的收入增加量
+select sum(l_extendedprice * l_discount) as revenue
from tpch_cn.lineitem
where l_shipdate >= '1994-01-01' -- DATE is the first of January of a randomly selected year within [1993 .. 1997];
and l_shipdate < DATE '1994-01-01' + interval '1' year
@@ -337,8 +273,10 @@
;
```
+#### Modified Version
+
```sql
-select sum(l_extendedprice * l_discount) as revenue -- 潜在的收入增加量
+select sum(l_extendedprice * l_discount) as revenue
from tpch_cn.lineitem
where l_shipdate >= '1994-01-01' -- DATE is the first of January of a randomly selected year within [1993 .. 1997];
and l_shipdate < DATE '1995-01-01'
@@ -348,24 +286,21 @@
```
-### Query 7(Skipped)
+### Volume Shipping Query (Q7)(Bugged)
+
+> This query cannot be answered because Query engine has a bug at 5.0.0-beta
#### Original Version
```sql
-/**
- * TPC-H Query 7 : Volume Shipping Query / 货运盈利情况查询
- * 从供货商国家与销售商品的国家之间通过销售获利情况的查询。此查询确定在两国之间货运商品的量用以帮助重新谈判货运合同。
- * 带有分组、排序、聚集、子查询操作并存的多表查询操作。子查询的父层查询不存在其他查询对象,是格式相对简单的子查询。
- */
select supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
-from ( -- 子查询
+from ( -- subquery
select n1.n_name as supp_nation,
n2.n_name as cust_nation,
- l_shipdate as l_year,
+ l_shipdate as l_year, -- extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from tpch_cn.supplier,
tpch_cn.lineitem,
@@ -393,20 +328,55 @@
l_year;
```
-### Query 8
+#### Modified Version
+
+```sql
+select supp_nation,
+ cust_nation,
+ l_year,
+ sum(volume) as revenue
+from ( -- subquery
+ select n1.n_name as supp_nation,
+ n2.n_name as cust_nation,
+ l_shipdate as l_year,
+ l_extendedprice * (1 - l_discount) as volume
+ from tpch_cn.lineitem
+ join tpch_cn.supplier on s_suppkey = l_suppkey
+ join tpch_cn.orders on o_orderkey = l_orderkey
+ join tpch_cn.customer on c_custkey = o_custkey
+ join tpch_cn.nation n1 on s_nationkey = n1.n_nationkey
+ join tpch_cn.nation n2 on c_nationkey = n2.n_nationkey
+ where
+ (
+ (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') -- NATION1 is randomly selected within the list of values defined for N_NAME in Clause 4.2.3;
+ or
+ (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') -- NATION2 is randomly selected within the list of values defined for N_NAME in Clause 4.2.3 and must be dif- ferent from the value selected for NATION1 in item 1 above.
+ )
+ and l_shipdate between '1995-01-01' and '1996-12-31'
+ ) as shipping
+group by supp_nation,
+ cust_nation,
+ l_year
+order by supp_nation,
+ cust_nation,
+ l_year;
+```
+
+### National Market Share Query (Q8)
#### Original Version
```sql
-/**
- * TPC-H Query 8 : National Market Share Query / 国家市场份额查询
- * 在过去的两年中一个给定零件类型在某国某地区市场份额的变化情况。
- * 带有分组、排序、聚集、子查询操作并存的查询操作。子查询的父层查询不存在其他查询对象,是格式相对简单的子查询,但子查询自身是多表连接的查询。
- */
+
select o_year,
- sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) -- NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3;
- as mkt_share
-from ( -- 子查询
+ sum(
+ case
+ when nation = 'BRAZIL'
+ then volume
+ else 0
+ end) / sum(volume) -- NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3;
+ as mkt_share
+from ( -- subquery
select year(o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
@@ -433,21 +403,44 @@
order by o_year;
```
+#### Modified Version
-### Query 9
+```sql
+select year(o_orderdate) as o_year,
+ sum(case
+ when n2.n_name = 'BRAZIL'
+ then l_extendedprice * (1 - l_discount)
+ else 0
+ end)
+ / sum(l_extendedprice * (1 - l_discount)) as mkt_share
+from
+ tpch_cn.lineitem
+ join tpch_cn.part on p_partkey = l_partkey
+ join tpch_cn.supplier on s_suppkey = l_suppkey
+ join tpch_cn.orders on l_orderkey = o_orderkey
+ join tpch_cn.customer on o_custkey = c_custkey
+ join tpch_cn.nation n1 on c_nationkey = n1.n_nationkey
+ join tpch_cn.nation n2 on s_nationkey = n2.n_nationkey
+ join tpch_cn.region on n1.n_regionkey = r_regionkey
+where
+ r_name = 'AMERICA'
+ and o_orderdate between '1995-01-01' and '1996-12-31'
+ and p_type = 'ECONOMY ANODIZED STEEL'
+group by
+ year(o_orderdate)
+order by
+ year(o_orderdate);
+```
+
+### Product Type Profit Measure Query (Q9)
#### Original Version
```sql
-/**
- * TPC-H Query 9 : Product Type Profit Measure Query / 产品类型利润估量查询
- * 查询每个国家每一年所有被定购的零件在一年中的总利润。
- * 带有分组、排序、聚集、子查询操作并存的查询操作。子查询的父层查询不存在其他查询对象,是格式相对简单的子查询,但子查询自身是多表连接的查询。
- * 子查询中使用了LIKE操作符,有的查询优化器不支持对LIKE操作符进行优化。
- */
+
select nation,
o_year,
sum(amount) as sum_profit
-from ( -- 子查询
+from ( -- subquery
select n_name as nation,
year(o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
@@ -463,7 +456,8 @@
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
- and p_name like '%green%') as profit -- COLOR is randomly selected within the list of values defined for the generation of P_NAME in Clause 4.2.3.
+ and p_name like '%green%'
+ ) as profit -- COLOR is randomly selected within the list of values defined for the generation of P_NAME in Clause 4.2.3.
group by nation,
o_year
order by nation,
@@ -495,16 +489,11 @@
```
-### Query 10
+### Returned Item Reporting Query (Q10)
#### Original Version
```sql
-/**
- * TPC-H Query 10 : Returned Item Reporting Query / 货运存在问题的查询
- * The Returned Item Reporting Query finds the top 20 customers
- * 每个国家在某时刻起的三个月内货运存在问题的客户和造成的损失。
- * 带有分组、排序、聚集操作并存的多表连接查询操作。
- */
+
select c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
@@ -521,7 +510,7 @@
and l_orderkey = o_orderkey
and o_orderdate >= date '1993-10-01' -- DATE is the first day of a randomly selected month from the second month of 1993 to the first month of 1995.
and o_orderdate < date '1993-10-01' + interval '3' month
- and l_returnflag = 'R' -- 被退货
+ and l_returnflag = 'R' -- be returned by customer
and c_nationkey = n_nationkey
group by c_custkey,
c_name,
@@ -534,47 +523,63 @@
limit 20;
```
+#### Modified Version
-### Query 11
+### Important Stock Identification Query (Q11)
#### Original Version
```sql
-/**
- * TPC-H Query 11 : Important Stock Identification Query / 库存价值查询
- * 查询库存中某个国家供应的零件的价值。
- * 带有分组、排序、聚集、子查询操作并存的多表连接查询操作。子查询位于分组操作的HAVING条件中。
- */
select ps_partkey,
- sum(ps_supplycost * ps_availqty) as value -- 聚集操作,商品的总价值
+ sum(ps_supplycost * ps_availqty) as value
from tpch_cn.partsupp,
tpch_cn.supplier,
tpch_cn.nation
where ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY' -- NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3;
-group by ps_partkey
-having sum(ps_supplycost * ps_availqty) > ( -- 带有HAVING子句的分组操作
- select sum(ps_supplycost * ps_availqty) * (0.0001 / 50) -- FRACTION is chosen as 0.0001 / SF.
- from tpch_cn.partsupp,
- tpch_cn.supplier,
- tpch_cn.nation
- where ps_suppkey = s_suppkey
- and s_nationkey = n_nationkey
- and n_name = 'GERMANY' -- 与父查询的 WHERE 条件一致
-)
+group by
+ ps_partkey
+ having sum(ps_supplycost * ps_availqty) > (
+ select
+ sum(ps_supplycost * ps_availqty) * (0.0001 / 50) -- FRACTION is chosen as 0.0001 / SF.
+ from tpch_cn.partsupp,
+ tpch_cn.supplier,
+ tpch_cn.nation
+ where ps_suppkey = s_suppkey
+ and s_nationkey = n_nationkey
+ and n_name = 'GERMANY'
+ )
order by value desc;
```
-### Query 12
+#### Modified Version
+```sql
+select ps_partkey,
+ sum(ps_supplycost * ps_availqty) as _value
+from tpch_cn.partsupp
+ join tpch_cn.supplier on ps_suppkey = s_suppkey
+ join tpch_cn.nation on s_nationkey = n_nationkey
+where
+ n_name = 'GERMANY'
+group by
+ ps_partkey
+having sum(ps_supplycost * ps_availqty) > (
+ select
+ sum(ps_supplycost * ps_availqty) * 0.005
+ from tpch_cn.partsupp
+ join tpch_cn.supplier on ps_suppkey = s_suppkey
+ join tpch_cn.nation on s_nationkey = n_nationkey
+ where n_name = 'GERMANY'
+ )
+order by _value desc;
+```
+
+
+### Shipping Modes and Order Priority Query (Q12)
#### Original Version
```sql
-/**
- * TPC-H Query 12 : Shipping Modes and Order Priority Query / 货运模式和订单优先级查询
- * 查询获得货运模式和订单优先级。可以帮助决策:选择便宜的货运模式是否会导致消费者更多的在合同日期之后收到货物,而对紧急优先命令产生负面影响。
- * 带有分组、排序、聚集操作并存的两表连接查询操作。
- */
select l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
@@ -598,23 +603,57 @@
order by l_shipmode;
```
-### Query 13
+```sql
+select l_shipmode,
+ sum(case
+ when o_orderpriority = '1-URGENT'
+ or o_orderpriority = '2-HIGH' then 1
+ else 0
+ end) as high_line_count,
+ sum(case
+ when o_orderpriority <> '1-URGENT'
+ and o_orderpriority <> '2-HIGH' then 1
+ else 0
+ end) as low_line_count
+from tpch_cn.lineitem join
+ tpch_cn.orders on o_orderkey = l_orderkey
+where l_shipmode in ('MAIL', 'SHIP')
+ and l_commitdate < l_receiptdate
+ and l_shipdate < l_commitdate
+ and l_receiptdate >= '1994-01-01'
+ and l_receiptdate < date '1995-01-01' + interval '1' year
+group by l_shipmode
+order by l_shipmode;
+```
+
+case
+when orders.o_orderpriority = '1-URGENT'
+or orders.o_orderpriority = '2-HIGH' then 1
+else 0
+end
+
+case
+when orders.o_orderpriority <> '1-URGENT'
+and orders.o_orderpriority <> '2-HIGH' then 1
+else 0
+end
+
+#### Modified Version
+
+### Customer Distribution Query (Q13)
#### Original Version
```sql
-/**
- * TPC-H Query 13 : Customer Distribution Query
- * 查询获得消费者的订单数量,包括过去和现在都没有订单记录的消费者
- * 带有分组、排序、聚集、子查询、左外连接操作并存的查询操作
- */
select c_count,
count(*) as custdist
from
-( -- 子查询
+( -- subquery
select c_custkey, count(o_orderkey) as c_count
- from tpch_cn.customer left outer join tpch_cn.orders -- 子查询中包括左外连接操作
- on c_custkey = o_custkey and o_comment not like '%special%requests%'
+ from tpch_cn.customer
+ left outer join tpch_cn.orders
+ on c_custkey = o_custkey
+ and o_comment not like '%special%requests%'
-- WORD1 is randomly selected from 4 possible values: special, pending, unusual, express.
-- WORD2 is randomly selected from 4 possible values: packages, requests, accounts, deposits.
group by c_custkey
@@ -626,18 +665,38 @@
c_count desc;
```
+#### Modified Version
-### Query 14
+```sql
+select c_count,
+ count(*) as custdist
+from (
+ select
+ c_custkey,
+ count(o_orderkey) as c_count
+ from
+ tpch_cn.customer
+ left join tpch_cn.orders on c_custkey = o_custkey
+ where
+ o_comment not like '%special%requests%'
+ group by
+ c_custkey
+) as c_orders
+group by
+ c_count
+order by
+ custdist desc,
+ c_count desc;
+```
+
+### Promotion Effect Query (Q14)
#### Original Version
```sql
-/**
- * TPC-H Query 14 : Promotion Effect Query / 促销效果查询
- * 查询获得某一个月的收入中有多大的百分比是来自促销零件。用以监视促销带来的市场反应。
- */
+
select 100.00 * sum(case
- when p_type like 'PROMO%' -- 促销零件
+ when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
@@ -648,18 +707,21 @@
and l_shipdate < '1995-10-01'
```
+#### Modified Version
-### Query 15
+case
+when p_type like 'PROMO%'
+then lineitem.l_extendedprice * (1 - lineitem.l_discount)
+else 0
+end
+
+### Top Supplier Query (Q15)
#### Original Version
```sql
-/**
- * TPC-H Query 15 : Top Supplier Query
- * 查询获得某段时间内为总收入贡献最多的供货商(排名第一)的信息。可用以决定对哪些头等供货商给予奖励、给予更多订单、给予特别认证、给予鼓舞等激励。
- * 带有分排序、聚集、聚集子查询操作并存的普通表与视图的连接操作。
- */
-WITH revenue(supplier_no, total_revenue) as ( -- 复杂视图
+
+WITH revenue(supplier_no, total_revenue) as (
SELECT l_suppkey,
SUM(l_extendedprice * (1 - l_discount))
FROM tpch_cn.lineitem
@@ -674,51 +736,50 @@
FROM tpch_cn.supplier,
revenue
WHERE s_suppkey = supplier_no
- AND total_revenue = ( -- 非关联子查询
+ AND total_revenue = (
SELECT MAX(total_revenue)
FROM revenue)
ORDER BY s_suppkey;
```
+#### Modified Version
```sql
WITH revenue(supplier_no, total_revenue) as (
- -- OLAPContext start
- SELECT l_suppkey,
- SUM(l_extendedprice * (1 - l_discount))
- FROM tpch_cn.lineitem
- WHERE l_shipdate >= '1996-01-01'
- AND l_shipdate < '1996-04-01'
- GROUP BY l_suppkey
- -- OLAPContext end
+ SELECT
+ l_suppkey,
+ SUM(l_extendedprice * (1 - l_discount))
+ FROM
+ tpch_cn.lineitem
+ WHERE
+ l_shipdate >= '1996-01-01'
+ AND l_shipdate < '1996-04-01'
+ GROUP BY
+ l_suppkey
)
SELECT s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
-FROM tpch_cn.supplier,
- revenue
-WHERE s_suppkey = supplier_no
- AND total_revenue = (
- -- OLAPContext start
- SELECT MAX(total_revenue)
- FROM revenue
- -- OLAPContext end
+FROM
+ tpch_cn.supplier
+ join revenue on s_suppkey = supplier_no
+WHERE
+ total_revenue = (
+ SELECT MAX(total_revenue)
+ FROM revenue
)
-ORDER BY s_suppkey;
+ORDER BY
+ s_suppkey;
```
-### Query 16
+### Parts/Supplier Relationship Query (Q16)
#### Original Version
```sql
-/**
- * TPC-H Query 16 : Parts/Supplier Relationship Query / 零件/供货商关系查询
- * 查询获得能够以指定的贡献条件供应零件的供货商数量。可用于决定在订单量大,任务紧急时,是否有充足的供货商。
- * 带有分组、排序、聚集、去重、NOT IN 子查询操作并存的两表连接操作。
- */
+
select p_brand,
p_type,
p_size,
@@ -729,10 +790,11 @@
and p_brand <> 'Brand#45' -- BRAND = Brand#MN where M and N are two single character strings representing two numbers randomly and independently selected within [1 .. 5];
and p_type not like 'MEDIUM POLISHED%' -- TYPE is made of the first 2 syllables of a string randomly selected within the list of 3-syllable strings defined for Types in Clause 4.2.2.13;
and p_size in (49, 14, 23, 45, 19, 3, 36, 9) -- SIZE is randomly selected as a set of eight different values within [1 .. 50]
- and ps_suppkey not in (-- NOT IN子查询,消费者排除某些供货商
+ and ps_suppkey not in (
select s_suppkey
from tpch_cn.supplier
- where s_comment like '%Customer%Complaints%')
+ where s_comment like '%Customer%Complaints%'
+ )
group by p_brand,
p_type,
p_size
@@ -742,40 +804,65 @@
p_size;
```
-### Query 17
+#### Modified Version
+
+```sql
+select p_brand,
+ p_type,
+ p_size,
+ count(distinct ps_suppkey) as supplier_cnt
+from tpch_cn.partsupp
+ join tpch_cn.part on p_partkey = ps_partkey
+where
+ p_brand <> 'Brand#45'
+ and p_type not like 'MEDIUM POLISHED%'
+ and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+ and ps_suppkey not in (
+ select
+ s_suppkey
+ from
+ tpch_cn.supplier
+ where
+ s_comment like '%Customer%Complaints%'
+ )
+group by p_brand,
+ p_type,
+ p_size
+order by supplier_cnt desc,
+ p_brand,
+ p_type,
+ p_size;
+```
+
+
+### Small-Quantity-Order Revenue Query (Q17)
#### Original Version
```sql
-/**
- * TPC-H Query 17 : Small-Quantity-Order Revenue Query / 小订单收入查询
- * 查询获得比平均供货量的百分之二十还低的小批量订单。对于指定品牌和指定包装类型的零件,决定在一个七年数据库的所有订单中这些订单零件的平均项目数量(过去的和未决的)。
- * 如果这些零件中少于平均数20%的订单不再被接纳,那平均一年会损失多少呢?所以此查询可用于计算出如果没有没有小量订单,平均年收入将损失多少(因为大量商品的货运,将降低管理费用)。
- * 带有聚集、聚集子查询操作并存的两表连接操作。
- */
+
select sum(l_extendedprice) / 7.0 as avg_yearly
from tpch_cn.lineitem,
tpch_cn.part
where p_partkey = l_partkey
and p_brand = 'Brand#23' -- BRAND = 'Brand#MN' where MN is a two character string representing two numbers randomly and independently selected within [1 .. 5];
and p_container = 'MED BOX' -- CONTAINER is randomly selected within the list of 2-syllable strings defined for Containers in Clause 4.2.2.13.
- and l_quantity < (-- 关联子查询
+ and l_quantity < (
select 0.2 * avg(l_quantity)
from tpch_cn.lineitem
- where l_partkey = p_partkey);
+ where l_partkey = p_partkey
+ );
```
-### Query 18
+#### Modified Version
+
+
+### Large Volume Customer Query (Q18)
#### Original Version
```sql
-/**
- * TPC-H Query 18 : Large Volume Customer Query / 大订单顾客查询
- * Return the first 100 selected rows
- * 查询获得比指定供货量大的供货商信息。可用于决定在订单量大,任务紧急时,验证否有充足的供货商。
- * 带有分组、排序、聚集、IN子查询操作并存的三表连接操作
- */
+
select c_name,
c_custkey,
o_orderkey,
@@ -785,7 +872,7 @@
from tpch_cn.customer,
tpch_cn.orders,
tpch_cn.lineitem
-where o_orderkey in (-- 带有分组操作的IN子查询
+where o_orderkey in (
select l_orderkey
from tpch_cn.lineitem
group by l_orderkey
@@ -803,17 +890,43 @@
limit 100;
```
-### Query 19
+```sql
+select c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice,
+ sum(l_quantity)
+from tpch_cn.lineitem
+ join tpch_cn.orders on o_orderkey = l_orderkey
+ join tpch_cn.customer on c_custkey = o_custkey
+where o_orderkey in (
+ select
+ l_orderkey
+ from
+ tpch_cn.lineitem
+ group by
+ l_orderkey
+ having
+ sum(l_quantity) > 312
+ )
+group by c_name,
+ c_custkey,
+ o_orderkey,
+ o_orderdate,
+ o_totalprice
+order by o_totalprice desc,
+ o_orderdate
+limit 100;
+```
+
+#### Modified Version
+
+### Discounted Revenue Query (Q19)
#### Original Version
```sql
-/**
- * TPC-H Query 19 : Discounted Revenue Query / 折扣收入查询
- * BRAND1, BRAND2, BRAND3 = 'Brand#MN' where each MN is a two character string representing two numbers randomly and independently selected within [1 .. 5]
- * 查询得到对一些空运或人工运输零件三个不同种类的所有订单的总折扣收入。零件的选择考虑特定品牌、包装和尺寸范围。本查询是用数据挖掘工具产生格式化代码的一个例子。
- * 带有分组、排序、聚集、IN子查询操作并存的三表连接操作。
- */
select sum(l_extendedprice * (1 - l_discount)) as revenue
from tpch_cn.lineitem,
tpch_cn.part
@@ -846,29 +959,55 @@
);
```
-### Query 20
+```sql
+select sum(l_extendedprice * (1 - l_discount)) as revenue
+from tpch_cn.lineitem join tpch_cn.part on p_partkey = l_partkey
+where (
+ p_brand = 'Brand#12'
+ and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+ and l_quantity >= 1 and l_quantity <= 1 + 5 -- QUANTITY1 is randomly selected within [1..10].
+ and p_size between 1 and 5
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or (
+ p_brand = 'Brand#23'
+ and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+ and l_quantity >= 10 and l_quantity <= 10 + 15 -- QUANTITY2 is randomly selected within [10..20].
+ and p_size between 1 and 10
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ )
+ or (
+ p_brand = 'Brand#34'
+ and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+ and l_quantity >= 20 and l_quantity <= 20 + 25 -- QUANTITY3 is randomly selected within [20..30].
+ and p_size between 1 and 15
+ and l_shipmode in ('AIR', 'AIR REG')
+ and l_shipinstruct = 'DELIVER IN PERSON'
+ );
+```
+
+#### Modified Version
+
+### Potential Part Promotion Query (Q20)
#### Original Version
```sql
-/**
- * TPC-H Query 20 : Potential Part Promotion Query / 供货商竞争力查询
- * 查询确定在某一年内,找出指定国家的能对某一零件商品提供更有竞争力价格的供货货。所谓更有竞争力的供货商,是指那些零件有过剩的供货商,超过供或商在某一年中货运给定国的某一零件的50%则为过剩。
- * 带有排序、聚集、IN子查询、普通子查询操作并存的两表连接操作。
- */
select s_name,
s_address
from tpch_cn.supplier,
tpch_cn.nation
-where s_suppkey in ( -- 第一层子查询
+where s_suppkey in ( -- start 1st subquery
select ps_suppkey
from tpch_cn.partsupp
- where ps_partkey in ( -- 第二层子查询,非关联子查询
+ where ps_partkey in ( -- 2nd subquery
select p_partkey
from tpch_cn.part
where p_name like 'forest%'
)
- and ps_availqty > ( -- 第二层子查询,关联子查询
+ and ps_availqty > ( -- 2nd subquery
select 0.5 * sum(l_quantity)
from tpch_cn.lineitem
where l_partkey = ps_partkey
@@ -876,56 +1015,79 @@
and l_shipdate >= date('1994-01-01') -- DATE is the first of January of a randomly selected year within 1993..1997.
and l_shipdate < date('1994-01-01') + interval '1' year
)
- ) -- 第一层子查询结束
+ ) -- end 1st subquery
and s_nationkey = n_nationkey
and n_name = 'CANADA' -- NATION is randomly selected within the list of values defined for N_NAME in Clause 4.2.3.
order by s_name;
```
-
+
+#### Modified Version
+
```sql
+select s_name,
+ s_address
+from tpch_cn.supplier join tpch_cn.nation
+ on s_nationkey = n_nationkey
+where s_suppkey in (
+ select
+ distinct ps_suppkey
+ from tpch_cn.lineitem
+ join tpch_cn.partsupp
+ on l_partkey = ps_partkey and l_suppkey = ps_suppkey
+ join tpch_cn.part
+ on ps_partkey = l_partkey
+ where p_name like 'forest%'
+ and l_shipdate >= '1994-01-01'
+ and l_shipdate <= '1995-01-01'
+ group by
+ ps_suppkey,
+ ps_partkey,
+ ps_availqty
+ having ps_availqty > 0.5 * sum(l_quantity)
+ )
+ and n_name = 'CANADA'
+order by s_name;
+```
-with part_quantity as ( -- model1
-select
- l_partkey,
- l_suppkey,
- 0.5 * sum(l_quantity) as half_sum_quantity
-from
- tpch_cn.lineitem
-where l_shipdate >= '1994-01-01'
- and l_shipdate < '1995-01-01'
-group by
- l_partkey,
- l_suppkey
-)
-
-with forest_suppilers as(
-select ps_suppkey
-from tpch_cn.partsupp
- join part_quantity
- on partsupp.ps_partkey = part_quantity.l_suppkey
- and partsupp.ps_suppkey = part_quantity.l_suppkey
- join tpch_cn.part
- on partsupp.ps_partkey = part.p_partkey
-where ps_availqty > half_sum_quantity
- and p_name like 'forest%'
+```sql
+with part_quantity as (
+ select
+ l_partkey,
+ l_suppkey,
+ 0.5 * sum(l_quantity) as half_sum_quantity
+ from
+ tpch_cn.lineitem
+ where l_shipdate >= '1994-01-01'
+ and l_shipdate < '1995-01-01'
+ group by
+ l_partkey,
+ l_suppkey
+),
+forest_suppilers as(
+ select
+ ps_suppkey
+ from
+ tpch_cn.partsupp
+ join part_quantity
+ on partsupp.ps_partkey = part_quantity.l_suppkey
+ and partsupp.ps_suppkey = part_quantity.l_suppkey
+ join tpch_cn.part
+ on partsupp.ps_partkey = part.p_partkey
+ where ps_availqty > half_sum_quantity
+ and p_name like 'forest%'
)
select s_name, s_address
from tpch_cn.supplier
join tpch_cn.nation on s_nationkey = n_nationkey
join forest_suppilers on supplier.s_suppkey= forest_suppilers.ps_suppkey
where n_name = 'CANADA'
-;
```
### Query 21 : Suppliers Who Kept Orders Waiting Query
+#### Original Version
+
```sql
-/**
- * TPC-H Query 21 : Suppliers Who Kept Orders Waiting Query / 不能按时交货供货商查询
- * Return the first 100 selected rows.
- * 查询获得不能及时交货的供货商。
- * 带有分组、排序、聚集、EXISTS子查询、NOT EXISTS子查询操作并存的四表连接操作。查询语句没有从语法上限制返回多少条元组,但是TPC-H标准规定,查询结果只返回前100行(通常依赖于应用程序实现)。
- */
select s_name,
count(*) as numwait
from tpch_cn.supplier,
@@ -936,12 +1098,12 @@
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
- and exists ( -- Exist 子查询
+ and exists ( -- Exist subquery
select *
from tpch_cn.lineitem l2
where l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey)
- and not exists ( -- Not Exist 子查询
+ and not exists ( -- Not Exist subquery
select *
from tpch_cn.lineitem l3
where l3.l_orderkey = l1.l_orderkey
@@ -954,37 +1116,38 @@
s_name;
```
+#### Modified Version
+
### Query 22 : Global Sales Opportunity Query
+#### Original Version
```sql
-/**
-* TPC-H Query 22 : Global Sales Opportunity Query / 全球销售机会查询
-* 查询获得消费者可能购买的地理分布。本查询计算在指定的国家,比平均水平更持肯定态度但还没下七年订单的消费者数量。能反应出普通消费者的的态度,即购买意向。
-* 带有分组、排序、聚集、EXISTS子查询、NOT EXISTS子查询操作并存的四表连接操作。
-*/
select cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
-from ( -- 第一层子查询
+from
+ ( -- start 1st subquery
select substring(c_phone from 1 for 2) as cntrycode,
c_acctbal
from tpch_cn.customer
-- I1 ... I7 are randomly selected without repetition from the possible values for Country code as defined in Clause 4.2.2.9.
where substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17')
- and c_acctbal > ( -- 第二层子查询
+ and c_acctbal > ( --
select avg(c_acctbal)
from tpch_cn.customer
where c_acctbal > 0.00
and substring(c_phone from 1 for 2)
in ('13', '31', '23', '29', '30', '18', '17'))
- and not exists ( -- 第二层子查询
+ and not exists ( --
select *
from tpch_cn.orders
where o_custkey = c_custkey)
- -- 第一层子查询结束
- ) as custsale
+
+ ) as custsale -- end 1st subquery
group by cntrycode
order by cntrycode;
-```
\ No newline at end of file
+```
+
+#### Modified Version
\ No newline at end of file
diff --git a/kystudio/src/components/query/query_tab.vue b/kystudio/src/components/query/query_tab.vue
index d9cd4eb..5422c9a 100644
--- a/kystudio/src/components/query/query_tab.vue
+++ b/kystudio/src/components/query/query_tab.vue
@@ -30,8 +30,8 @@
</div>
<div class="submit-tips" v-if="isWorkspace">
<i class="el-icon-ksd-info ksd-fs-12" ></i>
+ Dryrun is an experimental feature for user to create proper model. To enable/disable Dryrun, please set 'kylin.query.dryrun-enabled=true/false' in Setting -> Advanced Settings -> Custom Project Configuration. <br>
Control / Command + Enter = <span>{{!isLoading ? $t('runQuery') : $t('stopQuery')}}</span>
- Tip : Dryrun is an experimental feature for user to create proper model. <br>To enable/disable dry run, please set 'kylin.query.dryrun-enabled=true/false' in Setting -> Advanced Settings -> Custom Project Configuration.
</div>
</div>
<div v-show="isLoading" class="ksd-center ksd-mt-10">
@@ -292,7 +292,7 @@
} else {
clearInterval(_this.ST)
}
- }, 300)
+ }, 600)
}
get showHtrace () {
return this.$store.state.system.showHtrace === 'true'
diff --git a/src/core-common/src/main/java/org/apache/kylin/common/QueryContext.java b/src/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
index d364fd2..1f575c0 100644
--- a/src/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
+++ b/src/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
@@ -152,7 +152,7 @@
*/
@Getter
@Setter
- private String lastUsedRelNode;
+ private String lastUsedRelNode = "";
/**
* For debug purpose, when dry run mode is enabled, the query
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/relnode/ContextUtil.java b/src/query-common/src/main/java/org/apache/kylin/query/relnode/ContextUtil.java
index d08d0d0..88da90d 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/relnode/ContextUtil.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/relnode/ContextUtil.java
@@ -111,7 +111,8 @@
}
public static void dumpCalcitePlan(String msg, RelNode relNode, Logger logger) {
- if (Boolean.TRUE.equals(CalciteSystemProperty.DEBUG.value()) && logger.isDebugEnabled()) {
+ if ((Boolean.TRUE.equals(CalciteSystemProperty.DEBUG.value()) || QueryContext.current().isDryRun())
+ && logger.isDebugEnabled()) {
logger.debug("{} :{}{}", msg, System.getProperty("line.separator"), RelOptUtil.toString(relNode));
}
if (QueryContext.current().isDryRun() && msg.contains("FIRST ROUND")) {
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/src/query-common/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
index eadb698..5159c9c 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
@@ -45,6 +45,7 @@
import org.apache.kylin.guava30.shaded.common.collect.Sets;
import org.apache.kylin.metadata.cube.cuboid.NLayoutCandidate;
import org.apache.kylin.metadata.cube.model.DimensionRangeInfo;
+import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.cube.model.NDataflow;
import org.apache.kylin.metadata.model.FunctionDesc;
@@ -530,29 +531,55 @@
}
public final static String SEP = System.getProperty("line.separator");
- public final static String INDENT = " ";
+ public final static String INDENT = " ";
- public final static String olapContextFormat = SEP
- + "{"
- + SEP + INDENT + "\"Fact Table\" = \"%s\","
- + SEP + INDENT + "\"Dimension Tables\" = [%s],"
- + SEP + INDENT + "\"Recommend Dimension(Group by)\" = [%s],"
- + SEP + INDENT + "\"Recommend Dimension(Filter cond)\" = [%s],"
- + SEP + INDENT + "\"Measures\" = [%s],"
- + SEP + "}"
+ public final static String RECOMMEND_OLAP_INFO = SEP
+ + " {"
+ + SEP + INDENT + "\"Fact Table\" : \"%s\","
+ + SEP + INDENT + "\"Dimension Tables\" : [%s],"
+ + SEP + INDENT + "\"Query Columns\" : [%s],"
+ + SEP + INDENT + "\"Dimension(Group by)\" : [%s],"
+ + SEP + INDENT + "\"Dimension(Filter cond)\" : [%s],"
+ + SEP + INDENT + "\"Measure\" : [%s],"
+ + SEP + INDENT + "\"Join\" : \"" + SEP + "%s\","
+ + SEP + INDENT + "\"Index Id\" : %s,"
+ + SEP + INDENT + "\"Query Column / Index Column\" : \"%d / %d\","
+ + SEP + INDENT + "\"Index Columns\" : [%s]"
+ + SEP + " }"
+ SEP;
public String tipsForUser() {
Set<String> allTables = allTableScans.stream().map(OLAPTableScan::getTableName).collect(Collectors.toSet());
if (!allTables.isEmpty() && firstTableScan != null) {
allTables.remove(firstTableScan.getTableName());
- return String.format(olapContextFormat,
+ LayoutEntity target = null;
+ List<String> indexAllColumns = Lists.newArrayList();
+ int queryColCnt = -1;
+ long indexColCnt = -1;
+ try {
+ queryColCnt = allColumns.size();
+ target = storageContext.getCandidate().getLayoutEntity();
+ indexColCnt = target.getColOrder().stream().filter(i -> i < 10_000).count();
+ List<NDataModel.NamedColumn> cols = realization.getModel().getAllNamedColumns();
+ indexAllColumns = target.getColOrder().stream().map(i -> i < cols.size() ? cols.get(i).getAliasDotColumn() : "M_" + i).collect(Collectors.toList());
+ } catch (Exception e) {
+ logger.info("Fetch target index failed: " + e.getMessage());
+ }
+
+ return String.format(RECOMMEND_OLAP_INFO,
firstTableScan.getTableName(),
- Strings.join(allTables.stream().map(c -> "\"" + c + "\"").iterator(), ','),
- Strings.join(groupByColumns.stream().map(c -> "\"" + c.getColumnWithTableAndSchema() + "\"").iterator(), ','),
- Strings.join(filterColumns.stream().map(c -> "\"" + c.getColumnWithTableAndSchema() + "\"").iterator(), ','),
- Strings.join(aggregations.stream().map(c -> "\"" + c.getFullExpression() + "\"").iterator(), ',')
- );
+ Strings.join(allTables.stream().map(c -> " \"" + c + "\"").iterator(), ','),
+ Strings.join(allColumns.stream().map(c -> " \"" + c.getColumnWithTableAndSchema() + "\"").iterator(), ','),
+ Strings.join(groupByColumns.stream().map(c -> " \"" + c.getColumnWithTableAndSchema() + "\"").iterator(), ','),
+ Strings.join(filterColumns.stream().map(c -> " \"" + c.getColumnWithTableAndSchema() + "\"").iterator(), ','),
+ Strings.join(aggregations.stream().map(c -> " \"" + c.getFullExpression() + "\"").iterator(), ','),
+ joinsGraph == null ? "" : joinsGraph.toString().replaceAll("Root", "Fact")
+ .replaceAll("Edge", "Dim")
+ .replaceAll("TableRef", ""),
+ target == null ? "-1" : target.getId(),
+ queryColCnt, indexColCnt,
+ Strings.join(indexAllColumns.stream().map(c -> " \"" + c + "\"").iterator(), ',')
+ );
} else {
return "empty";
}
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/QueryRouter.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/QueryRouter.java
index aec64d8..69efa1c 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/routing/QueryRouter.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/QueryRouter.java
@@ -23,6 +23,7 @@
import java.util.Locale;
import java.util.Set;
+import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.QueryContext;
@@ -75,6 +76,7 @@
return new Strategy(NProjectManager.getProjectConfig(project));
}
+ @Slf4j
public static class Strategy {
private static final PruningRule SEGMENT_PRUNING = new SegmentPruningRule();
private static final PruningRule PARTITION_PRUNING = new PartitionPruningRule();
@@ -92,6 +94,14 @@
public Strategy(KylinConfig config) {
+ // add all rules
+ rules.add(SEGMENT_PRUNING);
+ rules.add(PARTITION_PRUNING);
+ rules.add(REMOVE_INCAPABLE_REALIZATIONS);
+ if (QueryRouter.isVacantIndexPruningEnabled(config)) {
+ rules.add(VACANT_INDEX_PRUNING);
+ }
+
// add debug rule
if (config.isQueryDryRunEnabled() && QueryContext.current().getModelPriorities().length > 0) {
// This will remove all candidate which not listed in SQL Hint 'MODEL_PRIORITY', for debug
@@ -104,6 +114,8 @@
NDataflow df = (NDataflow) r;
if (models.contains(df.getModelAlias().toUpperCase(Locale.ROOT))) {
matched = true;
+ } else {
+ log.debug("Remove " + df.getModelAlias());
}
}
if (!matched) {
@@ -115,14 +127,6 @@
});
}
- // add all rules
- rules.add(SEGMENT_PRUNING);
- rules.add(PARTITION_PRUNING);
- rules.add(REMOVE_INCAPABLE_REALIZATIONS);
- if (QueryRouter.isVacantIndexPruningEnabled(config)) {
- rules.add(VACANT_INDEX_PRUNING);
- }
-
// add all sorters
if (config.useTableIndexAnswerSelectStarEnabled()) {
sorters.add(Candidate.tableIndexUnmatchedColSizeSorter());
diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationChooser.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationChooser.java
index 7ae0238..a52951e 100644
--- a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationChooser.java
+++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationChooser.java
@@ -670,9 +670,6 @@
private static String helpfulMessageForUser(OLAPContext ctx) {
StringBuilder buf = new StringBuilder(ctx.toHumanReadString());
buf.append(System.getProperty("line.separator"));
- if(ctx.getJoinsGraph() != null) {
- buf.append(" Join graph : ").append(ctx.getJoinsGraph().toString()).append(System.getProperty("line.separator"));
- }
buf.append(" Incapable message : ");
for (List<RealizationCheck.IncapableReason> reasons : ctx.realizationCheck.getModelIncapableReasons().values()) {
for (RealizationCheck.IncapableReason reason : reasons) {
diff --git a/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java b/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java
index 51e4e42..dc8ac47 100644
--- a/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java
+++ b/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java
@@ -529,13 +529,9 @@
.append(SEP).append(" ");
diagnosticInfo.append(msg).append(SEP);
- diagnosticInfo.append(SEP)
- .append("2. RelNode(with ctx id) :")
- .append(SEP).append(" ");
- diagnosticInfo.append(QueryContext.current().getLastUsedRelNode());
diagnosticInfo.append(SEP)
- .append("3. OLAPContext(s) and matched model(s) :");
+ .append("2. OLAPContext(s) and matched model(s) :");
if(OLAPContext.getThreadLocalContexts() != null) {
String olapMatchInfo =
OLAPContext.getNativeRealizations().stream()
@@ -552,11 +548,11 @@
diagnosticInfo.append(SEP);
diagnosticInfo.append(" Ctx=").append(ctx.id);
if(ctx.realization == null) {
- diagnosticInfo.append(" is not matched by any model/snapshot, expected ")
+ diagnosticInfo.append(" is not matched by any model/snapshot, recommend :")
.append(ctx.tipsForUser());
} else {
diagnosticInfo.append(" is matched by ").append(ctx.realization.getCanonicalName())
- .append(", expected ").append(ctx.tipsForUser());
+ .append(", comparison :").append(ctx.tipsForUser());
}
// diagnosticInfo.append(", verbose text:").append(ctx);
}
@@ -564,6 +560,12 @@
}
diagnosticInfo.append(SEP)
+ .append("3. RelNode(with ctx id) :")
+ .append(SEP).append(" ");
+ diagnosticInfo.append(QueryContext.current().getLastUsedRelNode().replace("Kap", ""));
+
+
+ diagnosticInfo.append(SEP)
.append("4. SQL Text :")
.append(SEP);
diagnosticInfo.append(sql)
diff --git a/src/query/src/main/java/org/apache/kylin/query/engine/exec/sparder/SparderQueryPlanExec.java b/src/query/src/main/java/org/apache/kylin/query/engine/exec/sparder/SparderQueryPlanExec.java
index ea4624c..a1a7348 100644
--- a/src/query/src/main/java/org/apache/kylin/query/engine/exec/sparder/SparderQueryPlanExec.java
+++ b/src/query/src/main/java/org/apache/kylin/query/engine/exec/sparder/SparderQueryPlanExec.java
@@ -88,7 +88,8 @@
if (!(dataContext instanceof SimpleDataContext) || !(((SimpleDataContext) dataContext)).isContentQuery()
|| KapConfig.wrap(((SimpleDataContext) dataContext).getKylinConfig()).runConstantQueryLocally()) {
for (OLAPContext context : contexts) {
- if (context.olapSchema != null && context.storageContext.isEmptyLayout() && !context.isHasAgg()) {
+ if (context.olapSchema != null && context.storageContext.isEmptyLayout() && !context.isHasAgg() &
+ !QueryContext.current().isDryRun()) {
QueryContext.fillEmptyResultSetMetrics();
return new ExecuteResult(Lists.newArrayList(), 0);
}