blob: 6b032575f123fad56c7819dfc8041445d4c6f06f [file] [log] [blame]
CREATE TABLE my_tt_agg_small (
symbol character(16),
event_ts bigint,
trade_price numeric,
trade_volume bigint
) DISTRIBUTED BY (symbol);
CREATE TABLE my_tq_agg_small (
ets bigint,
sym character varying(16),
bid_price numeric,
ask_price numeric,
end_ts bigint
) DISTRIBUTED BY (ets);
COPY my_tt_agg_small (symbol, event_ts, trade_price, trade_volume) FROM stdin;
COPY my_tq_agg_small (ets, sym, bid_price, ask_price, end_ts) FROM stdin;
CREATE INDEX my_tq_agg_small_ets_end_ts_ix ON my_tq_agg_small USING btree (ets, end_ts);
analyze my_tq_agg_small;
analyze my_tt_agg_small;
set optimizer_enable_indexjoin=on;
set optimizer_nestloop_factor = 1.0;
-- force_explain
EXPLAIN
SELECT (tt.event_ts / 100000) / 5 * 5 as fivemin, COUNT(*)
FROM my_tt_agg_small tt, my_tq_agg_small tq
WHERE tq.sym = tt.symbol AND
tt.event_ts >= tq.ets AND
tt.event_ts < tq.end_ts
GROUP BY 1
ORDER BY 1 asc ;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1261.82..1267.67 rows=413 width=16)
Merge Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Sort (cost=1261.82..1262.16 rows=138 width=16)
Sort Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Finalize HashAggregate (cost=1254.52..1256.93 rows=138 width=16)
Group Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=1236.97..1252.45 rows=413 width=16)
Hash Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Partial HashAggregate (cost=1236.97..1244.19 rows=413 width=16)
Group Key: (((tt.event_ts / 100000) / 5) * 5)
-> Hash Join (cost=17.20..1161.31 rows=15131 width=8)
Hash Cond: (tt.symbol = (tq.sym)::bpchar)
Join Filter: ((tt.event_ts >= tq.ets) AND (tt.event_ts < tq.end_ts))
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..8.00 rows=420 width=25)
-> Seq Scan on my_tt_agg_small tt (cost=0.00..2.40 rows=140 width=25)
-> Hash (cost=8.76..8.76 rows=676 width=20)
-> Seq Scan on my_tq_agg_small tq (cost=0.00..8.76 rows=676 width=20)
Optimizer: Postgres query optimizer
(18 rows)
SELECT (tt.event_ts / 100000) / 5 * 5 as fivemin, COUNT(*)
FROM my_tt_agg_small tt, my_tq_agg_small tq
WHERE tq.sym = tt.symbol AND
tt.event_ts >= tq.ets AND
tt.event_ts < tq.end_ts
GROUP BY 1
ORDER BY 1 asc ;
fivemin | count
--------------+-------
201011261015 | 2
201011261045 | 1
201011261110 | 1
201011261125 | 1
201011261240 | 1
201011261245 | 1
201011261315 | 2
201011261320 | 3
(8 rows)
set optimizer_enable_hashjoin = off;
set enable_hashjoin=off;
set enable_seqscan=off;
set enable_mergejoin=off;
set enable_nestloop=on;
set enable_indexscan=on;
-- start_ignore
-- Known_opt_diff: OPT-929
-- end_ignore
-- force_explain
set optimizer_segments = 2;
set optimizer_nestloop_factor = 1.0;
EXPLAIN
SELECT (tt.event_ts / 100000) / 5 * 5 as fivemin, COUNT(*)
FROM my_tt_agg_small tt, my_tq_agg_small tq
WHERE tq.sym = tt.symbol AND
tt.event_ts >= tq.ets AND
tt.event_ts < tq.end_ts
GROUP BY 1
ORDER BY 1 asc ;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate (cost=10000004659.04..10000004809.69 rows=413 width=16)
Group Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Gather Motion 3:1 (slice1; segments: 3) (cost=10000004659.04..10000004796.27 rows=1239 width=16)
Merge Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Partial GroupAggregate (cost=10000004659.04..10000004779.75 rows=413 width=16)
Group Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Sort (cost=10000004659.04..10000004696.87 rows=15131 width=8)
Sort Key: ((((tt.event_ts / 100000) / 5) * 5))
-> Nested Loop (cost=10000000000.15..10000003608.53 rows=15131 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=10000000000.00..10000000008.00 rows=420 width=25)
-> Seq Scan on my_tt_agg_small tt (cost=10000000000.00..10000000002.40 rows=140 width=25)
-> Index Scan using my_tq_agg_small_ets_end_ts_ix on my_tq_agg_small tq (cost=0.15..7.93 rows=38 width=20)
Index Cond: ((tt.event_ts >= ets) AND (tt.event_ts < end_ts))
Filter: (tt.symbol = (sym)::bpchar)
Optimizer: Postgres query optimizer
(15 rows)
reset optimizer_segments;
reset optimizer_nestloop_factor;
SELECT (tt.event_ts / 100000) / 5 * 5 as fivemin, COUNT(*)
FROM my_tt_agg_small tt, my_tq_agg_small tq
WHERE tq.sym = tt.symbol AND
tt.event_ts >= tq.ets AND
tt.event_ts < tq.end_ts
GROUP BY 1
ORDER BY 1 asc ;
fivemin | count
--------------+-------
201011261015 | 2
201011261045 | 1
201011261110 | 1
201011261125 | 1
201011261240 | 1
201011261245 | 1
201011261315 | 2
201011261320 | 3
(8 rows)
-- Test Index Scan on CO table as the right tree of a NestLoop join.
create table no_index_table(fake_col1 int, fake_col2 int, fake_col3 int, a int, b int) distributed by (a, b);
insert into no_index_table values (1,1,1,1,1);
analyze no_index_table;
create table with_index_table(x int, y int) with (appendonly=true, orientation=column);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index with_index_table_index on with_index_table (x);
insert into with_index_table select i, 1 from generate_series(1, 20)i;
analyze with_index_table;
set enable_material to off;
set enable_seqscan to off;
set enable_mergejoin to off;
set enable_hashjoin to off;
set enable_nestloop to on;
set optimizer_enable_materialize to off;
set optimizer_enable_hashjoin to off;
explain (costs off)
SELECT * from with_index_table td JOIN no_index_table ro ON td.y = ro.a AND td.x = ro.b;
QUERY PLAN
---------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ro.b
-> Seq Scan on no_index_table ro
-> Bitmap Heap Scan on with_index_table td
Recheck Cond: (x = ro.b)
Filter: (ro.a = y)
-> Bitmap Index Scan on with_index_table_index
Index Cond: (x = ro.b)
Optimizer: Postgres query optimizer
(11 rows)
SELECT * from with_index_table td JOIN no_index_table ro ON td.y = ro.a AND td.x = ro.b;
x | y | fake_col1 | fake_col2 | fake_col3 | a | b
---+---+-----------+-----------+-----------+---+---
1 | 1 | 1 | 1 | 1 | 1 | 1
(1 row)
reset all;