blob: fb1071fb8ba7d5897a566bde8b6dd20008135e13 [file] [log] [blame]
-- start_matchsubs
-- m/ERROR: can not drop a internal task "gp_dynamic_table_refresh_.*/
-- s/ERROR: can not drop a internal task "gp_dynamic_table_refresh_.*/ERROR: can not drop a internal task "gp_dynamic_table_refresh_xxx"/g
-- m/WARNING: relation of oid "\d+" is not dynamic table/
-- s/WARNING: relation of oid "\d+" is not dynamic table/WARNING: relation of oid "XXX" is not dynamic table/g
-- end_matchsubs
CREATE SCHEMA dynamic_table_schema;
SET search_path TO dynamic_table_schema;
SET optimizer = OFF;
CREATE TABLE t1(a int, b int, c int) DISTRIBUTED BY (b);
INSERT INTO t1 SELECT i, i + 1, i + 2 FROM GENERATE_SERIES(1, 10) i;
INSERT INTO t1 SELECT i, i + 1, i + 2 FROM GENERATE_SERIES(1, 5) i;
ANALYZE t1;
CREATE DYNAMIC TABLE dt0 SCHEDULE '5 * * * *' AS
SELECT a, b, sum(c) FROM t1 GROUP BY a, b DISTRIBUTED BY(b);
\d+ dt0
Dynamic table "dynamic_table_schema.dt0"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+---------+-----------+----------+---------+---------+--------------+-------------
a | integer | | | | plain | |
b | integer | | | | plain | |
sum | bigint | | | | plain | |
View definition:
SELECT t1.a,
t1.b,
sum(t1.c) AS sum
FROM t1
GROUP BY t1.a, t1.b;
Distributed by: (b)
ANALYZE dt0;
-- test backgroud auto-refresh
SELECT schedule, command FROM pg_task WHERE jobname LIKE 'gp_dynamic_table_refresh%';
schedule | command
-----------+------------------------------------------------
5 * * * * | REFRESH DYNAMIC TABLE dynamic_table_schema.dt0
(1 row)
EXPLAIN(COSTS OFF, VERBOSE)
SELECT a, b, sum(c) FROM t1 GROUP BY a, b;
QUERY PLAN
-------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: a, b, (sum(c))
-> HashAggregate
Output: a, b, sum(c)
Group Key: t1.a, t1.b
-> Seq Scan on dynamic_table_schema.t1
Output: a, b, c
Settings: optimizer = 'off'
Optimizer: Postgres query optimizer
(9 rows)
SELECT a, b, sum(c) FROM t1 GROUP BY a, b;
a | b | sum
----+----+-----
9 | 10 | 11
8 | 9 | 10
5 | 6 | 14
10 | 11 | 12
4 | 5 | 12
1 | 2 | 6
2 | 3 | 8
7 | 8 | 9
3 | 4 | 10
6 | 7 | 8
(10 rows)
SELECT * FROM dt0;
a | b | sum
----+----+-----
1 | 2 | 6
2 | 3 | 8
7 | 8 | 9
3 | 4 | 10
6 | 7 | 8
9 | 10 | 11
8 | 9 | 10
5 | 6 | 14
10 | 11 | 12
4 | 5 | 12
(10 rows)
-- test join on distributed keys
EXPLAIN(COSTS OFF, VERBOSE)
SELECT * FROM dt0 JOIN t1 USING(b);
QUERY PLAN
--------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: dt0.b, dt0.a, dt0.sum, t1.a, t1.c
-> Hash Join
Output: dt0.b, dt0.a, dt0.sum, t1.a, t1.c
Hash Cond: (t1.b = dt0.b)
-> Seq Scan on dynamic_table_schema.t1
Output: t1.a, t1.b, t1.c
-> Hash
Output: dt0.b, dt0.a, dt0.sum
-> Seq Scan on dynamic_table_schema.dt0
Output: dt0.b, dt0.a, dt0.sum
Settings: optimizer = 'off'
Optimizer: Postgres query optimizer
(13 rows)
-- Create Dynamic Table without SCHEDULE.
CREATE DYNAMIC TABLE dt1 AS
SELECT * FROM t1 WHERE a = 1 DISTRIBUTED BY(b);
ANALYZE dt1;
-- Create Dynamic Table without DISTRIBUTION KEYS.
CREATE DYNAMIC TABLE dt2 AS
SELECT * FROM t1 WHERE a = 2 WITH NO DATA;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- Refresh Dynamic Table WITH NO DATA
REFRESH DYNAMIC TABLE dt0 WITH NO DATA;
-- Refresh Dynamic Table
REFRESH DYNAMIC TABLE dt2;
ANALYZE dt2;
-- Test Answer Query using Dynamic Tables.
SET enable_answer_query_using_materialized_views = ON;
EXPLAIN(COSTS OFF, VERBOSE)
SELECT * FROM t1 WHERE a = 1;
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: a, b, c
-> Seq Scan on dynamic_table_schema.dt1
Output: a, b, c
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(6 rows)
SELECT * FROM t1 WHERE a = 1;
a | b | c
---+---+---
1 | 2 | 3
1 | 2 | 3
(2 rows)
EXPLAIN(COSTS OFF, VERBOSE)
SELECT * FROM t1 WHERE a = 2;
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: a, b, c
-> Seq Scan on dynamic_table_schema.dt2
Output: a, b, c
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(6 rows)
SELECT * FROM t1 WHERE a = 2;
a | b | c
---+---+---
2 | 3 | 4
2 | 3 | 4
(2 rows)
-- test DROP DYNAMIC TABLE
SELECT schedule, command FROM pg_task WHERE jobname LIKE 'gp_dynamic_table_refresh%' AND command LIKE '%dt0%';
schedule | command
-----------+------------------------------------------------
5 * * * * | REFRESH DYNAMIC TABLE dynamic_table_schema.dt0
(1 row)
DROP DYNAMIC TABLE dt0;
SELECT schedule, command FROM pg_task WHERE jobname LIKE 'gp_dynamic_table_refresh%' AND command LIKE '%dt0%';
schedule | command
----------+---------
(0 rows)
-- drop base tables will drop DYNAMIC TABLEs too.
SELECT schedule, command FROM pg_task WHERE jobname LIKE 'gp_dynamic_table_refresh%';
schedule | command
-------------+------------------------------------------------
*/5 * * * * | REFRESH DYNAMIC TABLE dynamic_table_schema.dt1
*/5 * * * * | REFRESH DYNAMIC TABLE dynamic_table_schema.dt2
(2 rows)
DROP TABLE t1 CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to dynamic table dt1
drop cascades to dynamic table dt2
SELECT schedule, command FROM pg_task WHERE jobname LIKE 'gp_dynamic_table_refresh%';
schedule | command
----------+---------
(0 rows)
-- construct dynamic table
CREATE TABLE t2(a int, b int, c int) DISTRIBUTED BY (b);
CREATE MATERIALIZED VIEW mv_t2 AS
SELECT * FROM t2 WHERE a > 1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- construct dynamic table from materialized view
CREATE DYNAMIC TABLE dt3 AS
SELECT * FROM mv_t2 WHERE a = 2;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- construct dynamic table from dynamic table
CREATE DYNAMIC TABLE dt4 AS
SELECT * FROM dt3 WHERE b = 3;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- construct dynamic table from joins
CREATE DYNAMIC TABLE dt5 AS
SELECT * FROM dt3 natural join t2 natural join mv_t2;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- construct dynamic table from external table
begin;
--start_ignore
CREATE OR REPLACE FUNCTION write_to_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_export' LANGUAGE C STABLE NO SQL;
CREATE OR REPLACE FUNCTION read_from_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_import' LANGUAGE C STABLE NO SQL;
DROP PROTOCOL IF EXISTS demoprot;
NOTICE: protocol "demoprot" does not exist, skipping
--end_ignore
CREATE TRUSTED PROTOCOL demoprot (readfunc = 'read_from_file', writefunc = 'write_to_file'); -- should succeed
CREATE WRITABLE EXTERNAL TABLE ext_w(id int)
LOCATION('demoprot://dynamic_table_text_file.txt')
FORMAT 'text'
DISTRIBUTED BY (id);
INSERT INTO ext_w SELECT * FROM generate_series(1, 10);
CREATE READABLE EXTERNAL TABLE ext_r(id int)
LOCATION('demoprot://dynamic_table_text_file.txt')
FORMAT 'text';
EXPLAIN(COSTS OFF, VERBOSE)
SELECT sum(id) FROM ext_r where id > 5;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: sum(id)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL sum(id))
-> Partial Aggregate
Output: PARTIAL sum(id)
-> Foreign Scan on dynamic_table_schema.ext_r
Output: id
Filter: (ext_r.id > 5)
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)
SELECT sum(id) FROM ext_r where id > 5;
sum
-----
40
(1 row)
CREATE DYNAMIC TABLE dt_external AS
SELECT * FROM ext_r where id > 5;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ANALYZE dt_external;
SHOW optimizer;
optimizer
-----------
off
(1 row)
SET LOCAL enable_answer_query_using_materialized_views = ON;
SET LOCAL aqumv_allow_foreign_table = ON;
EXPLAIN(COSTS OFF, VERBOSE)
SELECT sum(id) FROM ext_r where id > 5;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: sum(id)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL sum(id))
-> Partial Aggregate
Output: PARTIAL sum(id)
-> Seq Scan on dynamic_table_schema.dt_external
Output: id
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)
SELECT sum(id) FROM ext_r where id > 5;
sum
-----
40
(1 row)
DROP FOREIGN TABLE ext_r CASCADE;
NOTICE: drop cascades to dynamic table dt_external
DROP FOREIGN TABLE ext_w;
ABORT;
-- Test resevered job name for Dynamic Tables.
SELECT 'dt5'::regclass::oid AS dtoid \gset
-- should fail
CREATE TASK gp_dynamic_table_refresh_xxx SCHEDULE '1 second' AS 'REFRESH DYNAMIC TABLE dt5';
ERROR: unacceptable task name "gp_dynamic_table_refresh_xxx"
DETAIL: The prefix "gp_dynamic_table_refresh_" is reserved for system tasks.
-- can not alter the REFRESH SQL of Dynamic Tables.
ALTER TASK gp_dynamic_table_refresh_:dtoid AS '* * * * *';
ERROR: can not alter REFRESH SQL of dynamic tables
ALTER TASK gp_dynamic_table_refresh_:dtoid AS '';
ERROR: can not alter REFRESH SQL of dynamic tables
-- should fail
DROP TASK gp_dynamic_table_refresh_:dtoid;
ERROR: can not drop a internal task "gp_dynamic_table_refresh_17387" paried with dynamic table
DETAIL: please drop the dynamic table instead
\unset dtoid
CREATE DYNAMIC TABLE dt_schedule SCHEDULE '1 2 3 4 5' AS SELECT * FROM t2;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
SELECT pg_catalog.pg_get_dynamic_table_schedule('dt_schedule'::regclass::oid);
pg_get_dynamic_table_schedule
-------------------------------
1 2 3 4 5
(1 row)
-- not a dynamic table
SELECT pg_catalog.pg_get_dynamic_table_schedule('t2'::regclass::oid);
WARNING: relation of oid "XXX" is not dynamic table
pg_get_dynamic_table_schedule
-------------------------------
(1 row)
SELECT * FROM pg_dynamic_tables;
schemaname | dynamictablename | dynamictableowner | tablespace | hasindexes | ispopulated | definition
----------------------+------------------+-------------------+------------+------------+-------------+-----------------------------------
dynamic_table_schema | dt3 | gpadmin | | f | t | SELECT mv_t2.a, +
| | | | | | mv_t2.b, +
| | | | | | mv_t2.c +
| | | | | | FROM mv_t2 +
| | | | | | WHERE (mv_t2.a = 2);
dynamic_table_schema | dt4 | gpadmin | | f | t | SELECT dt3.a, +
| | | | | | dt3.b, +
| | | | | | dt3.c +
| | | | | | FROM dt3 +
| | | | | | WHERE (dt3.b = 3);
dynamic_table_schema | dt5 | gpadmin | | f | t | SELECT dt3.a, +
| | | | | | dt3.b, +
| | | | | | dt3.c +
| | | | | | FROM ((dt3 +
| | | | | | JOIN t2 USING (a, b, c)) +
| | | | | | JOIN mv_t2 USING (a, b, c));
dynamic_table_schema | dt_schedule | gpadmin | | f | t | SELECT t2.a, +
| | | | | | t2.b, +
| | | | | | t2.c +
| | | | | | FROM t2;
(4 rows)
CREATE TABLE t3(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE DYNAMIC TABLE dt_1_min SCHEDULE '* * * * *' AS SELECT * FROM t3 WITH NO DATA;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO T3 VALUES(1);
-- wait for backgroud refresh
SELECT pg_sleep(80);
pg_sleep
----------
(1 row)
SELECT * FROM dt_1_min;
a
---
1
(1 row)
RESET enable_answer_query_using_materialized_views;
RESET optimizer;
--start_ignore
DROP SCHEMA dynamic_table_schema cascade;
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table t2
drop cascades to materialized view mv_t2
drop cascades to dynamic table dt3
drop cascades to dynamic table dt4
drop cascades to dynamic table dt5
--end_ignore