| -- disable ORCA |
| SET optimizer TO off; |
| create schema matview_data_schema; |
| set search_path to matview_data_schema; |
| create table t1(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t2(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into t1 select i, i+1 from generate_series(1, 5) i; |
| insert into t1 select i, i+1 from generate_series(1, 3) i; |
| create materialized view mv0 as select * from t1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv1 as select a, count(*), sum(b) from t1 group by a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv2 as select * from t2; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- all mv are up to date |
| select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2'); |
| mvname | datastatus |
| --------+------------ |
| mv0 | u |
| mv1 | u |
| mv2 | u |
| (3 rows) |
| |
| -- truncate in self transaction |
| begin; |
| create table t3(a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv3 as select * from t3; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select datastatus from gp_matview_aux where mvname = 'mv3'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| truncate t3; |
| select datastatus from gp_matview_aux where mvname = 'mv3'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| end; |
| -- trcuncate |
| refresh materialized view mv3; |
| select datastatus from gp_matview_aux where mvname = 'mv3'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| truncate t3; |
| select datastatus from gp_matview_aux where mvname = 'mv3'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- insert and refresh |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| insert into t1 values (1, 2); |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| i |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| i |
| (1 row) |
| |
| -- insert but no rows changes |
| refresh materialized view mv0; |
| refresh materialized view mv1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| insert into t1 select * from t3; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| -- update |
| refresh materialized view mv0; |
| refresh materialized view mv1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| update t1 set a = 10 where a = 1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- delete |
| refresh materialized view mv0; |
| refresh materialized view mv1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| delete from t1 where a = 10; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- vacuum |
| refresh materialized view mv0; |
| refresh materialized view mv1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| vacuum t1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| vacuum full t1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| r |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| r |
| (1 row) |
| |
| -- insert after vacuum full |
| insert into t1 values(1, 2); |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- vacuum full after insert |
| refresh materialized view mv0; |
| refresh materialized view mv1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| insert into t1 values(1, 2); |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| i |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| i |
| (1 row) |
| |
| vacuum full t1; |
| select datastatus from gp_matview_aux where mvname = 'mv0'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| select datastatus from gp_matview_aux where mvname = 'mv1'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- Refresh With No Data |
| refresh materialized view mv2; |
| select datastatus from gp_matview_aux where mvname = 'mv2'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| refresh materialized view mv2 with no data; |
| select datastatus from gp_matview_aux where mvname = 'mv2'; |
| datastatus |
| ------------ |
| e |
| (1 row) |
| |
| -- Copy |
| refresh materialized view mv2; |
| select datastatus from gp_matview_aux where mvname = 'mv2'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| -- 0 rows |
| COPY t2 from stdin; |
| select datastatus from gp_matview_aux where mvname = 'mv2'; |
| datastatus |
| ------------ |
| u |
| (1 row) |
| |
| COPY t2 from stdin; |
| select datastatus from gp_matview_aux where mvname = 'mv2'; |
| datastatus |
| ------------ |
| i |
| (1 row) |
| |
| -- |
| -- test issue https://github.com/apache/cloudberry/issues/582 |
| -- test inherits |
| -- |
| begin; |
| create table tp_issue_582(i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table tc_issue_582(i int) inherits (tp_issue_582); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| NOTICE: merging column "i" with inherited definition |
| insert into tp_issue_582 values(1, 1), (2, 2); |
| insert into tc_issue_582 values(1, 1); |
| create materialized view mv_tp_issue_582 as select * from tp_issue_582; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- should be null. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_tp_issue_582'; |
| mvname | datastatus |
| --------+------------ |
| (0 rows) |
| |
| abort; |
| -- |
| -- Test multi-table JOIN materialized views |
| -- |
| create table jt1(id int, val int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table jt2(id int, val int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table jt3(id int, val int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into jt1 select i, i*10 from generate_series(1,5) i; |
| insert into jt2 select i, i*100 from generate_series(1,5) i; |
| insert into jt3 select i, i*1000 from generate_series(1,5) i; |
| -- Two-table INNER JOIN: verify registration |
| create materialized view mv_join2 as |
| select jt1.id, jt1.val as v1, jt2.val as v2 |
| from jt1 join jt2 on jt1.id = jt2.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_join2'::regclass; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| -- INSERT on table A: status -> 'i' |
| insert into jt1 values(6, 60); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | i |
| (1 row) |
| |
| -- REFRESH: status -> 'u' |
| refresh materialized view mv_join2; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | u |
| (1 row) |
| |
| -- INSERT on table B: status -> 'i' |
| insert into jt2 values(7, 700); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | i |
| (1 row) |
| |
| -- UPDATE on table A: status -> 'e' |
| refresh materialized view mv_join2; |
| update jt1 set val = 99 where id = 1; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | e |
| (1 row) |
| |
| -- DELETE on table B: status -> 'e' |
| refresh materialized view mv_join2; |
| delete from jt2 where id = 7; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | e |
| (1 row) |
| |
| -- Implicit join (FROM t1, t2 WHERE ...): verify registration |
| refresh materialized view mv_join2; |
| create materialized view mv_implicit_join as |
| select jt1.id, jt1.val as v1, jt2.val as v2 |
| from jt1, jt2 where jt1.id = jt2.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_implicit_join'; |
| mvname | datastatus |
| ------------------+------------ |
| mv_implicit_join | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_implicit_join'::regclass; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| -- Three-table join: verify 3 entries in gp_matview_tables |
| create materialized view mv_join3 as |
| select jt1.id, jt1.val as v1, jt2.val as v2, jt3.val as v3 |
| from jt1 join jt2 on jt1.id = jt2.id join jt3 on jt2.id = jt3.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_join3'::regclass; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| -- DML on middle table expires mv_join3 |
| insert into jt2 values(8, 800); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | i |
| (1 row) |
| |
| -- Self-join: verify only 1 entry in gp_matview_tables |
| create materialized view mv_selfjoin as |
| select a.id as aid, b.id as bid |
| from jt1 a join jt1 b on a.id = b.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'aid' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_selfjoin'; |
| mvname | datastatus |
| -------------+------------ |
| mv_selfjoin | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_selfjoin'::regclass; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| -- LEFT/RIGHT/FULL OUTER JOIN: verify all register correctly |
| create materialized view mv_left_join as |
| select jt1.id, jt1.val as v1, jt2.val as v2 |
| from jt1 left join jt2 on jt1.id = jt2.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_left_join'; |
| mvname | datastatus |
| --------------+------------ |
| mv_left_join | u |
| (1 row) |
| |
| create materialized view mv_right_join as |
| select jt1.id, jt2.val as v2 |
| from jt1 right join jt2 on jt1.id = jt2.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_right_join'; |
| mvname | datastatus |
| ---------------+------------ |
| mv_right_join | u |
| (1 row) |
| |
| create materialized view mv_full_join as |
| select jt1.id as id1, jt2.id as id2 |
| from jt1 full join jt2 on jt1.id = jt2.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_full_join'; |
| mvname | datastatus |
| --------------+------------ |
| mv_full_join | u |
| (1 row) |
| |
| -- Partitioned table in join: verify partition DML propagates |
| create table jt_par(a int, b int) partition by range(a) |
| (start(1) end(3) every(1)); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into jt_par values(1, 10), (2, 20); |
| create materialized view mv_join_par as |
| select jt1.id, jt1.val as v1, jt_par.a, jt_par.b |
| from jt1 join jt_par on jt1.id = jt_par.a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; |
| mvname | datastatus |
| -------------+------------ |
| mv_join_par | u |
| (1 row) |
| |
| insert into jt_par values(1, 11); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; |
| mvname | datastatus |
| -------------+------------ |
| mv_join_par | i |
| (1 row) |
| |
| refresh materialized view mv_join_par; |
| insert into jt1 values(9, 90); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_par'; |
| mvname | datastatus |
| -------------+------------ |
| mv_join_par | i |
| (1 row) |
| |
| -- VACUUM FULL on one base table of a join MV: status -> 'r' |
| refresh materialized view mv_join2; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | u |
| (1 row) |
| |
| vacuum full jt1; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | r |
| (1 row) |
| |
| -- TRUNCATE on one base table of a join MV: status -> 'e' |
| refresh materialized view mv_join2; |
| truncate jt2; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | e |
| (1 row) |
| |
| -- CREATE WITH NO DATA: status -> 'e' |
| create materialized view mv_join_nodata as |
| select jt1.id, jt3.val from jt1 join jt3 on jt1.id = jt3.id |
| with no data; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_nodata'; |
| mvname | datastatus |
| ----------------+------------ |
| mv_join_nodata | e |
| (1 row) |
| |
| -- DROP CASCADE: matview and aux entries removed |
| drop materialized view mv_join_nodata; |
| select count(*) from gp_matview_aux where mvname = 'mv_join_nodata'; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| -- Mixed join types in one view (INNER + LEFT) |
| create materialized view mv_mixed_join as |
| select jt1.id, jt2.val as v2, jt3.val as v3 |
| from jt1 join jt2 on jt1.id = jt2.id left join jt3 on jt2.id = jt3.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_mixed_join'; |
| mvname | datastatus |
| ---------------+------------ |
| mv_mixed_join | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_mixed_join'::regclass; |
| count |
| ------- |
| 3 |
| (1 row) |
| |
| -- Join with GROUP BY and aggregates |
| create materialized view mv_join_agg as |
| select jt1.id, count(*) as cnt, sum(jt2.val) as total |
| from jt1 join jt2 on jt1.id = jt2.id group by jt1.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_agg'; |
| mvname | datastatus |
| -------------+------------ |
| mv_join_agg | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_join_agg'::regclass; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| -- Multiple MVs sharing base tables: DML on one table affects all dependent MVs |
| refresh materialized view mv_join2; |
| refresh materialized view mv_join3; |
| refresh materialized view mv_mixed_join; |
| refresh materialized view mv_join_agg; |
| select mvname, datastatus from gp_matview_aux |
| where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') |
| order by mvname; |
| mvname | datastatus |
| ---------------+------------ |
| mv_join2 | u |
| mv_join3 | u |
| mv_join_agg | u |
| mv_mixed_join | u |
| (4 rows) |
| |
| insert into jt2 values(10, 1000); |
| -- all four share jt2 as a base table |
| select mvname, datastatus from gp_matview_aux |
| where mvname in ('mv_join2', 'mv_join3', 'mv_mixed_join', 'mv_join_agg') |
| order by mvname; |
| mvname | datastatus |
| ---------------+------------ |
| mv_join2 | i |
| mv_join3 | i |
| mv_join_agg | i |
| mv_mixed_join | i |
| (4 rows) |
| |
| -- Transaction: multiple DML on different base tables |
| refresh materialized view mv_join3; |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | u |
| (1 row) |
| |
| insert into jt1 values(20, 200); |
| -- after insert: 'i' (insert-only) |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | i |
| (1 row) |
| |
| delete from jt2 where id = 10; |
| -- after delete: escalates to 'e' (expired) |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | e |
| (1 row) |
| |
| end; |
| -- committed: status persists as 'e' |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | e |
| (1 row) |
| |
| -- Transaction rollback: status should revert |
| refresh materialized view mv_join3; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | u |
| (1 row) |
| |
| begin; |
| update jt1 set val = 999 where id = 1; |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | e |
| (1 row) |
| |
| abort; |
| -- after rollback: back to 'u' |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join3'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join3 | u |
| (1 row) |
| |
| -- Transaction: insert then insert on different tables stays 'i' |
| refresh materialized view mv_join2; |
| begin; |
| insert into jt1 values(30, 300); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | i |
| (1 row) |
| |
| insert into jt2 values(31, 3100); |
| -- still 'i' since both are inserts |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join2'; |
| mvname | datastatus |
| ----------+------------ |
| mv_join2 | i |
| (1 row) |
| |
| abort; |
| -- Cross join (FROM t1, t2 with no WHERE): verify registration |
| create materialized view mv_cross_join as |
| select jt1.id as id1, jt2.id as id2 from jt1, jt2; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id2' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_cross_join'; |
| mvname | datastatus |
| ---------------+------------ |
| mv_cross_join | u |
| (1 row) |
| |
| select count(*) from gp_matview_tables mt |
| join pg_class c on mt.relid = c.oid |
| where mt.mvoid = 'mv_cross_join'::regclass; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| -- Drop base table CASCADE removes dependent join MVs and aux entries |
| create table jt_drop(id int, val int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into jt_drop values(1, 10); |
| create materialized view mv_join_drop as |
| select jt1.id, jt_drop.val from jt1 join jt_drop on jt1.id = jt_drop.id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_join_drop'; |
| mvname | datastatus |
| --------------+------------ |
| mv_join_drop | u |
| (1 row) |
| |
| drop table jt_drop cascade; |
| NOTICE: drop cascades to materialized view mv_join_drop |
| select count(*) from gp_matview_aux where mvname = 'mv_join_drop'; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| -- Clean up join test objects |
| drop materialized view mv_cross_join; |
| drop materialized view mv_join_agg; |
| drop materialized view mv_mixed_join; |
| drop materialized view mv_join_par; |
| drop table jt_par cascade; |
| drop materialized view mv_full_join; |
| drop materialized view mv_right_join; |
| drop materialized view mv_left_join; |
| drop materialized view mv_selfjoin; |
| drop materialized view mv_join3; |
| drop materialized view mv_implicit_join; |
| drop materialized view mv_join2; |
| drop table jt3; |
| drop table jt2; |
| drop table jt1; |
| -- |
| -- Test AQUMV (Answer Query Using Materialized Views) with join queries. |
| -- Each matching test shows EXPLAIN + SELECT with GUC off (original plan), |
| -- then EXPLAIN + SELECT with GUC on (MV rewrite). Results must match. |
| -- |
| create table aqj_t1(a int, b int) distributed by (a); |
| create table aqj_t2(a int, b int) distributed by (a); |
| create table aqj_t3(a int, b int) distributed by (a); |
| insert into aqj_t1 select i, i*10 from generate_series(1, 100) i; |
| insert into aqj_t2 select i, i*100 from generate_series(1, 100) i; |
| insert into aqj_t3 select i, i*1000 from generate_series(1, 100) i; |
| analyze aqj_t1; |
| analyze aqj_t2; |
| analyze aqj_t3; |
| -- 1. Two-table INNER JOIN exact match |
| create materialized view mv_aqj_join2 as |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_join2; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; |
| a | b |
| ---+----- |
| 1 | 100 |
| 2 | 200 |
| 3 | 300 |
| 4 | 400 |
| 5 | 500 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_join2 |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order by 1 limit 5; |
| a | b |
| ---+----- |
| 1 | 100 |
| 2 | 200 |
| 3 | 300 |
| 4 | 400 |
| 5 | 500 |
| (5 rows) |
| |
| -- 2. Join with WHERE clause |
| create materialized view mv_aqj_where as |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_where; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| Filter: (a > 5) |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Filter: (a > 5) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; |
| a | b |
| ----+------ |
| 6 | 600 |
| 7 | 700 |
| 8 | 800 |
| 9 | 900 |
| 10 | 1000 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_where |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 5 order by 1 limit 5; |
| a | b |
| ----+------ |
| 6 | 600 |
| 7 | 700 |
| 8 | 800 |
| 9 | 900 |
| 10 | 1000 |
| (5 rows) |
| |
| -- 3. Join with GROUP BY + aggregate |
| create materialized view mv_aqj_agg as |
| select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_agg; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; |
| QUERY PLAN |
| ------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> HashAggregate |
| Group Key: aqj_t1.a |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; |
| a | cnt |
| ---+----- |
| 1 | 1 |
| 2 | 1 |
| 3 | 1 |
| 4 | 1 |
| 5 | 1 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_agg |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a group by aqj_t1.a order by 1 limit 5; |
| a | cnt |
| ---+----- |
| 1 | 1 |
| 2 | 1 |
| 3 | 1 |
| 4 | 1 |
| 5 | 1 |
| (5 rows) |
| |
| -- 4. Non-match: different WHERE clause (should show Hash Join, not MV) |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where aqj_t1.a > 10; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| Filter: (a > 10) |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Filter: (a > 10) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| -- 5. Non-match: different target list |
| explain(costs off) select aqj_t1.b, aqj_t2.a from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| -- 6. Non-match: different join type (INNER vs LEFT) |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 left join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Left Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| -- 7. Three-table join |
| create materialized view mv_aqj_join3 as |
| select aqj_t1.a, aqj_t2.b, aqj_t3.b as c |
| from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_join3; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c |
| from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; |
| QUERY PLAN |
| ------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t3.a) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| -> Hash |
| -> Seq Scan on aqj_t3 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select aqj_t1.a, aqj_t2.b, aqj_t3.b as c |
| from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a |
| order by 1 limit 5; |
| a | b | c |
| ---+-----+------ |
| 1 | 100 | 1000 |
| 2 | 200 | 2000 |
| 3 | 300 | 3000 |
| 4 | 400 | 4000 |
| 5 | 500 | 5000 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c |
| from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_join3 |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select aqj_t1.a, aqj_t2.b, aqj_t3.b as c |
| from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = aqj_t3.a |
| order by 1 limit 5; |
| a | b | c |
| ---+-----+------ |
| 1 | 100 | 1000 |
| 2 | 200 | 2000 |
| 3 | 300 | 3000 |
| 4 | 400 | 4000 |
| 5 | 500 | 5000 |
| (5 rows) |
| |
| -- 8. Implicit join (FROM t1, t2 WHERE ...) |
| create materialized view mv_aqj_implicit as |
| select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_implicit; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; |
| a | b |
| ---+----- |
| 1 | 100 |
| 2 | 200 |
| 3 | 300 |
| 4 | 400 |
| 5 | 500 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_implicit |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order by 1 limit 5; |
| a | b |
| ---+----- |
| 1 | 100 |
| 2 | 200 |
| 3 | 300 |
| 4 | 400 |
| 5 | 500 |
| (5 rows) |
| |
| -- 9. MV not up-to-date: after INSERT on base table |
| insert into aqj_t1 values(999, 9990); |
| set enable_answer_query_using_materialized_views = on; |
| -- Should NOT use mv_aqj_join2 (status is 'i') |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| -- 10. After REFRESH: should use MV again |
| refresh materialized view mv_aqj_join2; |
| analyze mv_aqj_join2; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_join2 |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| -- 11. GUC off: should NOT use MV |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (aqj_t1.a = aqj_t2.a) |
| -> Seq Scan on aqj_t1 |
| -> Hash |
| -> Seq Scan on aqj_t2 |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| -- |
| -- More complex join AQUMV test cases with richer schemas |
| -- |
| create table aqj_orders( |
| order_id int, |
| customer_id int, |
| amount numeric(10,2), |
| status text, |
| order_date date |
| ) distributed by (order_id); |
| create table aqj_customers( |
| customer_id int, |
| name text, |
| region text, |
| credit_limit numeric(10,2) |
| ) distributed by (customer_id); |
| create table aqj_products( |
| product_id int, |
| name text, |
| category text, |
| price numeric(10,2) |
| ) distributed by (product_id); |
| create table aqj_order_items( |
| item_id int, |
| order_id int, |
| product_id int, |
| quantity int |
| ) distributed by (item_id); |
| insert into aqj_customers select i, 'cust_' || i, case when i % 3 = 0 then 'east' when i % 3 = 1 then 'west' else 'north' end, (i * 100)::numeric(10,2) from generate_series(1, 50) i; |
| insert into aqj_orders select i, (i % 50) + 1, (i * 10.5)::numeric(10,2), case when i % 4 = 0 then 'shipped' when i % 4 = 1 then 'pending' when i % 4 = 2 then 'delivered' else 'cancelled' end, '2024-01-01'::date + (i % 365) from generate_series(1, 200) i; |
| insert into aqj_products select i, 'prod_' || i, case when i % 5 = 0 then 'electronics' when i % 5 = 1 then 'books' when i % 5 = 2 then 'clothing' when i % 5 = 3 then 'food' else 'toys' end, (i * 5.99)::numeric(10,2) from generate_series(1, 30) i; |
| insert into aqj_order_items select i, (i % 200) + 1, (i % 30) + 1, (i % 10) + 1 from generate_series(1, 500) i; |
| analyze aqj_customers; |
| analyze aqj_orders; |
| analyze aqj_products; |
| analyze aqj_order_items; |
| -- 12. Join with multiple columns + WHERE on text column |
| create materialized view mv_aqj_orders_cust as |
| select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_orders_cust; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'shipped'::text) |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' |
| order by o.order_id limit 5; |
| order_id | amount | name | region |
| ----------+--------+---------+-------- |
| 4 | 42.00 | cust_5 | north |
| 8 | 84.00 | cust_9 | east |
| 12 | 126.00 | cust_13 | west |
| 16 | 168.00 | cust_17 | north |
| 20 | 210.00 | cust_21 | east |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_orders_cust |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' |
| order by o.order_id limit 5; |
| order_id | amount | name | region |
| ----------+--------+---------+-------- |
| 4 | 42.00 | cust_5 | north |
| 8 | 84.00 | cust_9 | east |
| 12 | 126.00 | cust_13 | west |
| 16 | 168.00 | cust_17 | north |
| 20 | 210.00 | cust_21 | east |
| (5 rows) |
| |
| -- 13. Four-table join |
| create materialized view mv_aqj_order_details as |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_order_details; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (oi.product_id = p.product_id) |
| -> Hash Join |
| Hash Cond: (o.order_id = oi.order_id) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| -> Hash |
| -> Seq Scan on aqj_order_items oi |
| -> Hash |
| -> Broadcast Motion 3:3 (slice4; segments: 3) |
| -> Seq Scan on aqj_products p |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| order by o.order_id, p.name limit 5; |
| order_id | customer_name | product_name | quantity | price |
| ----------+---------------+--------------+----------+-------- |
| 1 | cust_2 | prod_11 | 1 | 65.89 |
| 1 | cust_2 | prod_21 | 1 | 125.79 |
| 2 | cust_3 | prod_12 | 2 | 71.88 |
| 2 | cust_3 | prod_2 | 2 | 11.98 |
| 2 | cust_3 | prod_22 | 2 | 131.78 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_order_details |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| order by o.order_id, p.name limit 5; |
| order_id | customer_name | product_name | quantity | price |
| ----------+---------------+--------------+----------+-------- |
| 1 | cust_2 | prod_11 | 1 | 65.89 |
| 1 | cust_2 | prod_21 | 1 | 125.79 |
| 2 | cust_3 | prod_12 | 2 | 71.88 |
| 2 | cust_3 | prod_2 | 2 | 11.98 |
| 2 | cust_3 | prod_22 | 2 | 131.78 |
| (5 rows) |
| |
| -- 14. GROUP BY on join with multiple aggregates: sum, count, avg |
| create materialized view mv_aqj_cust_summary as |
| select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_cust_summary; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region; |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: c.region |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c.region |
| -> Streaming Partial HashAggregate |
| Group Key: c.region |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region |
| order by c.region; |
| region | order_count | total_amount | avg_amount |
| --------+-------------+--------------+----------------------- |
| east | 64 | 66864.00 | 1044.7500000000000000 |
| north | 68 | 71400.00 | 1050.0000000000000000 |
| west | 68 | 72786.00 | 1070.3823529411764706 |
| (3 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_cust_summary |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select c.region, count(*) as order_count, sum(o.amount) as total_amount, avg(o.amount) as avg_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region |
| order by c.region; |
| region | order_count | total_amount | avg_amount |
| --------+-------------+--------------+----------------------- |
| east | 64 | 66864.00 | 1044.7500000000000000 |
| north | 68 | 71400.00 | 1050.0000000000000000 |
| west | 68 | 72786.00 | 1070.3823529411764706 |
| (3 rows) |
| |
| -- 15. Join with expression in target list (arithmetic + function) |
| create materialized view mv_aqj_expr as |
| select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_expr; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; |
| QUERY PLAN |
| --------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| order by o.order_id limit 5; |
| order_id | amount_with_tax | name | region_upper |
| ----------+-----------------+--------+-------------- |
| 1 | 11.550 | cust_2 | NORTH |
| 2 | 23.100 | cust_3 | EAST |
| 3 | 34.650 | cust_4 | WEST |
| 4 | 46.200 | cust_5 | NORTH |
| 5 | 57.750 | cust_6 | EAST |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_expr |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| order by o.order_id limit 5; |
| order_id | amount_with_tax | name | region_upper |
| ----------+-----------------+--------+-------------- |
| 1 | 11.550 | cust_2 | NORTH |
| 2 | 23.100 | cust_3 | EAST |
| 3 | 34.650 | cust_4 | WEST |
| 4 | 46.200 | cust_5 | NORTH |
| 5 | 57.750 | cust_6 | EAST |
| (5 rows) |
| |
| -- 16. Non-match: same tables + expressions, but extra WHERE (should NOT match mv_aqj_expr) |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where c.region = 'east'; |
| QUERY PLAN |
| --------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Filter: (region = 'east'::text) |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| -- 17. Non-match: same tables but different aggregate target list |
| explain(costs off) |
| select c.region, sum(o.amount) as total_amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region; |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: c.region |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c.region |
| -> Streaming Partial HashAggregate |
| Group Key: c.region |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| -- 18. Non-match: different join order (o JOIN c vs c JOIN o) |
| explain(costs off) select o.order_id, o.amount, c.name, c.region |
| from aqj_customers c join aqj_orders o on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'shipped'::text) |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| -- 19. Join with compound WHERE (multiple AND conditions) |
| create materialized view mv_aqj_compound_where as |
| select o.order_id, o.amount, c.name |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' and c.region = 'west' and o.amount > 50; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_compound_where; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) select o.order_id, o.amount, c.name |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' and c.region = 'west' and o.amount > 50; |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| Filter: ((amount > '50'::numeric) AND (status = 'pending'::text)) |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Filter: (region = 'west'::text) |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select o.order_id, o.amount, c.name |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' and c.region = 'west' and o.amount > 50 |
| order by o.order_id limit 5; |
| order_id | amount | name |
| ----------+--------+--------- |
| 9 | 94.50 | cust_10 |
| 21 | 220.50 | cust_22 |
| 33 | 346.50 | cust_34 |
| 45 | 472.50 | cust_46 |
| 53 | 556.50 | cust_4 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) select o.order_id, o.amount, c.name |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' and c.region = 'west' and o.amount > 50; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_compound_where |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o.order_id, o.amount, c.name |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' and c.region = 'west' and o.amount > 50 |
| order by o.order_id limit 5; |
| order_id | amount | name |
| ----------+--------+--------- |
| 9 | 94.50 | cust_10 |
| 21 | 220.50 | cust_22 |
| 33 | 346.50 | cust_34 |
| 45 | 472.50 | cust_46 |
| 53 | 556.50 | cust_4 |
| (5 rows) |
| |
| -- 20. Self-join |
| create materialized view mv_aqj_selfjoin as |
| select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 |
| from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id |
| where o1.order_id < o2.order_id; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_selfjoin; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 |
| from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id |
| where o1.order_id < o2.order_id; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o1.customer_id = o2.customer_id) |
| Join Filter: (o1.order_id < o2.order_id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: o1.customer_id |
| -> Seq Scan on aqj_orders o1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: o2.customer_id |
| -> Seq Scan on aqj_orders o2 |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 |
| from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id |
| where o1.order_id < o2.order_id |
| order by o1.order_id, o2.order_id limit 5; |
| id1 | id2 | amt1 | amt2 |
| -----+-----+-------+--------- |
| 1 | 51 | 10.50 | 535.50 |
| 1 | 101 | 10.50 | 1060.50 |
| 1 | 151 | 10.50 | 1585.50 |
| 2 | 52 | 21.00 | 546.00 |
| 2 | 102 | 21.00 | 1071.00 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 |
| from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id |
| where o1.order_id < o2.order_id; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_selfjoin |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as amt2 |
| from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id |
| where o1.order_id < o2.order_id |
| order by o1.order_id, o2.order_id limit 5; |
| id1 | id2 | amt1 | amt2 |
| -----+-----+-------+--------- |
| 1 | 51 | 10.50 | 535.50 |
| 1 | 101 | 10.50 | 1060.50 |
| 1 | 151 | 10.50 | 1585.50 |
| 2 | 52 | 21.00 | 546.00 |
| 2 | 102 | 21.00 | 1071.00 |
| (5 rows) |
| |
| -- 21. GROUP BY with multi-column key on join |
| create materialized view mv_aqj_grp_multi as |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region, status' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_grp_multi; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status; |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: c.region, o.status |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c.region, o.status |
| -> Streaming Partial HashAggregate |
| Group Key: c.region, o.status |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status |
| order by c.region, o.status limit 6; |
| region | status | cnt | total |
| --------+-----------+-----+---------- |
| east | cancelled | 16 | 16968.00 |
| east | delivered | 16 | 16464.00 |
| east | pending | 16 | 16968.00 |
| east | shipped | 16 | 16464.00 |
| north | cancelled | 18 | 19425.00 |
| north | delivered | 16 | 16800.00 |
| (6 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_grp_multi |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status |
| order by c.region, o.status limit 6; |
| region | status | cnt | total |
| --------+-----------+-----+---------- |
| east | cancelled | 16 | 16968.00 |
| east | delivered | 16 | 16464.00 |
| east | pending | 16 | 16968.00 |
| east | shipped | 16 | 16464.00 |
| north | cancelled | 18 | 19425.00 |
| north | delivered | 16 | 16800.00 |
| (6 rows) |
| |
| -- 22. Four-table join with WHERE and aggregate |
| create materialized view mv_aqj_3way_agg as |
| select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| where o.status = 'delivered' |
| group by c.region, p.category; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'region, category' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_3way_agg; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| where o.status = 'delivered' |
| group by c.region, p.category; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: c.region, p.category |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c.region, p.category |
| -> Streaming Partial HashAggregate |
| Group Key: c.region, p.category |
| -> Hash Join |
| Hash Cond: (oi.product_id = p.product_id) |
| -> Hash Join |
| Hash Cond: (oi.order_id = o.order_id) |
| -> Seq Scan on aqj_order_items oi |
| -> Hash |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'delivered'::text) |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| -> Hash |
| -> Broadcast Motion 3:3 (slice5; segments: 3) |
| -> Seq Scan on aqj_products p |
| Optimizer: Postgres query optimizer |
| (26 rows) |
| |
| select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| where o.status = 'delivered' |
| group by c.region, p.category |
| order by c.region, p.category limit 6; |
| region | category | total_qty | line_count |
| --------+-------------+-----------+------------ |
| east | books | 30 | 5 |
| east | clothing | 20 | 10 |
| east | electronics | 50 | 5 |
| east | food | 80 | 10 |
| east | toys | 40 | 10 |
| north | books | 60 | 10 |
| (6 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| where o.status = 'delivered' |
| group by c.region, p.category; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_3way_agg |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as line_count |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| where o.status = 'delivered' |
| group by c.region, p.category |
| order by c.region, p.category limit 6; |
| region | category | total_qty | line_count |
| --------+-------------+-----------+------------ |
| east | books | 30 | 5 |
| east | clothing | 20 | 10 |
| east | electronics | 50 | 5 |
| east | food | 80 | 10 |
| east | toys | 40 | 10 |
| north | books | 60 | 10 |
| (6 rows) |
| |
| -- 23. Implicit four-table join (comma style) |
| create materialized view mv_aqj_implicit3 as |
| select o.order_id, c.name, p.name as product_name |
| from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p |
| where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id |
| and o.status = 'pending'; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_implicit3; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select o.order_id, c.name, p.name as product_name |
| from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p |
| where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id |
| and o.status = 'pending'; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (oi.product_id = p.product_id) |
| -> Hash Join |
| Hash Cond: (oi.order_id = o.order_id) |
| -> Seq Scan on aqj_order_items oi |
| -> Hash |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'pending'::text) |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| -> Hash |
| -> Broadcast Motion 3:3 (slice4; segments: 3) |
| -> Seq Scan on aqj_products p |
| Optimizer: Postgres query optimizer |
| (20 rows) |
| |
| select o.order_id, c.name, p.name as product_name |
| from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p |
| where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id |
| and o.status = 'pending' |
| order by o.order_id, p.name limit 5; |
| order_id | name | product_name |
| ----------+--------+-------------- |
| 1 | cust_2 | prod_11 |
| 1 | cust_2 | prod_21 |
| 5 | cust_6 | prod_15 |
| 5 | cust_6 | prod_25 |
| 5 | cust_6 | prod_5 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select o.order_id, c.name, p.name as product_name |
| from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p |
| where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id |
| and o.status = 'pending'; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_implicit3 |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| select o.order_id, c.name, p.name as product_name |
| from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p |
| where o.customer_id = c.customer_id and o.order_id = oi.order_id and oi.product_id = p.product_id |
| and o.status = 'pending' |
| order by o.order_id, p.name limit 5; |
| order_id | name | product_name |
| ----------+--------+-------------- |
| 1 | cust_2 | prod_11 |
| 1 | cust_2 | prod_21 |
| 5 | cust_6 | prod_15 |
| 5 | cust_6 | prod_25 |
| 5 | cust_6 | prod_5 |
| (5 rows) |
| |
| -- 24. Result correctness across DML + REFRESH cycle |
| insert into aqj_orders values(201, 1, 9999.99, 'shipped', '2025-12-31'); |
| set enable_answer_query_using_materialized_views = on; |
| -- Stale: should NOT use MV |
| explain(costs off) select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: o.customer_id |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'shipped'::text) |
| -> Hash |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| -- Refresh and verify MV is used again |
| refresh materialized view mv_aqj_orders_cust; |
| analyze mv_aqj_orders_cust; |
| explain(costs off) select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped'; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on mv_aqj_orders_cust |
| Optimizer: Postgres query optimizer |
| (3 rows) |
| |
| -- The new row should appear in results via MV scan |
| select o.order_id, o.amount, c.name, c.region |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' and o.order_id = 201; |
| order_id | amount | name | region |
| ----------+---------+--------+-------- |
| 201 | 9999.99 | cust_1 | west |
| (1 row) |
| |
| -- 25. Post-DML comprehensive: refresh all, then verify GUC off vs on results |
| refresh materialized view mv_aqj_order_details; |
| refresh materialized view mv_aqj_expr; |
| refresh materialized view mv_aqj_selfjoin; |
| refresh materialized view mv_aqj_grp_multi; |
| refresh materialized view mv_aqj_3way_agg; |
| refresh materialized view mv_aqj_implicit3; |
| analyze mv_aqj_order_details; |
| analyze mv_aqj_expr; |
| analyze mv_aqj_selfjoin; |
| analyze mv_aqj_grp_multi; |
| analyze mv_aqj_3way_agg; |
| analyze mv_aqj_implicit3; |
| -- Verify four-table join results after DML+refresh |
| set enable_answer_query_using_materialized_views = off; |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| order by o.order_id, p.name limit 5; |
| order_id | customer_name | product_name | quantity | price |
| ----------+---------------+--------------+----------+-------- |
| 1 | cust_2 | prod_11 | 1 | 65.89 |
| 1 | cust_2 | prod_21 | 1 | 125.79 |
| 2 | cust_3 | prod_12 | 2 | 71.88 |
| 2 | cust_3 | prod_2 | 2 | 11.98 |
| 2 | cust_3 | prod_22 | 2 | 131.78 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| select o.order_id, c.name as customer_name, p.name as product_name, oi.quantity, p.price |
| from aqj_orders o |
| join aqj_customers c on o.customer_id = c.customer_id |
| join aqj_order_items oi on o.order_id = oi.order_id |
| join aqj_products p on oi.product_id = p.product_id |
| order by o.order_id, p.name limit 5; |
| order_id | customer_name | product_name | quantity | price |
| ----------+---------------+--------------+----------+-------- |
| 1 | cust_2 | prod_11 | 1 | 65.89 |
| 1 | cust_2 | prod_21 | 1 | 125.79 |
| 2 | cust_3 | prod_12 | 2 | 71.88 |
| 2 | cust_3 | prod_2 | 2 | 11.98 |
| 2 | cust_3 | prod_22 | 2 | 131.78 |
| (5 rows) |
| |
| -- Verify expression MV results after DML+refresh |
| set enable_answer_query_using_materialized_views = off; |
| select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| order by o.order_id limit 5; |
| order_id | amount_with_tax | name | region_upper |
| ----------+-----------------+--------+-------------- |
| 1 | 11.550 | cust_2 | NORTH |
| 2 | 23.100 | cust_3 | EAST |
| 3 | 34.650 | cust_4 | WEST |
| 4 | 46.200 | cust_5 | NORTH |
| 5 | 57.750 | cust_6 | EAST |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) as region_upper |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| order by o.order_id limit 5; |
| order_id | amount_with_tax | name | region_upper |
| ----------+-----------------+--------+-------------- |
| 1 | 11.550 | cust_2 | NORTH |
| 2 | 23.100 | cust_3 | EAST |
| 3 | 34.650 | cust_4 | WEST |
| 4 | 46.200 | cust_5 | NORTH |
| 5 | 57.750 | cust_6 | EAST |
| (5 rows) |
| |
| -- Verify multi-key GROUP BY results after DML+refresh |
| set enable_answer_query_using_materialized_views = off; |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status |
| order by c.region, o.status limit 6; |
| region | status | cnt | total |
| --------+-----------+-----+---------- |
| east | cancelled | 16 | 16968.00 |
| east | delivered | 16 | 16464.00 |
| east | pending | 16 | 16968.00 |
| east | shipped | 16 | 16464.00 |
| north | cancelled | 18 | 19425.00 |
| north | delivered | 16 | 16800.00 |
| (6 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by c.region, o.status |
| order by c.region, o.status limit 6; |
| region | status | cnt | total |
| --------+-----------+-----+---------- |
| east | cancelled | 16 | 16968.00 |
| east | delivered | 16 | 16464.00 |
| east | pending | 16 | 16968.00 |
| east | shipped | 16 | 16464.00 |
| north | cancelled | 18 | 19425.00 |
| north | delivered | 16 | 16800.00 |
| (6 rows) |
| |
| -- 26. Non-match: LIMIT vs FETCH FIRST WITH TIES (limitOption differs) |
| create materialized view mv_aqj_limit_test as |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' |
| order by o.order_id limit 5; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_limit_test; |
| set enable_answer_query_using_materialized_views = on; |
| -- Same tables/WHERE/ORDER BY but FETCH FIRST WITH TIES: should NOT match |
| explain(costs off) |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' |
| order by o.order_id fetch first 5 rows with ties; |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Limit |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: o.order_id |
| -> Limit |
| -> Sort |
| Sort Key: o.order_id |
| -> Hash Join |
| Hash Cond: (c.customer_id = o.customer_id) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on aqj_customers c |
| -> Hash |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'shipped'::text) |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| -- Identical LIMIT query: should match |
| explain(costs off) |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'shipped' |
| order by o.order_id limit 5; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Limit |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: order_id |
| -> Limit |
| -> Sort |
| Sort Key: order_id |
| -> Seq Scan on mv_aqj_limit_test |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| -- 27. Match: FETCH FIRST WITH TIES exact match |
| create materialized view mv_aqj_with_ties as |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' |
| order by o.order_id fetch first 5 rows with ties; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'order_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| analyze mv_aqj_with_ties; |
| set enable_answer_query_using_materialized_views = off; |
| explain(costs off) |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' |
| order by o.order_id fetch first 5 rows with ties; |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Limit |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: o.order_id |
| -> Limit |
| -> Sort |
| Sort Key: o.order_id |
| -> Hash Join |
| Hash Cond: (c.customer_id = o.customer_id) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on aqj_customers c |
| -> Hash |
| -> Seq Scan on aqj_orders o |
| Filter: (status = 'pending'::text) |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' |
| order by o.order_id fetch first 5 rows with ties; |
| order_id | amount |
| ----------+-------- |
| 1 | 10.50 |
| 5 | 52.50 |
| 9 | 94.50 |
| 13 | 136.50 |
| 17 | 178.50 |
| (5 rows) |
| |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' |
| order by o.order_id fetch first 5 rows with ties; |
| QUERY PLAN |
| ------------------------------------------------------ |
| Limit |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: order_id |
| -> Limit |
| -> Sort |
| Sort Key: order_id |
| -> Seq Scan on mv_aqj_with_ties |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| select o.order_id, o.amount |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| where o.status = 'pending' |
| order by o.order_id fetch first 5 rows with ties; |
| order_id | amount |
| ----------+-------- |
| 1 | 10.50 |
| 5 | 52.50 |
| 9 | 94.50 |
| 13 | 136.50 |
| 17 | 178.50 |
| (5 rows) |
| |
| -- 28. Non-match: GROUP BY vs GROUP BY DISTINCT (groupDistinct differs) |
| -- MV mv_aqj_grp_multi uses GROUP BY (groupDistinct=false, registered in catalog) |
| -- Query uses GROUP BY DISTINCT — should NOT match |
| set enable_answer_query_using_materialized_views = on; |
| explain(costs off) |
| select c.region, o.status, count(*) as cnt, sum(o.amount) as total |
| from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id |
| group by distinct c.region, o.status; |
| QUERY PLAN |
| --------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: c.region, o.status |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c.region, o.status |
| -> Streaming Partial HashAggregate |
| Group Key: c.region, o.status |
| -> Hash Join |
| Hash Cond: (o.customer_id = c.customer_id) |
| -> Seq Scan on aqj_orders o |
| -> Hash |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| -> Seq Scan on aqj_customers c |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| -- Clean up AQUMV join test objects |
| drop materialized view mv_aqj_with_ties; |
| drop materialized view mv_aqj_limit_test; |
| drop materialized view mv_aqj_implicit3; |
| drop materialized view mv_aqj_3way_agg; |
| drop materialized view mv_aqj_grp_multi; |
| drop materialized view mv_aqj_selfjoin; |
| drop materialized view mv_aqj_compound_where; |
| drop materialized view mv_aqj_expr; |
| drop materialized view mv_aqj_cust_summary; |
| drop materialized view mv_aqj_order_details; |
| drop materialized view mv_aqj_orders_cust; |
| drop materialized view mv_aqj_implicit; |
| drop materialized view mv_aqj_join3; |
| drop materialized view mv_aqj_agg; |
| drop materialized view mv_aqj_where; |
| drop materialized view mv_aqj_join2; |
| drop table aqj_order_items; |
| drop table aqj_products; |
| drop table aqj_customers; |
| drop table aqj_orders; |
| drop table aqj_t3; |
| drop table aqj_t2; |
| drop table aqj_t1; |
| -- test drop table |
| select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); |
| mvname | datastatus |
| --------+------------ |
| mv3 | e |
| mv0 | e |
| mv1 | e |
| mv2 | i |
| (4 rows) |
| |
| drop materialized view mv2; |
| drop table t1 cascade; |
| NOTICE: drop cascades to 2 other objects |
| DETAIL: drop cascades to materialized view mv0 |
| drop cascades to materialized view mv1 |
| select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 'mv2', 'mv3'); |
| mvname | datastatus |
| --------+------------ |
| mv3 | e |
| (1 row) |
| |
| -- |
| -- test issue https://github.com/apache/cloudberry/issues/582 |
| -- test rules |
| begin; |
| create table t1_issue_582(i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t2_issue_582(i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t3_issue_582(i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_t2_issue_582 as select j from t2_issue_582 where i = 1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'j' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create rule r1 as on insert TO t1_issue_582 do also insert into t2_issue_582 values(1,1); |
| select count(*) from t1_issue_582; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| select count(*) from t2_issue_582; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_t2_issue_582'; |
| mvname | datastatus |
| -----------------+------------ |
| mv_t2_issue_582 | u |
| (1 row) |
| |
| insert into t1_issue_582 values(1,1); |
| select count(*) from t1_issue_582; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| select count(*) from t2_issue_582; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_t2_issue_582'; |
| mvname | datastatus |
| -----------------+------------ |
| mv_t2_issue_582 | i |
| (1 row) |
| |
| abort; |
| -- |
| -- test issue https://github.com/apache/cloudberry/issues/582 |
| -- test writable CTE |
| -- |
| begin; |
| create table t_cte_issue_582(i int, j int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_t_cte_issue_582 as select j from t_cte_issue_582 where i = 1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'j' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_t_cte_issue_582'; |
| mvname | datastatus |
| --------------------+------------ |
| mv_t_cte_issue_582 | u |
| (1 row) |
| |
| with mod1 as (insert into t_cte_issue_582 values(1, 1) returning *) select * from mod1; |
| i | j |
| ---+--- |
| 1 | 1 |
| (1 row) |
| |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_t_cte_issue_582'; |
| mvname | datastatus |
| --------------------+------------ |
| mv_t_cte_issue_582 | e |
| (1 row) |
| |
| abort; |
| -- test partitioned tables |
| create table par(a int, b int, c int) partition by range(b) |
| subpartition by range(c) subpartition template (start (1) end (3) every (1)) |
| (start(1) end(3) every(1)); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); |
| create materialized view mv_par as select * from par; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_par1 as select * from par_1_prt_1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_par1_1 as select * from par_1_prt_1_2_prt_1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_par1_2 as select * from par_1_prt_1_2_prt_2; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_par2 as select * from par_1_prt_2; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create materialized view mv_par2_1 as select * from par_1_prt_2_2_prt_1; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| insert into par_1_prt_1 values (1, 1, 1); |
| -- mv_par1* shoud be updated |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_1 | i |
| mv_par1 | i |
| mv_par | i |
| (6 rows) |
| |
| insert into par values (1, 2, 2); |
| -- mv_par* should be updated |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par2_1 | u |
| mv_par1_1 | i |
| mv_par1 | i |
| mv_par | i |
| mv_par2 | i |
| (6 rows) |
| |
| refresh materialized view mv_par; |
| refresh materialized view mv_par1; |
| refresh materialized view mv_par1_1; |
| refresh materialized view mv_par1_2; |
| refresh materialized view mv_par2; |
| refresh materialized view mv_par2_1; |
| begin; |
| insert into par_1_prt_2_2_prt_1 values (1, 2, 1); |
| -- mv_par1* should not be updated |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par2_1 | i |
| mv_par2 | i |
| mv_par | i |
| (6 rows) |
| |
| abort; |
| begin; |
| truncate par_1_prt_2; |
| -- mv_par1* should not be updated |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par2_1 | e |
| mv_par2 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| truncate par_1_prt_2; |
| -- mv_par1* should not be updated |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par2_1 | e |
| mv_par2 | e |
| mv_par | e |
| (6 rows) |
| |
| refresh materialized view mv_par; |
| refresh materialized view mv_par1; |
| refresh materialized view mv_par1_1; |
| refresh materialized view mv_par1_2; |
| refresh materialized view mv_par2; |
| refresh materialized view mv_par2_1; |
| vacuum full par_1_prt_1_2_prt_1; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_1 | r |
| mv_par1 | r |
| mv_par | r |
| (6 rows) |
| |
| refresh materialized view mv_par; |
| refresh materialized view mv_par1; |
| refresh materialized view mv_par1_1; |
| refresh materialized view mv_par1_2; |
| refresh materialized view mv_par2; |
| refresh materialized view mv_par2_1; |
| vacuum full par; |
| -- all should be updated. |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par2 | r |
| mv_par | r |
| mv_par2_1 | r |
| mv_par1_2 | r |
| mv_par1 | r |
| mv_par1_1 | r |
| (6 rows) |
| |
| refresh materialized view mv_par; |
| refresh materialized view mv_par1; |
| refresh materialized view mv_par1_1; |
| refresh materialized view mv_par1_2; |
| refresh materialized view mv_par2; |
| refresh materialized view mv_par2_1; |
| begin; |
| create table par_1_prt_1_2_prt_3 partition of par_1_prt_1 for values from (3) to (4); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| -- update status when partition of |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| begin; |
| drop table par_1_prt_1 cascade; |
| NOTICE: drop cascades to 3 other objects |
| DETAIL: drop cascades to materialized view mv_par1_1 |
| drop cascades to materialized view mv_par1_2 |
| drop cascades to materialized view mv_par1 |
| -- update status when drop table |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par | e |
| (3 rows) |
| |
| abort; |
| begin; |
| alter table par_1_prt_1 detach partition par_1_prt_1_2_prt_1; |
| -- update status when detach |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| begin; |
| create table new_par(a int, b int, c int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- update status when attach |
| alter table par_1_prt_1 attach partition new_par for values from (4) to (5); |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| -- |
| -- Maintain materialized views on partitioned tables from bottom to up. |
| -- |
| insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); |
| refresh materialized view mv_par; |
| refresh materialized view mv_par1; |
| refresh materialized view mv_par1_1; |
| refresh materialized view mv_par1_2; |
| refresh materialized view mv_par2; |
| refresh materialized view mv_par2_1; |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| insert into par values(1, 1, 1), (1, 1, 2); |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_2 | i |
| mv_par1 | i |
| mv_par | i |
| mv_par1_1 | i |
| (6 rows) |
| |
| abort; |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| insert into par_1_prt_2_2_prt_1 values(2, 2, 1); |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2_1 | i |
| mv_par2 | i |
| mv_par | i |
| (6 rows) |
| |
| abort; |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| delete from par where b = 2 and c = 1; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2_1 | e |
| mv_par2 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| delete from par_1_prt_1_2_prt_2; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par1_1 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_2 | e |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| -- Across partition update. |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| update par set c = 2 where b = 1 and c = 1; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_2 | i |
| mv_par1_1 | e |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| -- Split Update with acrosss partition update. |
| begin; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par | u |
| mv_par1 | u |
| mv_par1_1 | u |
| mv_par1_2 | u |
| mv_par2 | u |
| mv_par2_1 | u |
| (6 rows) |
| |
| update par set c = 2, a = 2 where b = 1 and c = 1; |
| select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%'; |
| mvname | datastatus |
| -----------+------------ |
| mv_par2 | u |
| mv_par2_1 | u |
| mv_par1_2 | i |
| mv_par1_1 | e |
| mv_par1 | e |
| mv_par | e |
| (6 rows) |
| |
| abort; |
| -- Test report warning if extend protocol data is not consumed. |
| --start_ignore |
| drop extension gp_inject_fault; |
| ERROR: extension "gp_inject_fault" does not exist |
| create extension gp_inject_fault; |
| --end_ignore |
| select gp_inject_fault_infinite('consume_extend_protocol_data', 'skip', dbid) |
| from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| |
| begin; |
| update par set c = 2 where b = 1 and c = 1; |
| WARNING: fail to find leafs of partitioned table: par |
| end; |
| WARNING: Extend Protocol Data unconsumed, subtag: 2 |
| WARNING: Extend Protocol Data unconsumed, subtag: 0 |
| begin; |
| insert into par values(1, 1, 1), (1, 1, 2); |
| WARNING: fail to find leafs of partitioned table: par |
| end; |
| WARNING: Extend Protocol Data unconsumed, subtag: 0 |
| begin; |
| update par set c = 2, a = 2 where b = 1 and c = 1; |
| WARNING: fail to find leafs of partitioned table: par |
| end; |
| WARNING: Extend Protocol Data unconsumed, subtag: 2 |
| WARNING: Extend Protocol Data unconsumed, subtag: 0 |
| begin; |
| insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); |
| WARNING: fail to find leafs of partitioned table: par |
| delete from par_1_prt_1_2_prt_2; |
| end; |
| WARNING: Extend Protocol Data unconsumed, subtag: 2 |
| WARNING: Extend Protocol Data unconsumed, subtag: 0 |
| begin; |
| insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); |
| WARNING: fail to find leafs of partitioned table: par |
| update par set c = 2, a = 2 where b = 1 and c = 1; |
| WARNING: fail to find leafs of partitioned table: par |
| end; |
| WARNING: Extend Protocol Data unconsumed, subtag: 2 |
| WARNING: Extend Protocol Data unconsumed, subtag: 0 |
| select gp_inject_fault('consume_extend_protocol_data', 'reset', dbid) |
| from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- |
| -- End of Maintain materialized views on partitioned tables from bottom to up. |
| -- |
| -- Test Rename matview. |
| begin; |
| create materialized view mv_name1 as |
| select * from par with no data; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select count(*) from gp_matview_aux where mvname = 'mv_name1'; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| alter materialized view mv_name1 rename to mv_name2; |
| select count(*) from gp_matview_aux where mvname = 'mv_name1'; |
| count |
| ------- |
| 0 |
| (1 row) |
| |
| select count(*) from gp_matview_aux where mvname = 'mv_name2'; |
| count |
| ------- |
| 1 |
| (1 row) |
| |
| abort; |
| -- start_ignore |
| CREATE EXTENSION IF NOT EXISTS gp_inject_fault; |
| NOTICE: extension "gp_inject_fault" already exists, skipping |
| -- end_ignore |
| create table par_normal_oid(a int, b int) partition by range(a) using ao_row distributed randomly; |
| select gp_inject_fault('bump_oid', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| create table sub_par1_large_oid partition of par_normal_oid for values from (1) to (2) using ao_row; |
| NOTICE: table has parent, setting distribution columns to match parent table |
| select 'sub_par1_large_oid'::regclass::oid > x'7FFFFFFF'::bigint; |
| ?column? |
| ---------- |
| t |
| (1 row) |
| |
| select gp_inject_fault('bump_oid', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| create materialized view mv_par_normal_oid as |
| select count(*) from par_normal_oid; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_par_normal_oid'; |
| mvname | datastatus |
| -------------------+------------ |
| mv_par_normal_oid | u |
| (1 row) |
| |
| insert into par_normal_oid values(1, 2); |
| select mvname, datastatus from gp_matview_aux where mvname = 'mv_par_normal_oid'; |
| mvname | datastatus |
| -------------------+------------ |
| mv_par_normal_oid | i |
| (1 row) |
| |
| --start_ignore |
| drop schema matview_data_schema cascade; |
| NOTICE: drop cascades to 13 other objects |
| DETAIL: drop cascades to table t2 |
| drop cascades to table t3 |
| drop cascades to materialized view mv3 |
| drop cascades to table par |
| drop cascades to materialized view mv_par |
| drop cascades to materialized view mv_par1 |
| drop cascades to materialized view mv_par1_1 |
| drop cascades to materialized view mv_par1_2 |
| drop cascades to materialized view mv_par2 |
| drop cascades to materialized view mv_par2_1 |
| drop cascades to extension gp_inject_fault |
| drop cascades to table par_normal_oid |
| drop cascades to materialized view mv_par_normal_oid |
| --end_ignore |
| reset enable_answer_query_using_materialized_views; |
| reset optimizer; |