blob: 0ed98a3d4b4b1cf69d6c4ec2897e1eb5af1cb5d0 [file] [log] [blame]
CREATE TABLE tenk_heap (
unique1 int4,
unique2 int4,
two int4,
four int4,
ten int4,
twenty int4,
hundred int4,
thousand int4,
twothousand int4,
fivethous int4,
tenthous int4,
odd int4,
even int4,
stringu1 name,
stringu2 name,
string4 name
) with (appendonly=false) distributed by(unique1);
--
-- create few AO tables. test various reloptions combinations. use a sample
-- of them (the first 4) for later testing.
--
-- valid
CREATE TABLE tenk_ao1 (like tenk_heap) with (appendonly=true, checksum=true) distributed by(unique1);
-- We used to check xmin here and other places in this test, but now the new gp_fastsequence
-- rows are frozen via hintbits (HEAP_XMIN_FROZEN) so not checking that anymore.
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='tenk_ao1'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 0 | 1
0 | 0 | 2
0 | 0 | 0
(3 rows)
CREATE TABLE tenk_ao2 (like tenk_heap) with (appendonly=true, compresslevel=0, blocksize=262144) distributed by(unique1);
CREATE TABLE tenk_ao3 (like tenk_heap) with (appendonly=true, compresslevel=6, blocksize=1048576, checksum=true) distributed by(unique1);
CREATE TABLE tenk_ao4 (like tenk_heap) with (appendonly=true, compresslevel=1, compresstype=zlib) distributed by(unique1);
CREATE TABLE tenk_ao5 (like tenk_heap) with (appendonly=true, compresslevel=6, compresstype=zlib, blocksize=1048576, checksum=true) distributed by(unique1);
-- invalid
CREATE TABLE tenk_ao6 (like tenk_heap) with (appendonly=false, compresslevel=6, checksum=true) distributed by(unique1);
ERROR: unrecognized parameter "compresslevel"
CREATE TABLE tenk_ao7 (like tenk_heap) with (appendonly=true, compresslevel=16, compresstype=zlib) distributed by(unique1);
ERROR: compresslevel=16 is out of range for zlib (should be in the range 1 to 9)
CREATE TABLE tenk_ao8 (like tenk_heap) with (appendonly=true, blocksize=100) distributed by(unique1);
ERROR: value 100 out of bounds for option "blocksize"
DETAIL: Valid values are between "8192" and "2097152".
CREATE TABLE tenk_ao9 (like tenk_heap) with (appendonly=true, compresslevel=0, compresstype=zlib) distributed by(unique1);
ERROR: compresstype "zlib" can't be used with compresslevel 0
-- these should not work without appendonly=true
CREATE TABLE tenk_ao10 (like tenk_heap) with (compresslevel=5);
ERROR: unrecognized parameter "compresslevel"
CREATE TABLE tenk_ao11 (like tenk_heap) with (blocksize=8192);
ERROR: unrecognized parameter "blocksize"
CREATE TABLE tenk_ao12 (like tenk_heap) with (appendonly=false,blocksize=8192);
ERROR: unrecognized parameter "blocksize"
-- appendoptimized is an alias for appendonly
CREATE TABLE tenk_ao13 (like tenk_heap) with (appendoptimized=true);
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendonly=true, appendoptimized=false);
ERROR: parameter "appendonly" specified more than once
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=maybe);
ERROR: appendoptimized requires a Boolean value
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=true, appendoptimized=true);
ERROR: parameter "appendonly" specified more than once
CREATE TABLE tenk_ao14 (like tenk_heap) with (orientation=row);
ERROR: invalid option "orientation" for base relation
HINT: Table orientation only valid for Append Optimized relations, create an AO relation to use table orientation.
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=false, orientation=row);
ERROR: invalid option "orientation" for base relation
HINT: Table orientation only valid for Append Optimized relations, create an AO relation to use table orientation.
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=true, orientation=foo);
ERROR: invalid parameter value for "orientation": "foo"
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=true, orientation);
ERROR: orientation requires a parameter
CREATE TABLE tenk_ao14 (like tenk_heap) with (appendoptimized=true, orientation=row, orientation=row);
ERROR: parameter "orientation" specified more than once
CREATE TABLE tenk_ao15 (like tenk_heap) with (appendoptimized=true)
PARTITION BY RANGE(unique2)
(
START (0) end (1) WITH (appendoptimized=true),
START (1) end (2) WITH (appendoptimized=true),
START (2) end (3) WITH (appendoptimized=true)
);
CREATE TABLE tenk_ao16 (like tenk_heap) with (appendoptimized=true)
PARTITION BY RANGE(unique2)
(
START (0) end (1) WITH (appendonly=true, appendoptimized=false),
START (1) end (2) WITH (appendoptimized=true),
START (2) end (3) WITH (appendoptimized=true)
);
ERROR: parameter "appendonly" specified more than once
-- also make sure appendoptimized works in the gp_default_storage_options GUC
SET gp_default_storage_options = 'blocksize=32768,compresstype=none,checksum=true,orientation=row,appendoptimized=false';
ERROR: invalid storage option "orientation"
HINT: For table access methods use "default_table_access_method" instead.
SET gp_default_storage_options = 'blocksize=32768,compresstype=none,checksum=true,orientation=row';
ERROR: invalid storage option "orientation"
HINT: For table access methods use "default_table_access_method" instead.
SET gp_default_storage_options = 'blocksize=32768,compresstype=none,checksum=true,blocksize=8';
ERROR: parameter "blocksize" specified more than once
SET gp_default_storage_options = 'blocksize=65536,compresstype=none,checksum=true';
SHOW gp_default_storage_options;
gp_default_storage_options
-------------------------------------------------
blocksize=65536,compresstype=none,checksum=true
(1 row)
SET default_table_access_method = ao_row;
SHOW default_table_access_method;
default_table_access_method
-----------------------------
ao_row
(1 row)
CREATE TABLE tenk_ao14 (like tenk_heap);
SELECT amhandler, reloptions FROM pg_class c, PG_AM a WHERE c.relname = 'tenk_ao14' AND c.relam = a.oid;
amhandler | reloptions
------------------------+-------------------
ao_row_tableam_handler | {blocksize=65536}
(1 row)
RESET gp_default_storage_options;
RESET default_table_access_method;
--------------------
-- catalog checks
--------------------
-- check pg_appendonly
SELECT c.relname, a.blocksize, a.compresstype, a.compresslevel, a.checksum FROM pg_class c, pg_appendonly a
WHERE c.relname LIKE 'tenk_ao%' AND c.oid=a.relid AND c.relname not like 'tenk_aocs%' ORDER BY c.relname;
relname | blocksize | compresstype | compresslevel | checksum
-------------------+-----------+--------------+---------------+----------
tenk_ao1 | 32768 | | 0 | t
tenk_ao13 | 32768 | | 0 | t
tenk_ao14 | 65536 | | 0 | t
tenk_ao15_1_prt_1 | 32768 | | 0 | t
tenk_ao15_1_prt_2 | 32768 | | 0 | t
tenk_ao15_1_prt_3 | 32768 | | 0 | t
tenk_ao2 | 262144 | | 0 | t
tenk_ao3 | 1048576 | zlib | 6 | t
tenk_ao4 | 32768 | zlib | 1 | t
tenk_ao5 | 1048576 | zlib | 6 | t
(10 rows)
--------------------
-- fn needed later
--------------------
create or replace function aototal(relname text) returns float8 as $$
declare
aosegname text;
tupcount float8 := 0;
rc int := 0;
begin
execute 'select relname from pg_class where oid=(select segrelid from pg_class, pg_appendonly where relname=''' || relname || ''' and relid = pg_class.oid)' into aosegname;
if aosegname is not null then
execute 'select sum(tupcount) from gp_dist_random(''pg_aoseg.' || aosegname || ''')' into tupcount;
end if;
return tupcount;
end; $$ language plpgsql volatile READS SQL DATA;
-- ------------------
-- functions for snapshotting the current mod counts, and checking the diff
-- vs last snapshot
-- ------------------
create temp table modcounts_snapshot (segment_id int, aosegno int, modcount bigint) distributed randomly;
create or replace function snapshot_modcounts(relname regclass) returns void as $$
begin
delete from modcounts_snapshot;
insert into modcounts_snapshot select segment_id, segno, modcount from gp_toolkit.__gp_aoseg(relname);
end;
$$ language plpgsql volatile;
create or replace function diff_modcounts(relname regclass, segment_id out int, aosegno out int, modcount_diff out bigint) returns setof record as $$
select a.segment_id, a.segno, a.modcount - coalesce(b.modcount, 0) mod_diff from gp_toolkit.__gp_aoseg($1) a left outer join modcounts_snapshot b on a.segment_id = b.segment_id and a.segno = b.aosegno;
$$ language sql;
--------------------
-- supported sql
--------------------
-- COPY
COPY tenk_heap FROM '@abs_srcdir@/data/tenk.data';
COPY tenk_ao1 FROM '@abs_srcdir@/data/tenk.data';
COPY tenk_ao2 FROM '@abs_srcdir@/data/tenk.data';
COPY tenk_ao3 FROM '@abs_srcdir@/data/tenk.data';
COPY tenk_ao4 FROM '@abs_srcdir@/data/tenk.data';
ANALYZE tenk_heap;
ANALYZE tenk_ao1;
ANALYZE tenk_ao2;
ANALYZE tenk_ao3;
ANALYZE tenk_ao4;
-- SELECT
SELECT count(*) FROM tenk_heap;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao2;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao3;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao4;
count
-------
10000
(1 row)
SELECT aototal('tenk_ao1'), aototal('tenk_ao2'), aototal('tenk_ao3'), aototal('tenk_ao4');
aototal | aototal | aototal | aototal
---------+---------+---------+---------
10000 | 10000 | 10000 | 10000
(1 row)
-- INSERT SELECT
INSERT INTO tenk_ao1 SELECT * FROM tenk_heap;
INSERT INTO tenk_ao2 SELECT * FROM tenk_heap;
INSERT INTO tenk_ao3 SELECT * FROM tenk_heap;
INSERT INTO tenk_ao4 SELECT * FROM tenk_heap;
-- mix and match some
INSERT INTO tenk_ao1 SELECT * FROM tenk_ao1;
INSERT INTO tenk_ao2 SELECT * FROM tenk_ao3;
INSERT INTO tenk_ao3 SELECT * FROM tenk_ao2;
INSERT INTO tenk_ao4 SELECT * FROM tenk_ao3;
SELECT aototal('tenk_ao1'), aototal('tenk_ao2'), aototal('tenk_ao3'), aototal('tenk_ao4');
aototal | aototal | aototal | aototal
---------+---------+---------+---------
40000 | 40000 | 60000 | 80000
(1 row)
-- SELECT
SELECT count(*) FROM tenk_heap;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1;
count
-------
40000
(1 row)
SELECT count(*) FROM tenk_ao2;
count
-------
40000
(1 row)
SELECT count(*) FROM tenk_ao3;
count
-------
60000
(1 row)
SELECT count(*) FROM tenk_ao4;
count
-------
80000
(1 row)
--
-- Test that the catalog eof entry doesn't change even if the file gets
-- larger due to bad data that isn't cleaned up until the next VACUUM.
-- make sure the SELECT stops at eof (count is the same).
-- The first row is good (so it grows the file), the second is bad.
--
COPY tenk_ao1 FROM STDIN;
ERROR: invalid input syntax for type integer: "bad data row"
CONTEXT: COPY tenk_ao1, line 2, column unique1: "bad data row"
COPY tenk_ao2 FROM STDIN;
ERROR: invalid input syntax for type integer: "bad data row"
CONTEXT: COPY tenk_ao2, line 2, column unique1: "bad data row"
COPY tenk_ao3 FROM STDIN;
ERROR: invalid input syntax for type integer: "bad data row"
CONTEXT: COPY tenk_ao3, line 2, column unique1: "bad data row"
COPY tenk_ao4 FROM STDIN;
ERROR: invalid input syntax for type integer: "bad data row"
CONTEXT: COPY tenk_ao4, line 2, column unique1: "bad data row"
SELECT count(*) FROM tenk_ao1;
count
-------
40000
(1 row)
SELECT count(*) FROM tenk_ao2;
count
-------
40000
(1 row)
SELECT count(*) FROM tenk_ao3;
count
-------
60000
(1 row)
SELECT count(*) FROM tenk_ao4;
count
-------
80000
(1 row)
SELECT aototal('tenk_ao1'), aototal('tenk_ao2'), aototal('tenk_ao3'), aototal('tenk_ao4');
aototal | aototal | aototal | aototal
---------+---------+---------+---------
40000 | 40000 | 60000 | 80000
(1 row)
--------------------
-- transactionality
--------------------
-- rollback
BEGIN;
INSERT INTO tenk_ao1 SELECT * FROM tenk_heap;
SELECT count(*) FROM tenk_ao1; -- should show new count
count
-------
50000
(1 row)
ROLLBACK;
SELECT count(*) FROM tenk_ao1; -- should show previous count
count
-------
40000
(1 row)
SELECT aototal('tenk_ao1');
aototal
---------
40000
(1 row)
-- gp_fastsequence should reflect bump in lastsequence, even if above
-- transaction aborted as its tuples is in place updated.
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='tenk_ao1'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 0 | 0
0 | 0 | 1
0 | 0 | 2
1 | 16400 | 2
1 | 17000 | 0
1 | 17000 | 1
(6 rows)
-- commit
BEGIN;
INSERT INTO tenk_ao1 SELECT * FROM tenk_heap;
SELECT count(*) FROM tenk_ao1; -- should show new count
count
-------
50000
(1 row)
COMMIT;
SELECT count(*) FROM tenk_ao1; -- should show new count
count
-------
50000
(1 row)
SELECT aototal('tenk_ao1');
aototal
---------
50000
(1 row)
-- same txn inserts
BEGIN;
INSERT INTO tenk_ao1(unique1) VALUES(12345678);
INSERT INTO tenk_ao1(unique1) VALUES(12345678);
INSERT INTO tenk_ao1(unique1) VALUES(12345678);
INSERT INTO tenk_ao1(unique1) VALUES(12345678);
INSERT INTO tenk_ao1(unique1) VALUES(12345678);
ROLLBACK;
BEGIN;
INSERT INTO tenk_ao1(unique1) VALUES(87654321);
INSERT INTO tenk_ao1(unique1) VALUES(87654321);
INSERT INTO tenk_ao1(unique1) VALUES(87654321);
INSERT INTO tenk_ao1(unique1) VALUES(87654321);
INSERT INTO tenk_ao1(unique1) VALUES(87654321);
COMMIT;
SELECT count(*) FROM tenk_ao1 WHERE unique1 = 12345678; -- should be 0
count
-------
0
(1 row)
SELECT count(*) FROM tenk_ao1 WHERE unique1 = 87654321; -- should be 5
count
-------
5
(1 row)
--------------------
-- cursors (basic)
--------------------
BEGIN;
DECLARE foo1 CURSOR FOR SELECT * FROM tenk_ao1 ORDER BY 1,2,3,4;
DECLARE foo2 CURSOR FOR SELECT * FROM tenk_ao2 ORDER BY 1,2,3,4;
FETCH 1 in foo1;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
FETCH 2 in foo2;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(2 rows)
FETCH 1 in foo1;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
FETCH 2 in foo2;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(2 rows)
CLOSE foo1;
CLOSE foo2;
END;
BEGIN;
DECLARE foo3 NO SCROLL CURSOR FOR SELECT * FROM tenk_ao1 ORDER BY 1,2,3,4;
FETCH 1 FROM foo3;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
FETCH BACKWARD 1 FROM foo3; -- should fail
ERROR: backward scan is not supported in this version of Apache Cloudberry
END;
-- Cursors outside transaction blocks
BEGIN;
DECLARE foo4 CURSOR WITH HOLD FOR SELECT * FROM tenk_ao1 ORDER BY 1,2,3,4;
FETCH FROM foo4;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
FETCH FROM foo4;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
COMMIT;
FETCH FROM foo4;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
0 | 9998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AAAAAA | OUOAAA | OOOOxx
(1 row)
SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY name;
name | statement | is_holdable | is_binary | is_scrollable
------+----------------------------------------------------------------------------+-------------+-----------+---------------
foo4 | DECLARE foo4 CURSOR WITH HOLD FOR SELECT * FROM tenk_ao1 ORDER BY 1,2,3,4; | t | f | f
(1 row)
CLOSE foo4;
-- DROP
DROP TABLE tenk_ao1;
DROP TABLE tenk_ao2;
DROP TABLE tenk_ao3;
DROP TABLE tenk_ao4;
-- CTAS
CREATE TABLE tenk_ao1 with(appendonly=true, checksum=true) AS SELECT * FROM tenk_heap DISTRIBUTED BY (unique1);
-- Incase of CTAS also, gp_fastsequence entries must get created and use segfile 0
-- With and without ORCA last_sequence fluctuates bit and hence using >= 3300 as
-- inserting 10k tuples to 3 node system must atleast have last_sequence >= 3300
-- on each node.
SELECT objmod,
CASE WHEN last_sequence = 0 THEN 'zero'
WHEN last_sequence >= 3300 THEN '>= 3300'
ELSE '1-2900' END AS last_sequence,
gp_segment_id
FROM gp_dist_random('gp_fastsequence')
WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (
SELECT oid FROM pg_class WHERE relname='tenk_ao1'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | >= 3300 | 0
0 | >= 3300 | 1
0 | >= 3300 | 2
(3 rows)
-- Since we check last_sequence for the tenk_ao2 table later, the data distribution should be same for orca and planner.
-- If we don't specify the DISTRIBUTED BY clause, ORCA will mark the table distributed randomly.
CREATE TABLE tenk_ao2 with(appendonly=true, compresslevel=0, blocksize=262144) AS SELECT * FROM tenk_heap DISTRIBUTED BY (unique1);
CREATE TABLE tenk_ao3 with(appendonly=true, compresslevel=6, blocksize=1048576, checksum=true) AS SELECT * FROM tenk_heap;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'unique1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE tenk_ao4 with(appendonly=true, compresslevel=1, compresstype=zlib) AS SELECT * FROM tenk_heap;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'unique1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ANALYZE tenk_ao1;
ANALYZE tenk_ao2;
ANALYZE tenk_ao3;
ANALYZE tenk_ao4;
SELECT c.relname, a.blocksize, a.compresstype, a.compresslevel, a.checksum FROM pg_class c, pg_appendonly a
WHERE c.relname LIKE 'tenk_ao%' AND c.oid=a.relid AND c.relname not like 'tenk_aocs%' ORDER BY c.relname;
relname | blocksize | compresstype | compresslevel | checksum
-------------------+-----------+--------------+---------------+----------
tenk_ao1 | 32768 | | 0 | t
tenk_ao13 | 32768 | | 0 | t
tenk_ao14 | 65536 | | 0 | t
tenk_ao15_1_prt_1 | 32768 | | 0 | t
tenk_ao15_1_prt_2 | 32768 | | 0 | t
tenk_ao15_1_prt_3 | 32768 | | 0 | t
tenk_ao2 | 262144 | | 0 | t
tenk_ao3 | 1048576 | zlib | 6 | t
tenk_ao4 | 32768 | zlib | 1 | t
tenk_ao5 | 1048576 | zlib | 6 | t
(10 rows)
SELECT count(*) FROM tenk_ao1;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao2;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao3;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao4;
count
-------
10000
(1 row)
-- Perform same transaction CREATE, followed by INSERT to validate
-- gp_fastseqeunce is using normal xid and not frozen transaction id.
BEGIN;
CREATE TABLE appendonly_sametxn_create_insert(a int, b int) with (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10);
-- Make sure insert is using segfile 0 for the insert, as part of create table itself.
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='appendonly_sametxn_create_insert'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 100 | 1
0 | 100 | 0
0 | 100 | 2
(3 rows)
INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10);
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='appendonly_sametxn_create_insert'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 200 | 1
0 | 200 | 2
0 | 200 | 0
(3 rows)
ABORT;
-- test get_ao_compression_ratio. use uncompressed table, so result is always 1.
SELECT get_ao_compression_ratio('tenk_ao2');
get_ao_compression_ratio
--------------------------
1
(1 row)
-- VACUUM
VACUUM tenk_ao1;
VACUUM tenk_ao2;
VACUUM tenk_ao3;
VACUUM tenk_ao4;
VACUUM FULL tenk_ao1;
ANALYZE tenk_ao2;
ANALYZE tenk_ao4;
VACUUM ANALYZE tenk_ao3;
SELECT count(*) FROM tenk_ao1;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao2;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao3;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao4;
count
-------
10000
(1 row)
-- JOIN
SELECT count(*) FROM tenk_ao1 t1, tenk_ao2 t2 where t1.unique1 = t2.unique2;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1, tenk_heap t2 where t1.unique1 = t2.unique2;
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1 INNER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2);
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1 LEFT OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2);
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1 RIGHT OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2);
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1 FULL OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2);
count
-------
10000
(1 row)
SELECT count(*) FROM tenk_ao1 t1 INNER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2) where t1.unique1 = 8095;
count
-------
1
(1 row)
SELECT count(*) FROM tenk_ao1 t1 LEFT OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2) where t1.unique1 = 8095;
count
-------
1
(1 row)
SELECT count(*) FROM tenk_ao1 t1 RIGHT OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2) where t1.unique1 = 8095;
count
-------
1
(1 row)
SELECT count(*) FROM tenk_ao1 t1 FULL OUTER JOIN tenk_ao2 t2 ON (t1.unique1 = t2.unique2) where t1.unique1 = 8095;
count
-------
1
(1 row)
CREATE TABLE empty_ao_table_for_join (like tenk_heap) with (appendonly=true) distributed by(unique1);
SELECT count(*) FROM tenk_ao1 t1 INNER JOIN empty_ao_table_for_join t2 ON (t1.unique1 = t2.unique2);
count
-------
0
(1 row)
SELECT count(*) FROM tenk_ao1 t1 LEFT OUTER JOIN empty_ao_table_for_join t2 ON (t1.unique1 = t2.unique2);
count
-------
10000
(1 row)
-- EXCEPT
SELECT unique1 FROM tenk_ao1 EXCEPT SELECT unique1 FROM tenk_ao1;
unique1
---------
(0 rows)
SELECT unique1 FROM tenk_heap EXCEPT SELECT unique1 FROM tenk_ao3;
unique1
---------
(0 rows)
-- Get gp_fastsequence details before truncate
SELECT objmod,
CASE WHEN last_sequence = 0 THEN 'zero'
WHEN last_sequence >= 3300 THEN '>= 3300'
ELSE '1-2900' END AS last_sequence,
gp_segment_id
FROM gp_dist_random('gp_fastsequence')
WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (
SELECT oid FROM pg_class WHERE relname='tenk_ao2'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | >= 3300 | 1
0 | >= 3300 | 2
0 | >= 3300 | 0
(3 rows)
-- TRUNCATE
TRUNCATE tenk_ao2;
-- Truncate changes relfilnode, as a result old pg_aoseg table is truncated and
-- gp_fastsequence entries are also reinitialized.
SELECT objmod,
CASE WHEN last_sequence = 0 THEN 'zero'
WHEN last_sequence >= 3300 THEN '>= 3300'
ELSE '1-2900' END AS last_sequence,
gp_segment_id
FROM gp_dist_random('gp_fastsequence')
WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (
SELECT oid FROM pg_class WHERE relname='tenk_ao2'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | zero | 0
0 | zero | 2
0 | zero | 1
(3 rows)
-- WITH OIDS is no longer supported
CREATE TABLE aowithoids(a int, b int) WITH (appendonly=true,oids=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ERROR: tables declared WITH OIDS are not supported
-- CREATE INDEX
CREATE INDEX tenk_ao1_unique1 ON tenk_ao1 USING btree(unique1 int4_ops);
-- Verify that the visimap and block-directory indexes have unique and
-- primary key constraints on them
select indisunique, indisprimary from pg_index
where indexrelid = (select oid from pg_class where relname = 'pg_aovisimap_' || (select oid from pg_class where relname = 'tenk_ao1') || '_index');
indisunique | indisprimary
-------------+--------------
t | t
(1 row)
select indisunique, indisprimary from pg_index
where indexrelid = (select oid from pg_class where relname = 'pg_aoblkdir_' || (select oid from pg_class where relname = 'tenk_ao1') || '_index');
indisunique | indisprimary
-------------+--------------
t | t
(1 row)
drop table if exists ao;
create table ao (i int, j int, k varchar) with(appendonly=true);
insert into ao values (1,1,'a'), (2,2,'aa'), (3,3,'aaa'), (4,4,'aaaa'),
(5,5,'aaaaa'), (6,6,'aaaaaa'), (7,7,'aaaaaaa'), (8,8,'aaaaaaaa');
analyze ao;
create index ao_j on ao using btree(j);
create index ao_k on ao using btree(k);
create index ao_jk on ao using btree((j + length(k)));
set enable_seqscan=off;
select * from ao where j = 2;
i | j | k
---+---+----
2 | 2 | aa
(1 row)
insert into ao values (9,1,'b'), (10,2,'bb'), (11,3,'bbb'), (12,4,'bbbb'),
(13,5,'aaaaa'), (14,6,'aaaaaa'), (15,7,'aaaaaaa'), (16,8,'aaaaaaaa');
select * from ao where j = 2;
i | j | k
----+---+----
2 | 2 | aa
10 | 2 | bb
(2 rows)
insert into ao values (9,2,'b'), (10,2,'bb'), (11,2,'bbb'), (12,2,'bbbb'),
(13,5,'aaaaa'), (14,6,'aaaaaa'), (15,7,'aaaaaaa'), (16,8,'aaaaaaaa');
select * from ao where j = 2;
i | j | k
----+---+------
11 | 2 | bbb
9 | 2 | b
12 | 2 | bbbb
2 | 2 | aa
10 | 2 | bb
10 | 2 | bb
(6 rows)
create index ao_ij on ao (i, j) with (fillfactor=10);
alter index ao_ij set (fillfactor=20);
reindex index ao_ij;
select indexname from pg_indexes where tablename = 'ao' order by indexname;
indexname
-----------
ao_ij
ao_j
ao_jk
ao_k
(4 rows)
alter table ao alter j type bigint;
alter table ao rename j to j_renamed;
alter table ao drop column j_renamed;
select tablename, attname, avg_width, n_distinct from pg_stats where tablename = 'ao' order by attname, tablename;
tablename | attname | avg_width | n_distinct
-----------+---------+-----------+------------
ao | i | 4 | -1
ao | k | 5 | -1
(2 rows)
create index ao_i on ao (i) where i = 9;
analyze ao;
select tablename, attname, avg_width, n_distinct from pg_stats where tablename = 'ao' order by attname, tablename;
tablename | attname | avg_width | n_distinct
-----------+---------+-----------+------------
ao | i | 4 | -0.6666667
ao | k | 5 | -0.5
(2 rows)
select indexname from pg_indexes where tablename = 'ao' order by indexname;
indexname
-----------
ao_i
ao_k
(2 rows)
select * from ao where i = 9;
i | k
---+---
9 | b
9 | b
(2 rows)
alter index ao_i rename to ao_i_renamed;
select indexname from pg_indexes where tablename = 'ao' order by indexname;
indexname
--------------
ao_i_renamed
ao_k
(2 rows)
drop index if exists ao_i_renamed;
drop table if exists ao;
create table ao (i int, j int, k varchar) with(appendonly=true);
insert into ao values (1,1,'a'), (2,2,'aa'), (3,3,'aaa'), (4,4,'aaaa'),
(5,5,'aaaaa'), (6,6,'aaaaaa'), (7,7,'aaaaaaa'), (8,8,'aaaaaaaa');
create index ao_j on ao using bitmap(j);
create index ao_k on ao using bitmap(k);
create index ao_jk on ao using bitmap((j + length(k)));
set enable_seqscan=off;
select * from ao where j = 2;
i | j | k
---+---+----
2 | 2 | aa
(1 row)
insert into ao values (9,1,'b'), (10,2,'bb'), (11,3,'bbb'), (12,4,'bbbb'),
(13,5,'aaaaa'), (14,6,'aaaaaa'), (15,7,'aaaaaaa'), (16,8,'aaaaaaaa');
select * from ao where j = 2;
i | j | k
----+---+----
2 | 2 | aa
10 | 2 | bb
(2 rows)
insert into ao values (9,2,'b'), (10,2,'bb'), (11,2,'bbb'), (12,2,'bbbb'),
(13,5,'aaaaa'), (14,6,'aaaaaa'), (15,7,'aaaaaaa'), (16,8,'aaaaaaaa');
select * from ao where j = 2;
i | j | k
----+---+------
11 | 2 | bbb
9 | 2 | b
12 | 2 | bbbb
2 | 2 | aa
10 | 2 | bb
10 | 2 | bb
(6 rows)
-- small test on a performance bug in bitmap indexes due to large tid gaps
insert into ao select i, 0, 'aaaaaaa' from generate_series(1, 20) i;
insert into ao select i, 1, 'aaa' from generate_series(1, 20) i;
insert into ao select i, 2, 'a' from generate_series(1, 20) i;
select distinct j from ao where j > -1 and j < 3 order by j;
j
---
0
1
2
(3 rows)
-- TEMP TABLES w/ INDEXES
create temp table temp_tenk_ao5 with (appendonly=true, compresstype=zlib, compresslevel=1)
as select * from tenk_ao5 distributed by (unique1);
create index temp_even_index on temp_tenk_ao5 (even);
select count(*) from temp_tenk_ao5;
count
-------
0
(1 row)
select i.tablename, i.indexname, pg_get_indexdef(c.oid, 0, TRUE) from pg_indexes i join pg_class c on i.indexname=c.relname and i.tablename='temp_tenk_ao5';
tablename | indexname | pg_get_indexdef
---------------+-----------------+------------------------------------------------------------------
temp_tenk_ao5 | temp_even_index | CREATE INDEX temp_even_index ON temp_tenk_ao5 USING btree (even)
(1 row)
insert into temp_tenk_ao5(unique1, unique2) values (99998888, 99998888);
update temp_tenk_ao5 set unique2 = 99998889 where unique2 = 99998888;
delete from temp_tenk_ao5 where unique2 = 99998889;
select count(*) from temp_tenk_ao5;
count
-------
0
(1 row)
vacuum analyze temp_tenk_ao5;
truncate table temp_tenk_ao5;
\d temp_tenk_ao5
Table "pg_temp_274.temp_tenk_ao5"
Column | Type | Collation | Nullable | Default
-------------+---------+-----------+----------+---------
unique1 | integer | | |
unique2 | integer | | |
two | integer | | |
four | integer | | |
ten | integer | | |
twenty | integer | | |
hundred | integer | | |
thousand | integer | | |
twothousand | integer | | |
fivethous | integer | | |
tenthous | integer | | |
odd | integer | | |
even | integer | | |
stringu1 | name | | |
stringu2 | name | | |
string4 | name | | |
Compression Type: zlib
Compression Level: 1
Block Size: 32768
Checksum: t
Indexes:
"temp_even_index" btree (even)
Distributed by: (unique1)
insert into temp_tenk_ao5(unique1, unique2) values (99998888, 99998888);
select unique1 from temp_tenk_ao5;
unique1
----------
99998888
(1 row)
-- TEMP TABLES w/ COMMIT DROP AND USING PREPARE
begin;
prepare tenk_ao5_prep(int4) as select * from tenk_ao5 where unique1 > 8000;
create temp table tenk_ao5_temp_drop with (appendonly=true, compresstype=zlib, compresslevel=1)
on commit drop as execute tenk_ao5_prep(8095);
select count(*) from tenk_ao5_temp_drop;
count
-------
0
(1 row)
commit;
select count(*) from tenk_ao5_temp_drop;
ERROR: relation "tenk_ao5_temp_drop" does not exist
LINE 1: select count(*) from tenk_ao5_temp_drop;
^
-- TEMP TABLES w/ COMMIT DELETE ROWS
begin;
create temp table tenk_ao5_temp_delete_rows with (appendonly=true, compresstype=zlib, compresslevel=1)
on commit delete rows as select * from tenk_ao5 where unique1 > 8000 distributed by (unique1);
select count(*) from tenk_ao5_temp_delete_rows;
count
-------
0
(1 row)
commit;
select count(*) from tenk_ao5_temp_delete_rows;
count
-------
0
(1 row)
-- TEMP TABLES w/ COMMIT PRESERVE ROWS
begin;
create temp table tenk_ao5_temp_pres_rows with (appendonly=true, compresstype=zlib, compresslevel=1)
on commit preserve rows as select * from tenk_ao5 where unique1 > 8000 distributed by (unique1);
select count(*) from tenk_ao5_temp_pres_rows;
count
-------
0
(1 row)
commit;
select count(*) from tenk_ao5_temp_pres_rows;
count
-------
0
(1 row)
-- RULES
insert into tenk_ao5(unique1, unique2) values (1, 99998889);
create rule ao_rule_update as on insert to tenk_ao5 do instead update tenk_ao5 set two=2;
insert into tenk_ao5(unique1, unique2) values (2, 99998889);
select distinct two from tenk_ao5;
two
-----
2
(1 row)
create rule ao_rule_delete as on update to tenk_ao5 do instead delete from tenk_ao5 where unique1=1;
insert into tenk_ao5(unique1, unique2) values (3, 99998889); -- should go through both rules
select * from tenk_ao5 where unique1=1;
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
(0 rows)
---------------------
-- UAO
---------------------
-- DELETE
select count(*) from tenk_ao1;
count
-------
10000
(1 row)
select snapshot_modcounts('tenk_ao1');
snapshot_modcounts
--------------------
(1 row)
select count(*) from diff_modcounts('tenk_ao1') where modcount_diff > 0; --sanity check our test function
count
-------
0
(1 row)
DELETE FROM tenk_ao1 WHERE unique1 = 1;
-- modcount after DELETE must increment to flag table should be included in
-- incremental backup
select count(*) from diff_modcounts('tenk_ao1') where modcount_diff > 0;
count
-------
1
(1 row)
select count(*) from tenk_ao1;
count
-------
9999
(1 row)
-- UPDATE
select snapshot_modcounts('tenk_ao1');
snapshot_modcounts
--------------------
(1 row)
select count(*) from tenk_ao1 where unique2 < 0;
count
-------
0
(1 row)
select count(*) from diff_modcounts('tenk_ao1') where modcount_diff > 0; --sanity check our test function
count
-------
0
(1 row)
UPDATE tenk_ao1 SET unique2 = -unique1 WHERE unique2 <= 5;
UPDATE tenk_ao1 SET two = 2;
-- modcount after UPDATE must increment to flag table should be included in
-- incremental backup
select count(*) from diff_modcounts('tenk_ao1') where modcount_diff > 0;
count
-------
3
(1 row)
select count(*) from tenk_ao1 where unique2 < 0;
count
-------
6
(1 row)
-- ALTER
ALTER TABLE tenk_ao1 RENAME TO tenk_renamed;
ALTER TABLE tenk_renamed ADD COLUMN newcol int default 10;
-- Validate post alter gp_fastsequence reflects correctly
SELECT objmod, CASE
WHEN objmod = 0 THEN last_sequence >= 3300 WHEN objmod = 1 THEN last_sequence =
0 END, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT
segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE
relname='tenk_renamed'));
objmod | case | gp_segment_id
--------+------+---------------
0 | t | 0
0 | t | 1
0 | t | 2
(3 rows)
ALTER TABLE tenk_renamed ALTER COLUMN twothousand SET NOT NULL;
ALTER TABLE tenk_renamed ADD COLUMN sercol serial; -- MPP-10015
ALTER TABLE tenk_renamed ADD COLUMN newcol2 int NOT NULL; -- should fail
ERROR: column "newcol2" of relation "tenk_renamed" contains null values (seg1 127.0.0.1:40001 pid=10274)
SELECT count(*) FROM tenk_renamed;
count
-------
9999
(1 row)
ALTER TABLE tenk_renamed RENAME TO tenk_ao1;
--------------------
-- system columns
--------------------
CREATE TABLE syscoltest(a int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO syscoltest VALUES(1);
SELECT ctid FROM syscoltest;
ctid
--------------
(33554432,2)
(1 row)
DROP TABLE syscoltest;
--------------------
-- relation size tests -- make sure can execute without block directory, sanity checks on relative sizes
--
--
--------------------
CREATE TABLE aosizetest_1(a int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE aosizetest_2(a int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
--
-- size will be < total size because of segrelid
--
SELECT pg_relation_size('aosizetest_1') < pg_total_relation_size('aosizetest_1') as total_exceeds_regular;
total_exceeds_regular
-----------------------
t
(1 row)
--
-- create currently build block directory, but dropping last index does not delete block directory...
-- so we will verify that the size of _2 is greater than the size of _1 after this
--
CREATE INDEX aosizetest_2_idx on aosizetest_2(a);
DROP INDEX aosizetest_2_idx;
SELECT pg_total_relation_size('aosizetest_1') < pg_total_relation_size('aosizetest_2') as with_block_dir_exceeds_without;
with_block_dir_exceeds_without
--------------------------------
t
(1 row)
DROP TABLE aosizetest_1;
DROP TABLE aosizetest_2;
-- Check that pg_total_relation_size() returns a sane value for an AO table.
-- (As of this writing, 1436104 bytes, but leave some slack for minor changes
-- to indexes etc.)
CREATE TABLE aosizetest_3(a text) WITH (appendonly=true);
insert into aosizetest_3 select repeat('x', 100) from generate_series(1, 10000);
SELECT pg_total_relation_size('aosizetest_3') between 1400000 and 1500000;
?column?
----------
t
(1 row)
-- These tests validate current segfile selection algorithm
DROP TABLE IF EXISTS ao_selection;
CREATE TABLE ao_selection (a INT, b INT) WITH (appendonly=true);
INSERT INTO ao_selection VALUES (generate_series(1,100000), generate_series(1,10000));
-- Validates insert is using single segfile to perform the insert to gp_fastsequence.
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='ao_selection'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 0 | 2
1 | 33300 | 2
0 | 0 | 0
1 | 33500 | 0
0 | 0 | 1
1 | 33400 | 1
(6 rows)
-- Following insert without concurrency is also using same segfile as above
INSERT INTO ao_selection values (generate_series(1,100000), generate_series(1,10000));
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='ao_selection'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 0 | 0
1 | 67000 | 0
0 | 0 | 1
1 | 66800 | 1
0 | 0 | 2
1 | 66600 | 2
(6 rows)
-- Check compression and distribution
create table ao_compress_table (id int, v varchar)
with (appendonly=true, compresstype=zlib, compresslevel=1) distributed by (id);
create table ao_compress_results(table_size int, ao_compress_id_index_size int, ao_compress_v_index_size int) distributed randomly;
create index ao_compress_id_index on ao_compress_table (id);
create index ao_compress_v_index on ao_compress_table (v);
insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index'));
insert into ao_compress_table (id, v) values (1, 'ifyouwantto99knowwhatist8329histhenkeepreadingit;;untilyou]findoutyoureyeshurtandyoustil0ldontknow103kwhatitisdoyouunderstandmeyetandifyoustillwanttoknowthenyoupleasekeepreading');
insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index'));
select get_ao_compression_ratio('ao_compress_table');
get_ao_compression_ratio
--------------------------
1.27
(1 row)
select get_ao_distribution('ao_compress_table');
get_ao_distribution
---------------------
(1,1)
(1 row)
truncate table ao_compress_table; -- after truncate, reclaim space from the table and index
insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index'));
select count(*) from (select distinct * from ao_compress_results) temp; -- should give 2 after reclaiming space
count
-------
2
(1 row)
-- Same transaction truncate multiple times. This performs unsafe truncate means
-- direct file truncate and not flipping the relfilenode.
create table ao_truncate_multiple_times(a int) with (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into ao_truncate_multiple_times select * from generate_series(0, 9);
begin;
select * from ao_truncate_multiple_times;
a
---
8
9
3
4
5
6
7
0
1
2
(10 rows)
truncate table ao_truncate_multiple_times;
select * from ao_truncate_multiple_times;
a
---
(0 rows)
insert into ao_truncate_multiple_times select * from generate_series(10, 19);
select * from ao_truncate_multiple_times;
a
----
18
19
13
14
15
16
17
10
11
12
(10 rows)
truncate table ao_truncate_multiple_times;
select * from ao_truncate_multiple_times;
a
---
(0 rows)
insert into ao_truncate_multiple_times select * from generate_series(20, 29);
select * from ao_truncate_multiple_times;
a
----
28
29
23
24
25
26
27
20
21
22
(10 rows)
abort;
select * from ao_truncate_multiple_times;
a
---
0
1
2
8
9
3
4
5
6
7
(10 rows)
--------------------
-- supported sql
--------------------
DROP TABLE tenk_heap;
DROP TABLE tenk_ao1;
DROP TABLE tenk_ao2;
DROP TABLE tenk_ao3;
DROP TABLE tenk_ao4;
DROP TABLE tenk_ao5;
DROP TABLE ao_selection;
---------------------------------------------------------------
-- Sub-transaction tests to validate AO behavior
---------------------------------------------------------------
-- create table in sub-transaction aborts but top transaction commits
BEGIN;
SAVEPOINT sp1;
CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO appendonly_subxans_test SELECT * FROM generate_series(1, 10);
SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='appendonly_subxans_test'));
objmod | last_sequence | gp_segment_id
--------+---------------+---------------
0 | 100 | 0
0 | 100 | 1
0 | 100 | 2
(3 rows)
ROLLBACK TO SAVEPOINT sp1;
COMMIT;
-- create table and insert in nested subtransaction.
-- The original purpose of this test is to validate that insert to gp_fastsequence
-- is using NormalXid and not FrozenXid. But since now we do not set the xmin to
-- FrozenTransactionXid anymore, it makes less sense in that regard, but this
-- nested transaction case might still be valuable, so keep running it.
BEGIN;
SAVEPOINT sp1;
CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select oid as appendonly_subxans_test_oid from pg_class
where relname = 'appendonly_subxans_test' \gset
SAVEPOINT sp2;
INSERT INTO appendonly_subxans_test SELECT * FROM generate_series(1, 10);
SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod,
last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence')
WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid =:appendonly_subxans_test_oid);
case | objmod | last_sequence | gp_segment_id
-----------+--------+---------------+---------------
NormalXid | 0 | 100 | 1
NormalXid | 0 | 100 | 0
NormalXid | 0 | 100 | 2
(3 rows)
ROLLBACK TO SAVEPOINT sp1;
COMMIT;
-- No entry should have leaked in gp_fastsequence after subtransaction
-- rollback.
SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod,
last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence')
WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid =:appendonly_subxans_test_oid);
case | objmod | last_sequence | gp_segment_id
------+--------+---------------+---------------
(0 rows)
-- create table and insert in independent subtransactions, validate insert to
-- gp_fastsequence is using NormalXid and not FrozenXid and stays despite
-- inserting subtransaction aborting.
BEGIN;
SAVEPOINT sp1;
CREATE TABLE appendonly_subxans_test1(a int, b int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
RELEASE SAVEPOINT sp1;
SAVEPOINT sp2;
INSERT INTO appendonly_subxans_test1 SELECT * FROM generate_series(1, 10);
SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod,
last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='appendonly_subxans_test1'));
case | objmod | last_sequence | gp_segment_id
-----------+--------+---------------+---------------
NormalXid | 0 | 100 | 0
NormalXid | 0 | 100 | 2
NormalXid | 0 | 100 | 1
(3 rows)
ROLLBACK TO SAVEPOINT sp2;
COMMIT;
ANALYZE appendonly_subxans_test1;
SELECT * FROM appendonly_subxans_test1;
a | b
---+---
(0 rows)
SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod,
last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid
IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class
WHERE relname='appendonly_subxans_test1'));
case | objmod | last_sequence | gp_segment_id
-----------+--------+---------------+---------------
NormalXid | 0 | 100 | 0
NormalXid | 0 | 100 | 1
NormalXid | 0 | 100 | 2
(3 rows)
-- create table in top transaction but first insert in sub-transaction
BEGIN;
CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index appendonly_subxans_test_idx on appendonly_subxans_test using btree(a);
SAVEPOINT sp1;
insert into appendonly_subxans_test select * from generate_series(1, 10);
ROLLBACK TO SAVEPOINT sp1;
insert into appendonly_subxans_test select * from generate_series(11, 20);
COMMIT;
ANALYZE appendonly_subxans_test;
-- Validation to make sure gp_fastsequence is not broken. If gp_fastsequence is
-- malfunctioning then this will return wrong result.
SELECT * FROM appendonly_subxans_test WHERE a < 10;
a | b
---+---
(0 rows)
CREATE ROLE appendony_test_user2;
NOTICE: resource queue required -- using default resource queue "pg_default"
BEGIN;
CREATE TABLE fo(a int, b int) WITH (appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE INDEX foidx ON fo USING btree(a);
SAVEPOINT sp1;
ALTER TABLE fo OWNER TO appendony_test_user2;
INSERT INTO fo SELECT i,i FROM generate_series(1, 10)i;
ROLLBACK TO SAVEPOINT sp1;
INSERT INTO fo SELECT i,i FROM generate_series(11, 20)i;
COMMIT;
SET enable_seqscan=off;
SET enable_indexscan=on;
SET enable_bitmapscan=on;
SELECT * FROM fo WHERE a < 10;
a | b
---+---
(0 rows)
RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;
DROP OWNED BY appendony_test_user2 CASCADE;
DROP ROLE IF EXISTS appendony_test_user2;
---------------------------------------------------------------
-- Triggers
---------------------------------------------------------------
CREATE OR REPLACE FUNCTION ao_trigger_func() RETURNS trigger LANGUAGE plpgsql AS $$
begin
raise notice 'new: %', NEW;
return new;
end;
$$;
create table trigger_ao_test (id int4, t text) with (appendonly=true, orientation=row);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table trigger_aocs_test (id int4, t text) with (appendonly=true, orientation=column);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TRIGGER show_trigger_data_trig_on_insert BEFORE INSERT ON trigger_ao_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
CREATE TRIGGER show_trigger_data_trig_on_insert BEFORE INSERT ON trigger_aocs_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
-- UPDATE or DELETE triggers are not supporeted on AO tables
CREATE TRIGGER show_trigger_data_trig_on_update BEFORE UPDATE ON trigger_ao_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
ERROR: ON UPDATE triggers are not supported on append-only tables
CREATE TRIGGER show_trigger_data_trig_on_update BEFORE UPDATE ON trigger_aocs_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
ERROR: ON UPDATE triggers are not supported on append-only tables
CREATE TRIGGER show_trigger_data_trig_on_delete BEFORE DELETE ON trigger_ao_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
ERROR: ON DELETE triggers are not supported on append-only tables
CREATE TRIGGER show_trigger_data_trig_on_delete BEFORE DELETE ON trigger_aocs_test
FOR EACH ROW EXECUTE PROCEDURE ao_trigger_func();
ERROR: ON DELETE triggers are not supported on append-only tables
INSERT INTO trigger_ao_test VALUES (1, 'foo');
NOTICE: new: (1,foo) (seg0 127.0.0.1:40000 pid=24242)
INSERT INTO trigger_aocs_test VALUES (1, 'bar');
NOTICE: new: (1,bar) (seg0 127.0.0.1:40000 pid=24242)
COPY trigger_ao_test FROM STDIN;
NOTICE: new: (2,foocopy) (seg0 127.0.0.1:40000 pid=24242)
CONTEXT: COPY trigger_ao_test, line 1: "2 foocopy"
COPY trigger_aocs_test FROM STDIN;
NOTICE: new: (2,barcopy) (seg0 127.0.0.1:40000 pid=24242)
CONTEXT: COPY trigger_aocs_test, line 1: "2 barcopy"
SELECT * FROM trigger_ao_test;
id | t
----+---------
1 | foo
2 | foocopy
(2 rows)
SELECT * FROM trigger_aocs_test;
id | t
----+---------
1 | bar
2 | barcopy
(2 rows)
-----------------
-- Inheritance
-----------------
CREATE TABLE ao_inh_p1(a int, b int) WITH (appendonly = true) DISTRIBUTED RANDOMLY;
CREATE TABLE ao_inh_p2(a int, b int) INHERITS (ao_inh_p1) WITH (appendonly = true) DISTRIBUTED RANDOMLY;
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
CREATE TABLE ao_inh_p3(a int, b int) INHERITS (ao_inh_p1) WITH (appendonly = true);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
CREATE TABLE ao_inh_p4(a int, b int, c int) INHERITS (ao_inh_p2) WITH (appendonly = true);
NOTICE: merging column "a" with inherited definition
NOTICE: merging column "b" with inherited definition
insert into ao_inh_p1 values (1, 1);
insert into ao_inh_p2 values (2, 2);
insert into ao_inh_p3 values (3, 3);
insert into ao_inh_p4 values (4, 4);
insert into ao_inh_p4 select g, g, g from generate_series(10, 15) g;
select * from ao_inh_p1;
a | b
----+----
1 | 1
2 | 2
3 | 3
4 | 4
10 | 10
11 | 11
12 | 12
13 | 13
14 | 14
15 | 15
(10 rows)
select * from ao_inh_p2;
a | b
----+----
2 | 2
4 | 4
10 | 10
11 | 11
12 | 12
13 | 13
14 | 14
15 | 15
(8 rows)
select * from ao_inh_p4;
a | b | c
----+----+----
4 | 4 |
10 | 10 | 10
11 | 11 | 11
12 | 12 | 12
13 | 13 | 13
14 | 14 | 14
15 | 15 | 15
(7 rows)
update ao_inh_p1 set a = a + 1;
update ao_inh_p2 set b = b + 1;
delete from ao_inh_p1 where a = 13;
select * from ao_inh_p1;
a | b
----+----
2 | 1
3 | 3
4 | 3
5 | 5
11 | 11
12 | 12
14 | 14
15 | 15
16 | 16
(9 rows)
select * from ao_inh_p4;
a | b | c
----+----+----
5 | 5 |
11 | 11 | 10
12 | 12 | 11
14 | 14 | 13
15 | 15 | 14
16 | 16 | 15
(6 rows)
--------------------------------------------------------------------------------
-- Finally check to detect if any dangling gp_fastsequence entries are left
-- behind by this SQL file
--------------------------------------------------------------------------------
SELECT objid FROM gp_fastsequence AS gfs LEFT OUTER JOIN (SELECT oid FROM
pg_class) AS pgc ON (gfs.objid = pgc.oid) WHERE pgc.oid IS NULL;
objid
-------
(0 rows)
-- test case for append optimized columnar bitmap scan when row level bitmap is promoted to page level
drop table if exists bms_ao_bug;
NOTICE: table "bms_ao_bug" does not exist, skipping
create table bms_ao_bug
(c1 int not null,
c2 int not null,
c3 char(100) not null)
with (appendonly=true, compresstype=zlib)
distributed by (c1)
;
insert into bms_ao_bug select 1, 1, a.c1::char(100) from generate_series(1, 2000000) a(c1);
create index bms_ao_bug_ix1 on bms_ao_bug (c2);
set enable_seqscan=off;
set work_mem=256;
select
a.c1,
count(*) row_cnt
from bms_ao_bug a
where a.c2 = 1
group by
a.c1
;
c1 | row_cnt
----+---------
1 | 2000000
(1 row)
drop table bms_ao_bug;
-- test truncate ao table in current transaction and sub-transaction
-- more details can look at https://github.com/greenplum-db/gpdb/issues/13699
-- should success, create and truncate ao table in the same transaction;
begin;
create table fix_ao_truncate_last_sequence(a int, b int) with (appendonly = true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index index_fix_ao_truncate_last_sequence on fix_ao_truncate_last_sequence(b);
insert into fix_ao_truncate_last_sequence select i, i from generate_series(1, 5) i;
select count(*) from fix_ao_truncate_last_sequence;
count
-------
5
(1 row)
truncate table fix_ao_truncate_last_sequence;
select count(*) from fix_ao_truncate_last_sequence;
count
-------
0
(1 row)
abort;
-- should success, create and truncate ao table in the different transaction,
-- and create index for it.
begin;
create table fix_ao_truncate_last_sequence(a int, b int) with (appendonly = true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index index_fix_ao_truncate_last_sequence on fix_ao_truncate_last_sequence(b);
insert into fix_ao_truncate_last_sequence select i, i from generate_series(1, 5) i;
select count(*) from fix_ao_truncate_last_sequence;
count
-------
5
(1 row)
savepoint s1;
truncate table fix_ao_truncate_last_sequence;
insert into fix_ao_truncate_last_sequence select 1, 1 from generate_series(1, 10);
select count(*) from fix_ao_truncate_last_sequence;
count
-------
10
(1 row)
rollback to s1;
select count(*) from fix_ao_truncate_last_sequence;
count
-------
5
(1 row)
truncate table fix_ao_truncate_last_sequence;
insert into fix_ao_truncate_last_sequence select 1, 1 from generate_series(1, 5);
select count(*) from fix_ao_truncate_last_sequence;
count
-------
5
(1 row)
abort;
create table fix_issue_605
(
c1 varchar(8),
c2 varchar(12) collate "C",
c3 varchar(10),
c4 varchar(30)
)
with (appendonly=true,compresstype=zstd,compresslevel=5)
distributed by (c1,c3,c4)
partition by range(c1)
(partition p1990 start('19900101') inclusive,
default partition other
);
select c3::varchar from fix_issue_605 where c1='20190830' order by c2;
c3
----
(0 rows)