blob: b86c2f449257ad3e7bdfbc1785c88ab6ae82c85f [file] [log] [blame]
CREATE TABLE toastable_heap(a text, b varchar, c int) distributed by (b);
CREATE TABLE toastable_ao(a text, b varchar, c int) with(appendonly=true, compresslevel=1) distributed by (b);
ALTER TABLE toastable_ao ALTER COLUMN a SET STORAGE EXTERNAL;
-- Helper function to generate a random string with given length. (Due to the
-- implementation, the length is rounded down to nearest multiple of 32.
-- That's good enough for our purposes.)
CREATE FUNCTION randomtext(len int) returns text as $$
select string_agg(md5(random()::text),'') from generate_series(1, $1 / 32)
$$ language sql;
create function get_rel_toast_count(relname text) returns int as $$
reltoastoid = plpy.execute("select \'"+ relname +"\'::regclass::oid;")[0]['oid']
count = plpy.execute("select count(*) from pg_toast.pg_toast_"+str(reltoastoid)+";")[0]['count']
return count
$$ language plpython3u;
-- INSERT
-- uses the toast call to store the large tuples
INSERT INTO toastable_heap VALUES(repeat('a',100000), repeat('b',100001), 1);
INSERT INTO toastable_heap VALUES(repeat('A',100000), repeat('B',100001), 2);
INSERT INTO toastable_ao VALUES(repeat('a',100000), repeat('b',100001), 1);
INSERT INTO toastable_ao VALUES(repeat('A',100000), repeat('B',100001), 2);
-- uncompressable values
INSERT INTO toastable_heap VALUES(randomtext(10000000), randomtext(10000032), 3);
INSERT INTO toastable_ao VALUES(randomtext(10000000), randomtext(10000032), 3);
-- Check that tuples were toasted and are detoasted correctly. we use
-- char_length() because it guarantees a detoast without showing tho whole result
SELECT char_length(a), char_length(b), c FROM toastable_heap ORDER BY c;
char_length | char_length | c
-------------+-------------+---
100000 | 100001 | 1
100000 | 100001 | 2
10000000 | 10000032 | 3
(3 rows)
SELECT char_length(a), char_length(b), c FROM toastable_ao ORDER BY c;
char_length | char_length | c
-------------+-------------+---
100000 | 100001 | 1
100000 | 100001 | 2
10000000 | 10000032 | 3
(3 rows)
-- Check that small tuples can be correctly toasted as long as it's beyond the toast
-- target size (about 1/4 of the table's blocksize)
CREATE TABLE toastable_ao2(a int, b int[]) WITH (appendonly=true, blocksize=8192);
INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 1000) x;
INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 2030) x;
SELECT gp_segment_id, get_rel_toast_count('toastable_ao2') FROM gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 2
2 | 0
(3 rows)
-- UPDATE
-- (heap rel only) and toast the large tuple
UPDATE toastable_heap SET a=repeat('A',100000) WHERE c=1;
UPDATE toastable_heap SET a=randomtext(100032) WHERE c=3;
SELECT char_length(a), char_length(b) FROM toastable_heap ORDER BY c;
char_length | char_length
-------------+-------------
100000 | 100001
100000 | 100001
100032 | 10000032
(3 rows)
-- ALTER
-- this will cause a full table rewrite. we make sure the tosted values and references
-- stay intact after all the oid switching business going on.
ALTER TABLE toastable_heap ADD COLUMN d int DEFAULT 10;
ALTER TABLE toastable_ao ADD COLUMN d int DEFAULT 10;
SELECT char_length(a), char_length(b), c, d FROM toastable_heap ORDER BY c;
char_length | char_length | c | d
-------------+-------------+---+----
100000 | 100001 | 1 | 10
100000 | 100001 | 2 | 10
100032 | 10000032 | 3 | 10
(3 rows)
SELECT char_length(a), char_length(b), c, d FROM toastable_ao ORDER BY c;
char_length | char_length | c | d
-------------+-------------+---+----
100000 | 100001 | 1 | 10
100000 | 100001 | 2 | 10
10000000 | 10000032 | 3 | 10
(3 rows)
-- TRUNCATE
-- remove reference to toast table and create a new one with different values
TRUNCATE toastable_heap;
TRUNCATE toastable_ao;
select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 0
2 | 0
(3 rows)
select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 0
2 | 0
(3 rows)
INSERT INTO toastable_heap VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
INSERT INTO toastable_ao VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
SELECT char_length(a), char_length(b), c, d FROM toastable_heap;
char_length | char_length | c | d
-------------+-------------+---+----
100002 | 100003 | 2 | 20
(1 row)
SELECT char_length(a), char_length(b), c, d FROM toastable_ao;
char_length | char_length | c | d
-------------+-------------+---+----
100002 | 100003 | 2 | 20
(1 row)
select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 0
2 | 0
(3 rows)
select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 14
1 | 0
2 | 0
(3 rows)
delete from toastable_heap;
delete from toastable_ao;
vacuum toastable_heap, toastable_ao;
select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 0
2 | 0
(3 rows)
select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
gp_segment_id | get_rel_toast_count
---------------+---------------------
0 | 0
1 | 0
2 | 0
(3 rows)
DROP TABLE toastable_heap;
DROP TABLE toastable_ao;
-- TODO: figure out a way to verify that the toast tables are dropped
-- Test TOAST_MAX_CHUNK_SIZE changes for upgrade.
CREATE TABLE toast_chunk_test (a bytea);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ALTER TABLE toast_chunk_test ALTER COLUMN a SET STORAGE EXTERNAL;
-- Alter our TOAST_MAX_CHUNK_SIZE and insert a value we know will be toasted.
SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'skip', dbid)
FROM pg_catalog.gp_segment_configuration
WHERE role = 'p';
gp_inject_fault
-----------------
Success:
(1 row)
INSERT INTO toast_chunk_test VALUES (repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea);
SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'reset', dbid)
FROM pg_catalog.gp_segment_configuration
WHERE role = 'p';
gp_inject_fault
-----------------
Success:
(1 row)
-- The toasted value should still be read correctly.
SELECT * FROM toast_chunk_test WHERE a <> repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea;
a
---
(0 rows)
-- Random access into the toast table should work equally well.
SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test;
encode
----------------------------
abcdefghijklmnopqrstuvwxyz
(1 row)
-- Test for Github Issue 16906
create table t_16906(a int, b text) distributed by(a);
-- Insert two rows and make sure they are in the same segment (same dist key)
-- the 1st row's column b must be NULL;
-- the 2nd row's column b must be a long string even after toast compression
-- for details please refer to the issue page.
insert into t_16906 values(1, null);
insert into t_16906 values(1, randomtext(10240));
-- Don't want actually fetch all data just need to test
-- it does not hit assert fail or error. Using explain
-- analyze might introduce a new ansfile for ORCA so here
-- I decide to use \o.
\o /tmp/t_16906.tmp
select * from t_16906;
\o
drop table t_16906;