| CREATE TABLE toastable_heap(a text, b varchar, c int) distributed by (b); |
| CREATE TABLE toastable_ao(a text, b varchar, c int) with(appendonly=true, compresslevel=1) distributed by (b); |
| ALTER TABLE toastable_ao ALTER COLUMN a SET STORAGE EXTERNAL; |
| -- Helper function to generate a random string with given length. (Due to the |
| -- implementation, the length is rounded down to nearest multiple of 32. |
| -- That's good enough for our purposes.) |
| CREATE FUNCTION randomtext(len int) returns text as $$ |
| select string_agg(md5(random()::text),'') from generate_series(1, $1 / 32) |
| $$ language sql; |
| create function get_rel_toast_count(relname text) returns int as $$ |
| reltoastoid = plpy.execute("select \'"+ relname +"\'::regclass::oid;")[0]['oid'] |
| count = plpy.execute("select count(*) from pg_toast.pg_toast_"+str(reltoastoid)+";")[0]['count'] |
| return count |
| $$ language plpython3u; |
| -- INSERT |
| -- uses the toast call to store the large tuples |
| INSERT INTO toastable_heap VALUES(repeat('a',100000), repeat('b',100001), 1); |
| INSERT INTO toastable_heap VALUES(repeat('A',100000), repeat('B',100001), 2); |
| INSERT INTO toastable_ao VALUES(repeat('a',100000), repeat('b',100001), 1); |
| INSERT INTO toastable_ao VALUES(repeat('A',100000), repeat('B',100001), 2); |
| -- uncompressable values |
| INSERT INTO toastable_heap VALUES(randomtext(10000000), randomtext(10000032), 3); |
| INSERT INTO toastable_ao VALUES(randomtext(10000000), randomtext(10000032), 3); |
| -- Check that tuples were toasted and are detoasted correctly. we use |
| -- char_length() because it guarantees a detoast without showing tho whole result |
| SELECT char_length(a), char_length(b), c FROM toastable_heap ORDER BY c; |
| char_length | char_length | c |
| -------------+-------------+--- |
| 100000 | 100001 | 1 |
| 100000 | 100001 | 2 |
| 10000000 | 10000032 | 3 |
| (3 rows) |
| |
| SELECT char_length(a), char_length(b), c FROM toastable_ao ORDER BY c; |
| char_length | char_length | c |
| -------------+-------------+--- |
| 100000 | 100001 | 1 |
| 100000 | 100001 | 2 |
| 10000000 | 10000032 | 3 |
| (3 rows) |
| |
| -- Check that small tuples can be correctly toasted as long as it's beyond the toast |
| -- target size (about 1/4 of the table's blocksize) |
| CREATE TABLE toastable_ao2(a int, b int[]) WITH (appendonly=true, blocksize=8192); |
| INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 1000) x; |
| INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 2030) x; |
| SELECT gp_segment_id, get_rel_toast_count('toastable_ao2') FROM gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 2 |
| 2 | 0 |
| (3 rows) |
| |
| -- UPDATE |
| -- (heap rel only) and toast the large tuple |
| UPDATE toastable_heap SET a=repeat('A',100000) WHERE c=1; |
| UPDATE toastable_heap SET a=randomtext(100032) WHERE c=3; |
| SELECT char_length(a), char_length(b) FROM toastable_heap ORDER BY c; |
| char_length | char_length |
| -------------+------------- |
| 100000 | 100001 |
| 100000 | 100001 |
| 100032 | 10000032 |
| (3 rows) |
| |
| -- ALTER |
| -- this will cause a full table rewrite. we make sure the tosted values and references |
| -- stay intact after all the oid switching business going on. |
| ALTER TABLE toastable_heap ADD COLUMN d int DEFAULT 10; |
| ALTER TABLE toastable_ao ADD COLUMN d int DEFAULT 10; |
| SELECT char_length(a), char_length(b), c, d FROM toastable_heap ORDER BY c; |
| char_length | char_length | c | d |
| -------------+-------------+---+---- |
| 100000 | 100001 | 1 | 10 |
| 100000 | 100001 | 2 | 10 |
| 100032 | 10000032 | 3 | 10 |
| (3 rows) |
| |
| SELECT char_length(a), char_length(b), c, d FROM toastable_ao ORDER BY c; |
| char_length | char_length | c | d |
| -------------+-------------+---+---- |
| 100000 | 100001 | 1 | 10 |
| 100000 | 100001 | 2 | 10 |
| 10000000 | 10000032 | 3 | 10 |
| (3 rows) |
| |
| -- TRUNCATE |
| -- remove reference to toast table and create a new one with different values |
| TRUNCATE toastable_heap; |
| TRUNCATE toastable_ao; |
| select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| INSERT INTO toastable_heap VALUES(repeat('a',100002), repeat('b',100003), 2, 20); |
| INSERT INTO toastable_ao VALUES(repeat('a',100002), repeat('b',100003), 2, 20); |
| SELECT char_length(a), char_length(b), c, d FROM toastable_heap; |
| char_length | char_length | c | d |
| -------------+-------------+---+---- |
| 100002 | 100003 | 2 | 20 |
| (1 row) |
| |
| SELECT char_length(a), char_length(b), c, d FROM toastable_ao; |
| char_length | char_length | c | d |
| -------------+-------------+---+---- |
| 100002 | 100003 | 2 | 20 |
| (1 row) |
| |
| select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 14 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| delete from toastable_heap; |
| delete from toastable_ao; |
| vacuum toastable_heap, toastable_ao; |
| select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id; |
| gp_segment_id | get_rel_toast_count |
| ---------------+--------------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| (3 rows) |
| |
| DROP TABLE toastable_heap; |
| DROP TABLE toastable_ao; |
| -- TODO: figure out a way to verify that the toast tables are dropped |
| -- Test TOAST_MAX_CHUNK_SIZE changes for upgrade. |
| CREATE TABLE toast_chunk_test (a bytea); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| ALTER TABLE toast_chunk_test ALTER COLUMN a SET STORAGE EXTERNAL; |
| -- Alter our TOAST_MAX_CHUNK_SIZE and insert a value we know will be toasted. |
| SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'skip', dbid) |
| FROM pg_catalog.gp_segment_configuration |
| WHERE role = 'p'; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| INSERT INTO toast_chunk_test VALUES (repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea); |
| SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'reset', dbid) |
| FROM pg_catalog.gp_segment_configuration |
| WHERE role = 'p'; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- The toasted value should still be read correctly. |
| SELECT * FROM toast_chunk_test WHERE a <> repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea; |
| a |
| --- |
| (0 rows) |
| |
| -- Random access into the toast table should work equally well. |
| SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test; |
| encode |
| ---------------------------- |
| abcdefghijklmnopqrstuvwxyz |
| (1 row) |
| |
| -- Test for Github Issue 16906 |
| create table t_16906(a int, b text) distributed by(a); |
| -- Insert two rows and make sure they are in the same segment (same dist key) |
| -- the 1st row's column b must be NULL; |
| -- the 2nd row's column b must be a long string even after toast compression |
| -- for details please refer to the issue page. |
| insert into t_16906 values(1, null); |
| insert into t_16906 values(1, randomtext(10240)); |
| -- Don't want actually fetch all data just need to test |
| -- it does not hit assert fail or error. Using explain |
| -- analyze might introduce a new ansfile for ORCA so here |
| -- I decide to use \o. |
| \o /tmp/t_16906.tmp |
| select * from t_16906; |
| \o |
| drop table t_16906; |