src/test/regress/expected/toast.out - cloudberry - Git at Google

 CREATE TABLE toastable_heap(a text, b varchar, c int) distributed by (b);
 CREATE TABLE toastable_ao(a text, b varchar, c int) with(appendonly=true, compresslevel=1) distributed by (b);
 ALTER TABLE toastable_ao ALTER COLUMN a SET STORAGE EXTERNAL;
 -- Helper function to generate a random string with given length. (Due to the
 -- implementation, the length is rounded down to nearest multiple of 32.
 -- That's good enough for our purposes.)
 CREATE FUNCTION randomtext(len int) returns text as $$
   select string_agg(md5(random()::text),'') from generate_series(1, $1 / 32)
 $$ language sql;
 create function get_rel_toast_count(relname text) returns int as $$
 reltoastoid = plpy.execute("select \'"+ relname +"\'::regclass::oid;")[0]['oid']
 count = plpy.execute("select count(*) from pg_toast.pg_toast_"+str(reltoastoid)+";")[0]['count']
 return count
 $$ language plpython3u;
 -- INSERT
 -- uses the toast call to store the large tuples
 INSERT INTO toastable_heap VALUES(repeat('a',100000), repeat('b',100001), 1);
 INSERT INTO toastable_heap VALUES(repeat('A',100000), repeat('B',100001), 2);
 INSERT INTO toastable_ao VALUES(repeat('a',100000), repeat('b',100001), 1);
 INSERT INTO toastable_ao VALUES(repeat('A',100000), repeat('B',100001), 2);
 -- uncompressable values
 INSERT INTO toastable_heap VALUES(randomtext(10000000), randomtext(10000032), 3);
 INSERT INTO toastable_ao VALUES(randomtext(10000000), randomtext(10000032), 3);
 -- Check that tuples were toasted and are detoasted correctly. we use
 -- char_length() because it guarantees a detoast without showing tho whole result
 SELECT char_length(a), char_length(b), c FROM toastable_heap ORDER BY c;
  char_length | char_length | c
 -------------+-------------+---
       100000 |      100001 | 1
       100000 |      100001 | 2
     10000000 |    10000032 | 3
 (3 rows)

 SELECT char_length(a), char_length(b), c FROM toastable_ao ORDER BY c;
  char_length | char_length | c
 -------------+-------------+---
       100000 |      100001 | 1
       100000 |      100001 | 2
     10000000 |    10000032 | 3
 (3 rows)

 -- Check that small tuples can be correctly toasted as long as it's beyond the toast
 -- target size (about 1/4 of the table's blocksize)
 CREATE TABLE toastable_ao2(a int, b int[]) WITH (appendonly=true, blocksize=8192);
 INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 1000) x;
 INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 2030) x;
 SELECT gp_segment_id, get_rel_toast_count('toastable_ao2') FROM gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   2
              2 |                   0
 (3 rows)

 -- UPDATE
 -- (heap rel only) and toast the large tuple
 UPDATE toastable_heap SET a=repeat('A',100000) WHERE c=1;
 UPDATE toastable_heap SET a=randomtext(100032) WHERE c=3;
 SELECT char_length(a), char_length(b) FROM toastable_heap ORDER BY c;
  char_length | char_length
 -------------+-------------
       100000 |      100001
       100000 |      100001
       100032 |    10000032
 (3 rows)

 -- ALTER
 -- this will cause a full table rewrite. we make sure the tosted values and references
 -- stay intact after all the oid switching business going on.
 ALTER TABLE toastable_heap ADD COLUMN d int DEFAULT 10;
 ALTER TABLE toastable_ao ADD COLUMN d int DEFAULT 10;
 SELECT char_length(a), char_length(b), c, d FROM toastable_heap ORDER BY c;
  char_length | char_length | c | d
 -------------+-------------+---+----
       100000 |      100001 | 1 | 10
       100000 |      100001 | 2 | 10
       100032 |    10000032 | 3 | 10
 (3 rows)

 SELECT char_length(a), char_length(b), c, d FROM toastable_ao ORDER BY c;
  char_length | char_length | c | d
 -------------+-------------+---+----
       100000 |      100001 | 1 | 10
       100000 |      100001 | 2 | 10
     10000000 |    10000032 | 3 | 10
 (3 rows)

 -- TRUNCATE
 -- remove reference to toast table and create a new one with different values
 TRUNCATE toastable_heap;
 TRUNCATE toastable_ao;
 select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   0
              2 |                   0
 (3 rows)

 select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   0
              2 |                   0
 (3 rows)

 INSERT INTO toastable_heap VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
 INSERT INTO toastable_ao VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
 SELECT char_length(a), char_length(b), c, d FROM toastable_heap;
  char_length | char_length | c | d
 -------------+-------------+---+----
       100002 |      100003 | 2 | 20
 (1 row)

 SELECT char_length(a), char_length(b), c, d FROM toastable_ao;
  char_length | char_length | c | d
 -------------+-------------+---+----
       100002 |      100003 | 2 | 20
 (1 row)

 select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   0
              2 |                   0
 (3 rows)

 select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                  14
              1 |                   0
              2 |                   0
 (3 rows)

 delete from toastable_heap;
 delete from toastable_ao;
 vacuum toastable_heap, toastable_ao;
 select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   0
              2 |                   0
 (3 rows)

 select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
  gp_segment_id | get_rel_toast_count
 ---------------+---------------------
              0 |                   0
              1 |                   0
              2 |                   0
 (3 rows)

 DROP TABLE toastable_heap;
 DROP TABLE toastable_ao;
 -- TODO: figure out a way to verify that the toast tables are dropped
 -- Test TOAST_MAX_CHUNK_SIZE changes for upgrade.
 CREATE TABLE toast_chunk_test (a bytea);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 ALTER TABLE toast_chunk_test ALTER COLUMN a SET STORAGE EXTERNAL;
 -- Alter our TOAST_MAX_CHUNK_SIZE and insert a value we know will be toasted.
 SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'skip', dbid)
 	   FROM pg_catalog.gp_segment_configuration
 	   WHERE role = 'p';
  gp_inject_fault
 -----------------
  Success:
 (1 row)

 INSERT INTO toast_chunk_test VALUES (repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea);
 SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'reset', dbid)
 	   FROM pg_catalog.gp_segment_configuration
 	   WHERE role = 'p';
  gp_inject_fault
 -----------------
  Success:
 (1 row)

 -- The toasted value should still be read correctly.
 SELECT * FROM toast_chunk_test WHERE a <> repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea;
  a
 ---
 (0 rows)

 -- Random access into the toast table should work equally well.
 SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test;
            encode
 ----------------------------
  abcdefghijklmnopqrstuvwxyz
 (1 row)

 -- Test for Github Issue 16906
 create table t_16906(a int, b text) distributed by(a);
 -- Insert two rows and make sure they are in the same segment (same dist key)
 -- the 1st row's column b must be NULL;
 -- the 2nd row's column b must be a long string even after toast compression
 -- for details please refer to the issue page.
 insert into t_16906 values(1, null);
 insert into t_16906 values(1, randomtext(10240));
 -- Don't want actually fetch all data just need to test
 -- it does not hit assert fail or error. Using explain
 -- analyze might introduce a new ansfile for ORCA so here
 -- I decide to use \o.
 \o /tmp/t_16906.tmp
 select * from t_16906;
 \o
 drop table t_16906;
	CREATE TABLE toastable_heap(a text, b varchar, c int) distributed by (b);
	CREATE TABLE toastable_ao(a text, b varchar, c int) with(appendonly=true, compresslevel=1) distributed by (b);
	ALTER TABLE toastable_ao ALTER COLUMN a SET STORAGE EXTERNAL;
	-- Helper function to generate a random string with given length. (Due to the
	-- implementation, the length is rounded down to nearest multiple of 32.
	-- That's good enough for our purposes.)
	CREATE FUNCTION randomtext(len int) returns text as $$
	select string_agg(md5(random()::text),'') from generate_series(1, $1 / 32)
	$$ language sql;
	create function get_rel_toast_count(relname text) returns int as $$
	reltoastoid = plpy.execute("select \'"+ relname +"\'::regclass::oid;")[0]['oid']
	count = plpy.execute("select count(*) from pg_toast.pg_toast_"+str(reltoastoid)+";")[0]['count']
	return count
	$$ language plpython3u;
	-- INSERT
	-- uses the toast call to store the large tuples
	INSERT INTO toastable_heap VALUES(repeat('a',100000), repeat('b',100001), 1);
	INSERT INTO toastable_heap VALUES(repeat('A',100000), repeat('B',100001), 2);
	INSERT INTO toastable_ao VALUES(repeat('a',100000), repeat('b',100001), 1);
	INSERT INTO toastable_ao VALUES(repeat('A',100000), repeat('B',100001), 2);
	-- uncompressable values
	INSERT INTO toastable_heap VALUES(randomtext(10000000), randomtext(10000032), 3);
	INSERT INTO toastable_ao VALUES(randomtext(10000000), randomtext(10000032), 3);
	-- Check that tuples were toasted and are detoasted correctly. we use
	-- char_length() because it guarantees a detoast without showing tho whole result
	SELECT char_length(a), char_length(b), c FROM toastable_heap ORDER BY c;
	char_length \| char_length \| c
	-------------+-------------+---
	100000 \| 100001 \| 1
	100000 \| 100001 \| 2
	10000000 \| 10000032 \| 3
	(3 rows)

	SELECT char_length(a), char_length(b), c FROM toastable_ao ORDER BY c;
	char_length \| char_length \| c
	-------------+-------------+---
	100000 \| 100001 \| 1
	100000 \| 100001 \| 2
	10000000 \| 10000032 \| 3
	(3 rows)

	-- Check that small tuples can be correctly toasted as long as it's beyond the toast
	-- target size (about 1/4 of the table's blocksize)
	CREATE TABLE toastable_ao2(a int, b int[]) WITH (appendonly=true, blocksize=8192);
	INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 1000) x;
	INSERT INTO toastable_ao2 SELECT 1, array_agg(x) from generate_series(1, 2030) x;
	SELECT gp_segment_id, get_rel_toast_count('toastable_ao2') FROM gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 2
	2 \| 0
	(3 rows)

	-- UPDATE
	-- (heap rel only) and toast the large tuple
	UPDATE toastable_heap SET a=repeat('A',100000) WHERE c=1;
	UPDATE toastable_heap SET a=randomtext(100032) WHERE c=3;
	SELECT char_length(a), char_length(b) FROM toastable_heap ORDER BY c;
	char_length \| char_length
	-------------+-------------
	100000 \| 100001
	100000 \| 100001
	100032 \| 10000032
	(3 rows)

	-- ALTER
	-- this will cause a full table rewrite. we make sure the tosted values and references
	-- stay intact after all the oid switching business going on.
	ALTER TABLE toastable_heap ADD COLUMN d int DEFAULT 10;
	ALTER TABLE toastable_ao ADD COLUMN d int DEFAULT 10;
	SELECT char_length(a), char_length(b), c, d FROM toastable_heap ORDER BY c;
	char_length \| char_length \| c \| d
	-------------+-------------+---+----
	100000 \| 100001 \| 1 \| 10
	100000 \| 100001 \| 2 \| 10
	100032 \| 10000032 \| 3 \| 10
	(3 rows)

	SELECT char_length(a), char_length(b), c, d FROM toastable_ao ORDER BY c;
	char_length \| char_length \| c \| d
	-------------+-------------+---+----
	100000 \| 100001 \| 1 \| 10
	100000 \| 100001 \| 2 \| 10
	10000000 \| 10000032 \| 3 \| 10
	(3 rows)

	-- TRUNCATE
	-- remove reference to toast table and create a new one with different values
	TRUNCATE toastable_heap;
	TRUNCATE toastable_ao;
	select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 0
	2 \| 0
	(3 rows)

	select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 0
	2 \| 0
	(3 rows)

	INSERT INTO toastable_heap VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
	INSERT INTO toastable_ao VALUES(repeat('a',100002), repeat('b',100003), 2, 20);
	SELECT char_length(a), char_length(b), c, d FROM toastable_heap;
	char_length \| char_length \| c \| d
	-------------+-------------+---+----
	100002 \| 100003 \| 2 \| 20
	(1 row)

	SELECT char_length(a), char_length(b), c, d FROM toastable_ao;
	char_length \| char_length \| c \| d
	-------------+-------------+---+----
	100002 \| 100003 \| 2 \| 20
	(1 row)

	select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 0
	2 \| 0
	(3 rows)

	select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 14
	1 \| 0
	2 \| 0
	(3 rows)

	delete from toastable_heap;
	delete from toastable_ao;
	vacuum toastable_heap, toastable_ao;
	select gp_segment_id, get_rel_toast_count('toastable_heap') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 0
	2 \| 0
	(3 rows)

	select gp_segment_id, get_rel_toast_count('toastable_ao') from gp_dist_random('gp_id') order by gp_segment_id;
	gp_segment_id \| get_rel_toast_count
	---------------+---------------------
	0 \| 0
	1 \| 0
	2 \| 0
	(3 rows)

	DROP TABLE toastable_heap;
	DROP TABLE toastable_ao;
	-- TODO: figure out a way to verify that the toast tables are dropped
	-- Test TOAST_MAX_CHUNK_SIZE changes for upgrade.
	CREATE TABLE toast_chunk_test (a bytea);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	ALTER TABLE toast_chunk_test ALTER COLUMN a SET STORAGE EXTERNAL;
	-- Alter our TOAST_MAX_CHUNK_SIZE and insert a value we know will be toasted.
	SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'skip', dbid)
	FROM pg_catalog.gp_segment_configuration
	WHERE role = 'p';
	gp_inject_fault
	-----------------
	Success:
	(1 row)

	INSERT INTO toast_chunk_test VALUES (repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea);
	SELECT DISTINCT gp_inject_fault('decrease_toast_max_chunk_size', 'reset', dbid)
	FROM pg_catalog.gp_segment_configuration
	WHERE role = 'p';
	gp_inject_fault
	-----------------
	Success:
	(1 row)

	-- The toasted value should still be read correctly.
	SELECT * FROM toast_chunk_test WHERE a <> repeat('abcdefghijklmnopqrstuvwxyz', 1000)::bytea;
	a
	---
	(0 rows)

	-- Random access into the toast table should work equally well.
	SELECT encode(substring(a from 521*26+1 for 26), 'escape') FROM toast_chunk_test;
	encode
	----------------------------
	abcdefghijklmnopqrstuvwxyz
	(1 row)

	-- Test for Github Issue 16906
	create table t_16906(a int, b text) distributed by(a);
	-- Insert two rows and make sure they are in the same segment (same dist key)
	-- the 1st row's column b must be NULL;
	-- the 2nd row's column b must be a long string even after toast compression
	-- for details please refer to the issue page.
	insert into t_16906 values(1, null);
	insert into t_16906 values(1, randomtext(10240));
	-- Don't want actually fetch all data just need to test
	-- it does not hit assert fail or error. Using explain
	-- analyze might introduce a new ansfile for ORCA so here
	-- I decide to use \o.
	\o /tmp/t_16906.tmp
	select * from t_16906;
	\o
	drop table t_16906;