blob: 70bf5cf9aeee8189c41aea8b3b924751b073487f [file] [log] [blame]
-- Test views/functions to check missing/orphaned data files
-- start_matchsubs
-- m/aoseg_\d+/
-- s/aoseg_\d+/aoseg_xxx/g
-- m/aocsseg_\d+/
-- s/aocsseg_\d+/aocsseg_xxx/g
-- m/aovisimap_\d+/
-- s/aovisimap_\d+/aovisimap_xxx/g
-- m/seg1_pg_tblspc_.*/
-- s/seg1_pg_tblspc_.*/seg1_pg_tblspc_XXX/g
-- m/ERROR\: could not rename .*/
-- s/ERROR\: could not rename .*/ERROR\: could not rename XXX/g
-- m/ERROR\: cannot rename .*/
-- s/ERROR\: cannot rename .*/ERROR\: cannot rename XXX/g
-- end_matchsubs
-- helper function to repeatedly run gp_toolkit.gp_check_orphaned_files for up to 10 minutes,
-- in case any flakiness happens (like background worker makes LOCK pg_class unsuccessful etc.)
CREATE OR REPLACE FUNCTION run_orphaned_files_view()
RETURNS TABLE(gp_segment_id INT, filename TEXT) AS $$
DECLARE
retry_counter INT := 0;
BEGIN
WHILE retry_counter < 120 LOOP
BEGIN
RETURN QUERY SELECT q.gp_segment_id, q.filename FROM gp_toolkit.gp_check_orphaned_files q;
RETURN; -- If successful
EXCEPTION
WHEN OTHERS THEN
RAISE LOG 'attempt failed % with error: %', retry_counter + 1, SQLERRM;
-- When an exception occurs, wait for 5 seconds and then retry
PERFORM pg_sleep(5);
-- Refresh to get the latest pg_stat_activity
PERFORM pg_stat_clear_snapshot();
retry_counter := retry_counter + 1;
END;
END LOOP;
-- all retries failed
RAISE EXCEPTION 'failed to retrieve orphaned files after 10 minutes of retries.';
END;
$$ LANGUAGE plpgsql;
-- we'll use a specific tablespace to test
CREATE TABLESPACE checkfile_ts LOCATION '@testtablespace@';
set default_tablespace = checkfile_ts;
-- create a table that we'll delete the files to test missing files.
-- this have to be created beforehand in order for the tablespace directories to be created.
CREATE TABLE checkmissing_heap(a int, b int, c int);
insert into checkmissing_heap select i,i,i from generate_series(1,100)i;
--
-- Tests for orphaned files
--
-- go to seg1's data directory for the tablespace we just created
\cd @testtablespace@
select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset
\cd :dbid
select get_tablespace_version_directory_name() as version_dir \gset
\cd :version_dir
select oid from pg_database where datname = current_database() \gset
\cd :oid
-- create some orphaned files
\! touch 987654
\! touch 987654.3
-- check orphaned files, note that this forces a checkpoint internally.
set client_min_messages = ERROR;
select gp_segment_id, filename from run_orphaned_files_view();
gp_segment_id | filename
---------------+----------
1 | 987654
1 | 987654.3
(2 rows)
-- test moving the orphaned files
-- firstly, should not move anything if the target directory doesn't exist
select * from gp_toolkit.gp_move_orphaned_files('@testtablespace@/non_exist_dir');
ERROR: could not rename XXX
select gp_segment_id, filename from run_orphaned_files_view();
gp_segment_id | filename
---------------+----------
1 | 987654.3
1 | 987654
(2 rows)
-- should also fail to move if no proper permission to the target directory
\! mkdir @testtablespace@/moving_orphaned_file_test
\! chmod 000 @testtablespace@/moving_orphaned_file_test
select * from gp_toolkit.gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test');
ERROR: cannot rename XXX
CONTEXT: PL/pgSQL function gp_toolkit.gp_move_orphaned_files(text) line 20 at RETURN QUERY
select gp_segment_id, filename from run_orphaned_files_view();
gp_segment_id | filename
---------------+----------
1 | 987654.3
1 | 987654
(2 rows)
-- should not allow non-superuser to run,
-- though it would complain as soon as non-superuser tries to lock pg_class in gp_toolkit.gp_move_orphaned_files
create role check_file_test_role nosuperuser;
set role = check_file_test_role;
select * from gp_toolkit.gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test');
ERROR: permission denied for table pg_class
CONTEXT: SQL statement "LOCK TABLE pg_class IN SHARE MODE NOWAIT"
PL/pgSQL function gp_toolkit.gp_move_orphaned_files(text) line 4 at SQL statement
reset role;
drop role check_file_test_role;
\! chmod 700 @testtablespace@/moving_orphaned_file_test
-- should correctly move the orphaned files,
-- filter out exact paths as that could vary
\a
select gp_segment_id, move_success, regexp_replace(oldpath, '^.*/(.+)$', '\1') as oldpath, regexp_replace(newpath, '^.*/(.+)$', '\1') as newpath
from gp_toolkit.gp_move_orphaned_files('@testtablespace@/moving_orphaned_file_test');
gp_segment_id|move_success|oldpath|newpath
1|t|987654|seg1_pg_tblspc_17816_GPDB_7_302307241_17470_987654
1|t|987654.3|seg1_pg_tblspc_17816_GPDB_7_302307241_17470_987654.3
(2 rows)
\a
-- The moved orphaned files are in the target directory tree with a name that indicates its original location in data directory
\cd @testtablespace@/moving_orphaned_file_test/
-- should see the orphaned files being moved
\! ls
seg1_pg_tblspc_37385_GPDB_7_302307241_37039_987654
seg1_pg_tblspc_37385_GPDB_7_302307241_37039_987654.3
-- no orphaned files can be found now
select gp_segment_id, filename from run_orphaned_files_view();
gp_segment_id | filename
---------------+----------
(0 rows)
-- should not affect existing tables
select count(*) from checkmissing_heap;
count
-------
100
(1 row)
reset client_min_messages;
-- go back to the valid data directory
\cd @testtablespace@
select dbid from gp_segment_configuration where content = 1 and role = 'p' \gset
\cd :dbid
select get_tablespace_version_directory_name() as version_dir \gset
\cd :version_dir
select oid from pg_database where datname = current_database() \gset
\cd :oid
--
-- Tests for missing files
--
-- Now remove the data file for the table we just created.
-- But check to see if the working directory is what we expect (under
-- the test tablespace). Also just delete one and only one file that
-- is number-named.
\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+' -exec rm {} \; -quit; fi
-- now create AO/CO tables and delete only their extended files
CREATE TABLE checkmissing_ao(a int, b int, c int) using ao_row;
CREATE TABLE checkmissing_co(a int, b int, c int) using ao_column;
insert into checkmissing_ao select i,i,i from generate_series(1,100)i;
insert into checkmissing_co select i,i,i from generate_series(1,100)i;
-- Now remove the extended data file '.1' for the AO/CO tables we just created.
-- Still, check to see if the working directory is what we expect, and only
-- delete exact two '.1' files.
\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi
\! if pwd | grep -q "^@testtablespace@/.*$"; then find . -maxdepth 1 -type f -regex '.*\/[0-9]+\.1' -exec rm {} \; -quit; fi
-- create some normal tables
CREATE TABLE checknormal_heap(a int, b int, c int);
CREATE TABLE checknormal_ao(a int, b int, c int) using ao_row;
CREATE TABLE checknormal_co(a int, b int, c int) using ao_column;
insert into checknormal_heap select i,i,i from generate_series(1,100)i;
insert into checknormal_ao select i,i,i from generate_series(1,100)i;
insert into checknormal_co select i,i,i from generate_series(1,100)i;
-- check non-extended missing files
select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_toolkit.gp_check_missing_files;
gp_segment_id | regexp_replace | relname
---------------+----------------+-------------------
1 | x | checkmissing_heap
(1 row)
SET client_min_messages = ERROR;
-- check extended files
select gp_segment_id, regexp_replace(filename, '\d+', 'x'), relname from gp_toolkit.gp_check_missing_files_ext;
gp_segment_id | regexp_replace | relname
---------------+----------------+-------------------
1 | x | checkmissing_heap
1 | x.1 | checkmissing_ao
1 | x.1 | checkmissing_co
(3 rows)
RESET client_min_messages;
-- cleanup
drop table checkmissing_heap;
drop table checkmissing_ao;
drop table checkmissing_co;
drop table checknormal_heap;
drop table checknormal_ao;
drop table checknormal_co;
\! rm -rf @testtablespace@/*;
DROP TABLESPACE checkfile_ts;