blob: cffe55453951b17c61deeb705fff824dd3a71638 [file] [log] [blame]
-- start_matchsubs
-- m/WARNING:.*Any temporary tables for this session have been dropped because the gang was disconnected/
-- s/session id \=\s*\d+/session id \= DUMMY/gm
-- end_matchsubs
-- Setup, cleanup and validation helpers
-- We need to update the restart point on the mirrors so downstream tests do not
-- attempt to replay records generated by this suite.
-- start_ignore
\! gpconfig -c create_restartpoint_on_ckpt_record_replay -v on --skipvalidation;
\! gpstop -u;
-- end_ignore
CREATE SCHEMA adst;
SET search_path TO adst,public;
-- start_ignore
CREATE LANGUAGE plpython3u;
-- end_ignore
CREATE OR REPLACE FUNCTION setup_tablespace_location_dir_for_test(tablespace_location_dir text) RETURNS VOID AS $$
import os;
import shutil;
import traceback
try:
shutil.rmtree(tablespace_location_dir)
except OSError:
plpy.debug(traceback.format_exc())
plpy.debug('failed to remove tablespace location directory: %s' % (tablespace_location_dir))
os.mkdir(tablespace_location_dir)
$$ LANGUAGE plpython3u;
\set adst_source_tablespace_location @testtablespace@/adst_source
\set adst_destination_tablespace_location @testtablespace@/adst_dest
CREATE or REPLACE FUNCTION setup() RETURNS VOID AS $$
DECLARE
adst_source_tablespace_location text := '@testtablespace@/adst_source';
adst_destination_tablespace_location text := '@testtablespace@/adst_dest';
BEGIN
-- Setup tablespace directories
PERFORM setup_tablespace_location_dir_for_test(adst_source_tablespace_location);
PERFORM setup_tablespace_location_dir_for_test(adst_destination_tablespace_location);
-- setup faults
PERFORM gp_inject_fault('all', 'reset', dbid) FROM gp_segment_configuration;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION list_db_tablespace(database_name text, tablespace_name text) RETURNS TABLE(gp_segment_id int, db_name name, tablespace_name name) AS $$
SELECT d.gp_segment_id, d.datname AS db_name, t.spcname AS tablespace_name
FROM gp_dist_random('pg_database') d, pg_tablespace t WHERE d.dattablespace=t.oid AND t.spcname=tablespace_name AND d.datname=database_name
UNION ALL
SELECT -1 AS gp_segment_id, d.datname AS db_name, t.spcname AS tablespace_name
FROM pg_database d, pg_tablespace t WHERE d.dattablespace=t.oid AND t.spcname=tablespace_name AND d.datname=database_name
ORDER BY gp_segment_id;
$$ LANGUAGE sql;
-- start_ignore
CREATE LANGUAGE plpython3u;
-- end_ignore
CREATE OR REPLACE FUNCTION stat_db_objects(datname text, spcname text)
RETURNS TABLE (dbid int2, relfilenode_dboid_relative_path text, size int)
VOLATILE LANGUAGE plpython3u
AS
$fn$
import os
db_instances = {}
PG_DEFAULT_TSOID = 1663
PG_GLOBAL_TSOID = 1664
result = plpy.execute("SELECT get_tablespace_version_directory_name() AS tablespace_version_dir_name;")
tablespace_version_dir_name = result[0]['tablespace_version_dir_name']
result = plpy.execute("SELECT oid AS dboid FROM pg_database WHERE datname='%s'" % datname)
dboid = result[0]['dboid']
result = plpy.execute("SELECT oid AS tsoid FROM pg_tablespace WHERE spcname='%s'" % spcname)
tsoid = result[0]['tsoid']
result = plpy.execute("select dbid, datadir from gp_segment_configuration;")
for col in range(0, result.nrows()):
db_instances[result[col]['dbid']] = result[col]['datadir']
rows = []
for dbid, datadir in db_instances.items():
relative_path_to_dboid_dir = ''
if tsoid == PG_DEFAULT_TSOID:
absolute_path_to_dboid_dir = '%s/base/%d' % (datadir, dboid)
elif tsoid == PG_GLOBAL_TSOID:
plpy.error("You can't have a database within the global tablespace")
else:
absolute_path_to_dboid_dir = '%(datadir)s/pg_tblspc/%(tsoid)d/%(tablespace_version_dir_name)s/%(dboid)d' % {
'datadir': datadir,
'tsoid': tsoid,
'tablespace_version_dir_name': tablespace_version_dir_name,
'dboid': dboid
}
try:
for relfilenode in os.listdir(absolute_path_to_dboid_dir):
relfilenode_absolute_path = absolute_path_to_dboid_dir + '/' + relfilenode
size_relfilenode = os.stat(relfilenode_absolute_path).st_size
row = {
'relfilenode_dboid_relative_path': '%d/%s' % (dboid, relfilenode),
'dbid': dbid,
'size': size_relfilenode
}
rows.append(row)
except OSError:
plpy.notice("dboid dir for database %s does not exist on dbid = %d" % (datname, dbid))
rows.append({
'relfilenode_dboid_relative_path': None,
'dbid': dbid,
'size': None
})
return rows
$fn$;
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Tests for ALTER DATABASE SET TABLESPACE adhere to the following format. A
-- scenario is provided for each test with a table that compactly specifies
-- expected behavior.
-- The left most column specifies the database instances involved: the master,
-- primaries P0..P3, standby master and mirrors M0..M3.
-- The source_ts and target_ts columns designate the dboid dirs for the database
-- under the source and destination tablespaces respectively. They do not
-- refer to the tablespace location directory, dbid directory or version
-- directory.
-- Values under these two columns are {"deleted", "moved", "remains",
-- "orphaned"}. "deleted" refers to the dboid directory being removed from
-- the filesystem. "moved" refers to the dboid directory being successfully
-- relocated as a part of the ALTER command. "remains" refers to the source
-- dboid directory remaining unchanged in the event of a rollback. "orphaned"
-- refers to dboid directories that could not be cleaned up with our current
-- infrastructure.
-- To aid understanding/tracing for the reader, the cleanup mechanism is
-- provided as additional context. The mechanism is either the in-memory
-- pendingDbDeletes data structure, the de-serialization of a particular
-- XLOG record or N/A if orphaned files can't be avoided.
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: A successful ALTER operation.
--
-- Expectation
-- +---------+-----------+-----------+------------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+------------------------------+-------+
-- | Master | deleted | moved | pendingDbDeletes | |
-- | P0 | deleted | moved | XLOG_XACT_PREPARE | |
-- | P1 | deleted | moved | XLOG_XACT_PREPARE | |
-- | P2 | deleted | moved | XLOG_XACT_PREPARE | |
-- | SMaster | deleted | moved | XLOG_XACT_DISTRIBUTED_COMMIT | |
-- | M0 | deleted | moved | XLOG_XACT_COMMIT_PREPARED | |
-- | M1 | deleted | moved | XLOG_XACT_COMMIT_PREPARED | |
-- | M2 | deleted | moved | XLOG_XACT_COMMIT_PREPARED | |
-- +---------+-----------+-----------+------------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir under the source tablespace by the mirrors.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
-- Then the tablespace for the database has been updated to the destination tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_destination_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+-----------------------------
-1 | alter_db | adst_destination_tablespace
0 | alter_db | adst_destination_tablespace
1 | alter_db | adst_destination_tablespace
2 | alter_db | adst_destination_tablespace
(4 rows)
-- Ensure that the mirrors have removed the dboid dir under the source tablespace.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
Success:
(4 rows)
-- Then all the files of the database should now be in the dboid directory in the target tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the source tablespace directory should remain empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error on master directly after writing the
-- XLOG_DBASE_CREATE record.
--
-- Note: The primaries (and by extension their mirrors) are not involved
-- here as the error occurs before the dispatch of the ALTER command.
--
-- Expectation
-- +---------+-----------+-----------+-------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+-------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | E |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- +---------+-----------+-----------+-------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE content=-1 AND role='m';
gp_inject_fault
-----------------
Success:
(1 row)
-- And introduce an error on the master directly after the XLOG_DBASE_CREATE is written by the master and before the master dispatches the ALTER command.
SELECT gp_inject_fault('inside_move_db_transaction', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the fault is triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'inside_move_db_transaction' fault type:'error'
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the standby master has removed the dboid dir under the target tablespace.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE content=-1 AND role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error after XLOG_DBASE_CREATE on a primary and error
-- before XLOG_XACT_ABORT on master
--
-- Expectation
-- +---------+-----------+-----------+-------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+-------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | |
-- | P0 | remains | deleted | pendingDbDeletes | E |
-- | P1 | remains | deleted | pendingDbDeletes | |
-- | P2 | remains | deleted | pendingDbDeletes | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT | |
-- +---------+-----------+-----------+-------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce an error on a primary directly after the XLOG_DBASE_CREATE is written by the primary during dispatch of the ALTER command.
SELECT gp_inject_fault('inside_move_db_transaction', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the fault is triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'inside_move_db_transaction' fault type:'error' (seg0 127.0.0.1:25432 pid=7774)
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure for content 0, the mirror has removed the dboid dir under
-- the target tablespace. We cannot wait for this fault for all
-- mirrors and standby, as don't know till what stage other primaries
-- has completed the command when error happens for content 0. If
-- other primaries end up aborting before starting the directory copy,
-- replay of abort record will not delete the directory.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- Wait for all the mirrors to replay the xlog before checking destination directory
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error after XLOG_DBASE_CREATE on Primary and error before
-- XLOG_XACT_ABORT on master
--
-- Expectation
-- +---------+-----------+-----------+-------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+-------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | E |
-- | P0 | remains | deleted | pendingDbDeletes | E |
-- | P1 | remains | deleted | pendingDbDeletes | |
-- | P2 | remains | deleted | pendingDbDeletes | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT | |
-- +---------+-----------+-----------+-------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce an error on a primary directly after the XLOG_DBASE_CREATE is written by the primary during dispatch of the ALTER command.
SELECT gp_inject_fault('inside_move_db_transaction', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- And introduce an error on the master after dispatch of the ALTER command and before XLOG_XACT_ABORT is written by the master.
SELECT gp_inject_fault('transaction_abort_failure', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the faults are triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'inside_move_db_transaction' fault type:'error' (seg0 127.0.0.1:25432 pid=7774)
ERROR: fault triggered, fault name:'transaction_abort_failure' fault type:'error'
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure for content 0, the mirror has removed the dboid dir under
-- the target tablespace. We cannot wait for this fault for all
-- mirrors and standby, as don't know till what stage other primaries
-- has completed the command when error happens for content 0. If
-- other primaries end up aborting before starting the directory copy,
-- abort record will not delete the directory.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m' and content = 0;
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- Wait for all the mirrors to replay the xlog before checking destination directory
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error before XLOG_XACT_PREPARE on Primary
--
-- Expectation
-- +---------+-----------+-----------+--------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+--------------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | |
-- | P0 | remains | deleted | pendingDbDeletes | E |
-- | P1 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P2 | remains | deleted | XLOG_XACT_PREPARE | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- +---------+-----------+-----------+--------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce an error on a primary directly before the primary writes the XLOG_XACT_PREPARE record during the dispatch of the PREPARE TRANSACTION command.
SELECT gp_inject_fault('start_prepare', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the fault is triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'start_prepare' fault type:'error' (seg0 127.0.0.1:25432 pid=7774)
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the mirrors including the standby master have removed the dboid dir under the target tablespace.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
Success:
(4 rows)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error before XLOG_XACT_PREPARE on Primary and before
-- XLOG_XACT_ABORT on master.
--
-- Expectation
-- +---------+-----------+-----------+--------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+--------------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | E |
-- | P0 | remains | deleted | pendingDbDeletes | E |
-- | P1 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P2 | remains | deleted | XLOG_XACT_PREPARE | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- +---------+-----------+-----------+--------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce an error on a primary directly before the primary writes the XLOG_XACT_PREPARE record during the dispatch of the PREPARE TRANSACTION command.
SELECT gp_inject_fault('start_prepare', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
gp_inject_fault
-----------------
Success:
(1 row)
-- And introduce an error on the master after dispatch of the ALTER command and before XLOG_XACT_ABORT is written by the master.
SELECT gp_inject_fault('transaction_abort_failure', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the faults are triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'start_prepare' fault type:'error' (seg0 127.0.0.1:25432 pid=18917)
ERROR: fault triggered, fault name:'transaction_abort_failure' fault type:'error'
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the mirrors including the standby master have removed the dboid dir under the target tablespace.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
Success:
(4 rows)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: An error just before XLOG_XACT_DISTRIBUTED_COMMIT on master
--
-- Expectation
-- +---------+-----------+-----------+--------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +---------+-----------+-----------+--------------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | E |
-- | P0 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P1 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P2 | remains | deleted | XLOG_XACT_PREPARE | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- +---------+-----------+-----------+--------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TEMPORARY TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce an error on the master after dispatch of the PREPARE TRANSACTION command and before XLOG_XACT_DISTRIBUTED_COMMIT is written by the master.
SELECT gp_inject_fault('transaction_abort_after_distributed_prepared', 'error', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the fault is triggered.
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
ERROR: fault triggered, fault name:'transaction_abort_after_distributed_prepared' fault type:'error'
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the mirrors including the standby master have removed the dboid dir under the target tablespace.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
Success:
(4 rows)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
(SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter);
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: A panic just before XLOG_XACT_PREPARE on primary
--
-- Expectation
-- +-------------+-----------+-----------+--------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +-------------+-----------+-----------+--------------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | |
-- | P0 | remains | orphaned | N/A | P |
-- | P1 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P2 | remains | deleted | XLOG_XACT_PREPARE | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | orphaned | N/A | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- +-------------+-----------+-----------+--------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
-- Note: We can't use a temporary table as PANICS wipe them out
CREATE TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- Skip fts probe such that we prevent the promotion of the mirror of the primary that we induce the panic on.
SELECT gp_inject_fault_infinite('fts_probe', 'skip', dbid) FROM gp_segment_configuration WHERE role='p' AND content=-1;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Force scan to trigger the fault.
SELECT gp_request_fts_probe_scan();
gp_request_fts_probe_scan
---------------------------
t
(1 row)
-- And we track the removal of the dboid dir by all mirrors including the standby master, except the mirror of the primary about to panic.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m' AND content!=0;
gp_inject_fault
-----------------
Success:
Success:
Success:
(3 rows)
-- And introduce a panic on a primary directly before the primary writes the XLOG_XACT_PREPARE record during the dispatch of the PREPARE TRANSACTION command.
SELECT gp_inject_fault('start_prepare', 'panic', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing.
WARNING: consider disabling FTS probes while injecting a panic.
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the panic is triggered
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
WARNING: Any temporary tables for this session have been dropped because the gang was disconnected (session id = 1802)
ERROR: fault triggered, fault name:'start_prepare' fault type:'panic' (seg0 127.0.0.1:25432 pid=61308)
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master.
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the mirrors including the standby master have removed the dboid dir under the target tablespace, except the mirror of the panicked primary.
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m' AND content!=0;
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
(3 rows)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
-- Note: Sometimes the pg_internal.init is not yet formed on the recovering primary. It is not important for our test.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SELECT * FROM ((SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter)) r WHERE relfilenode_dboid_relative_path NOT LIKE '%pg_internal.init';
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances except the primary that panicked and its mirror.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace') WHERE dbid NOT IN (SELECT dbid FROM gp_segment_configuration WHERE content=0);
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
3 | |
4 | |
6 | |
7 | |
8 | |
(6 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Scenario: A panic just after XLOG_XACT_PREPARE on primary
--
-- +-------------+-----------+-----------+--------------------------+-------+
-- | | source_ts | target_ts | cleanup mechanism | fault |
-- +-------------+-----------+-----------+--------------------------+-------+
-- | Master | remains | deleted | pendingDbDeletes | |
-- | P0 | remains | deleted | XLOG_XACT_PREPARE | P |
-- | P1 | remains | deleted | XLOG_XACT_PREPARE | |
-- | P2 | remains | deleted | XLOG_XACT_PREPARE | |
-- | SMaster | remains | deleted | XLOG_XACT_ABORT | |
-- | M0 | remains | deleted | XLOG_XACT_ABORT | |
-- | M1 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- | M2 | remains | deleted | XLOG_XACT_ABORT_PREPARED | |
-- +-------------+-----------+-----------+--------------------------+-------+
SELECT setup();
setup
-------
(1 row)
-- Given we create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- And we create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- And we ensure that the mirrors have applied the filesystem changes for CREATE DATABASE
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- And we record the filesystem state for the database in the source tablespace
CREATE TABLE before_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
-- Skip fts probe such that we prevent the promotion of the mirror of the primary that we induce the panic on.
SELECT gp_inject_fault_infinite('fts_probe', 'skip', dbid) FROM gp_segment_configuration WHERE role='p' AND content=-1;
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
-- Force scan to trigger the fault.
SELECT gp_request_fts_probe_scan();
gp_request_fts_probe_scan
---------------------------
t
(1 row)
-- And we track the removal of the dboid dir by all mirrors including the standby master.
SELECT gp_inject_fault('after_drop_database_directories', 'sleep', c.dbid) FROM gp_segment_configuration c WHERE role='m';
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
(4 rows)
-- And introduce a panic on a primary directly after the primary writes the XLOG_XACT_PREPARE record during the dispatch of the PREPARE TRANSACTION command.
SELECT gp_inject_fault('after_xlog_xact_prepare_flushed', 'panic', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0;
HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing.
WARNING: consider disabling FTS probes while injecting a panic.
gp_inject_fault
-----------------
Success:
(1 row)
-- When we execute an ALTER DATABASE SET TABLESPACE command on alter_db and the panic is triggered
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
WARNING: Any temporary tables for this session have been dropped because the gang was disconnected (session id = 3078)
ERROR: fault triggered, fault name:'after_xlog_xact_prepare_flushed' fault type:'panic' (seg0 127.0.0.1:25432 pid=78007)
-- Then the tablespace for the database remains to be the source tablespace in all segments and the master.
SELECT * FROM list_db_tablespace('alter_db', 'adst_source_tablespace');
gp_segment_id | db_name | tablespace_name
---------------+----------+------------------------
-1 | alter_db | adst_source_tablespace
0 | alter_db | adst_source_tablespace
1 | alter_db | adst_source_tablespace
2 | alter_db | adst_source_tablespace
(4 rows)
-- Ensure that the mirrors including the standby master have removed the dboid dir under the target tablespace
SELECT gp_wait_until_triggered_fault('after_drop_database_directories', 1, dbid) FROM gp_segment_configuration WHERE role='m';
gp_wait_until_triggered_fault
-------------------------------
Success:
Success:
Success:
Success:
(4 rows)
-- Then all the files of the database should remain in the dboid directory in the source tablespace directory for all database instances.
-- Note: Sometimes the pg_internal.init is not yet formed on the recovering primary. It is not important for our test.
CREATE TEMPORARY TABLE after_alter AS SELECT * FROM stat_db_objects('alter_db', 'adst_source_tablespace');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SELECT * FROM ((SELECT * FROM before_alter) EXCEPT (SELECT * FROM after_alter)) r WHERE relfilenode_dboid_relative_path NOT LIKE '%pg_internal.init';
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
(0 rows)
-- And the dboid directory under the target tablespace directory is empty for all database instances.
SELECT * FROM stat_db_objects('alter_db', 'adst_destination_tablespace');
NOTICE: dboid dir for database alter_db does not exist on dbid = 1
NOTICE: dboid dir for database alter_db does not exist on dbid = 2
NOTICE: dboid dir for database alter_db does not exist on dbid = 3
NOTICE: dboid dir for database alter_db does not exist on dbid = 4
NOTICE: dboid dir for database alter_db does not exist on dbid = 5
NOTICE: dboid dir for database alter_db does not exist on dbid = 6
NOTICE: dboid dir for database alter_db does not exist on dbid = 7
NOTICE: dboid dir for database alter_db does not exist on dbid = 8
dbid | relfilenode_dboid_relative_path | size
------+---------------------------------+------
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
(8 rows)
-- Cleanup
DROP TABLE before_alter;
DROP TABLE after_alter;
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- End of tests which require create_restartpoint_on_ckpt_record_replay=on
--- start_ignore
\! gpconfig -r create_restartpoint_on_ckpt_record_replay --skipvalidation;
\! gpstop -u;
--- end_ignore
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- If a mirror checkpointer doesn't create checkpoint between making changes in a database and moving this database to another tablespace, then attempt to create checkpoint the
-- next time should not result in the fsync error and restarting of checkpointer. The checkpointer must process all requests.
--- start_ignore
CREATE OR REPLACE FUNCTION mirror0() RETURNS gp_segment_configuration.dbid%type
AS $$
SELECT dbid FROM gp_segment_configuration WHERE content = 0 AND role = 'm'
$$ LANGUAGE SQL;
--- end_ignore
-- Checkpointer must not start processing requests until database is moved to another tablespace
SELECT gp_inject_fault('ckpt_loop_begin', 'infinite_loop', mirror0());
gp_inject_fault
-----------------
Success:
(1 row)
-- Check the initial value. It will be restored after the test.
show fsync;
fsync
-------
on
(1 row)
--- start_ignore
-- Set fsync on since we need to test the fsync code logic
\! gpconfig -c fsync -v on --skipvalidation;
-- Apply settings. Wake up checkpointer to speed up the test.
\! gpstop -u;
--- end_ignore
-- Wait until checkpointer is ready to start processing requests
SELECT gp_wait_until_triggered_fault('ckpt_loop_begin', 1, mirror0());
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
SELECT setup();
setup
-------
(1 row)
-- Create the source and destination tablespaces
CREATE TABLESPACE adst_source_tablespace LOCATION :'adst_source_tablespace_location';
CREATE TABLESPACE adst_destination_tablespace LOCATION :'adst_destination_tablespace_location';
-- Create a database in the source tablespace
CREATE DATABASE alter_db TABLESPACE adst_source_tablespace;
-- Make a change in the database, return to the previous database, set search_path again
\c alter_db
CREATE TABLE t(i int) DISTRIBUTED RANDOMLY;
\c regression
SET search_path TO adst,public;
-- Prepare fault for waiting
SELECT gp_inject_fault('ckpt_loop_end', 'skip', mirror0());
gp_inject_fault
-----------------
Success:
(1 row)
ALTER DATABASE alter_db SET TABLESPACE adst_destination_tablespace;
-- Ensure that the mirrors have applied the filesystem changes
SELECT force_mirrors_to_catch_up();
force_mirrors_to_catch_up
---------------------------
(1 row)
-- Checkpointer starts processing requests
SELECT gp_inject_fault('ckpt_loop_begin', 'reset', mirror0());
gp_inject_fault
-----------------
Success:
(1 row)
-- Wait until checkpointer finishes processing requests. If the fsync error happens, then server closes the connection and this fault is never reached
SELECT gp_wait_until_triggered_fault('ckpt_loop_end', 1, mirror0());
gp_wait_until_triggered_fault
-------------------------------
Success:
(1 row)
-- Cleanup
DROP FUNCTION mirror0();
DROP DATABASE alter_db;
DROP TABLESPACE adst_source_tablespace;
DROP TABLESPACE adst_destination_tablespace;
-- Final cleanup
DROP SCHEMA adst CASCADE;
DETAIL: drop cascades to function setup_tablespace_location_dir_for_test(text)
NOTICE: drop cascades to 4 other objects
drop cascades to function setup()
drop cascades to function list_db_tablespace(text,text)
drop cascades to function stat_db_objects(text,text)
SELECT gp_inject_fault('all', 'reset', dbid) FROM gp_segment_configuration;
gp_inject_fault
-----------------
Success:
Success:
Success:
Success:
Success:
Success:
Success:
Success:
(8 rows)
\!rm -rf @testtablespace@/adst_source
\!rm -rf @testtablespace@/adst_dest
--- start_ignore
-- Set fsync on because it is the value before the test
\! gpconfig -c fsync -v on --skipvalidation;
-- Apply settings
\! gpstop -u;
--- end_ignore