blob: fc993f9e1e4a4e717957e08dc2988ac26e89780f [file] [log] [blame]
-- The following test simulates corruption of header and data content and
-- verifies the select behavior on this corrupted table.
--
-- Mask out the expected and actual values of the checksums when comparing the
-- result. All we care about is that they don't match.
--
-- start_matchsubs
-- m/^ERROR: (block|header) checksum does not match./
-- s/expected 0x(........) and found 0x(........)/expected 0xXXXXXXXX and found 0xXXXXXXXX/
-- m/^ERROR: VarBlock is not valid, valid block check error 12, detail .* /
-- s/detail.*//
-- end_matchsubs
-- Ignore the status messages from the helper function. They're useful for
-- debugging, but the output is different on every invocation.
--
-- start_matchignore
-- m/^INFO: corrupting file/
-- m/^INFO: skipping non-existent file/
-- end_matchignore
-- start_ignore
CREATE LANGUAGE plpython3u;
-- end_ignore
-- Create our test tables (and functions) in a bespoken schema that we can drop
-- at the end. We don't want to leave any corrupt files lying around!
CREATE SCHEMA corrupt_checksums;
set search_path='corrupt_checksums';
-- to ignore the CONTEXT from messages from the plpython helpers, and to ignore
-- DETAILs from the checksum errors.
\set VERBOSITY terse
-- Return path to the file holding data for the given table (relative to
-- $PGDATA).
--
-- The returned path is for segfile #1. Because that's what we will choose for
-- the first insertion into an AO table.
CREATE FUNCTION get_aoseg1_path(tbl regclass) returns text as $$
(select 'base/' || db.oid || '/' || c.relfilenode || '.1' from pg_class c, pg_database db where c.oid = $1 AND db.datname = current_database())
$$ language sql VOLATILE;
-- Corrupt data file at given path (if it exists on this segment)
--
-- If corruption_offset is negative, it's an offset from the end of file.
-- Otherwise it's from the beginning of file.
--
-- Returns 0. (That's handy in the way this function is called, because we can
-- do a SUM() over the return values, and it's always 0, regardless of the
-- number of segemnts in the cluster.)
CREATE FUNCTION corrupt_file(data_file text, corruption_offset int4)
RETURNS setof integer as $$
import os;
if not os.path.isfile(data_file):
plpy.info('skipping non-existent file %s' % (data_file))
else:
plpy.info('corrupting file %s at %s' % (data_file, corruption_offset))
with open(data_file , "rb+") as f:
char_location=0
write_char='*'.encode() # CONST.CORRUPTION
if corruption_offset >= 0:
f.seek(corruption_offset, 0)
else:
f.seek(corruption_offset, 2)
f.write(write_char)
f.close()
return []
$$ LANGUAGE plpython3u EXECUTE ON ALL SEGMENTS;
-- Corrupt a file by replacing the last occurrence of 'str' within the file
-- with 'replacement'
CREATE FUNCTION corrupt_file(data_file text, target bytea, replacement bytea)
RETURNS setof integer as $$
import os;
if not os.path.isfile(data_file):
plpy.info('skipping non-existent file %s' % (data_file))
else:
with open(data_file , "rb+") as f:
content = f.read();
# replace the last occurrence of the target string.
pos = content.rfind(target);
if pos == -1:
plpy.info('target string not found in file %s, skipping' % data_file)
else:
plpy.info('corrupting file %s by replacing %s with %s' % (data_file, str, replacement))
f.seek(pos, 0)
f.write(replacement)
f.close()
return []
$$ LANGUAGE plpython3u EXECUTE ON ALL SEGMENTS;
-- Large content, corrupt block header
create table corrupt_header_large_co(comment bytea ) with (appendonly=true, orientation=column, checksum=true) DISTRIBUTED RANDOMLY;
insert into corrupt_header_large_co select ("decode"(repeat('a',33554432),'escape')) from generate_series(1,8) ;
select corrupt_file(get_aoseg1_path('corrupt_header_large_co'), 8);
SELECT COUNT(*) FROM corrupt_header_large_co;
-- Large content, corrupt content
create table corrupt_content_large_co(comment bytea ) with (appendonly=true, orientation=column, checksum=true) DISTRIBUTED RANDOMLY;
insert into corrupt_content_large_co select ("decode"(repeat('a',33554432),'escape')) from generate_series(1,8) ;
select corrupt_file(get_aoseg1_path('corrupt_content_large_co'), -3);
SELECT COUNT(*) FROM corrupt_content_large_co;
-- Small content, corrupt block header
create table corrupt_header_small_co(a int) with (appendonly=true, orientation=column, checksum=true);
insert into corrupt_header_small_co values (1),(1),(1),(-1),(1),(1),(1),(2),(2),(2),(2),(2),(2),(2),(33),(3),(3),(3),(1),(8),(19),(20),(31),(32),(33),(34),(5),(5),(5),(5),(5),(6),(6),(6),(6),(6),(6),(7),(7),(7),(7),(7),(7),(7),(7), (null),(7),(7),(7),(null),(8),(8),(8),(8),(8),(8),(4),(4),(null),(4),(17),(17),(17),(null),(null),(null);
select corrupt_file(get_aoseg1_path('corrupt_header_small_co'), 8);
SELECT COUNT(*) FROM corrupt_header_small_co;
-- Small content, corrupt content
create table corrupt_content_small_co(a int) with (appendonly=true, orientation=column, checksum=true);
insert into corrupt_content_small_co values (1),(1),(1),(-1),(1),(1),(1),(2),(2),(2),(2),(2),(2),(2),(33),(3),(3),(3),(1),(8),(19),(20),(31),(32),(33),(34),(5),(5),(5),(5),(5),(6),(6),(6),(6),(6),(6),(7),(7),(7),(7),(7),(7),(7),(7), (null),(7),(7),(7),(null),(8),(8),(8),(8),(8),(8),(4),(4),(null),(4),(17),(17),(17),(null),(null),(null);
select corrupt_file(get_aoseg1_path('corrupt_content_small_co'), -3);
SELECT COUNT(*) FROM corrupt_content_small_co;
-- Row-oriented, Small content, corrupt block header
create table corrupt_header_small_ao(a int) with (appendonly=true, orientation=row, checksum=true);
insert into corrupt_header_small_ao values (1),(1),(1),(-1),(1),(1),(1),(2),(2),(2),(2),(2),(2),(2),(33),(3),(3),(3),(1),(8),(19),(20),(31),(32),(33),(34),(5),(5),(5),(5),(5),(6),(6),(6),(6),(6),(6),(7),(7),(7),(7),(7),(7),(7),(7), (null),(7),(7),(7),(null),(8),(8),(8),(8),(8),(8),(4),(4),(null),(4),(17),(17),(17),(null),(null),(null);
select corrupt_file(get_aoseg1_path('corrupt_header_small_ao'), 8);
SELECT COUNT(*) FROM corrupt_header_small_ao;
-- Row-oriented, Small content, corrupt content
create table corrupt_content_small_ao(a int) with (appendonly=true, orientation=row, checksum=true);
insert into corrupt_content_small_ao values (1),(1),(1),(-1),(1),(1),(1),(2),(2),(2),(2),(2),(2),(2),(33),(3),(3),(3),(1),(8),(19),(20),(31),(32),(33),(34),(5),(5),(5),(5),(5),(6),(6),(6),(6),(6),(6),(7),(7),(7),(7),(7),(7),(7),(7), (null),(7),(7),(7),(null),(8),(8),(8),(8),(8),(8),(4),(4),(null),(4),(17),(17),(17),(null),(null),(null);
select corrupt_file(get_aoseg1_path('corrupt_content_small_ao'), -3);
SELECT COUNT(*) FROM corrupt_content_small_ao;
-- try again, check varblock content offset validation w/ checksum disabled
set gp_appendonly_verify_block_checksums = off;
SELECT COUNT(*) FROM corrupt_content_small_ao;
reset gp_appendonly_verify_block_checksums;
-- Also test gp_appendonly_verify_block_checksums=off.
create table appendonly_verify_block_checksums_co (t text) with (checksum=true, appendonly=true, orientation=column, compresstype=none);
insert into appendonly_verify_block_checksums_co
select 'abcdefghijlmnopqrstuvxyz' from generate_series(1, 5);
-- Corrupt the table by flip the 'xyz' on the last row with ###
select corrupt_file(get_aoseg1_path('appendonly_verify_block_checksums_co'), 'xyz', '###');
-- Fails, checksum is wrong.
SELECT * FROM appendonly_verify_block_checksums_co;
-- try again, ignoring the checksum failure.
set gp_appendonly_verify_block_checksums = off;
SELECT * FROM appendonly_verify_block_checksums_co;
-- Clean up. We don't want to leave the corrupt tables lying around!
reset search_path;
DROP SCHEMA corrupt_checksums CASCADE;