blob: f5a32b85238d09bc1af6f22c555e3ae601f52e4d [file]
-- test single row error handling, both in COPY and external tables.
--
-- # different errors depending on dispatch timings (missing data vs
-- # invalid input syntax)
--
-- start_matchsubs
--
-- m/CONTEXT\:\s+COPY sreh_copy\,\s+line/
-- s/line \d+(.*)/line SOME_LINE/
--
-- m/DETAIL: Failing row contains \(.*\)/
-- s/DETAIL: Failing row contains \(.*\)/DETAIL: Failing row contains (#####)/
--
-- end_matchsubs
-- ###########################################################
-- COPY
-- ###########################################################
CREATE TABLE sreh_copy(a int, b int, c int) distributed by(a);
--
-- ROW reject limit only
--
\getenv abs_srcdir PG_ABS_SRCDIR
\set bad_data1 :abs_srcdir '/data/bad_data1.data'
COPY sreh_copy FROM :'bad_data1' DELIMITER '|' SEGMENT REJECT LIMIT 1000;
SELECT * FROM sreh_copy ORDER BY a,b,c;
--
-- ROW reject limit only - low value that gets reached
--
COPY sreh_copy FROM :'bad_data1' DELIMITER '|' SEGMENT REJECT LIMIT 2;
SELECT * FROM sreh_copy ORDER BY a,b,c;
--
-- error logs
--
DROP TABLE IF EXISTS sreh_copy; CREATE TABLE sreh_copy(a int, b int, c int) distributed by(a);
COPY sreh_copy FROM :'bad_data1' DELIMITER '|' LOG ERRORS INTO WHATEVER SEGMENT REJECT LIMIT 1000;
SET gp_ignore_error_table=true;
COPY sreh_copy FROM :'bad_data1' DELIMITER '|' LOG ERRORS INTO WHATEVER SEGMENT REJECT LIMIT 1000;
SELECT * FROM sreh_copy ORDER BY a,b,c;
WITH error_log AS (SELECT gp_read_error_log('sreh_copy')) select count(*) from error_log;
--
-- error logs - do the same thing again. this time error logs exist and should get data appended
--
COPY sreh_copy FROM :'bad_data1' DELIMITER '|' LOG ERRORS SEGMENT REJECT LIMIT 1000;
SELECT * FROM sreh_copy ORDER BY a,b,c;
SELECT linenum, rawdata FROM gp_read_error_log('sreh_copy') ORDER BY linenum;
--
-- error logs - do the same thing again. this time use data from STDIN (should show in error logs)
--
DROP TABLE IF EXISTS sreh_copy; CREATE TABLE sreh_copy(a int, b int, c int) distributed by(a);
COPY sreh_copy FROM STDIN DELIMITER '|' LOG ERRORS SEGMENT REJECT LIMIT 1000;
100|100|100
200 bad data from stdin
300|300|300
500|500| more bad data from stdin
\.
SELECT * FROM sreh_copy ORDER BY a,b,c;
WITH error_log AS (SELECT gp_read_error_log('sreh_copy')) select count(*) from error_log;
--
-- constraint errors - data is rolled back (CHECK)
--
CREATE TABLE sreh_constr(a int check (a > 10));
COPY sreh_constr FROM STDIN DELIMITER '|' LOG ERRORS SEGMENT REJECT LIMIT 1000;
12
bad format (not int) data should get inserted into error logs but later rolledback
11
10
9
\.
WITH error_log AS (SELECT gp_read_error_log('sreh_constr')) select count(*) from error_log;
SELECT * FROM sreh_constr; -- should exist and be empty
DROP TABLE sreh_constr;
--
-- constraint errors - data is rolled back from target table (UNIQUE)
--
CREATE TABLE sreh_constr(a int unique);
COPY sreh_constr FROM STDIN DELIMITER '|' LOG ERRORS SEGMENT REJECT LIMIT 1000;
12
bad format (not int) data should get inserted into error logs but later rolledback
11
9
9
\.
WITH error_log AS (SELECT gp_read_error_log('sreh_constr')) select count(*) from error_log;
SELECT * FROM sreh_constr; -- should exist and be empty
--
-- test PERCENT reject limit logic with threshold 100.
-- (we don't test the default threshold (300) yet. we could if needed).
-- bad records in bad_data3.data (300 rows): 5,15,55,56,107,136,160,213,214,286
-- this data file is made with errors that will occur only on the QD (with COPY)
-- so the percent calculation should always be the same regardless of number of
-- QE's in the system.
--
\set bad_data3 :abs_srcdir '/data/bad_data3.data'
set gp_reject_percent_threshold = 100;
COPY sreh_copy FROM :'bad_data3' DELIMITER '|' SEGMENT REJECT LIMIT 10 PERCENT; --pass
COPY sreh_copy FROM :'bad_data3' DELIMITER '|' SEGMENT REJECT LIMIT 2 PERCENT; --fail
--
-- test PERCENT reject limit logic with custom threshold 10 (only practical for test purposes)
--
set gp_reject_percent_threshold = 10;
COPY sreh_copy FROM :'bad_data3' DELIMITER '|' SEGMENT REJECT LIMIT 10 PERCENT; --fail
COPY sreh_copy FROM :'bad_data3' DELIMITER '|' SEGMENT REJECT LIMIT 20 PERCENT; --pass
-- MPP-2933 (multiple dist-key attr conversion errors)
create table t2933 (col1 varchar(3) NULL , col2 char(1) NULL, col3 varchar(4) NULL, col4 char(1) NULL, col5 varchar(20) NULL)
Distributed By (col1, col2, col3, col4);
COPY t2933 FROM STDIN DELIMITER '|' SEGMENT REJECT LIMIT 10;
MANDT|SPRAS|BSART|BSTYP|BATXT
\.
-- cleanup
DROP TABLE sreh_copy;
DROP TABLE sreh_constr;
-- ###########################################################
-- External Tables
-- ###########################################################
\getenv binddir PG_BINDDIR
\set gpfdist_sreh_start_e '((' :binddir '/gpfdist -p 8080 -d ' :abs_srcdir '/data </dev/null >/dev/null 2>&1 &); for i in `seq 1 30`; do curl 127.0.0.1:8080 >/dev/null 2>&1 && break; sleep 1; done; echo "starting...") '
CREATE EXTERNAL WEB TABLE gpfdist_sreh_start (x text)
execute E:'gpfdist_sreh_start_e'
on MASTER
FORMAT 'text' (delimiter '|');
CREATE EXTERNAL WEB TABLE gpfdist_sreh_stop (x text)
execute E'(pkill gpfdist || killall gpfdist) > /dev/null 2>&1; echo "stopping..."'
on MASTER
FORMAT 'text' (delimiter '|');
select * from gpfdist_sreh_stop;
select * from gpfdist_sreh_start;
-- create a target table to load into
CREATE TABLE sreh_target(a int, b int, c int) distributed by(a);
--
-- reject limit only
--
\getenv hostname PG_HOSTNAME
\set bad_data1 'gpfdist://' :hostname ':8080/bad_data1.data'
CREATE EXTERNAL TABLE sreh_ext(a int, b int, c int)
LOCATION (:'bad_data1' )
FORMAT 'text' (delimiter '|')
SEGMENT REJECT LIMIT 10000;
SELECT * FROM sreh_ext;
INSERT INTO sreh_target SELECT * FROM sreh_ext;
SELECT count(*) FROM sreh_target;
TRUNCATE sreh_target;
DROP EXTERNAL TABLE sreh_ext;
--
-- reject limit only - low value that gets reached
--
CREATE EXTERNAL TABLE sreh_ext(a int, b int, c int)
LOCATION (:'bad_data1' )
FORMAT 'text' (delimiter '|')
SEGMENT REJECT LIMIT 2;
SELECT * FROM sreh_ext ORDER BY a;
INSERT INTO sreh_target SELECT * FROM sreh_ext;
SELECT count(*) FROM sreh_target;
TRUNCATE sreh_target;
DROP EXTERNAL TABLE sreh_ext;
--
-- error logs
--
CREATE EXTERNAL TABLE sreh_ext_err_tbl(a int, b int, c int)
LOCATION (:'bad_data1' )
FORMAT 'text' (delimiter '|')
LOG ERRORS INTO WHATEVER
SEGMENT REJECT LIMIT 1000;
SELECT * FROM sreh_ext_err_tbl ORDER BY a;
-- Verify the fields that we easily can.
\set bad_data1_like 'gpfdist://' :hostname ':8080/bad_data1.data [%]'
WITH error_log AS (SELECT * FROM gp_read_error_log('sreh_ext_err_tbl'))
SELECT relname, filename LIKE :'bad_data1_like' as filename_ok, linenum, errmsg from error_log;
CREATE EXTERNAL TABLE sreh_ext(a int, b int, c int)
LOCATION (:'bad_data1' )
FORMAT 'text' (delimiter '|')
LOG ERRORS
SEGMENT REJECT LIMIT 1000;
SELECT * FROM sreh_ext ORDER BY a;
WITH error_log AS (SELECT gp_read_error_log('sreh_ext')) select count(*) from error_log;
INSERT INTO sreh_target SELECT * FROM sreh_ext;
SELECT count(*) FROM sreh_target;
TRUNCATE sreh_target;
--
-- error logs - do the same thing again. this time error logs exist and should get data appended
--
SELECT * FROM sreh_ext ORDER BY a;
SELECT linenum, rawdata FROM gp_read_error_log('sreh_ext') ORDER BY linenum;
INSERT INTO sreh_target SELECT * FROM sreh_ext;
SELECT count(*) FROM sreh_target;
TRUNCATE sreh_target;
DROP EXTERNAL TABLE sreh_ext;
DROP EXTERNAL TABLE sreh_ext_err_tbl;
--
-- constraint errors - data is rolled back from both target and error tables (CHECK)
--
CREATE TABLE sreh_constr(a int, b int, c int check (c < 10));
CREATE EXTERNAL TABLE sreh_ext(a int, b int, c int)
LOCATION (:'bad_data1' )
FORMAT 'text' (delimiter '|')
LOG ERRORS
SEGMENT REJECT LIMIT 1000;
INSERT INTO sreh_constr SELECT * FROM sreh_ext;
SELECT linenum, rawdata FROM gp_read_error_log('sreh_constr') ORDER BY linenum;
SELECT COUNT(*) FROM sreh_constr; -- should be empty
--
-- test PERCENT reject limit logic with threshold 100.
-- (we don't test the default threshold (300) yet. we could if needed).
-- bad records in bad_data3.data (300 rows): 5,15,55,56,107,136,160,213,214,286
-- in here we assume that gpfdist will distribute all the data to one QE (since
-- it is much smaller than the data buffer it uses) so the percent calculation
-- should always be the same regardless of number of QE's in the system.
--
set gp_reject_percent_threshold = 100;
\set bad_data3 'gpfdist://' :hostname ':8080/bad_data3.data'
CREATE EXTERNAL TABLE sreh_ext_10percent(a int, b int, c int)
LOCATION (:'bad_data3' )
FORMAT 'text' (delimiter '|')
SEGMENT REJECT LIMIT 10 PERCENT;
SELECT count(*) FROM sreh_ext_10percent; -- pass
CREATE EXTERNAL TABLE sreh_ext_2percent(a int, b int, c int)
LOCATION (:'bad_data3' )
FORMAT 'text' (delimiter '|')
SEGMENT REJECT LIMIT 2 PERCENT;
SELECT count(*) FROM sreh_ext_2percent; -- fail
--
-- test PERCENT reject limit logic with custom threshold 10 (only practical for test purposes)
--
set gp_reject_percent_threshold = 10;
SELECT count(*) FROM sreh_ext_10percent; -- fail
CREATE EXTERNAL TABLE sreh_ext_20percent(a int, b int, c int)
LOCATION (:'bad_data3' )
FORMAT 'text' (delimiter '|')
SEGMENT REJECT LIMIT 20 PERCENT;
SELECT count(*) FROM sreh_ext_20percent; -- pass
set gp_reject_percent_threshold = 300; -- reset
-- cleanup
select * from gpfdist_sreh_stop;
DROP EXTERNAL WEB TABLE gpfdist_sreh_stop;
DROP EXTERNAL WEB TABLE gpfdist_sreh_start;
DROP EXTERNAL TABLE sreh_ext;
DROP EXTERNAL TABLE sreh_ext_2percent;
DROP EXTERNAL TABLE sreh_ext_10percent;
DROP EXTERNAL TABLE sreh_ext_20percent;
DROP TABLE sreh_target;
DROP TABLE sreh_constr;