blob: 36f70b6412cddd5f20d6eeb45564fa7d025091aa [file] [log] [blame]
--
-- COPY
--
-- CLASS POPULATION
-- (any resemblance to real life is purely coincidental)
--
COPY aggtest FROM '@abs_srcdir@/data/agg.data';
COPY onek FROM '@abs_srcdir@/data/onek.data';
COPY onek TO '@abs_builddir@/results/onek.data';
DELETE FROM onek;
COPY onek FROM '@abs_builddir@/results/onek.data';
COPY tenk1 FROM '@abs_srcdir@/data/tenk.data';
-- Create a table that's identical to 'tenk1', but all the data is in a single
-- segment. This comes handy in making some PostgreSQL tests pass on GPDB,
-- where data distribution might make a difference to a test result.
CREATE SCHEMA singleseg;
create table singleseg.tenk1 (like tenk1, distkey int4) distributed by (distkey);
COPY singleseg.tenk1 (unique1,unique2,two,four,ten,twenty,hundred,thousand,twothousand,fivethous,tenthous,odd,even,stringu1,stringu2,string4) FROM '@abs_srcdir@/data/tenk.data';
COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data';
COPY person FROM '@abs_srcdir@/data/person.data';
COPY emp FROM '@abs_srcdir@/data/emp.data';
COPY student FROM '@abs_srcdir@/data/student.data';
COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data';
COPY road FROM '@abs_srcdir@/data/streets.data';
COPY real_city FROM '@abs_srcdir@/data/real_city.data';
COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data';
COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data';
COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data';
COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data';
COPY test_tsvector FROM '@abs_srcdir@/data/tsearch.data';
COPY testjsonb FROM '@abs_srcdir@/data/jsonb.data';
-- the data in this file has a lot of duplicates in the index key
-- fields, leading to long bucket chains and lots of table expansion.
-- this is therefore a stress test of the bucket overflow code (unlike
-- the data in hash.data, which has unique index keys).
--
-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data';
COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data';
COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data';
COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data';
COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data';
COPY array_op_test FROM '@abs_srcdir@/data/array.data';
COPY array_index_op_test FROM '@abs_srcdir@/data/array.data';
-- analyze all the data we just loaded, to ensure plan consistency
-- in later tests
-- Force pgstat_report_stat() to send tabstat before pgstat_report_analyze.
-- Nomally this is no needed, but ORCA is very sensitive for statistics.
-- If analyze msg recevied first, n_mod_since_analyze will not be 0.
-- And since we create index for some tables later, the triggered auto-ANALYZE
-- will cause table's index statstics change and ORCA may generate different
-- plans for some queries.
select pg_sleep(0.77);
ANALYZE aggtest;
ANALYZE onek;
ANALYZE tenk1;
ANALYZE slow_emp4000;
ANALYZE person;
ANALYZE emp;
ANALYZE student;
ANALYZE stud_emp;
ANALYZE road;
ANALYZE real_city;
ANALYZE hash_i4_heap;
ANALYZE hash_name_heap;
ANALYZE hash_txt_heap;
ANALYZE hash_f8_heap;
ANALYZE test_tsvector;
ANALYZE testjsonb;
ANALYZE bt_i4_heap;
ANALYZE bt_name_heap;
ANALYZE bt_txt_heap;
ANALYZE bt_f8_heap;
ANALYZE array_op_test;
ANALYZE array_index_op_test;
--- test copying in CSV mode with various styles
--- of embedded line ending characters
create temp table copytest (
style text,
test text,
filler int);
insert into copytest values('DOS',E'abc\r\ndef',1);
insert into copytest values('Unix',E'abc\ndef',2);
insert into copytest values('Mac',E'abc\rdef',3);
insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4);
copy copytest to '@abs_builddir@/results/copytest.csv' csv;
create temp table copytest2 (like copytest);
copy copytest2 from '@abs_builddir@/results/copytest.csv' csv;
copy copytest2 from '@abs_builddir@/results/copytest.csv' csv LOG ERRORS SEGMENT REJECT LIMIT 10 ROWS;
select * from copytest except select * from copytest2 order by 1,2,3;
truncate copytest2;
--- same test but with an escape char different from quote char
copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
select * from copytest except select * from copytest2 order by 1,2,3;
-- test header line feature
create temp table copytest3 (
c1 int,
"col with , comma" text,
"col with "" quote" int) distributed by (c1);
copy copytest3 from stdin csv header;
this is just a line full of junk that would error out if parsed
1,a,1
2,b,2
\.
copy copytest3 to stdout csv header;
-- test copy force quote
create temp table copytest4 (id int, id1 int);
insert into copytest4 values (1,2);
insert into copytest4 values (1,3);
insert into copytest4 values (1,4);
copy (select * from copytest4) to stdout csv delimiter ',' force quote id, id1, id2;
copy (select * from copytest4) to stdout csv delimiter ',' force quote id, id1;
-- test null string with CRLF for text mode
CREATE TEMP TABLE venue(
venueid smallint not null,
venuename varchar(100),
venuecity varchar(30),
venuestate char(2),
venueseats integer) DISTRIBUTED BY (venueid);
COPY venue FROM '@abs_srcdir@/data/venue_pipe.txt' WITH DELIMITER AS '|';
SELECT count(*) FROM venue;
-- test copy from with a partitioned table
create table parted_copytest (
a int,
b int,
c text
) partition by list (b);
create table parted_copytest_a1 (c text, b int, a int);
create table parted_copytest_a2 (a int, c text, b int);
alter table parted_copytest_a1 set distributed by (a);
alter table parted_copytest attach partition parted_copytest_a1 for values in(1);
alter table parted_copytest attach partition parted_copytest_a2 for values in(2);
-- We must insert enough rows to trigger multi-inserts. These are only
-- enabled adaptively when there are few enough partition changes.
insert into parted_copytest select x,1,'One' from generate_series(1,1000) x;
insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x;
insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x;
copy (select * from parted_copytest order by a) to '@abs_builddir@/results/parted_copytest.csv';
truncate parted_copytest;
copy parted_copytest from '@abs_builddir@/results/parted_copytest.csv';
-- Ensure COPY FREEZE errors for partitioned tables.
begin;
truncate parted_copytest;
copy parted_copytest from '@abs_builddir@/results/parted_copytest.csv' (freeze);
rollback;
select tableoid::regclass,count(*),sum(a) from parted_copytest
group by tableoid order by tableoid::regclass::name;
truncate parted_copytest;
-- create before insert row trigger on parted_copytest_a2
create function part_ins_func() returns trigger language plpgsql as $$
begin
return new;
end;
$$;
create trigger part_ins_trig
before insert on parted_copytest_a2
for each row
execute procedure part_ins_func();
copy parted_copytest from '@abs_builddir@/results/parted_copytest.csv';
select tableoid::regclass,count(*),sum(a) from parted_copytest
group by tableoid order by tableoid::regclass::name;
truncate table parted_copytest;
create index on parted_copytest (b);
drop trigger part_ins_trig on parted_copytest_a2;
copy parted_copytest from stdin;
1 1 str1
2 2 str2
\.
-- Ensure index entries were properly added during the copy.
select * from parted_copytest where b = 1;
select * from parted_copytest where b = 2;
drop table parted_copytest;
--
-- Progress reporting for COPY
--
create table tab_progress_reporting (
name text,
age int4,
location point,
salary int4,
manager name
);
-- Add a trigger to catch and print the contents of the catalog view
-- pg_stat_progress_copy during data insertion. This allows to test
-- the validation of some progress reports for COPY FROM where the trigger
-- would fire.
create function notice_after_tab_progress_reporting() returns trigger AS
$$
declare report record;
begin
-- The fields ignored here are the ones that may not remain
-- consistent across multiple runs. The sizes reported may differ
-- across platforms, so just check if these are strictly positive.
with progress_data as (
select
relid::regclass::text as relname,
command,
type,
bytes_processed > 0 as has_bytes_processed,
bytes_total > 0 as has_bytes_total,
tuples_processed,
tuples_excluded
from pg_stat_progress_copy
where pid = pg_backend_pid())
select into report (to_jsonb(r)) as value
from progress_data r;
raise info 'progress: %', report.value::text;
return new;
end;
$$ language plpgsql;
create trigger check_after_tab_progress_reporting
after insert on tab_progress_reporting
for each statement
execute function notice_after_tab_progress_reporting();
-- Generate COPY FROM report with PIPE.
copy tab_progress_reporting from stdin;
sharon 25 (15,12) 1000 sam
sam 30 (10,5) 2000 bill
bill 20 (11,10) 1000 sharon
\.
-- Generate COPY FROM report with FILE, with some excluded tuples.
truncate tab_progress_reporting;
copy tab_progress_reporting from '@abs_srcdir@/data/emp.data'
where (salary < 2000);
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
drop function notice_after_tab_progress_reporting();
drop table tab_progress_reporting;
-- check COPY behavior
-- there are 2 fields in copy.data
-- table_1_col_dist_c1: 1 col in table < 2 fields in file
-- table_2_cols_dist_c1 and
-- table_2_cols_dist_c2: 2 cols in table = 2 fields in file
-- table_3_cols_dist_c1 and
-- table_3_cols_dist_c2 and
-- table_3_cols_dist_c3: 3 cols in table > 2 fields in file
CREATE TABLE table_1_col_dist_c1(c1 text) distributed by (c1);
CREATE TABLE table_2_cols_dist_c1(c1 text, c2 text) distributed by (c1);
CREATE TABLE table_2_cols_dist_c2(c1 text, c2 text) distributed by (c2);
CREATE TABLE table_3_cols_dist_c1(c1 text, c2 text, c3 text) distributed by (c1);
CREATE TABLE table_3_cols_dist_c2(c1 text, c2 text, c3 text) distributed by (c2);
CREATE TABLE table_3_cols_dist_c3(c1 text, c2 text, c3 text) distributed by (c3);
COPY table_1_col_dist_c1 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
COPY table_2_cols_dist_c1 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
COPY table_2_cols_dist_c2 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
COPY table_3_cols_dist_c1 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
COPY table_3_cols_dist_c2 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
COPY table_3_cols_dist_c3 FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
SELECT count(*) FROM table_1_col_dist_c1;
SELECT count(*) FROM table_2_cols_dist_c1;
SELECT count(*) FROM table_2_cols_dist_c2;
SELECT count(*) FROM table_3_cols_dist_c1;
SELECT count(*) FROM table_3_cols_dist_c2;
SELECT count(*) FROM table_3_cols_dist_c3;
DROP TABLE table_1_col_dist_c1;
DROP TABLE table_2_cols_dist_c1;
DROP TABLE table_2_cols_dist_c2;
DROP TABLE table_3_cols_dist_c1;
DROP TABLE table_3_cols_dist_c2;
DROP TABLE table_3_cols_dist_c3;
-- special case, tables without columns
CREATE TABLE table_no_cols();
INSERT INTO table_no_cols DEFAULT VALUES;
SELECT count(*) FROM table_no_cols;
COPY table_no_cols TO '@abs_srcdir@/results/copy_no_cols.data' DELIMITER ',';
COPY table_no_cols FROM '@abs_srcdir@/results/copy_no_cols.data' DELIMITER ',';
SELECT count(*) FROM table_no_cols;
COPY table_no_cols FROM '@abs_srcdir@/data/copy.data' DELIMITER ',';
DROP TABLE table_no_cols;