blob: fe0187b380d78dbcd243b6b5fa9638d3e4cafa20 [file] [log] [blame]
--
-- external tables - short and simple functional tests.
--
-- start_matchsubs
--
-- # replace return code in error message (platform specific)
--
-- m/ERROR\:\s+external table .* command ended with .* not found/
-- s/nosuchcommand\:\s*(command)? not found/nosuchcommand\: NOT FOUND/
--
-- m/ERROR\:\s+external table .* command ended with .*No such file.*/
-- s/nosuchfile\.txt\:\s*No such file (or directory)?/nosuchfile\.txt\: NO SUCH FILE/
-- m/ERROR\:\s+external table .* command ended with .*No such file.*/i
-- s/cat\: (cannot open)? nosuchfile\.txt/cat\: nosuchfile\.txt/
--
-- # remove line number - redhat
-- m/ERROR\:\s+external table .* command ended with .*NOT FOUND.*/i
-- s/\s+line \d+\://
-- # remove line number - Debian
-- m/ERROR\:\s+external table .* command ended with .*sh: 1: .*NOT FOUND.*/i
-- s/ sh: 1: / sh: /
--
-- m/DETAIL: Found \d+ URLs and \d+ primary segments./
-- s/Found.+//
--
-- end_matchsubs
set optimizer_trace_fallback=on;
create schema part_external_table;
set search_path=part_external_table;
create table part (a int, b int) partition by range (b);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create external table p1_e (a int, b int) location ('file://@hostname@@abs_srcdir@/data/part1.csv') format 'csv';
create external table p2_e (a int, b int) location ('file://@hostname@@abs_srcdir@/data/part2.csv') format 'csv';
alter table part attach partition p1_e for values from (0) to (10);
NOTICE: partition constraints are not validated when attaching a readable external table
alter table part attach partition p2_e for values from (10) to (19);
NOTICE: partition constraints are not validated when attaching a readable external table
analyze part;
WARNING: skipping "p2_e" --- cannot analyze this foreign table
WARNING: skipping "p1_e" --- cannot analyze this foreign table
explain select * from part;
QUERY PLAN
------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2722000.00 rows=180000000 width=8)
-> Append (cost=0.00..322000.00 rows=60000000 width=8)
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8)
Optimizer: Postgres query optimizer
(5 rows)
select * from part;
a | b
----+----
1 | 2
3 | 4
5 | 5
7 | 8
11 | 12
13 | 14
15 | 15
17 | 18
(8 rows)
-- test SPE
explain select * from part where b>10 and a>0;
QUERY PLAN
----------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..246940.11 rows=17320508 width=8)
-> Foreign Scan on p2_e part (cost=0.00..16000.00 rows=5773503 width=8)
Filter: ((b > 10) AND (a > 0))
Optimizer: Postgres query optimizer
(4 rows)
select * from part where b>10 and a>0;
a | b
----+----
11 | 12
13 | 14
15 | 15
17 | 18
(4 rows)
create table non_part (a int, b int) distributed by (a);
insert into non_part values (15,15);
analyze non_part;
-- mixed partitions with DPE with multiple dynamic scans, select one partition
-- start_ignore
-- unstable test case
explain analyze select * from part, non_part where part.b=non_part.b;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2545.25..649094545.25 rows=172200000 width=16) (actual time=7.635..7.787 rows=1 loops=1)
-> Hash Join (cost=2545.25..646798545.25 rows=57400000 width=16) (actual time=4.331..6.693 rows=1 loops=1)
Hash Cond: (part.b = non_part.b)
Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 1 of 524288 buckets.
-> Append (cost=0.00..322000.00 rows=60000000 width=8) (actual time=0.331..0.350 rows=4 loops=1)
Partition Selectors: $0
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8) (never executed)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8) (actual time=0.327..0.343 rows=4 loops=1)
-> Hash (cost=1469.00..1469.00 rows=86100 width=8) (actual time=0.080..0.084 rows=1 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4097kB
-> Partition Selector (selector id: $0) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.058..0.061 rows=1 loops=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.033..0.034 rows=1 loops=1)
-> Seq Scan on non_part (cost=0.00..321.00 rows=28700 width=8) (actual time=0.060..0.063 rows=1 loops=1)
Planning Time: 2.398 ms
(slice0) Executor memory: 211K bytes.
(slice1) Executor memory: 4360K bytes avg x 3x(0) workers, 4454K bytes max (seg0). Work_mem: 4097K bytes max.
(slice2) Executor memory: 112K bytes avg x 3x(0) workers, 112K bytes max (seg0).
Memory used: 128000kB
Optimizer: Postgres query optimizer
Execution Time: 19.240 ms
(20 rows)
-- end_ignore
select * from part, non_part where part.b=non_part.b;
a | b | a | b
----+----+----+----
15 | 15 | 15 | 15
(1 row)
insert into non_part values (5,5);
analyze non_part;
-- mixed partitions with DPE with multiple dynamic scans, select both partition
-- start_ignore
-- unstable test case
explain analyze select * from part, non_part where part.b=non_part.b;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2545.25..649094545.25 rows=172200000 width=16) (actual time=3.810..3.815 rows=2 loops=1)
-> Hash Join (cost=2545.25..646798545.25 rows=57400000 width=16) (actual time=1.414..3.220 rows=2 loops=1)
Hash Cond: (part.b = non_part.b)
Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 2 of 524288 buckets.
-> Append (cost=0.00..322000.00 rows=60000000 width=8) (actual time=0.233..0.447 rows=8 loops=1)
Partition Selectors: $0
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8) (actual time=0.231..0.244 rows=4 loops=1)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8) (actual time=0.185..0.197 rows=4 loops=1)
-> Hash (cost=1469.00..1469.00 rows=86100 width=8) (actual time=0.791..0.793 rows=2 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4097kB
-> Partition Selector (selector id: $0) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.640..0.786 rows=2 loops=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.631..0.774 rows=2 loops=1)
-> Seq Scan on non_part (cost=0.00..321.00 rows=28700 width=8) (actual time=0.014..0.015 rows=1 loops=1)
Planning Time: 1.557 ms
(slice0) Executor memory: 211K bytes.
(slice1) Executor memory: 4373K bytes avg x 3x(0) workers, 4492K bytes max (seg0). Work_mem: 4097K bytes max.
(slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0).
Memory used: 128000kB
Optimizer: Postgres query optimizer
Execution Time: 4.815 ms
(20 rows)
-- end_ignore
select * from part, non_part where part.b=non_part.b;
a | b | a | b
----+----+----+----
5 | 5 | 5 | 5
15 | 15 | 15 | 15
(2 rows)
create table p3 (a int, b int) distributed by (a);
create table p4 (a int, b int) distributed by (a);
alter table part attach partition p3 for values from (20) to (30);
alter table part attach partition p4 for values from (30) to (40);
insert into part select i,i from generate_series(25,35)i;
analyze part;
WARNING: skipping "p2_e" --- cannot analyze this foreign table
WARNING: skipping "p1_e" --- cannot analyze this foreign table
insert into non_part values (32,32);
-- mixed partitions
explain select * from part;
QUERY PLAN
------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2722002.20 rows=180000011 width=8)
-> Append (cost=0.00..322002.05 rows=60000004 width=8)
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8)
-> Seq Scan on p3 part_3 (cost=0.00..1.02 rows=2 width=8)
-> Seq Scan on p4 part_4 (cost=0.00..1.02 rows=2 width=8)
Optimizer: Postgres query optimizer
(7 rows)
select * from part;
a | b
----+----
25 | 25
28 | 28
32 | 32
33 | 33
26 | 26
30 | 30
31 | 31
35 | 35
1 | 2
3 | 4
5 | 5
7 | 8
11 | 12
13 | 14
15 | 15
17 | 18
27 | 27
29 | 29
34 | 34
(19 rows)
--mixed partitions with SPE
explain select * from part where b>10 and b<25;
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..29501.07 rows=900003 width=8)
-> Append (cost=0.00..17501.03 rows=300001 width=8)
-> Foreign Scan on p2_e part_1 (cost=0.00..16000.00 rows=300000 width=8)
Filter: ((b > 10) AND (b < 25))
-> Seq Scan on p3 part_2 (cost=0.00..1.02 rows=1 width=8)
Filter: ((b > 10) AND (b < 25))
Optimizer: Postgres query optimizer
(7 rows)
select * from part where b>10 and b<25 and a > 2;
a | b
----+----
11 | 12
13 | 14
15 | 15
17 | 18
(4 rows)
--mixed partitions with SPE, only do seq scans
explain select * from part where b>25;
QUERY PLAN
-----------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.20 rows=10 width=8)
-> Append (cost=0.00..2.06 rows=3 width=8)
-> Seq Scan on p3 part_1 (cost=0.00..1.02 rows=1 width=8)
Filter: (b > 25)
-> Seq Scan on p4 part_2 (cost=0.00..1.02 rows=2 width=8)
Filter: (b > 25)
Optimizer: Postgres query optimizer
(7 rows)
select * from part where b>25;
a | b
----+----
26 | 26
30 | 30
31 | 31
35 | 35
28 | 28
32 | 32
33 | 33
27 | 27
29 | 29
34 | 34
(10 rows)
--mixed partitions with SPE, only do foreign scans
explain select a from part where b<18;
QUERY PLAN
----------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..927000.00 rows=60000000 width=4)
-> Append (cost=0.00..127000.00 rows=20000000 width=4)
-> Foreign Scan on p1_e part_1 (cost=0.00..13500.00 rows=10000000 width=4)
Filter: (b < 18)
-> Foreign Scan on p2_e part_2 (cost=0.00..13500.00 rows=10000000 width=4)
Filter: (b < 18)
Optimizer: Postgres query optimizer
(7 rows)
select a from part where b<18;
a
----
1
3
5
7
11
13
15
(7 rows)
truncate non_part;
-- mixed partitions with DPE with multiple dynamic scans, select no partitions
explain analyze select * from part, non_part where part.b=non_part.b;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2545.25..550982.32 rows=86100 width=16) (actual time=3.922..3.926 rows=0 loops=1)
-> Hash Join (cost=2545.25..549834.32 rows=28700 width=16) (actual time=3.045..3.051 rows=0 loops=1)
Hash Cond: (part.b = non_part.b)
-> Append (cost=0.00..322002.05 rows=60000004 width=8) (never executed)
Partition Selectors: $0
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8) (never executed)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8) (never executed)
-> Seq Scan on p3 part_3 (cost=0.00..1.02 rows=2 width=8) (never executed)
-> Seq Scan on p4 part_4 (cost=0.00..1.02 rows=2 width=8) (never executed)
-> Hash (cost=1469.00..1469.00 rows=86100 width=8) (actual time=0.010..0.013 rows=0 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4096kB
-> Partition Selector (selector id: $0) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.009..0.010 rows=0 loops=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.008..0.009 rows=0 loops=1)
-> Seq Scan on non_part (cost=0.00..321.00 rows=28700 width=8) (actual time=0.016..0.018 rows=0 loops=1)
Planning Time: 2.743 ms
(slice0) Executor memory: 286K bytes.
(slice1) Executor memory: 4284K bytes avg x 3x(0) workers, 4284K bytes max (seg0). Work_mem: 4096K bytes max.
(slice2) Executor memory: 112K bytes avg x 3x(0) workers, 112K bytes max (seg0).
Memory used: 128000kB
Optimizer: Postgres query optimizer
Execution Time: 14.920 ms
(21 rows)
select * from part, non_part where part.b=non_part.b;
a | b | a | b
---+---+---+---
(0 rows)
insert into non_part values (15,15), (32,32);
-- mixed partitions with DPE with multiple dynamic scans, select one partition from each dynamic scan
explain analyze select * from part, non_part where part.b=non_part.b;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2545.25..550982.32 rows=86100 width=16) (actual time=3.227..3.647 rows=2 loops=1)
-> Hash Join (cost=2545.25..549834.32 rows=28700 width=16) (actual time=1.326..2.957 rows=1 loops=1)
Hash Cond: (part.b = non_part.b)
Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 2 of 524288 buckets.
-> Append (cost=0.00..322002.05 rows=60000004 width=8) (actual time=0.346..0.383 rows=5 loops=1)
Partition Selectors: $0
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8) (never executed)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8) (actual time=0.343..0.358 rows=4 loops=1)
-> Seq Scan on p3 part_3 (cost=0.00..1.02 rows=2 width=8) (never executed)
-> Seq Scan on p4 part_4 (cost=0.00..1.02 rows=2 width=8) (actual time=0.035..0.036 rows=3 loops=1)
-> Hash (cost=1469.00..1469.00 rows=86100 width=8) (actual time=0.680..0.682 rows=2 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4097kB
-> Partition Selector (selector id: $0) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.667..0.673 rows=2 loops=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8) (actual time=0.658..0.661 rows=2 loops=1)
-> Seq Scan on non_part (cost=0.00..321.00 rows=28700 width=8) (actual time=0.046..0.047 rows=1 loops=1)
Planning Time: 2.386 ms
(slice0) Executor memory: 286K bytes.
(slice1) Executor memory: 4366K bytes avg x 3x(0) workers, 4457K bytes max (seg0). Work_mem: 4097K bytes max.
(slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0).
Memory used: 128000kB
Optimizer: Postgres query optimizer
Execution Time: 5.052 ms
(22 rows)
select * from part, non_part where part.b=non_part.b;
a | b | a | b
----+----+----+----
32 | 32 | 32 | 32
15 | 15 | 15 | 15
(2 rows)
-- test different servers, should create different scans
-- note: do not select here, just test plan
CREATE EXTENSION file_fdw;
CREATE SERVER file_server1 FOREIGN DATA WRAPPER file_fdw;
CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw;
CREATE FOREIGN TABLE ft1 (
a int,
b int
) SERVER file_server1
OPTIONS ( filename '/does/not/exist.csv', format 'csv');
CREATE FOREIGN TABLE ft2 (
a int,
b int
) SERVER file_server2
OPTIONS ( filename '/does/not/exist.csv', format 'csv');
alter table part attach partition ft1 for values from (40) to (50);
alter table part attach partition ft2 for values from (50) to (60);
analyze part;
ERROR: could not stat file "/does/not/exist.csv": No such file or directory
-- should have multiple dynamic scans
explain select * from part;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Append (cost=0.00..3324172.64 rows=180020491 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1211000.00 rows=90000000 width=8)
-> Foreign Scan on p1_e part_1 (cost=0.00..11000.00 rows=30000000 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1211000.00 rows=90000000 width=8)
-> Foreign Scan on p2_e part_2 (cost=0.00..11000.00 rows=30000000 width=8)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..1.08 rows=5 width=8)
-> Seq Scan on p3 part_3 (cost=0.00..1.02 rows=2 width=8)
-> Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..1.10 rows=6 width=8)
-> Seq Scan on p4 part_4 (cost=0.00..1.02 rows=2 width=8)
-> Foreign Scan on ft1 part_5 (cost=0.00..1034.00 rows=10240 width=8)
Foreign File: /does/not/exist.csv
-> Foreign Scan on ft2 part_6 (cost=0.00..1034.00 rows=10240 width=8)
Foreign File: /does/not/exist.csv
Optimizer: Postgres query optimizer
(14 rows)
-- eliminate unnecessary dynamic scans and foreign sans
explain select * from part where b = 53;
QUERY PLAN
----------------------------------------------------------------
Foreign Scan on ft2 part (cost=0.00..1059.60 rows=10 width=8)
Filter: (b = 53)
Foreign File: /does/not/exist.csv
Optimizer: Postgres query optimizer
(4 rows)
-- only select foreign scans
explain select * from part where b > 22;
QUERY PLAN
----------------------------------------------------------------------------------
Append (cost=0.00..2155.58 rows=6838 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.09 rows=5 width=8)
-> Seq Scan on p3 part_1 (cost=0.00..1.02 rows=2 width=8)
Filter: (b > 22)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.10 rows=6 width=8)
-> Seq Scan on p4 part_2 (cost=0.00..1.02 rows=2 width=8)
Filter: (b > 22)
-> Foreign Scan on ft1 part_3 (cost=0.00..1059.60 rows=3413 width=8)
Filter: (b > 22)
Foreign File: /does/not/exist.csv
-> Foreign Scan on ft2 part_4 (cost=0.00..1059.60 rows=3413 width=8)
Filter: (b > 22)
Foreign File: /does/not/exist.csv
Optimizer: Postgres query optimizer
(14 rows)
--
-- exchange & attach partition
--
alter table part add partition exch1 start(60) end (70);
alter table part add partition exch2 start(70) end (80);
-- exchange with external tables
create external web table p3_e (a int, b int) execute 'cat > @abs_srcdir@/data/part-ext.csv' format 'csv' (delimiter as '|' null as 'null' escape as ' ');
create writable external web table p4_e (a int, b int) execute 'cat > @abs_srcdir@/data/part-ext.csv' format 'csv' (delimiter as '|' null as 'null' escape as ' ');
-- allow exchange readable external table
alter table part exchange partition exch1 with table p3_e;
NOTICE: partition constraints are not validated when attaching a readable external table
-- should disallow writable external table
alter table part exchange partition exch1 with table p4_e;
ERROR: cannot attach a WRITABLE external table
-- exchange with foreign tables
CREATE SERVER file_server3 FOREIGN DATA WRAPPER file_fdw;
CREATE FOREIGN TABLE ft3 (
a int,
b int
) SERVER file_server3
OPTIONS ( filename '/does/not/exist.csv', format 'csv');
-- exchange works, but no error checking like for external tables
alter table part exchange partition exch2 with table ft3;
-- same tests for attach partition
create external web table p5_e (a int, b int) execute 'cat > @abs_srcdir@/data/part-ext.csv' format 'csv' (delimiter as '|' null as 'null' escape as ' ');
create writable external web table p6_e (a int, b int) execute 'cat > @abs_srcdir@/data/part-ext.csv' format 'csv' (delimiter as '|' null as 'null' escape as ' ');
-- allow attach readable external table
alter table part attach partition p5_e for values from (80) to (90);
NOTICE: partition constraints are not validated when attaching a readable external table
-- should disallow writable external table
alter table part attach partition p6_e for values from (90) to (100);
ERROR: cannot attach a WRITABLE external table
-- attach foreign table
CREATE SERVER file_server4 FOREIGN DATA WRAPPER file_fdw;
CREATE FOREIGN TABLE ft4 (
a int,
b int
) SERVER file_server4
OPTIONS ( filename '/does/not/exist.csv', format 'csv');
-- exchange works, but no error checking like for external tables
alter table part attach partition ft4 for values from (100) to (110);