blob: 44746ce6c8fbf1926bee4dcae4662e71c2599eaf [file]
-- tests for tidscans
CREATE TABLE tidscan(id integer);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- GPDB: we need some preparation, to make the output the same as in upstream.
-- Firstly, force all the rows to the same segment, so that selecting by ctid
-- produces the same result as in upstream.
ALTER TABLE tidscan ADD COLUMN distkey int;
ALTER TABLE tidscan SET DISTRIBUTED BY (distkey);
-- Secondly, Coerce the planner to produce same plans as in upstream.
set enable_seqscan=off;
set enable_mergejoin=on;
set enable_nestloop=on;
-- Finally, silence NOTICEs that GPDB normally emits if you use ctid in a
-- query:
-- NOTICE: SELECT uses system-defined column "tidscan.ctid" without the necessary companion column "tidscan.gp_segment_id"
-- HINT: To uniquely identify a row within a distributed table, use the "gp_segment_id" column together with the "ctid" column.
set client_min_messages='warning';
-- only insert a few rows, we don't want to spill onto a second table page
INSERT INTO tidscan (id) VALUES (1), (2), (3);
ANALYZE tidscan;
-- The 'distkey' column has served its purpose, by ensuring that all the rows
-- end up on the same segment. Now drop it, so that it doesn't affect the
-- output of the "select *" queries that follow.
ALTER TABLE tidscan DROP COLUMN distkey;
-- show ctids
SELECT ctid, * FROM tidscan;
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(0,3) | 3
(3 rows)
-- ctid equality - implemented as tidscan
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,1)';
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Tid Scan on tidscan
TID Cond: (ctid = '(0,1)'::tid)
Optimizer: Postgres query optimizer
(4 rows)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,1)';
ctid | id
-------+----
(0,1) | 1
(1 row)
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid;
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Tid Scan on tidscan
TID Cond: ('(0,1)'::tid = ctid)
Optimizer: Postgres query optimizer
(4 rows)
SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid;
ctid | id
-------+----
(0,1) | 1
(1 row)
-- OR'd clauses
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
QUERY PLAN
--------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Tid Scan on tidscan
TID Cond: ((ctid = '(0,2)'::tid) OR ('(0,1)'::tid = ctid))
Optimizer: Postgres query optimizer
(4 rows)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(2 rows)
-- ctid = ScalarArrayOp - implemented as tidscan
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Tid Scan on tidscan
TID Cond: (ctid = ANY ('{"(0,1)","(0,2)"}'::tid[]))
Optimizer: Postgres query optimizer
(4 rows)
SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(2 rows)
-- ctid != ScalarArrayOp - can't be implemented as tidscan
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid != ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
QUERY PLAN
------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on tidscan
Filter: (ctid <> ANY ('{"(0,1)","(0,2)"}'::tid[]))
Optimizer: Postgres query optimizer
(4 rows)
SELECT ctid, * FROM tidscan WHERE ctid != ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(0,3) | 3
(3 rows)
-- tid equality extracted from sub-AND clauses
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan
WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Tid Scan on tidscan
TID Cond: ((ctid = ANY ('{"(0,2)","(0,3)"}'::tid[])) OR (ctid = '(0,1)'::tid))
Filter: (((id = 3) AND (ctid = ANY ('{"(0,2)","(0,3)"}'::tid[]))) OR ((ctid = '(0,1)'::tid) AND (id = 1)))
Optimizer: Postgres query optimizer
(5 rows)
SELECT ctid, * FROM tidscan
WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
ctid | id
-------+----
(0,1) | 1
(0,3) | 3
(2 rows)
-- nestloop-with-inner-tidscan joins on tid
SET enable_hashjoin TO off; -- otherwise hash join might win
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
QUERY PLAN
---------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on tidscan t1
Filter: (id = 1)
-> Tid Scan on tidscan t2
TID Cond: (ctid = t1.ctid)
Optimizer: Postgres query optimizer
(8 rows)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
ctid | id | ctid | id
-------+----+-------+----
(0,1) | 1 | (0,1) | 1
(1 row)
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: (t1.ctid = t2.ctid)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: t1.ctid
-> Seq Scan on tidscan t1
Filter: (id = 1)
-> Materialize
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: t2.ctid
-> Seq Scan on tidscan t2
Optimizer: Postgres query optimizer
(12 rows)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
ctid | id | ctid | id
-------+----+-------+----
(0,1) | 1 | (0,1) | 1
(1 row)
RESET enable_hashjoin;
-- exercise backward scan and rewind
BEGIN;
DECLARE c CURSOR FOR
SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
FETCH ALL FROM c;
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(2 rows)
FETCH BACKWARD 1 FROM c;
ERROR: backward scan is not supported in this version of Apache Cloudberry
FETCH FIRST FROM c;
ERROR: current transaction is aborted, commands ignored until end of transaction block
ROLLBACK;
-- tidscan via CURRENT OF
BEGIN;
DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan;
FETCH NEXT FROM c; -- skip one row
ctid | id
-------+----
(0,1) | 1
(1 row)
FETCH NEXT FROM c;
ctid | id
-------+----
(0,2) | 2
(1 row)
-- perform update
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1)
-> Update on tidscan (actual rows=1 loops=1)
-> Tid Scan on tidscan (actual rows=1 loops=1)
TID Cond: CURRENT OF c
Optimizer: Postgres query optimizer
(5 rows)
FETCH NEXT FROM c;
ctid | id
-------+----
(0,3) | 3
(1 row)
-- perform update
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1)
-> Update on tidscan (actual rows=1 loops=1)
-> Tid Scan on tidscan (actual rows=1 loops=1)
TID Cond: CURRENT OF c
Optimizer: Postgres query optimizer
(5 rows)
SELECT * FROM tidscan;
id
----
1
-2
-3
(3 rows)
-- position cursor past any rows
FETCH NEXT FROM c;
ctid | id
------+----
(0 rows)
-- should error out
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
ERROR: cursor "c" is not positioned on a row
ROLLBACK;
-- bulk joins on CTID
-- (these plans don't use TID scans, but this still seems like an
-- appropriate place for these tests)
reset enable_seqscan;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id;
QUERY PLAN
------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Hash Join
Hash Cond: ((t1.ctid = t2.ctid) AND (t1.gp_segment_id = t2.gp_segment_id))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: t1.ctid, t1.gp_segment_id
-> Seq Scan on tenk1 t1
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: t2.ctid, t2.gp_segment_id
-> Seq Scan on tenk1 t2
Optimizer: Postgres query optimizer
(13 rows)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id;
count
-------
10000
(1 row)
SET enable_hashjoin TO off;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Merge Join
Merge Cond: ((t1.ctid = t2.ctid) AND (t1.gp_segment_id = t2.gp_segment_id))
-> Sort
Sort Key: t1.ctid, t1.gp_segment_id
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: t1.ctid, t1.gp_segment_id
-> Seq Scan on tenk1 t1
-> Sort
Sort Key: t2.ctid, t2.gp_segment_id
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: t2.ctid, t2.gp_segment_id
-> Seq Scan on tenk1 t2
Optimizer: Postgres query optimizer
(16 rows)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id;
count
-------
10000
(1 row)
RESET enable_hashjoin;
-- check predicate lock on CTID
-- GPDB_12_MERGE_FEATURE_NOT_SUPPORTED: Greenplum does not support serializable transactions,
-- ignore the below test case.
-- start_ignore
BEGIN ISOLATION LEVEL SERIALIZABLE;
SELECT * FROM tidscan WHERE ctid = '(0,1)';
id
----
1
(1 row)
-- locktype should be 'tuple'
SELECT locktype, mode FROM pg_locks WHERE pid = pg_backend_pid() AND mode = 'SIReadLock';
locktype | mode
----------+------
(0 rows)
ROLLBACK;
-- end_ignore
DROP TABLE tidscan;