| -- tests for tidscans |
| CREATE TABLE tidscan(id integer); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| -- GPDB: we need some preparation, to make the output the same as in upstream. |
| -- Firstly, force all the rows to the same segment, so that selecting by ctid |
| -- produces the same result as in upstream. |
| ALTER TABLE tidscan ADD COLUMN distkey int; |
| ALTER TABLE tidscan SET DISTRIBUTED BY (distkey); |
| -- Secondly, Coerce the planner to produce same plans as in upstream. |
| set enable_seqscan=off; |
| set enable_mergejoin=on; |
| set enable_nestloop=on; |
| -- Finally, silence NOTICEs that GPDB normally emits if you use ctid in a |
| -- query: |
| -- NOTICE: SELECT uses system-defined column "tidscan.ctid" without the necessary companion column "tidscan.gp_segment_id" |
| -- HINT: To uniquely identify a row within a distributed table, use the "gp_segment_id" column together with the "ctid" column. |
| set client_min_messages='warning'; |
| -- only insert a few rows, we don't want to spill onto a second table page |
| INSERT INTO tidscan (id) VALUES (1), (2), (3); |
| ANALYZE tidscan; |
| -- The 'distkey' column has served its purpose, by ensuring that all the rows |
| -- end up on the same segment. Now drop it, so that it doesn't affect the |
| -- output of the "select *" queries that follow. |
| ALTER TABLE tidscan DROP COLUMN distkey; |
| -- show ctids |
| SELECT ctid, * FROM tidscan; |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,2) | 2 |
| (0,3) | 3 |
| (3 rows) |
| |
| -- ctid equality - implemented as tidscan |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan WHERE ctid = '(0,1)'; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Tid Scan on tidscan |
| TID Cond: (ctid = '(0,1)'::tid) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| SELECT ctid, * FROM tidscan WHERE ctid = '(0,1)'; |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (1 row) |
| |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Tid Scan on tidscan |
| TID Cond: ('(0,1)'::tid = ctid) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid; |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (1 row) |
| |
| -- OR'd clauses |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid; |
| QUERY PLAN |
| -------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Tid Scan on tidscan |
| TID Cond: ((ctid = '(0,2)'::tid) OR ('(0,1)'::tid = ctid)) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid; |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,2) | 2 |
| (2 rows) |
| |
| -- ctid = ScalarArrayOp - implemented as tidscan |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]); |
| QUERY PLAN |
| ------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Tid Scan on tidscan |
| TID Cond: (ctid = ANY ('{"(0,1)","(0,2)"}'::tid[])) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]); |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,2) | 2 |
| (2 rows) |
| |
| -- ctid != ScalarArrayOp - can't be implemented as tidscan |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan WHERE ctid != ANY(ARRAY['(0,1)', '(0,2)']::tid[]); |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on tidscan |
| Filter: (ctid <> ANY ('{"(0,1)","(0,2)"}'::tid[])) |
| Optimizer: Postgres query optimizer |
| (4 rows) |
| |
| SELECT ctid, * FROM tidscan WHERE ctid != ANY(ARRAY['(0,1)', '(0,2)']::tid[]); |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,2) | 2 |
| (0,3) | 3 |
| (3 rows) |
| |
| -- tid equality extracted from sub-AND clauses |
| EXPLAIN (COSTS OFF) |
| SELECT ctid, * FROM tidscan |
| WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1); |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Tid Scan on tidscan |
| TID Cond: ((ctid = ANY ('{"(0,2)","(0,3)"}'::tid[])) OR (ctid = '(0,1)'::tid)) |
| Filter: (((id = 3) AND (ctid = ANY ('{"(0,2)","(0,3)"}'::tid[]))) OR ((ctid = '(0,1)'::tid) AND (id = 1))) |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| SELECT ctid, * FROM tidscan |
| WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1); |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,3) | 3 |
| (2 rows) |
| |
| -- nestloop-with-inner-tidscan joins on tid |
| SET enable_hashjoin TO off; -- otherwise hash join might win |
| EXPLAIN (COSTS OFF) |
| SELECT t1.ctid, t1.*, t2.ctid, t2.* |
| FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; |
| QUERY PLAN |
| --------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Nested Loop |
| -> Broadcast Motion 3:3 (slice2; segments: 3) |
| -> Seq Scan on tidscan t1 |
| Filter: (id = 1) |
| -> Tid Scan on tidscan t2 |
| TID Cond: (ctid = t1.ctid) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| SELECT t1.ctid, t1.*, t2.ctid, t2.* |
| FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; |
| ctid | id | ctid | id |
| -------+----+-------+---- |
| (0,1) | 1 | (0,1) | 1 |
| (1 row) |
| |
| EXPLAIN (COSTS OFF) |
| SELECT t1.ctid, t1.*, t2.ctid, t2.* |
| FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Nested Loop Left Join |
| Join Filter: (t1.ctid = t2.ctid) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t1.ctid |
| -> Seq Scan on tidscan t1 |
| Filter: (id = 1) |
| -> Materialize |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2.ctid |
| -> Seq Scan on tidscan t2 |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| SELECT t1.ctid, t1.*, t2.ctid, t2.* |
| FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1; |
| ctid | id | ctid | id |
| -------+----+-------+---- |
| (0,1) | 1 | (0,1) | 1 |
| (1 row) |
| |
| RESET enable_hashjoin; |
| -- exercise backward scan and rewind |
| BEGIN; |
| DECLARE c CURSOR FOR |
| SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]); |
| FETCH ALL FROM c; |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (0,2) | 2 |
| (2 rows) |
| |
| FETCH BACKWARD 1 FROM c; |
| ERROR: backward scan is not supported in this version of Apache Cloudberry |
| FETCH FIRST FROM c; |
| ERROR: current transaction is aborted, commands ignored until end of transaction block |
| ROLLBACK; |
| -- tidscan via CURRENT OF |
| BEGIN; |
| DECLARE c CURSOR FOR SELECT ctid, * FROM tidscan; |
| FETCH NEXT FROM c; -- skip one row |
| ctid | id |
| -------+---- |
| (0,1) | 1 |
| (1 row) |
| |
| FETCH NEXT FROM c; |
| ctid | id |
| -------+---- |
| (0,2) | 2 |
| (1 row) |
| |
| -- perform update |
| EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) |
| UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1) |
| -> Update on tidscan (actual rows=1 loops=1) |
| -> Tid Scan on tidscan (actual rows=1 loops=1) |
| TID Cond: CURRENT OF c |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| FETCH NEXT FROM c; |
| ctid | id |
| -------+---- |
| (0,3) | 3 |
| (1 row) |
| |
| -- perform update |
| EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) |
| UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1) |
| -> Update on tidscan (actual rows=1 loops=1) |
| -> Tid Scan on tidscan (actual rows=1 loops=1) |
| TID Cond: CURRENT OF c |
| Optimizer: Postgres query optimizer |
| (5 rows) |
| |
| SELECT * FROM tidscan; |
| id |
| ---- |
| 1 |
| -2 |
| -3 |
| (3 rows) |
| |
| -- position cursor past any rows |
| FETCH NEXT FROM c; |
| ctid | id |
| ------+---- |
| (0 rows) |
| |
| -- should error out |
| EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) |
| UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *; |
| ERROR: cursor "c" is not positioned on a row |
| ROLLBACK; |
| -- bulk joins on CTID |
| -- (these plans don't use TID scans, but this still seems like an |
| -- appropriate place for these tests) |
| reset enable_seqscan; |
| EXPLAIN (COSTS OFF) |
| SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: ((t1.ctid = t2.ctid) AND (t1.gp_segment_id = t2.gp_segment_id)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t1.ctid, t1.gp_segment_id |
| -> Seq Scan on tenk1 t1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2.ctid, t2.gp_segment_id |
| -> Seq Scan on tenk1 t2 |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id; |
| count |
| ------- |
| 10000 |
| (1 row) |
| |
| SET enable_hashjoin TO off; |
| EXPLAIN (COSTS OFF) |
| SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Merge Join |
| Merge Cond: ((t1.ctid = t2.ctid) AND (t1.gp_segment_id = t2.gp_segment_id)) |
| -> Sort |
| Sort Key: t1.ctid, t1.gp_segment_id |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t1.ctid, t1.gp_segment_id |
| -> Seq Scan on tenk1 t1 |
| -> Sort |
| Sort Key: t2.ctid, t2.gp_segment_id |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2.ctid, t2.gp_segment_id |
| -> Seq Scan on tenk1 t2 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid and t1.gp_segment_id = t2.gp_segment_id; |
| count |
| ------- |
| 10000 |
| (1 row) |
| |
| RESET enable_hashjoin; |
| -- check predicate lock on CTID |
| -- GPDB_12_MERGE_FEATURE_NOT_SUPPORTED: Greenplum does not support serializable transactions, |
| -- ignore the below test case. |
| -- start_ignore |
| BEGIN ISOLATION LEVEL SERIALIZABLE; |
| SELECT * FROM tidscan WHERE ctid = '(0,1)'; |
| id |
| ---- |
| 1 |
| (1 row) |
| |
| -- locktype should be 'tuple' |
| SELECT locktype, mode FROM pg_locks WHERE pid = pg_backend_pid() AND mode = 'SIReadLock'; |
| locktype | mode |
| ----------+------ |
| (0 rows) |
| |
| ROLLBACK; |
| -- end_ignore |
| DROP TABLE tidscan; |