| -- Scenario to test, CHECKPOINT getting distributed transaction |
| -- information between COMMIT processing time window |
| -- `XLogInsert(RM_XACT_ID, XLOG_XACT_DISTRIBUTED_COMMIT)` and |
| -- insertedDistributedCommitted(). `delayChkpt` protects this |
| -- case. There used to bug in placement of getDtxCheckPointInfo() in |
| -- checkpoint code causing, transaction to be committed on coordinator |
| -- and aborted on segments. Test case is meant to validate |
| -- getDtxCheckPointInfo() gets called after |
| -- GetVirtualXIDsDelayingChkpt(). |
| -- |
| -- Test controls the progress of COMMIT executed in session 1 and of |
| -- CHECKPOINT executed in the checkpointer process, with high-level |
| -- flow: |
| -- |
| -- 1. session 1: COMMIT is blocked at start_insertedDistributedCommitted |
| -- 2. checkpointer: Start a CHECKPOINT and wait to reach before_wait_VirtualXIDsDelayingChkpt |
| -- 3. session 1: COMMIT is resumed |
| -- 4. checkpointer: CHECKPOINT is resumed and executes to keep_log_seg to finally introduce panic and perform crash recovery |
| -- |
| -- Bug existed when getDtxCheckPointInfo() was invoked before |
| -- GetVirtualXIDsDelayingChkpt(), getDtxCheckPointInfo() will not |
| -- contain the distributed transaction in session1 whose state is |
| -- DTX_STATE_INSERTED_COMMITTED. Therefore, after crash recovery, the |
| -- 2PC transaction that has been committed on coordinator will be |
| -- considered as orphaned prepared transaction hence is aborted at |
| -- segments. As a result the SELECT executed by session3 used to fail |
| -- because the twopcbug table only existed on the coordinator. |
| -- |
| 1: select gp_inject_fault_infinite('start_insertedDistributedCommitted', 'suspend', 1); |
| 1: begin; |
| 1: create table twopcbug(i int, j int); |
| 1&: commit; |
| 2: select gp_inject_fault_infinite('before_wait_VirtualXIDsDelayingChkpt', 'skip', 1); |
| 33&: checkpoint; |
| 2: select gp_inject_fault_infinite('keep_log_seg', 'panic', 1); |
| -- wait to make sure we don't resume commit processing before this |
| -- step in checkpoint |
| 2: select gp_wait_until_triggered_fault('before_wait_VirtualXIDsDelayingChkpt', 1, 1); |
| -- reason for this inifinite wait is just to avoid test flake. Without |
| -- this joining step "1<" may see "COMMIT" sometimes or "server closed |
| -- the connection unexpectedly" otherwise. With this its always |
| -- "server closed the connection unexpectedly". |
| 2: select gp_inject_fault_infinite('after_xlog_xact_distributed_commit', 'infinite_loop', 1); |
| 2: select gp_inject_fault_infinite('start_insertedDistributedCommitted', 'resume', 1); |
| 1<: |
| 33<: |
| -- wait until coordinator is up for querying. |
| 3: select 1; |
| 3: select count(1) from twopcbug; |