| /*------------------------------------------------------------------------- |
| * |
| * tuplesortvariants.c |
| * Implementation of tuple sorting variants. |
| * |
| * This module handles the sorting of heap tuples, index tuples, or single |
| * Datums. The implementation is based on the generalized tuple sorting |
| * facility given in tuplesort.c. Support other kinds of sortable objects |
| * could be easily added here, another module, or even an extension. |
| * |
| * |
| * Copyright (c) 2022-2023, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/backend/utils/sort/tuplesortvariants.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/hash.h" |
| #include "access/htup_details.h" |
| #include "access/nbtree.h" |
| #include "catalog/index.h" |
| #include "executor/executor.h" |
| #include "pg_trace.h" |
| #include "utils/datum.h" |
| #include "utils/lsyscache.h" |
| #include "utils/guc.h" |
| #include "utils/tuplesort.h" |
| |
| |
| /* sort-type codes for sort__start probes */ |
| #define HEAP_SORT 0 |
| #define INDEX_SORT 1 |
| #define DATUM_SORT 2 |
| #define CLUSTER_SORT 3 |
| |
| static void removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, |
| int count); |
| static void removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, |
| int count); |
| static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups, |
| int count); |
| static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, |
| int count); |
| static int comparetup_heap(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state); |
| static void writetup_heap(Tuplesortstate *state, LogicalTape *tape, |
| SortTuple *stup); |
| static void readtup_heap(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len); |
| static int comparetup_cluster(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state); |
| static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape, |
| SortTuple *stup); |
| static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int tuplen); |
| static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state); |
| static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state); |
| static void writetup_index(Tuplesortstate *state, LogicalTape *tape, |
| SortTuple *stup); |
| static void readtup_index(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len); |
| static int comparetup_datum(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state); |
| static void writetup_datum(Tuplesortstate *state, LogicalTape *tape, |
| SortTuple *stup); |
| static void readtup_datum(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len); |
| static void freestate_cluster(Tuplesortstate *state); |
| |
| /* |
| * Data struture pointed by "TuplesortPublic.arg" for the CLUSTER case. Set by |
| * the tuplesort_begin_cluster. |
| */ |
| typedef struct |
| { |
| TupleDesc tupDesc; |
| |
| IndexInfo *indexInfo; /* info about index being used for reference */ |
| EState *estate; /* for evaluating index expressions */ |
| } TuplesortClusterArg; |
| |
| /* |
| * Data struture pointed by "TuplesortPublic.arg" for the IndexTuple case. |
| * Set by tuplesort_begin_index_xxx and used only by the IndexTuple routines. |
| */ |
| typedef struct |
| { |
| Relation heapRel; /* table the index is being built on */ |
| Relation indexRel; /* index being built */ |
| } TuplesortIndexArg; |
| |
| /* |
| * Data struture pointed by "TuplesortPublic.arg" for the index_btree subcase. |
| */ |
| typedef struct |
| { |
| TuplesortIndexArg index; |
| |
| bool enforceUnique; /* complain if we find duplicate tuples */ |
| bool uniqueNullsNotDistinct; /* unique constraint null treatment */ |
| } TuplesortIndexBTreeArg; |
| |
| /* |
| * Data struture pointed by "TuplesortPublic.arg" for the index_hash subcase. |
| */ |
| typedef struct |
| { |
| TuplesortIndexArg index; |
| |
| uint32 high_mask; /* masks for sortable part of hash code */ |
| uint32 low_mask; |
| uint32 max_buckets; |
| } TuplesortIndexHashArg; |
| |
| /* |
| * Data struture pointed by "TuplesortPublic.arg" for the Datum case. |
| * Set by tuplesort_begin_datum and used only by the DatumTuple routines. |
| */ |
| typedef struct |
| { |
| /* the datatype oid of Datum's to be sorted */ |
| Oid datumType; |
| /* we need typelen in order to know how to copy the Datums. */ |
| int datumTypeLen; |
| } TuplesortDatumArg; |
| |
| Tuplesortstate * |
| tuplesort_begin_heap(TupleDesc tupDesc, |
| int nkeys, AttrNumber *attNums, |
| Oid *sortOperators, Oid *sortCollations, |
| bool *nullsFirstFlags, |
| int workMem, SortCoordinate coordinate, int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext; |
| int i; |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| |
| Assert(nkeys > 0); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", |
| nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = nkeys; |
| |
| TRACE_POSTGRESQL_SORT_START(HEAP_SORT, |
| false, /* no unique check */ |
| nkeys, |
| workMem, |
| sortopt & TUPLESORT_RANDOMACCESS, |
| PARALLEL_SORT(coordinate)); |
| |
| base->removeabbrev = removeabbrev_heap; |
| base->comparetup = comparetup_heap; |
| base->writetup = writetup_heap; |
| base->readtup = readtup_heap; |
| base->haveDatum1 = true; |
| base->arg = tupDesc; /* assume we need not copy tupDesc */ |
| |
| /* Prepare SortSupport data for each column */ |
| base->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); |
| |
| for (i = 0; i < nkeys; i++) |
| { |
| SortSupport sortKey = base->sortKeys + i; |
| |
| Assert(attNums[i] != 0); |
| Assert(sortOperators[i] != 0); |
| |
| sortKey->ssup_cxt = CurrentMemoryContext; |
| sortKey->ssup_collation = sortCollations[i]; |
| sortKey->ssup_nulls_first = nullsFirstFlags[i]; |
| sortKey->ssup_attno = attNums[i]; |
| /* Convey if abbreviation optimization is applicable in principle */ |
| sortKey->abbreviate = (i == 0 && base->haveDatum1); |
| |
| PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); |
| } |
| |
| /* |
| * The "onlyKey" optimization cannot be used with abbreviated keys, since |
| * tie-breaker comparisons may be required. Typically, the optimization |
| * is only of value to pass-by-value types anyway, whereas abbreviated |
| * keys are typically only of value to pass-by-reference types. |
| */ |
| if (nkeys == 1 && !base->sortKeys->abbrev_converter) |
| base->onlyKey = base->sortKeys; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| Tuplesortstate * |
| tuplesort_begin_cluster(TupleDesc tupDesc, |
| Relation indexRel, |
| int workMem, |
| SortCoordinate coordinate, int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| BTScanInsert indexScanKey; |
| MemoryContext oldcontext; |
| TuplesortClusterArg *arg; |
| int i; |
| |
| Assert(indexRel->rd_rel->relam == BTREE_AM_OID); |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| arg = (TuplesortClusterArg *) palloc0(sizeof(TuplesortClusterArg)); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", |
| RelationGetNumberOfAttributes(indexRel), |
| workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); |
| |
| TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, |
| false, /* no unique check */ |
| base->nKeys, |
| workMem, |
| sortopt & TUPLESORT_RANDOMACCESS, |
| PARALLEL_SORT(coordinate)); |
| |
| base->removeabbrev = removeabbrev_cluster; |
| base->comparetup = comparetup_cluster; |
| base->writetup = writetup_cluster; |
| base->readtup = readtup_cluster; |
| base->freestate = freestate_cluster; |
| base->arg = arg; |
| |
| arg->indexInfo = BuildIndexInfo(indexRel); |
| |
| /* |
| * If we don't have a simple leading attribute, we don't currently |
| * initialize datum1, so disable optimizations that require it. |
| */ |
| if (arg->indexInfo->ii_IndexAttrNumbers[0] == 0) |
| base->haveDatum1 = false; |
| else |
| base->haveDatum1 = true; |
| |
| arg->tupDesc = tupDesc; /* assume we need not copy tupDesc */ |
| |
| indexScanKey = _bt_mkscankey(indexRel, NULL); |
| |
| if (arg->indexInfo->ii_Expressions != NULL) |
| { |
| TupleTableSlot *slot; |
| ExprContext *econtext; |
| |
| /* |
| * We will need to use FormIndexDatum to evaluate the index |
| * expressions. To do that, we need an EState, as well as a |
| * TupleTableSlot to put the table tuples into. The econtext's |
| * scantuple has to point to that slot, too. |
| */ |
| arg->estate = CreateExecutorState(); |
| slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple); |
| econtext = GetPerTupleExprContext(arg->estate); |
| econtext->ecxt_scantuple = slot; |
| } |
| |
| /* Prepare SortSupport data for each column */ |
| base->sortKeys = (SortSupport) palloc0(base->nKeys * |
| sizeof(SortSupportData)); |
| |
| for (i = 0; i < base->nKeys; i++) |
| { |
| SortSupport sortKey = base->sortKeys + i; |
| ScanKey scanKey = indexScanKey->scankeys + i; |
| int16 strategy; |
| |
| sortKey->ssup_cxt = CurrentMemoryContext; |
| sortKey->ssup_collation = scanKey->sk_collation; |
| sortKey->ssup_nulls_first = |
| (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; |
| sortKey->ssup_attno = scanKey->sk_attno; |
| /* Convey if abbreviation optimization is applicable in principle */ |
| sortKey->abbreviate = (i == 0 && base->haveDatum1); |
| |
| Assert(sortKey->ssup_attno != 0); |
| |
| strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? |
| BTGreaterStrategyNumber : BTLessStrategyNumber; |
| |
| PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); |
| } |
| |
| pfree(indexScanKey); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| Tuplesortstate * |
| tuplesort_begin_index_btree(Relation heapRel, |
| Relation indexRel, |
| bool enforceUnique, |
| bool uniqueNullsNotDistinct, |
| int workMem, |
| SortCoordinate coordinate, |
| int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| BTScanInsert indexScanKey; |
| TuplesortIndexBTreeArg *arg; |
| MemoryContext oldcontext; |
| int i; |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg)); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin index sort: unique = %c, workMem = %d, randomAccess = %c", |
| enforceUnique ? 't' : 'f', |
| workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); |
| |
| TRACE_POSTGRESQL_SORT_START(INDEX_SORT, |
| enforceUnique, |
| base->nKeys, |
| workMem, |
| sortopt & TUPLESORT_RANDOMACCESS, |
| PARALLEL_SORT(coordinate)); |
| |
| base->removeabbrev = removeabbrev_index; |
| base->comparetup = comparetup_index_btree; |
| base->writetup = writetup_index; |
| base->readtup = readtup_index; |
| base->haveDatum1 = true; |
| base->arg = arg; |
| |
| arg->index.heapRel = heapRel; |
| arg->index.indexRel = indexRel; |
| arg->enforceUnique = enforceUnique; |
| arg->uniqueNullsNotDistinct = uniqueNullsNotDistinct; |
| |
| indexScanKey = _bt_mkscankey(indexRel, NULL); |
| |
| /* Prepare SortSupport data for each column */ |
| base->sortKeys = (SortSupport) palloc0(base->nKeys * |
| sizeof(SortSupportData)); |
| |
| for (i = 0; i < base->nKeys; i++) |
| { |
| SortSupport sortKey = base->sortKeys + i; |
| ScanKey scanKey = indexScanKey->scankeys + i; |
| int16 strategy; |
| |
| sortKey->ssup_cxt = CurrentMemoryContext; |
| sortKey->ssup_collation = scanKey->sk_collation; |
| sortKey->ssup_nulls_first = |
| (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; |
| sortKey->ssup_attno = scanKey->sk_attno; |
| /* Convey if abbreviation optimization is applicable in principle */ |
| sortKey->abbreviate = (i == 0 && base->haveDatum1); |
| |
| Assert(sortKey->ssup_attno != 0); |
| |
| strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? |
| BTGreaterStrategyNumber : BTLessStrategyNumber; |
| |
| PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); |
| } |
| |
| pfree(indexScanKey); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| Tuplesortstate * |
| tuplesort_begin_index_hash(Relation heapRel, |
| Relation indexRel, |
| uint32 high_mask, |
| uint32 low_mask, |
| uint32 max_buckets, |
| int workMem, |
| SortCoordinate coordinate, |
| int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext; |
| TuplesortIndexHashArg *arg; |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| arg = (TuplesortIndexHashArg *) palloc(sizeof(TuplesortIndexHashArg)); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " |
| "max_buckets = 0x%x, workMem = %d, randomAccess = %c", |
| high_mask, |
| low_mask, |
| max_buckets, |
| workMem, |
| sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = 1; /* Only one sort column, the hash code */ |
| |
| base->removeabbrev = removeabbrev_index; |
| base->comparetup = comparetup_index_hash; |
| base->writetup = writetup_index; |
| base->readtup = readtup_index; |
| base->haveDatum1 = true; |
| base->arg = arg; |
| |
| arg->index.heapRel = heapRel; |
| arg->index.indexRel = indexRel; |
| |
| arg->high_mask = high_mask; |
| arg->low_mask = low_mask; |
| arg->max_buckets = max_buckets; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| Tuplesortstate * |
| tuplesort_begin_index_gist(Relation heapRel, |
| Relation indexRel, |
| int workMem, |
| SortCoordinate coordinate, |
| int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext; |
| TuplesortIndexBTreeArg *arg; |
| int i; |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg)); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin index sort: workMem = %d, randomAccess = %c", |
| workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); |
| |
| base->removeabbrev = removeabbrev_index; |
| base->comparetup = comparetup_index_btree; |
| base->writetup = writetup_index; |
| base->readtup = readtup_index; |
| base->haveDatum1 = true; |
| base->arg = arg; |
| |
| arg->index.heapRel = heapRel; |
| arg->index.indexRel = indexRel; |
| arg->enforceUnique = false; |
| arg->uniqueNullsNotDistinct = false; |
| |
| /* Prepare SortSupport data for each column */ |
| base->sortKeys = (SortSupport) palloc0(base->nKeys * |
| sizeof(SortSupportData)); |
| |
| for (i = 0; i < base->nKeys; i++) |
| { |
| SortSupport sortKey = base->sortKeys + i; |
| |
| sortKey->ssup_cxt = CurrentMemoryContext; |
| sortKey->ssup_collation = indexRel->rd_indcollation[i]; |
| sortKey->ssup_nulls_first = false; |
| sortKey->ssup_attno = i + 1; |
| /* Convey if abbreviation optimization is applicable in principle */ |
| sortKey->abbreviate = (i == 0 && base->haveDatum1); |
| |
| Assert(sortKey->ssup_attno != 0); |
| |
| /* Look for a sort support function */ |
| PrepareSortSupportFromGistIndexRel(indexRel, sortKey); |
| } |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| Tuplesortstate * |
| tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, |
| bool nullsFirstFlag, int workMem, |
| SortCoordinate coordinate, int sortopt) |
| { |
| Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, |
| sortopt); |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortDatumArg *arg; |
| MemoryContext oldcontext; |
| int16 typlen; |
| bool typbyval; |
| |
| oldcontext = MemoryContextSwitchTo(base->maincontext); |
| arg = (TuplesortDatumArg *) palloc(sizeof(TuplesortDatumArg)); |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "begin datum sort: workMem = %d, randomAccess = %c", |
| workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); |
| #endif |
| |
| base->nKeys = 1; /* always a one-column sort */ |
| |
| TRACE_POSTGRESQL_SORT_START(DATUM_SORT, |
| false, /* no unique check */ |
| 1, |
| workMem, |
| sortopt & TUPLESORT_RANDOMACCESS, |
| PARALLEL_SORT(coordinate)); |
| |
| base->removeabbrev = removeabbrev_datum; |
| base->comparetup = comparetup_datum; |
| base->writetup = writetup_datum; |
| base->readtup = readtup_datum; |
| base->haveDatum1 = true; |
| base->arg = arg; |
| |
| arg->datumType = datumType; |
| |
| /* lookup necessary attributes of the datum type */ |
| get_typlenbyval(datumType, &typlen, &typbyval); |
| arg->datumTypeLen = typlen; |
| base->tuples = !typbyval; |
| |
| /* Prepare SortSupport data */ |
| base->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); |
| |
| base->sortKeys->ssup_cxt = CurrentMemoryContext; |
| base->sortKeys->ssup_collation = sortCollation; |
| base->sortKeys->ssup_nulls_first = nullsFirstFlag; |
| |
| /* |
| * Abbreviation is possible here only for by-reference types. In theory, |
| * a pass-by-value datatype could have an abbreviated form that is cheaper |
| * to compare. In a tuple sort, we could support that, because we can |
| * always extract the original datum from the tuple as needed. Here, we |
| * can't, because a datum sort only stores a single copy of the datum; the |
| * "tuple" field of each SortTuple is NULL. |
| */ |
| base->sortKeys->abbreviate = !typbyval; |
| |
| PrepareSortSupportFromOrderingOp(sortOperator, base->sortKeys); |
| |
| /* |
| * The "onlyKey" optimization cannot be used with abbreviated keys, since |
| * tie-breaker comparisons may be required. Typically, the optimization |
| * is only of value to pass-by-value types anyway, whereas abbreviated |
| * keys are typically only of value to pass-by-reference types. |
| */ |
| if (!base->sortKeys->abbrev_converter) |
| base->onlyKey = base->sortKeys; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return state; |
| } |
| |
| /* |
| * Accept one tuple while collecting input data for sort. |
| * |
| * Note that the input data is always copied; the caller need not save it. |
| */ |
| void |
| tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext); |
| TupleDesc tupDesc = (TupleDesc) base->arg; |
| SortTuple stup; |
| MinimalTuple tuple; |
| HeapTupleData htup; |
| |
| /* copy the tuple into sort storage */ |
| tuple = ExecCopySlotMinimalTuple(slot); |
| stup.tuple = (void *) tuple; |
| /* set up first-column key value */ |
| htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; |
| htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); |
| stup.datum1 = heap_getattr(&htup, |
| base->sortKeys[0].ssup_attno, |
| tupDesc, |
| &stup.isnull1); |
| |
| tuplesort_puttuple_common(state, &stup, |
| base->sortKeys->abbrev_converter && |
| !stup.isnull1); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* |
| * Accept one tuple while collecting input data for sort. |
| * |
| * Note that the input data is always copied; the caller need not save it. |
| */ |
| void |
| tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) |
| { |
| SortTuple stup; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext); |
| TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; |
| |
| /* copy the tuple into sort storage */ |
| tup = heap_copytuple(tup); |
| stup.tuple = (void *) tup; |
| |
| /* |
| * set up first-column key value, and potentially abbreviate, if it's a |
| * simple column |
| */ |
| if (base->haveDatum1) |
| { |
| stup.datum1 = heap_getattr(tup, |
| arg->indexInfo->ii_IndexAttrNumbers[0], |
| arg->tupDesc, |
| &stup.isnull1); |
| } |
| |
| tuplesort_puttuple_common(state, &stup, |
| base->haveDatum1 && |
| base->sortKeys->abbrev_converter && |
| !stup.isnull1); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* |
| * Collect one index tuple while collecting input data for sort, building |
| * it from caller-supplied values. |
| */ |
| void |
| tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, |
| ItemPointer self, Datum *values, |
| bool *isnull) |
| { |
| SortTuple stup; |
| IndexTuple tuple; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg; |
| |
| stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values, |
| isnull, base->tuplecontext); |
| tuple = ((IndexTuple) stup.tuple); |
| tuple->t_tid = *self; |
| /* set up first-column key value */ |
| stup.datum1 = index_getattr(tuple, |
| 1, |
| RelationGetDescr(arg->indexRel), |
| &stup.isnull1); |
| |
| tuplesort_puttuple_common(state, &stup, |
| base->sortKeys && |
| base->sortKeys->abbrev_converter && |
| !stup.isnull1); |
| } |
| |
| /* |
| * Accept one Datum while collecting input data for sort. |
| * |
| * If the Datum is pass-by-ref type, the value will be copied. |
| */ |
| void |
| tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext); |
| TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg; |
| SortTuple stup; |
| |
| /* |
| * Pass-by-value types or null values are just stored directly in |
| * stup.datum1 (and stup.tuple is not used and set to NULL). |
| * |
| * Non-null pass-by-reference values need to be copied into memory we |
| * control, and possibly abbreviated. The copied value is pointed to by |
| * stup.tuple and is treated as the canonical copy (e.g. to return via |
| * tuplesort_getdatum or when writing to tape); stup.datum1 gets the |
| * abbreviated value if abbreviation is happening, otherwise it's |
| * identical to stup.tuple. |
| */ |
| |
| if (isNull || !base->tuples) |
| { |
| /* |
| * Set datum1 to zeroed representation for NULLs (to be consistent, |
| * and to support cheap inequality tests for NULL abbreviated keys). |
| */ |
| stup.datum1 = !isNull ? val : (Datum) 0; |
| stup.isnull1 = isNull; |
| stup.tuple = NULL; /* no separate storage */ |
| } |
| else |
| { |
| stup.isnull1 = false; |
| stup.datum1 = datumCopy(val, false, arg->datumTypeLen); |
| stup.tuple = DatumGetPointer(stup.datum1); |
| } |
| |
| tuplesort_puttuple_common(state, &stup, |
| base->tuples && |
| base->sortKeys->abbrev_converter && !isNull); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* |
| * Fetch the next tuple in either forward or back direction. |
| * If successful, put tuple in slot and return true; else, clear the slot |
| * and return false. |
| * |
| * Caller may optionally be passed back abbreviated value (on true return |
| * value) when abbreviation was used, which can be used to cheaply avoid |
| * equality checks that might otherwise be required. Caller can safely make a |
| * determination of "non-equal tuple" based on simple binary inequality. A |
| * NULL value in leading attribute will set abbreviated value to zeroed |
| * representation, which caller may rely on in abbreviated inequality check. |
| * |
| * If copy is true, the slot receives a tuple that's been copied into the |
| * caller's memory context, so that it will stay valid regardless of future |
| * manipulations of the tuplesort's state (up to and including deleting the |
| * tuplesort). If copy is false, the slot will just receive a pointer to a |
| * tuple held within the tuplesort, which is more efficient, but only safe for |
| * callers that are prepared to have any subsequent manipulation of the |
| * tuplesort's state invalidate slot contents. |
| */ |
| bool |
| tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, |
| TupleTableSlot *slot, Datum *abbrev) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext); |
| SortTuple stup; |
| |
| if (!tuplesort_gettuple_common(state, forward, &stup)) |
| stup.tuple = NULL; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| if (stup.tuple) |
| { |
| /* Record abbreviated key for caller */ |
| if (base->sortKeys->abbrev_converter && abbrev) |
| *abbrev = stup.datum1; |
| |
| if (copy) |
| stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); |
| |
| ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); |
| return true; |
| } |
| else |
| { |
| ExecClearTuple(slot); |
| return false; |
| } |
| } |
| |
| /* |
| * Fetch the next tuple in either forward or back direction. |
| * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory |
| * context, and must not be freed by caller. Caller may not rely on tuple |
| * remaining valid after any further manipulation of tuplesort. |
| */ |
| HeapTuple |
| tuplesort_getheaptuple(Tuplesortstate *state, bool forward) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext); |
| SortTuple stup; |
| |
| if (!tuplesort_gettuple_common(state, forward, &stup)) |
| stup.tuple = NULL; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return stup.tuple; |
| } |
| |
| /* |
| * Fetch the next index tuple in either forward or back direction. |
| * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory |
| * context, and must not be freed by caller. Caller may not rely on tuple |
| * remaining valid after any further manipulation of tuplesort. |
| */ |
| IndexTuple |
| tuplesort_getindextuple(Tuplesortstate *state, bool forward) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext); |
| SortTuple stup; |
| |
| if (!tuplesort_gettuple_common(state, forward, &stup)) |
| stup.tuple = NULL; |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| return (IndexTuple) stup.tuple; |
| } |
| |
| /* |
| * Fetch the next Datum in either forward or back direction. |
| * Returns false if no more datums. |
| * |
| * If the Datum is pass-by-ref type, the returned value is freshly palloc'd |
| * in caller's context, and is now owned by the caller (this differs from |
| * similar routines for other types of tuplesorts). |
| * |
| * Caller may optionally be passed back abbreviated value (on true return |
| * value) when abbreviation was used, which can be used to cheaply avoid |
| * equality checks that might otherwise be required. Caller can safely make a |
| * determination of "non-equal tuple" based on simple binary inequality. A |
| * NULL value will have a zeroed abbreviated value representation, which caller |
| * may rely on in abbreviated inequality check. |
| * |
| * For byref Datums, if copy is true, *val is set to a copy of the Datum |
| * copied into the caller's memory context, so that it will stay valid |
| * regardless of future manipulations of the tuplesort's state (up to and |
| * including deleting the tuplesort). If copy is false, *val will just be |
| * set to a pointer to the Datum held within the tuplesort, which is more |
| * efficient, but only safe for callers that are prepared to have any |
| * subsequent manipulation of the tuplesort's state invalidate slot contents. |
| * For byval Datums, the value of the 'copy' parameter has no effect. |
| |
| */ |
| bool |
| tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, |
| Datum *val, bool *isNull, Datum *abbrev) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext); |
| TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg; |
| SortTuple stup; |
| |
| if (!tuplesort_gettuple_common(state, forward, &stup)) |
| { |
| MemoryContextSwitchTo(oldcontext); |
| return false; |
| } |
| |
| /* Ensure we copy into caller's memory context */ |
| MemoryContextSwitchTo(oldcontext); |
| |
| /* Record abbreviated key for caller */ |
| if (base->sortKeys->abbrev_converter && abbrev) |
| *abbrev = stup.datum1; |
| |
| if (stup.isnull1 || !base->tuples) |
| { |
| *val = stup.datum1; |
| *isNull = stup.isnull1; |
| } |
| else |
| { |
| /* use stup.tuple because stup.datum1 may be an abbreviation */ |
| if (copy) |
| *val = datumCopy(PointerGetDatum(stup.tuple), false, |
| arg->datumTypeLen); |
| else |
| *val = PointerGetDatum(stup.tuple); |
| *isNull = false; |
| } |
| |
| return true; |
| } |
| |
| |
| /* |
| * Routines specialized for HeapTuple (actually MinimalTuple) case |
| */ |
| |
| static void |
| removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, int count) |
| { |
| int i; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| |
| for (i = 0; i < count; i++) |
| { |
| HeapTupleData htup; |
| |
| htup.t_len = ((MinimalTuple) stups[i].tuple)->t_len + |
| MINIMAL_TUPLE_OFFSET; |
| htup.t_data = (HeapTupleHeader) ((char *) stups[i].tuple - |
| MINIMAL_TUPLE_OFFSET); |
| stups[i].datum1 = heap_getattr(&htup, |
| base->sortKeys[0].ssup_attno, |
| (TupleDesc) base->arg, |
| &stups[i].isnull1); |
| } |
| } |
| |
| static int |
| comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| SortSupport sortKey = base->sortKeys; |
| HeapTupleData ltup; |
| HeapTupleData rtup; |
| TupleDesc tupDesc; |
| int nkey; |
| int32 compare; |
| AttrNumber attno; |
| Datum datum1, |
| datum2; |
| bool isnull1, |
| isnull2; |
| |
| |
| /* Compare the leading sort key */ |
| compare = ApplySortComparator(a->datum1, a->isnull1, |
| b->datum1, b->isnull1, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| |
| /* Compare additional sort keys */ |
| ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; |
| ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); |
| rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; |
| rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); |
| tupDesc = (TupleDesc) base->arg; |
| |
| if (sortKey->abbrev_converter) |
| { |
| attno = sortKey->ssup_attno; |
| |
| datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); |
| datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); |
| |
| compare = ApplySortAbbrevFullComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| } |
| |
| sortKey++; |
| for (nkey = 1; nkey < base->nKeys; nkey++, sortKey++) |
| { |
| attno = sortKey->ssup_attno; |
| |
| datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); |
| datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); |
| |
| compare = ApplySortComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| } |
| |
| return 0; |
| } |
| |
| static void |
| writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| MinimalTuple tuple = (MinimalTuple) stup->tuple; |
| |
| /* the part of the MinimalTuple we'll write: */ |
| char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; |
| unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; |
| |
| /* total on-disk footprint: */ |
| unsigned int tuplen = tupbodylen + sizeof(int); |
| |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| LogicalTapeWrite(tape, tupbody, tupbodylen); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| } |
| |
| static void |
| readtup_heap(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len) |
| { |
| unsigned int tupbodylen = len - sizeof(int); |
| unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; |
| MinimalTuple tuple = (MinimalTuple) tuplesort_readtup_alloc(state, tuplen); |
| char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| HeapTupleData htup; |
| |
| /* read in the tuple proper */ |
| tuple->t_len = tuplen; |
| LogicalTapeReadExact(tape, tupbody, tupbodylen); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); |
| stup->tuple = (void *) tuple; |
| /* set up first-column key value */ |
| htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; |
| htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); |
| stup->datum1 = heap_getattr(&htup, |
| base->sortKeys[0].ssup_attno, |
| (TupleDesc) base->arg, |
| &stup->isnull1); |
| } |
| |
| /* |
| * Routines specialized for the CLUSTER case (HeapTuple data, with |
| * comparisons per a btree index definition) |
| */ |
| |
| static void |
| removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, int count) |
| { |
| int i; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; |
| |
| for (i = 0; i < count; i++) |
| { |
| HeapTuple tup; |
| |
| tup = (HeapTuple) stups[i].tuple; |
| stups[i].datum1 = heap_getattr(tup, |
| arg->indexInfo->ii_IndexAttrNumbers[0], |
| arg->tupDesc, |
| &stups[i].isnull1); |
| } |
| } |
| |
| static int |
| comparetup_cluster(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; |
| SortSupport sortKey = base->sortKeys; |
| HeapTuple ltup; |
| HeapTuple rtup; |
| TupleDesc tupDesc; |
| int nkey; |
| int32 compare; |
| Datum datum1, |
| datum2; |
| bool isnull1, |
| isnull2; |
| |
| /* Be prepared to compare additional sort keys */ |
| ltup = (HeapTuple) a->tuple; |
| rtup = (HeapTuple) b->tuple; |
| tupDesc = arg->tupDesc; |
| |
| /* Compare the leading sort key, if it's simple */ |
| if (base->haveDatum1) |
| { |
| compare = ApplySortComparator(a->datum1, a->isnull1, |
| b->datum1, b->isnull1, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| |
| if (sortKey->abbrev_converter) |
| { |
| AttrNumber leading = arg->indexInfo->ii_IndexAttrNumbers[0]; |
| |
| datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); |
| datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); |
| |
| compare = ApplySortAbbrevFullComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| } |
| if (compare != 0 || base->nKeys == 1) |
| return compare; |
| /* Compare additional columns the hard way */ |
| sortKey++; |
| nkey = 1; |
| } |
| else |
| { |
| /* Must compare all keys the hard way */ |
| nkey = 0; |
| } |
| |
| if (arg->indexInfo->ii_Expressions == NULL) |
| { |
| /* If not expression index, just compare the proper heap attrs */ |
| |
| for (; nkey < base->nKeys; nkey++, sortKey++) |
| { |
| AttrNumber attno = arg->indexInfo->ii_IndexAttrNumbers[nkey]; |
| |
| datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); |
| datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); |
| |
| compare = ApplySortComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| } |
| } |
| else |
| { |
| /* |
| * In the expression index case, compute the whole index tuple and |
| * then compare values. It would perhaps be faster to compute only as |
| * many columns as we need to compare, but that would require |
| * duplicating all the logic in FormIndexDatum. |
| */ |
| Datum l_index_values[INDEX_MAX_KEYS]; |
| bool l_index_isnull[INDEX_MAX_KEYS]; |
| Datum r_index_values[INDEX_MAX_KEYS]; |
| bool r_index_isnull[INDEX_MAX_KEYS]; |
| TupleTableSlot *ecxt_scantuple; |
| |
| /* Reset context each time to prevent memory leakage */ |
| ResetPerTupleExprContext(arg->estate); |
| |
| ecxt_scantuple = GetPerTupleExprContext(arg->estate)->ecxt_scantuple; |
| |
| ExecStoreHeapTuple(ltup, ecxt_scantuple, false); |
| FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate, |
| l_index_values, l_index_isnull); |
| |
| ExecStoreHeapTuple(rtup, ecxt_scantuple, false); |
| FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate, |
| r_index_values, r_index_isnull); |
| |
| for (; nkey < base->nKeys; nkey++, sortKey++) |
| { |
| compare = ApplySortComparator(l_index_values[nkey], |
| l_index_isnull[nkey], |
| r_index_values[nkey], |
| r_index_isnull[nkey], |
| sortKey); |
| if (compare != 0) |
| return compare; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static void |
| writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| HeapTuple tuple = (HeapTuple) stup->tuple; |
| unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); |
| |
| /* We need to store t_self, but not other fields of HeapTupleData */ |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); |
| LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| } |
| |
| static void |
| readtup_cluster(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int tuplen) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; |
| unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); |
| HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state, |
| t_len + HEAPTUPLESIZE); |
| |
| /* Reconstruct the HeapTupleData header */ |
| tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); |
| tuple->t_len = t_len; |
| LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); |
| /* We don't currently bother to reconstruct t_tableOid */ |
| tuple->t_tableOid = InvalidOid; |
| /* Read in the tuple body */ |
| LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); |
| stup->tuple = (void *) tuple; |
| /* set up first-column key value, if it's a simple column */ |
| if (base->haveDatum1) |
| stup->datum1 = heap_getattr(tuple, |
| arg->indexInfo->ii_IndexAttrNumbers[0], |
| arg->tupDesc, |
| &stup->isnull1); |
| } |
| |
| static void |
| freestate_cluster(Tuplesortstate *state) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; |
| |
| /* Free any execution state created for CLUSTER case */ |
| if (arg->estate != NULL) |
| { |
| ExprContext *econtext = GetPerTupleExprContext(arg->estate); |
| |
| ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); |
| FreeExecutorState(arg->estate); |
| } |
| } |
| |
| /* |
| * Routines specialized for IndexTuple case |
| * |
| * The btree and hash cases require separate comparison functions, but the |
| * IndexTuple representation is the same so the copy/write/read support |
| * functions can be shared. |
| */ |
| |
| static void |
| removeabbrev_index(Tuplesortstate *state, SortTuple *stups, int count) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg; |
| int i; |
| |
| for (i = 0; i < count; i++) |
| { |
| IndexTuple tuple; |
| |
| tuple = stups[i].tuple; |
| stups[i].datum1 = index_getattr(tuple, |
| 1, |
| RelationGetDescr(arg->indexRel), |
| &stups[i].isnull1); |
| } |
| } |
| |
| static int |
| comparetup_index_btree(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state) |
| { |
| /* |
| * This is similar to comparetup_heap(), but expects index tuples. There |
| * is also special handling for enforcing uniqueness, and special |
| * treatment for equal keys at the end. |
| */ |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortIndexBTreeArg *arg = (TuplesortIndexBTreeArg *) base->arg; |
| SortSupport sortKey = base->sortKeys; |
| IndexTuple tuple1; |
| IndexTuple tuple2; |
| int keysz; |
| TupleDesc tupDes; |
| bool equal_hasnull = false; |
| int nkey; |
| int32 compare; |
| Datum datum1, |
| datum2; |
| bool isnull1, |
| isnull2; |
| |
| |
| /* Compare the leading sort key */ |
| compare = ApplySortComparator(a->datum1, a->isnull1, |
| b->datum1, b->isnull1, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| |
| /* Compare additional sort keys */ |
| tuple1 = (IndexTuple) a->tuple; |
| tuple2 = (IndexTuple) b->tuple; |
| keysz = base->nKeys; |
| tupDes = RelationGetDescr(arg->index.indexRel); |
| |
| if (sortKey->abbrev_converter) |
| { |
| datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); |
| datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); |
| |
| compare = ApplySortAbbrevFullComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| if (compare != 0) |
| return compare; |
| } |
| |
| /* they are equal, so we only need to examine one null flag */ |
| if (a->isnull1) |
| equal_hasnull = true; |
| |
| sortKey++; |
| for (nkey = 2; nkey <= keysz; nkey++, sortKey++) |
| { |
| datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); |
| datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); |
| |
| compare = ApplySortComparator(datum1, isnull1, |
| datum2, isnull2, |
| sortKey); |
| if (compare != 0) |
| return compare; /* done when we find unequal attributes */ |
| |
| /* they are equal, so we only need to examine one null flag */ |
| if (isnull1) |
| equal_hasnull = true; |
| } |
| |
| /* |
| * If btree has asked us to enforce uniqueness, complain if two equal |
| * tuples are detected (unless there was at least one NULL field and NULLS |
| * NOT DISTINCT was not set). |
| * |
| * It is sufficient to make the test here, because if two tuples are equal |
| * they *must* get compared at some stage of the sort --- otherwise the |
| * sort algorithm wouldn't have checked whether one must appear before the |
| * other. |
| */ |
| if (arg->enforceUnique && !(!arg->uniqueNullsNotDistinct && equal_hasnull)) |
| { |
| Datum values[INDEX_MAX_KEYS]; |
| bool isnull[INDEX_MAX_KEYS]; |
| char *key_desc; |
| |
| /* |
| * Some rather brain-dead implementations of qsort (such as the one in |
| * QNX 4) will sometimes call the comparison routine to compare a |
| * value to itself, but we always use our own implementation, which |
| * does not. |
| */ |
| Assert(tuple1 != tuple2); |
| |
| index_deform_tuple(tuple1, tupDes, values, isnull); |
| |
| key_desc = BuildIndexValueDescription(arg->index.indexRel, values, isnull); |
| |
| ereport(ERROR, |
| (errcode(ERRCODE_UNIQUE_VIOLATION), |
| errmsg("could not create unique index \"%s\"", |
| RelationGetRelationName(arg->index.indexRel)), |
| key_desc ? errdetail("Key %s is duplicated.", key_desc) : |
| errdetail("Duplicate keys exist."), |
| errtableconstraint(arg->index.heapRel, |
| RelationGetRelationName(arg->index.indexRel)))); |
| } |
| |
| /* |
| * If key values are equal, we sort on ItemPointer. This is required for |
| * btree indexes, since heap TID is treated as an implicit last key |
| * attribute in order to ensure that all keys in the index are physically |
| * unique. |
| */ |
| { |
| BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); |
| BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); |
| |
| if (blk1 != blk2) |
| return (blk1 < blk2) ? -1 : 1; |
| } |
| { |
| OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); |
| OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); |
| |
| if (pos1 != pos2) |
| return (pos1 < pos2) ? -1 : 1; |
| } |
| |
| /* ItemPointer values should never be equal */ |
| Assert(false); |
| |
| return 0; |
| } |
| |
| static int |
| comparetup_index_hash(const SortTuple *a, const SortTuple *b, |
| Tuplesortstate *state) |
| { |
| Bucket bucket1; |
| Bucket bucket2; |
| uint32 hash1; |
| uint32 hash2; |
| IndexTuple tuple1; |
| IndexTuple tuple2; |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortIndexHashArg *arg = (TuplesortIndexHashArg *) base->arg; |
| |
| /* |
| * Fetch hash keys and mask off bits we don't want to sort by, so that the |
| * initial sort is just on the bucket number. We know that the first |
| * column of the index tuple is the hash key. |
| */ |
| Assert(!a->isnull1); |
| bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), |
| arg->max_buckets, arg->high_mask, |
| arg->low_mask); |
| Assert(!b->isnull1); |
| bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), |
| arg->max_buckets, arg->high_mask, |
| arg->low_mask); |
| if (bucket1 > bucket2) |
| return 1; |
| else if (bucket1 < bucket2) |
| return -1; |
| |
| /* |
| * If bucket values are equal, sort by hash values. This allows us to |
| * insert directly onto bucket/overflow pages, where the index tuples are |
| * stored in hash order to allow fast binary search within each page. |
| */ |
| hash1 = DatumGetUInt32(a->datum1); |
| hash2 = DatumGetUInt32(b->datum1); |
| if (hash1 > hash2) |
| return 1; |
| else if (hash1 < hash2) |
| return -1; |
| |
| /* |
| * If hash values are equal, we sort on ItemPointer. This does not affect |
| * validity of the finished index, but it may be useful to have index |
| * scans in physical order. |
| */ |
| tuple1 = (IndexTuple) a->tuple; |
| tuple2 = (IndexTuple) b->tuple; |
| |
| { |
| BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); |
| BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); |
| |
| if (blk1 != blk2) |
| return (blk1 < blk2) ? -1 : 1; |
| } |
| { |
| OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); |
| OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); |
| |
| if (pos1 != pos2) |
| return (pos1 < pos2) ? -1 : 1; |
| } |
| |
| /* ItemPointer values should never be equal */ |
| Assert(false); |
| |
| return 0; |
| } |
| |
| static void |
| writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| IndexTuple tuple = (IndexTuple) stup->tuple; |
| unsigned int tuplen; |
| |
| tuplen = IndexTupleSize(tuple) + sizeof(tuplen); |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| LogicalTapeWrite(tape, tuple, IndexTupleSize(tuple)); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); |
| } |
| |
| static void |
| readtup_index(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg; |
| unsigned int tuplen = len - sizeof(unsigned int); |
| IndexTuple tuple = (IndexTuple) tuplesort_readtup_alloc(state, tuplen); |
| |
| LogicalTapeReadExact(tape, tuple, tuplen); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); |
| stup->tuple = (void *) tuple; |
| /* set up first-column key value */ |
| stup->datum1 = index_getattr(tuple, |
| 1, |
| RelationGetDescr(arg->indexRel), |
| &stup->isnull1); |
| } |
| |
| /* |
| * Routines specialized for DatumTuple case |
| */ |
| |
| static void |
| removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, int count) |
| { |
| int i; |
| |
| for (i = 0; i < count; i++) |
| stups[i].datum1 = PointerGetDatum(stups[i].tuple); |
| } |
| |
| static int |
| comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| int compare; |
| |
| compare = ApplySortComparator(a->datum1, a->isnull1, |
| b->datum1, b->isnull1, |
| base->sortKeys); |
| if (compare != 0) |
| return compare; |
| |
| /* if we have abbreviations, then "tuple" has the original value */ |
| |
| if (base->sortKeys->abbrev_converter) |
| compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, |
| PointerGetDatum(b->tuple), b->isnull1, |
| base->sortKeys); |
| |
| return compare; |
| } |
| |
| static void |
| writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg; |
| void *waddr; |
| unsigned int tuplen; |
| unsigned int writtenlen; |
| |
| if (stup->isnull1) |
| { |
| waddr = NULL; |
| tuplen = 0; |
| } |
| else if (!base->tuples) |
| { |
| waddr = &stup->datum1; |
| tuplen = sizeof(Datum); |
| } |
| else |
| { |
| waddr = stup->tuple; |
| tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, arg->datumTypeLen); |
| Assert(tuplen != 0); |
| } |
| |
| writtenlen = tuplen + sizeof(unsigned int); |
| |
| LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen)); |
| LogicalTapeWrite(tape, waddr, tuplen); |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen)); |
| } |
| |
| static void |
| readtup_datum(Tuplesortstate *state, SortTuple *stup, |
| LogicalTape *tape, unsigned int len) |
| { |
| TuplesortPublic *base = TuplesortstateGetPublic(state); |
| unsigned int tuplen = len - sizeof(unsigned int); |
| |
| if (tuplen == 0) |
| { |
| /* it's NULL */ |
| stup->datum1 = (Datum) 0; |
| stup->isnull1 = true; |
| stup->tuple = NULL; |
| } |
| else if (!base->tuples) |
| { |
| Assert(tuplen == sizeof(Datum)); |
| LogicalTapeReadExact(tape, &stup->datum1, tuplen); |
| stup->isnull1 = false; |
| stup->tuple = NULL; |
| } |
| else |
| { |
| void *raddr = tuplesort_readtup_alloc(state, tuplen); |
| |
| LogicalTapeReadExact(tape, raddr, tuplen); |
| stup->datum1 = PointerGetDatum(raddr); |
| stup->isnull1 = false; |
| stup->tuple = raddr; |
| } |
| |
| if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ |
| LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); |
| } |