| /*------------------------------------------------------------------------- |
| * |
| * uuid.c |
| * Functions for the built-in type "uuid". |
| * |
| * Copyright (c) 2007-2021, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/uuid.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "common/hashfn.h" |
| #include "lib/hyperloglog.h" |
| #include "libpq/pqformat.h" |
| #include "port/pg_bswap.h" |
| #include "utils/builtins.h" |
| #include "utils/guc.h" |
| #include "utils/sortsupport.h" |
| #include "utils/uuid.h" |
| |
| /* sortsupport for uuid */ |
| typedef struct |
| { |
| int64 input_count; /* number of non-null values seen */ |
| bool estimating; /* true if estimating cardinality */ |
| |
| hyperLogLogState abbr_card; /* cardinality estimator */ |
| } uuid_sortsupport_state; |
| |
| static void string_to_uuid(const char *source, pg_uuid_t *uuid); |
| static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); |
| static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); |
| static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup); |
| static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); |
| static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); |
| |
| Datum |
| uuid_in(PG_FUNCTION_ARGS) |
| { |
| char *uuid_str = PG_GETARG_CSTRING(0); |
| pg_uuid_t *uuid; |
| |
| uuid = (pg_uuid_t *) palloc(sizeof(*uuid)); |
| string_to_uuid(uuid_str, uuid); |
| PG_RETURN_UUID_P(uuid); |
| } |
| |
| Datum |
| uuid_out(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
| static const char hex_chars[] = "0123456789abcdef"; |
| StringInfoData buf; |
| int i; |
| |
| initStringInfo(&buf); |
| for (i = 0; i < UUID_LEN; i++) |
| { |
| int hi; |
| int lo; |
| |
| /* |
| * We print uuid values as a string of 8, 4, 4, 4, and then 12 |
| * hexadecimal characters, with each group is separated by a hyphen |
| * ("-"). Therefore, add the hyphens at the appropriate places here. |
| */ |
| if (i == 4 || i == 6 || i == 8 || i == 10) |
| appendStringInfoChar(&buf, '-'); |
| |
| hi = uuid->data[i] >> 4; |
| lo = uuid->data[i] & 0x0F; |
| |
| appendStringInfoChar(&buf, hex_chars[hi]); |
| appendStringInfoChar(&buf, hex_chars[lo]); |
| } |
| |
| PG_RETURN_CSTRING(buf.data); |
| } |
| |
| /* |
| * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash |
| * after each group of 4 hexadecimal digits, and optionally surrounded by {}. |
| * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal |
| * digits, is the only one used for output.) |
| */ |
| static void |
| string_to_uuid(const char *source, pg_uuid_t *uuid) |
| { |
| const char *src = source; |
| bool braces = false; |
| int i; |
| |
| if (src[0] == '{') |
| { |
| src++; |
| braces = true; |
| } |
| |
| for (i = 0; i < UUID_LEN; i++) |
| { |
| char str_buf[3]; |
| |
| if (src[0] == '\0' || src[1] == '\0') |
| goto syntax_error; |
| memcpy(str_buf, src, 2); |
| if (!isxdigit((unsigned char) str_buf[0]) || |
| !isxdigit((unsigned char) str_buf[1])) |
| goto syntax_error; |
| |
| str_buf[2] = '\0'; |
| uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16); |
| src += 2; |
| if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1) |
| src++; |
| } |
| |
| if (braces) |
| { |
| if (*src != '}') |
| goto syntax_error; |
| src++; |
| } |
| |
| if (*src != '\0') |
| goto syntax_error; |
| |
| return; |
| |
| syntax_error: |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type %s: \"%s\"", |
| "uuid", source))); |
| } |
| |
| Datum |
| uuid_recv(PG_FUNCTION_ARGS) |
| { |
| StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0); |
| pg_uuid_t *uuid; |
| |
| uuid = (pg_uuid_t *) palloc(UUID_LEN); |
| memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN); |
| PG_RETURN_POINTER(uuid); |
| } |
| |
| Datum |
| uuid_send(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
| StringInfoData buffer; |
| |
| pq_begintypsend(&buffer); |
| pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN); |
| PG_RETURN_BYTEA_P(pq_endtypsend(&buffer)); |
| } |
| |
| /* internal uuid compare function */ |
| static int |
| uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2) |
| { |
| return memcmp(arg1->data, arg2->data, UUID_LEN); |
| } |
| |
| Datum |
| uuid_lt(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0); |
| } |
| |
| Datum |
| uuid_le(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0); |
| } |
| |
| Datum |
| uuid_eq(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0); |
| } |
| |
| Datum |
| uuid_ge(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0); |
| } |
| |
| Datum |
| uuid_gt(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0); |
| } |
| |
| Datum |
| uuid_ne(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0); |
| } |
| |
| /* handler for btree index operator */ |
| Datum |
| uuid_cmp(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| |
| PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2)); |
| } |
| |
| /* |
| * Sort support strategy routine |
| */ |
| Datum |
| uuid_sortsupport(PG_FUNCTION_ARGS) |
| { |
| SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
| |
| ssup->comparator = uuid_fast_cmp; |
| ssup->ssup_extra = NULL; |
| |
| if (ssup->abbreviate) |
| { |
| uuid_sortsupport_state *uss; |
| MemoryContext oldcontext; |
| |
| oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
| |
| uss = palloc(sizeof(uuid_sortsupport_state)); |
| uss->input_count = 0; |
| uss->estimating = true; |
| initHyperLogLog(&uss->abbr_card, 10); |
| |
| ssup->ssup_extra = uss; |
| |
| ssup->comparator = uuid_cmp_abbrev; |
| ssup->abbrev_converter = uuid_abbrev_convert; |
| ssup->abbrev_abort = uuid_abbrev_abort; |
| ssup->abbrev_full_comparator = uuid_fast_cmp; |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| PG_RETURN_VOID(); |
| } |
| |
| /* |
| * SortSupport comparison func |
| */ |
| static int |
| uuid_fast_cmp(Datum x, Datum y, SortSupport ssup) |
| { |
| pg_uuid_t *arg1 = DatumGetUUIDP(x); |
| pg_uuid_t *arg2 = DatumGetUUIDP(y); |
| |
| return uuid_internal_cmp(arg1, arg2); |
| } |
| |
| /* |
| * Abbreviated key comparison func |
| */ |
| static int |
| uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup) |
| { |
| if (x > y) |
| return 1; |
| else if (x == y) |
| return 0; |
| else |
| return -1; |
| } |
| |
| /* |
| * Callback for estimating effectiveness of abbreviated key optimization. |
| * |
| * We pay no attention to the cardinality of the non-abbreviated data, because |
| * there is no equality fast-path within authoritative uuid comparator. |
| */ |
| static bool |
| uuid_abbrev_abort(int memtupcount, SortSupport ssup) |
| { |
| uuid_sortsupport_state *uss = ssup->ssup_extra; |
| double abbr_card; |
| |
| if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) |
| return false; |
| |
| abbr_card = estimateHyperLogLog(&uss->abbr_card); |
| |
| /* |
| * If we have >100k distinct values, then even if we were sorting many |
| * billion rows we'd likely still break even, and the penalty of undoing |
| * that many rows of abbrevs would probably not be worth it. Stop even |
| * counting at that point. |
| */ |
| if (abbr_card > 100000.0) |
| { |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "uuid_abbrev: estimation ends at cardinality %f" |
| " after " INT64_FORMAT " values (%d rows)", |
| abbr_card, uss->input_count, memtupcount); |
| #endif |
| uss->estimating = false; |
| return false; |
| } |
| |
| /* |
| * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row |
| * fudge factor allows us to abort earlier on genuinely pathological data |
| * where we've had exactly one abbreviated value in the first 2k |
| * (non-null) rows. |
| */ |
| if (abbr_card < uss->input_count / 2000.0 + 0.5) |
| { |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "uuid_abbrev: aborting abbreviation at cardinality %f" |
| " below threshold %f after " INT64_FORMAT " values (%d rows)", |
| abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, |
| memtupcount); |
| #endif |
| return true; |
| } |
| |
| #ifdef TRACE_SORT |
| if (trace_sort) |
| elog(LOG, |
| "uuid_abbrev: cardinality %f after " INT64_FORMAT |
| " values (%d rows)", abbr_card, uss->input_count, memtupcount); |
| #endif |
| |
| return false; |
| } |
| |
| /* |
| * Conversion routine for sortsupport. Converts original uuid representation |
| * to abbreviated key representation. Our encoding strategy is simple -- pack |
| * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian |
| * machines, the bytes are stored in reverse order), and treat it as an |
| * unsigned integer. |
| */ |
| static Datum |
| uuid_abbrev_convert(Datum original, SortSupport ssup) |
| { |
| uuid_sortsupport_state *uss = ssup->ssup_extra; |
| pg_uuid_t *authoritative = DatumGetUUIDP(original); |
| Datum res; |
| |
| memcpy(&res, authoritative->data, sizeof(Datum)); |
| uss->input_count += 1; |
| |
| if (uss->estimating) |
| { |
| uint32 tmp; |
| |
| #if SIZEOF_DATUM == 8 |
| tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); |
| #else /* SIZEOF_DATUM != 8 */ |
| tmp = (uint32) res; |
| #endif |
| |
| addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); |
| } |
| |
| /* |
| * Byteswap on little-endian machines. |
| * |
| * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way |
| * comparator) works correctly on all platforms. If we didn't do this, |
| * the comparator would have to call memcmp() with a pair of pointers to |
| * the first byte of each abbreviated key, which is slower. |
| */ |
| res = DatumBigEndianToNative(res); |
| |
| return res; |
| } |
| |
| /* hash index support */ |
| Datum |
| uuid_hash(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *key = PG_GETARG_UUID_P(0); |
| |
| return hash_any(key->data, UUID_LEN); |
| } |
| |
| Datum |
| uuid_hash_extended(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *key = PG_GETARG_UUID_P(0); |
| |
| return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); |
| } |
| |
| Datum |
| gen_random_uuid(PG_FUNCTION_ARGS) |
| { |
| pg_uuid_t *uuid = palloc(UUID_LEN); |
| |
| if (!pg_strong_random(uuid, UUID_LEN)) |
| ereport(ERROR, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("could not generate random values"))); |
| |
| /* |
| * Set magic numbers for a "version 4" (pseudorandom) UUID, see |
| * http://tools.ietf.org/html/rfc4122#section-4.4 |
| */ |
| uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */ |
| uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */ |
| |
| PG_RETURN_UUID_P(uuid); |
| } |