| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| /*------------------------------------------------------------------------- |
| * |
| * jsonb_gin.c |
| * GIN support functions for jsonb |
| * |
| * Copyright (c) 2014-2015, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/jsonb_gin.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/gin.h" |
| #include "access/hash.h" |
| #include "catalog/pg_type.h" |
| #include "utils/builtins.h" |
| #include "utils/jsonb.h" |
| |
| typedef struct PathHashStack |
| { |
| uint32 hash; |
| struct PathHashStack *parent; |
| } PathHashStack; |
| |
| static Datum make_text_key(char flag, const char *str, int len); |
| static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); |
| |
| /* |
| * |
| * jsonb_ops GIN opclass support functions |
| * |
| */ |
| |
| Datum |
| gin_compare_jsonb(PG_FUNCTION_ARGS) |
| { |
| text *arg1 = PG_GETARG_TEXT_PP(0); |
| text *arg2 = PG_GETARG_TEXT_PP(1); |
| int32 result; |
| char *a1p, |
| *a2p; |
| int len1, |
| len2; |
| |
| a1p = VARDATA_ANY(arg1); |
| a2p = VARDATA_ANY(arg2); |
| |
| len1 = VARSIZE_ANY_EXHDR(arg1); |
| len2 = VARSIZE_ANY_EXHDR(arg2); |
| |
| /* Compare text as bttextcmp does, but always using C collation */ |
| result = varstr_cmp(a1p, len1, a2p, len2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_INT32(result); |
| } |
| |
| Datum |
| gin_extract_jsonb(PG_FUNCTION_ARGS) |
| { |
| Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0); |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| int total = 2 * JB_ROOT_COUNT(jb); |
| JsonbIterator *it; |
| JsonbValue v; |
| JsonbIteratorToken r; |
| int i = 0; |
| Datum *entries; |
| |
| /* If the root level is empty, we certainly have no keys */ |
| if (total == 0) |
| { |
| *nentries = 0; |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| /* Otherwise, use 2 * root count as initial estimate of result size */ |
| entries = (Datum *) palloc(sizeof(Datum) * total); |
| |
| it = JsonbIteratorInit(&jb->root); |
| |
| while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| { |
| /* Since we recurse into the object, we might need more space */ |
| if (i >= total) |
| { |
| total *= 2; |
| entries = (Datum *) repalloc(entries, sizeof(Datum) * total); |
| } |
| |
| switch (r) |
| { |
| case WJB_KEY: |
| entries[i++] = make_scalar_key(&v, true); |
| break; |
| case WJB_ELEM: |
| /* Pretend string array elements are keys, see jsonb.h */ |
| entries[i++] = make_scalar_key(&v, (v.type == jbvString)); |
| break; |
| case WJB_VALUE: |
| entries[i++] = make_scalar_key(&v, false); |
| break; |
| default: |
| /* we can ignore structural items */ |
| break; |
| } |
| } |
| |
| *nentries = i; |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_extract_jsonb_query(PG_FUNCTION_ARGS) |
| { |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| StrategyNumber strategy = PG_GETARG_UINT16(2); |
| int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| Datum *entries; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* Query is a jsonb, so just apply gin_extract_jsonb... */ |
| entries = (Datum *) |
| DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, |
| PG_GETARG_DATUM(0), |
| PointerGetDatum(nentries))); |
| /* ...although "contains {}" requires a full index scan */ |
| if (*nentries == 0) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else if (strategy == JsonbExistsStrategyNumber) |
| { |
| /* Query is a text string, which we treat as a key */ |
| text *query = PG_GETARG_TEXT_PP(0); |
| |
| *nentries = 1; |
| entries = (Datum *) palloc(sizeof(Datum)); |
| entries[0] = make_text_key(JGINFLAG_KEY, |
| VARDATA_ANY(query), |
| VARSIZE_ANY_EXHDR(query)); |
| } |
| else if (strategy == JsonbExistsAnyStrategyNumber || |
| strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* Query is a text array; each element is treated as a key */ |
| ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); |
| Datum *key_datums; |
| bool *key_nulls; |
| int key_count; |
| int i, |
| j; |
| |
| deconstruct_array(query, |
| TEXTOID, -1, false, 'i', |
| &key_datums, &key_nulls, &key_count); |
| |
| entries = (Datum *) palloc(sizeof(Datum) * key_count); |
| |
| for (i = 0, j = 0; i < key_count; i++) |
| { |
| /* Nulls in the array are ignored */ |
| if (key_nulls[i]) |
| continue; |
| entries[j++] = make_text_key(JGINFLAG_KEY, |
| VARDATA_ANY(key_datums[i]), |
| VARSIZE_ANY_EXHDR(key_datums[i])); |
| } |
| |
| *nentries = j; |
| /* ExistsAll with no keys should match everything */ |
| if (j == 0 && strategy == JsonbExistsAllStrategyNumber) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| entries = NULL; /* keep compiler quiet */ |
| } |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_consistent_jsonb(PG_FUNCTION_ARGS) |
| { |
| bool *check = (bool *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| |
| /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ |
| bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| bool res = true; |
| int32 i; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* |
| * We must always recheck, since we can't tell from the index whether |
| * the positions of the matched items match the structure of the query |
| * object. (Even if we could, we'd also have to worry about hashed |
| * keys and the index's failure to distinguish keys from string array |
| * elements.) However, the tuple certainly doesn't match unless it |
| * contains all the query keys. |
| */ |
| *recheck = true; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbExistsStrategyNumber) |
| { |
| /* |
| * Although the key is certainly present in the index, we must recheck |
| * because (1) the key might be hashed, and (2) the index match might |
| * be for a key that's not at top level of the JSON object. For (1), |
| * we could look at the query key to see if it's hashed and not |
| * recheck if not, but the index lacks enough info to tell about (2). |
| */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == JsonbExistsAnyStrategyNumber) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| /* ... but unless all the keys are present, we can say "false" */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_BOOL(res); |
| } |
| |
| Datum |
| gin_triconsistent_jsonb(PG_FUNCTION_ARGS) |
| { |
| GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| |
| /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ |
| GinTernaryValue res = GIN_MAYBE; |
| int32 i; |
| |
| /* |
| * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this |
| * corresponds to always forcing recheck in the regular consistent |
| * function, for the reasons listed there. |
| */ |
| if (strategy == JsonbContainsStrategyNumber || |
| strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* All extracted keys must be present */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_FALSE) |
| { |
| res = GIN_FALSE; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbExistsStrategyNumber || |
| strategy == JsonbExistsAnyStrategyNumber) |
| { |
| /* At least one extracted key must be present */ |
| res = GIN_FALSE; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_TRUE || |
| check[i] == GIN_MAYBE) |
| { |
| res = GIN_MAYBE; |
| break; |
| } |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_GIN_TERNARY_VALUE(res); |
| } |
| |
| /* |
| * |
| * jsonb_path_ops GIN opclass support functions |
| * |
| * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON |
| * value; but the JSON key(s) leading to each value are also included in its |
| * hash computation. This means we can only support containment queries, |
| * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} |
| * since different hashes will be generated. |
| * |
| */ |
| |
| Datum |
| gin_extract_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| Jsonb *jb = PG_GETARG_JSONB(0); |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| int total = 2 * JB_ROOT_COUNT(jb); |
| JsonbIterator *it; |
| JsonbValue v; |
| JsonbIteratorToken r; |
| PathHashStack tail; |
| PathHashStack *stack; |
| int i = 0; |
| Datum *entries; |
| |
| /* If the root level is empty, we certainly have no keys */ |
| if (total == 0) |
| { |
| *nentries = 0; |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| /* Otherwise, use 2 * root count as initial estimate of result size */ |
| entries = (Datum *) palloc(sizeof(Datum) * total); |
| |
| /* We keep a stack of partial hashes corresponding to parent key levels */ |
| tail.parent = NULL; |
| tail.hash = 0; |
| stack = &tail; |
| |
| it = JsonbIteratorInit(&jb->root); |
| |
| while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| { |
| PathHashStack *parent; |
| |
| /* Since we recurse into the object, we might need more space */ |
| if (i >= total) |
| { |
| total *= 2; |
| entries = (Datum *) repalloc(entries, sizeof(Datum) * total); |
| } |
| |
| switch (r) |
| { |
| case WJB_BEGIN_ARRAY: |
| case WJB_BEGIN_OBJECT: |
| /* Push a stack level for this object */ |
| parent = stack; |
| stack = (PathHashStack *) palloc(sizeof(PathHashStack)); |
| |
| if (parent->parent) |
| { |
| /* |
| * We pass forward hashes from previous container nesting |
| * levels so that nested arrays with an outermost nested |
| * object will have element hashes mixed with the |
| * outermost key. It's also somewhat useful to have |
| * nested objects' innermost values have hashes that are a |
| * function of not just their own key, but outer keys too. |
| * |
| * Nesting an array within another array will not alter |
| * innermost scalar element hash values, but that seems |
| * inconsequential. |
| */ |
| stack->hash = parent->hash; |
| } |
| else |
| { |
| /* |
| * At the outermost level, initialize hash with container |
| * type proxy value. Note that this makes JB_FARRAY and |
| * JB_FOBJECT part of the on-disk representation, but they |
| * are that in the base jsonb object storage already. |
| */ |
| stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT; |
| } |
| stack->parent = parent; |
| break; |
| case WJB_KEY: |
| /* initialize hash from parent */ |
| stack->hash = stack->parent->hash; |
| /* and mix in this key */ |
| JsonbHashScalarValue(&v, &stack->hash); |
| /* hash is now ready to incorporate the value */ |
| break; |
| case WJB_ELEM: |
| /* array elements use parent hash mixed with element's hash */ |
| stack->hash = stack->parent->hash; |
| /* FALL THRU */ |
| case WJB_VALUE: |
| /* mix the element or value's hash into the prepared hash */ |
| JsonbHashScalarValue(&v, &stack->hash); |
| /* and emit an index entry */ |
| entries[i++] = UInt32GetDatum(stack->hash); |
| /* Note: we assume we'll see KEY before another VALUE */ |
| break; |
| case WJB_END_ARRAY: |
| case WJB_END_OBJECT: |
| /* Pop the stack */ |
| parent = stack->parent; |
| pfree(stack); |
| stack = parent; |
| break; |
| default: |
| elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); |
| } |
| } |
| |
| *nentries = i; |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) |
| { |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| StrategyNumber strategy = PG_GETARG_UINT16(2); |
| int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| Datum *entries; |
| |
| if (strategy != JsonbContainsStrategyNumber) |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ |
| entries = (Datum *) |
| DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, |
| PG_GETARG_DATUM(0), |
| PointerGetDatum(nentries))); |
| |
| /* ... although "contains {}" requires a full index scan */ |
| if (*nentries == 0) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_consistent_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| bool *check = (bool *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| |
| /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ |
| bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| bool res = true; |
| int32 i; |
| |
| if (strategy != JsonbContainsStrategyNumber) |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| /* |
| * jsonb_path_ops is necessarily lossy, not only because of hash |
| * collisions but also because it doesn't preserve complete information |
| * about the structure of the JSON object. Besides, there are some |
| * special rules around the containment of raw scalars in arrays that are |
| * not handled here. So we must always recheck a match. However, if not |
| * all of the keys are present, the tuple certainly doesn't match. |
| */ |
| *recheck = true; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| |
| PG_RETURN_BOOL(res); |
| } |
| |
| Datum |
| gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| |
| /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ |
| GinTernaryValue res = GIN_MAYBE; |
| int32 i; |
| |
| if (strategy != JsonbContainsStrategyNumber) |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| /* |
| * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this |
| * corresponds to always forcing recheck in the regular consistent |
| * function, for the reasons listed there. |
| */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_FALSE) |
| { |
| res = GIN_FALSE; |
| break; |
| } |
| } |
| |
| PG_RETURN_GIN_TERNARY_VALUE(res); |
| } |
| |
| /* |
| * Construct a jsonb_ops GIN key from a flag byte and a textual representation |
| * (which need not be null-terminated). This function is responsible |
| * for hashing overlength text representations; it will add the |
| * JGINFLAG_HASHED bit to the flag value if it does that. |
| */ |
| static Datum |
| make_text_key(char flag, const char *str, int len) |
| { |
| text *item; |
| char hashbuf[10]; |
| |
| if (len > JGIN_MAXLENGTH) |
| { |
| uint32 hashval; |
| |
| hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); |
| snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); |
| str = hashbuf; |
| len = 8; |
| flag |= JGINFLAG_HASHED; |
| } |
| |
| /* |
| * Now build the text Datum. For simplicity we build a 4-byte-header |
| * varlena text Datum here, but we expect it will get converted to short |
| * header format when stored in the index. |
| */ |
| item = (text *) palloc(VARHDRSZ + len + 1); |
| SET_VARSIZE(item, VARHDRSZ + len + 1); |
| |
| *VARDATA(item) = flag; |
| |
| memcpy(VARDATA(item) + 1, str, len); |
| |
| return PointerGetDatum(item); |
| } |
| |
| /* |
| * Create a textual representation of a JsonbValue that will serve as a GIN |
| * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, |
| * or if it is a string array element (since we pretend those are keys, |
| * see jsonb.h). |
| */ |
| static Datum |
| make_scalar_key(const JsonbValue *scalarVal, bool is_key) |
| { |
| Datum item; |
| char *cstr; |
| |
| switch (scalarVal->type) |
| { |
| case jbvNull: |
| Assert(!is_key); |
| item = make_text_key(JGINFLAG_NULL, "", 0); |
| break; |
| case jbvBool: |
| Assert(!is_key); |
| item = make_text_key(JGINFLAG_BOOL, |
| scalarVal->val.boolean ? "t" : "f", 1); |
| break; |
| case jbvNumeric: |
| Assert(!is_key); |
| |
| /* |
| * A normalized textual representation, free of trailing zeroes, |
| * is required so that numerically equal values will produce equal |
| * strings. |
| * |
| * It isn't ideal that numerics are stored in a relatively bulky |
| * textual format. However, it's a notationally convenient way of |
| * storing a "union" type in the GIN B-Tree, and indexing Jsonb |
| * strings takes precedence. |
| */ |
| cstr = numeric_normalize(scalarVal->val.numeric); |
| item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); |
| pfree(cstr); |
| break; |
| case jbvString: |
| item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, |
| scalarVal->val.string.val, |
| scalarVal->val.string.len); |
| break; |
| default: |
| elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type); |
| item = 0; /* keep compiler quiet */ |
| break; |
| } |
| |
| return item; |
| } |