| /* |
| * For PostgreSQL Database Management System: |
| * (formerly known as Postgres, then as Postgres95) |
| * |
| * Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group |
| * |
| * Portions Copyright (c) 1994, The Regents of the University of California |
| * |
| * Permission to use, copy, modify, and distribute this software and its |
| * documentation for any purpose, without fee, and without a written agreement |
| * is hereby granted, provided that the above copyright notice and this |
| * paragraph and the following two paragraphs appear in all copies. |
| * |
| * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR |
| * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING |
| * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, |
| * EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * |
| * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, |
| * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| * FITNESS FOR A PARTICULAR PURPOSE. |
| * |
| * THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF |
| * CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, |
| * ENHANCEMENTS, OR MODIFICATIONS. |
| */ |
| |
| #include "postgres.h" |
| #include "varatt.h" |
| |
| #include "access/gin.h" |
| #include "access/hash.h" |
| #include "catalog/pg_collation.h" |
| #include "utils/agtype.h" |
| #include "utils/float.h" |
| #include "utils/builtins.h" |
| #include "utils/varlena.h" |
| |
| typedef struct PathHashStack |
| { |
| uint32 hash; |
| struct PathHashStack *parent; |
| } PathHashStack; |
| |
| static Datum make_text_key(char flag, const char *str, int len); |
| static Datum make_scalar_key(const agtype_value *scalar_val, bool is_key); |
| |
| |
| /* |
| * |
| * agtype_ops GIN opclass support functions |
| * |
| */ |
| /* |
| * Compares two keys (not indexed items!) and returns an integer less than zero, |
| * zero, or greater than zero, indicating whether the first key is less than, |
| * equal to, or greater than the second. NULL keys are never passed to this |
| * function. |
| */ |
| PG_FUNCTION_INFO_V1(gin_compare_agtype); |
| Datum gin_compare_agtype(PG_FUNCTION_ARGS) |
| { |
| text *arg1, *arg2; |
| int32 result; |
| char *a1p, *a2p; |
| int len1, len2; |
| |
| if (PG_ARGISNULL(0) || PG_ARGISNULL(1)) |
| { |
| PG_RETURN_NULL(); |
| } |
| |
| arg1 = PG_GETARG_TEXT_PP(0); |
| arg2 = PG_GETARG_TEXT_PP(1); |
| |
| a1p = VARDATA_ANY(arg1); |
| a2p = VARDATA_ANY(arg2); |
| |
| len1 = VARSIZE_ANY_EXHDR(arg1); |
| len2 = VARSIZE_ANY_EXHDR(arg2); |
| |
| /* Compare text as bttextcmp does, but always using C collation */ |
| result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_INT32(result); |
| } |
| |
| /* |
| * Returns a palloc'd array of keys given an item to be indexed. The number of |
| * returned keys must be stored into *nkeys. The return value can be NULL if the |
| * item contains no keys. |
| */ |
| PG_FUNCTION_INFO_V1(gin_extract_agtype); |
| Datum gin_extract_agtype(PG_FUNCTION_ARGS) |
| { |
| agtype *agt; |
| int32 *nentries; |
| int total; |
| agtype_iterator *it; |
| agtype_value v; |
| agtype_iterator_token r; |
| int i = 0; |
| Datum *entries; |
| |
| if (PG_ARGISNULL(0) || PG_ARGISNULL(1)) |
| { |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| agt = (agtype *) AG_GET_ARG_AGTYPE_P(0); |
| nentries = (int32 *) PG_GETARG_POINTER(1); |
| total = 2 * AGT_ROOT_COUNT(agt); |
| |
| /* If the root level is empty, we certainly have no keys */ |
| if (total == 0) |
| { |
| *nentries = 0; |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| /* Otherwise, use 2 * root count as initial estimate of result size */ |
| entries = (Datum *) palloc(sizeof(Datum) * total); |
| |
| it = agtype_iterator_init(&agt->root); |
| |
| while ((r = agtype_iterator_next(&it, &v, false)) != WAGT_DONE) |
| { |
| /* Since we recurse into the object, we might need more space */ |
| if (i >= total) |
| { |
| total *= 2; |
| entries = (Datum *) repalloc(entries, sizeof(Datum) * total); |
| } |
| |
| switch (r) |
| { |
| case WAGT_KEY: |
| entries[i++] = make_scalar_key(&v, true); |
| break; |
| case WAGT_ELEM: |
| /* Pretend string array elements are keys */ |
| entries[i++] = make_scalar_key(&v, (v.type == AGTV_STRING)); |
| break; |
| case WAGT_VALUE: |
| entries[i++] = make_scalar_key(&v, false); |
| break; |
| default: |
| /* we can ignore structural items */ |
| break; |
| } |
| } |
| |
| *nentries = i; |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| /* |
| * Returns a palloc'd array of keys given a value to be queried; that is, query |
| * is the value on the right-hand side of an indexable operator whose left-hand |
| * side is the indexed column. The number of returned keys must be stored into |
| * *nkeys. If any of the keys can be null, also palloc an array of *nkeys bool |
| * fields, store its address at *nullFlags, and set these null flags as needed. |
| * *nullFlags can be left NULL (its initial value) if all keys are non-null. |
| * The return value can be NULL if the query contains no keys. |
| * |
| * searchMode is an output argument that allows extractQuery to specify details |
| * about how the search will be done. If *searchMode is set to |
| * GIN_SEARCH_MODE_DEFAULT (which is the value it is initialized to before |
| * call), only items that match at least one of the returned keys are considered |
| * candidate matches. If *searchMode is set to GIN_SEARCH_MODE_ALL, then all |
| * non-null items in the index are considered candidate matches, whether they |
| * match any of the returned keys or not. This is only done when the contains |
| * or exists all strategy are used and the passed map is empty. |
| */ |
| PG_FUNCTION_INFO_V1(gin_extract_agtype_query); |
| Datum gin_extract_agtype_query(PG_FUNCTION_ARGS) |
| { |
| int32 *nentries; |
| StrategyNumber strategy; |
| int32 *searchMode; |
| Datum *entries; |
| |
| if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || |
| PG_ARGISNULL(2) || PG_ARGISNULL(6)) |
| { |
| PG_RETURN_NULL(); |
| } |
| |
| nentries = (int32 *) PG_GETARG_POINTER(1); |
| strategy = PG_GETARG_UINT16(2); |
| searchMode = (int32 *) PG_GETARG_POINTER(6); |
| |
| if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER || |
| strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER) |
| { |
| /* Query is a agtype, so just apply gin_extract_agtype... */ |
| entries = (Datum *) |
| DatumGetPointer(DirectFunctionCall2(gin_extract_agtype, |
| PG_GETARG_DATUM(0), |
| PointerGetDatum(nentries))); |
| /* ...although "contains {}" requires a full index scan */ |
| if (*nentries == 0) |
| { |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| } |
| else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER) |
| { |
| /* Query is a text string, which we treat as a key */ |
| text *query = PG_GETARG_TEXT_PP(0); |
| |
| *nentries = 1; |
| entries = (Datum *)palloc(sizeof(Datum)); |
| entries[0] = make_text_key(AGT_GIN_FLAG_KEY, VARDATA_ANY(query), |
| VARSIZE_ANY_EXHDR(query)); |
| } |
| else if (strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER || |
| strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER) |
| { |
| /* Query is a text array; each element is treated as a key */ |
| agtype *agt = AG_GET_ARG_AGTYPE_P(0); |
| agtype_iterator *it = NULL; |
| agtype_value elem; |
| agtype_iterator_token itok; |
| int key_count = AGTYPE_CONTAINER_SIZE(&agt->root); |
| int index = 0; |
| |
| if (AGTYPE_CONTAINER_IS_SCALAR(&agt->root) || |
| !AGTYPE_CONTAINER_IS_ARRAY(&agt->root)) |
| { |
| elog(ERROR, "GIN query requires an agtype array"); |
| } |
| |
| entries = (Datum *) palloc(sizeof(Datum) * key_count); |
| it = agtype_iterator_init(&agt->root); |
| |
| /* it should be WAGT_BEGIN_ARRAY */ |
| itok = agtype_iterator_next(&it, &elem, true); |
| if (itok != WAGT_BEGIN_ARRAY) |
| { |
| elog(ERROR, "unexpected iterator token: %d", itok); |
| } |
| |
| while (WAGT_END_ARRAY != agtype_iterator_next(&it, &elem, true)) |
| { |
| if (elem.type != AGTV_STRING) |
| { |
| elog(ERROR, "unsupport agtype for GIN lookup: %d", elem.type); |
| } |
| |
| entries[index++] = make_text_key(AGT_GIN_FLAG_KEY, |
| elem.val.string.val, |
| elem.val.string.len); |
| } |
| |
| *nentries = index; |
| |
| /* ExistsAll with no keys should match everything */ |
| if (index == 0 && strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER) |
| { |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| entries = NULL; /* keep compiler quiet */ |
| } |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| /* |
| * Returns true if an indexed item satisfies the query operator for the given |
| * strategy (or might satisfy it, if the recheck indication is returned). This |
| * function does not have direct access to the indexed item's value, since GIN |
| * does not store items explicitly. Rather, what is available is knowledge about |
| * which key values extracted from the query appear in a given indexed item. The |
| * check array has length nkeys, which is the same as the number of keys |
| * previously returned by gin_extract_agtype_query for this query datum. Each |
| * element of the check array is true if the indexed item contains the |
| * corresponding query key, i.e., if (check[i] == true) the i-th key of the |
| * gin_extract_agtype_query result array is present in the indexed item. The |
| * original query datum is passed in case the consistent method needs to consult |
| * it, and so are the queryKeys[] and nullFlags[] arrays previously returned by |
| * gin_extract_agtype_query. |
| * |
| * When extractQuery returns a null key in queryKeys[], the corresponding |
| * check[] element is true if the indexed item contains a null key; that is, the |
| * semantics of check[] are like IS NOT DISTINCT FROM. The consistent function |
| * can examine the corresponding nullFlags[] element if it needs to tell the |
| * difference between a regular value match and a null match. |
| * |
| * On success, *recheck should be set to true if the heap tuple needs to be |
| * rechecked against the query operator, or false if the index test is exact. |
| * That is, a false return value guarantees that the heap tuple does not match |
| * the query; a true return value with *recheck set to false guarantees that the |
| * heap tuple does match the query; and a true return value with *recheck set to |
| * true means that the heap tuple might match the query, so it needs to be |
| * fetched and rechecked by evaluating the query operator directly against the |
| * originally indexed item. |
| */ |
| PG_FUNCTION_INFO_V1(gin_consistent_agtype); |
| Datum gin_consistent_agtype(PG_FUNCTION_ARGS) |
| { |
| bool *check; |
| StrategyNumber strategy; |
| int32 nkeys; |
| bool *recheck; |
| bool res = true; |
| int32 i; |
| |
| if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || |
| PG_ARGISNULL(3) || PG_ARGISNULL(5)) |
| { |
| PG_RETURN_NULL(); |
| } |
| |
| check = (bool *) PG_GETARG_POINTER(0); |
| strategy = PG_GETARG_UINT16(1); |
| nkeys = PG_GETARG_INT32(3); |
| recheck = (bool *) PG_GETARG_POINTER(5); |
| |
| if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER || |
| strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER) |
| { |
| /* |
| * We must always recheck, since we can't tell from the index whether |
| * the positions of the matched items match the structure of the query |
| * object. (Even if we could, we'd also have to worry about hashed |
| * keys and the index's failure to distinguish keys from string array |
| * elements.) However, the tuple certainly doesn't match unless it |
| * contains all the query keys. |
| */ |
| *recheck = true; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER) |
| { |
| /* |
| * Although the key is certainly present in the index, we must recheck |
| * because (1) the key might be hashed, and (2) the index match might |
| * be for a key that's not at top level of the JSON object. For (1), |
| * we could look at the query key to see if it's hashed and not |
| * recheck if not, but the index lacks enough info to tell about (2). |
| */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| /* ... but unless all the keys are present, we can say "false" */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| } |
| |
| PG_RETURN_BOOL(res); |
| } |
| |
| /* |
| * gin_triconsistent_agtype is similar to gin_consistent_agtype, but instead of |
| * booleans in the check vector, there are three possible values for each key: |
| * GIN_TRUE, GIN_FALSE and GIN_MAYBE. GIN_FALSE and GIN_TRUE have the same |
| * meaning as regular boolean values, while GIN_MAYBE means that the presence of |
| * that key is not known. When GIN_MAYBE values are present, the function should |
| * only return GIN_TRUE if the item certainly matches whether or not the index |
| * item contains the corresponding query keys. Likewise, the function must |
| * return GIN_FALSE only if the item certainly does not match, whether or not it |
| * contains the GIN_MAYBE keys. If the result depends on the GIN_MAYBE entries, |
| * i.e., the match cannot be confirmed or refuted based on the known query keys, |
| * the function must return GIN_MAYBE. |
| * |
| * When there are no GIN_MAYBE values in the check vector, a GIN_MAYBE return |
| * value is the equivalent of setting the recheck flag in the boolean consistent |
| * function. |
| */ |
| PG_FUNCTION_INFO_V1(gin_triconsistent_agtype); |
| Datum gin_triconsistent_agtype(PG_FUNCTION_ARGS) |
| { |
| GinTernaryValue *check; |
| StrategyNumber strategy; |
| int32 nkeys; |
| GinTernaryValue res = GIN_MAYBE; |
| int32 i; |
| |
| if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(3)) |
| { |
| PG_RETURN_NULL(); |
| } |
| |
| check = (GinTernaryValue *)PG_GETARG_POINTER(0); |
| strategy = PG_GETARG_UINT16(1); |
| nkeys = PG_GETARG_INT32(3); |
| |
| /* |
| * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this |
| * corresponds to always forcing recheck in the regular consistent |
| * function, for the reasons listed there. |
| */ |
| if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER || |
| strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER || |
| strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER) |
| { |
| /* All extracted keys must be present */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_FALSE) |
| { |
| res = GIN_FALSE; |
| break; |
| } |
| } |
| } |
| else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER || |
| strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER) |
| { |
| /* At least one extracted key must be present */ |
| res = GIN_FALSE; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_TRUE || check[i] == GIN_MAYBE) |
| { |
| res = GIN_MAYBE; |
| break; |
| } |
| } |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| } |
| |
| PG_RETURN_GIN_TERNARY_VALUE(res); |
| } |
| |
| /* |
| * Construct a agtype_ops GIN key from a flag byte and a textual representation |
| * (which need not be null-terminated). This function is responsible |
| * for hashing overlength text representations; it will add the |
| * AGT_GIN_FLAG_HASHED bit to the flag value if it does that. |
| */ |
| static Datum make_text_key(char flag, const char *str, int len) |
| { |
| text *item; |
| char hashbuf[10]; |
| |
| if (len > AGT_GIN_MAX_LENGTH) |
| { |
| uint32 hashval; |
| |
| hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); |
| snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); |
| str = hashbuf; |
| len = 8; |
| flag |= AGT_GIN_FLAG_HASHED; |
| } |
| |
| /* |
| * Now build the text Datum. For simplicity we build a 4-byte-header |
| * varlena text Datum here, but we expect it will get converted to short |
| * header format when stored in the index. |
| */ |
| item = (text *)palloc(VARHDRSZ + len + 1); |
| SET_VARSIZE(item, VARHDRSZ + len + 1); |
| |
| *VARDATA(item) = flag; |
| |
| memcpy(VARDATA(item) + 1, str, len); |
| |
| return PointerGetDatum(item); |
| } |
| |
| /* |
| * Create a textual representation of a agtype_value that will serve as a GIN |
| * key in a agtype_ops index. is_key is true if the JsonbValue is a key, |
| * or if it is a string array element (since we pretend those are keys, |
| * see jsonb.h). |
| */ |
| static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key) |
| { |
| Datum item = 0; |
| char *cstr = NULL; |
| char buf[MAXINT8LEN + 1]; |
| switch (scalarVal->type) |
| { |
| case AGTV_NULL: |
| Assert(!is_key); |
| item = make_text_key(AGT_GIN_FLAG_NULL, "", 0); |
| break; |
| case AGTV_INTEGER: |
| { |
| char *result; |
| |
| Assert(!is_key); |
| |
| pg_lltoa(scalarVal->val.int_value, buf); |
| |
| result = pstrdup(buf); |
| item = make_text_key(AGT_GIN_FLAG_NUM, result, strlen(result)); |
| break; |
| } |
| case AGTV_FLOAT: |
| Assert(!is_key); |
| cstr = float8out_internal(scalarVal->val.float_value); |
| item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr)); |
| break; |
| case AGTV_BOOL: |
| Assert(!is_key); |
| item = make_text_key(AGT_GIN_FLAG_BOOL, |
| scalarVal->val.boolean ? "t" : "f", 1); |
| break; |
| case AGTV_NUMERIC: |
| Assert(!is_key); |
| /* |
| * A normalized textual representation, free of trailing zeroes, |
| * is required so that numerically equal values will produce equal |
| * strings. |
| * |
| * It isn't ideal that numerics are stored in a relatively bulky |
| * textual format. However, it's a notationally convenient way of |
| * storing a "union" type in the GIN B-Tree, and indexing Jsonb |
| * strings takes precedence. |
| */ |
| cstr = numeric_normalize(scalarVal->val.numeric); |
| item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr)); |
| pfree(cstr); |
| break; |
| case AGTV_STRING: |
| item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR, |
| scalarVal->val.string.val, |
| scalarVal->val.string.len); |
| break; |
| case AGTV_VERTEX: |
| case AGTV_EDGE: |
| case AGTV_PATH: |
| elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type); |
| break; |
| default: |
| elog(ERROR, "unrecognized agtype type: %d", scalarVal->type); |
| break; |
| } |
| |
| return item; |
| } |