blob: 246dbcab6f11f7fe89ec5aac10a45b91ea861b80 [file] [log] [blame]
/*
* For PostgreSQL Database Management System:
* (formerly known as Postgres, then as Postgres95)
*
* Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group
*
* Portions Copyright (c) 1994, The Regents of the University of California
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
* paragraph and the following two paragraphs appear in all copies.
*
* IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
* LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
* EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE.
*
* THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
* CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
* ENHANCEMENTS, OR MODIFICATIONS.
*/
#include "postgres.h"
#include "varatt.h"
#include "access/gin.h"
#include "access/hash.h"
#include "catalog/pg_collation.h"
#include "utils/agtype.h"
#include "utils/float.h"
#include "utils/builtins.h"
#include "utils/varlena.h"
typedef struct PathHashStack
{
uint32 hash;
struct PathHashStack *parent;
} PathHashStack;
static Datum make_text_key(char flag, const char *str, int len);
static Datum make_scalar_key(const agtype_value *scalar_val, bool is_key);
/*
*
* agtype_ops GIN opclass support functions
*
*/
/*
* Compares two keys (not indexed items!) and returns an integer less than zero,
* zero, or greater than zero, indicating whether the first key is less than,
* equal to, or greater than the second. NULL keys are never passed to this
* function.
*/
PG_FUNCTION_INFO_V1(gin_compare_agtype);
Datum gin_compare_agtype(PG_FUNCTION_ARGS)
{
text *arg1, *arg2;
int32 result;
char *a1p, *a2p;
int len1, len2;
if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
{
PG_RETURN_NULL();
}
arg1 = PG_GETARG_TEXT_PP(0);
arg2 = PG_GETARG_TEXT_PP(1);
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
/* Compare text as bttextcmp does, but always using C collation */
result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_INT32(result);
}
/*
* Returns a palloc'd array of keys given an item to be indexed. The number of
* returned keys must be stored into *nkeys. The return value can be NULL if the
* item contains no keys.
*/
PG_FUNCTION_INFO_V1(gin_extract_agtype);
Datum gin_extract_agtype(PG_FUNCTION_ARGS)
{
agtype *agt;
int32 *nentries;
int total;
agtype_iterator *it;
agtype_value v;
agtype_iterator_token r;
int i = 0;
Datum *entries;
if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
{
PG_RETURN_POINTER(NULL);
}
agt = (agtype *) AG_GET_ARG_AGTYPE_P(0);
nentries = (int32 *) PG_GETARG_POINTER(1);
total = 2 * AGT_ROOT_COUNT(agt);
/* If the root level is empty, we certainly have no keys */
if (total == 0)
{
*nentries = 0;
PG_RETURN_POINTER(NULL);
}
/* Otherwise, use 2 * root count as initial estimate of result size */
entries = (Datum *) palloc(sizeof(Datum) * total);
it = agtype_iterator_init(&agt->root);
while ((r = agtype_iterator_next(&it, &v, false)) != WAGT_DONE)
{
/* Since we recurse into the object, we might need more space */
if (i >= total)
{
total *= 2;
entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
}
switch (r)
{
case WAGT_KEY:
entries[i++] = make_scalar_key(&v, true);
break;
case WAGT_ELEM:
/* Pretend string array elements are keys */
entries[i++] = make_scalar_key(&v, (v.type == AGTV_STRING));
break;
case WAGT_VALUE:
entries[i++] = make_scalar_key(&v, false);
break;
default:
/* we can ignore structural items */
break;
}
}
*nentries = i;
PG_RETURN_POINTER(entries);
}
/*
* Returns a palloc'd array of keys given a value to be queried; that is, query
* is the value on the right-hand side of an indexable operator whose left-hand
* side is the indexed column. The number of returned keys must be stored into
* *nkeys. If any of the keys can be null, also palloc an array of *nkeys bool
* fields, store its address at *nullFlags, and set these null flags as needed.
* *nullFlags can be left NULL (its initial value) if all keys are non-null.
* The return value can be NULL if the query contains no keys.
*
* searchMode is an output argument that allows extractQuery to specify details
* about how the search will be done. If *searchMode is set to
* GIN_SEARCH_MODE_DEFAULT (which is the value it is initialized to before
* call), only items that match at least one of the returned keys are considered
* candidate matches. If *searchMode is set to GIN_SEARCH_MODE_ALL, then all
* non-null items in the index are considered candidate matches, whether they
* match any of the returned keys or not. This is only done when the contains
* or exists all strategy are used and the passed map is empty.
*/
PG_FUNCTION_INFO_V1(gin_extract_agtype_query);
Datum gin_extract_agtype_query(PG_FUNCTION_ARGS)
{
int32 *nentries;
StrategyNumber strategy;
int32 *searchMode;
Datum *entries;
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) ||
PG_ARGISNULL(2) || PG_ARGISNULL(6))
{
PG_RETURN_NULL();
}
nentries = (int32 *) PG_GETARG_POINTER(1);
strategy = PG_GETARG_UINT16(2);
searchMode = (int32 *) PG_GETARG_POINTER(6);
if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER ||
strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER)
{
/* Query is a agtype, so just apply gin_extract_agtype... */
entries = (Datum *)
DatumGetPointer(DirectFunctionCall2(gin_extract_agtype,
PG_GETARG_DATUM(0),
PointerGetDatum(nentries)));
/* ...although "contains {}" requires a full index scan */
if (*nentries == 0)
{
*searchMode = GIN_SEARCH_MODE_ALL;
}
}
else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER)
{
/* Query is a text string, which we treat as a key */
text *query = PG_GETARG_TEXT_PP(0);
*nentries = 1;
entries = (Datum *)palloc(sizeof(Datum));
entries[0] = make_text_key(AGT_GIN_FLAG_KEY, VARDATA_ANY(query),
VARSIZE_ANY_EXHDR(query));
}
else if (strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER ||
strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER)
{
/* Query is a text array; each element is treated as a key */
agtype *agt = AG_GET_ARG_AGTYPE_P(0);
agtype_iterator *it = NULL;
agtype_value elem;
agtype_iterator_token itok;
int key_count = AGTYPE_CONTAINER_SIZE(&agt->root);
int index = 0;
if (AGTYPE_CONTAINER_IS_SCALAR(&agt->root) ||
!AGTYPE_CONTAINER_IS_ARRAY(&agt->root))
{
elog(ERROR, "GIN query requires an agtype array");
}
entries = (Datum *) palloc(sizeof(Datum) * key_count);
it = agtype_iterator_init(&agt->root);
/* it should be WAGT_BEGIN_ARRAY */
itok = agtype_iterator_next(&it, &elem, true);
if (itok != WAGT_BEGIN_ARRAY)
{
elog(ERROR, "unexpected iterator token: %d", itok);
}
while (WAGT_END_ARRAY != agtype_iterator_next(&it, &elem, true))
{
if (elem.type != AGTV_STRING)
{
elog(ERROR, "unsupport agtype for GIN lookup: %d", elem.type);
}
entries[index++] = make_text_key(AGT_GIN_FLAG_KEY,
elem.val.string.val,
elem.val.string.len);
}
*nentries = index;
/* ExistsAll with no keys should match everything */
if (index == 0 && strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER)
{
*searchMode = GIN_SEARCH_MODE_ALL;
}
}
else
{
elog(ERROR, "unrecognized strategy number: %d", strategy);
entries = NULL; /* keep compiler quiet */
}
PG_RETURN_POINTER(entries);
}
/*
* Returns true if an indexed item satisfies the query operator for the given
* strategy (or might satisfy it, if the recheck indication is returned). This
* function does not have direct access to the indexed item's value, since GIN
* does not store items explicitly. Rather, what is available is knowledge about
* which key values extracted from the query appear in a given indexed item. The
* check array has length nkeys, which is the same as the number of keys
* previously returned by gin_extract_agtype_query for this query datum. Each
* element of the check array is true if the indexed item contains the
* corresponding query key, i.e., if (check[i] == true) the i-th key of the
* gin_extract_agtype_query result array is present in the indexed item. The
* original query datum is passed in case the consistent method needs to consult
* it, and so are the queryKeys[] and nullFlags[] arrays previously returned by
* gin_extract_agtype_query.
*
* When extractQuery returns a null key in queryKeys[], the corresponding
* check[] element is true if the indexed item contains a null key; that is, the
* semantics of check[] are like IS NOT DISTINCT FROM. The consistent function
* can examine the corresponding nullFlags[] element if it needs to tell the
* difference between a regular value match and a null match.
*
* On success, *recheck should be set to true if the heap tuple needs to be
* rechecked against the query operator, or false if the index test is exact.
* That is, a false return value guarantees that the heap tuple does not match
* the query; a true return value with *recheck set to false guarantees that the
* heap tuple does match the query; and a true return value with *recheck set to
* true means that the heap tuple might match the query, so it needs to be
* fetched and rechecked by evaluating the query operator directly against the
* originally indexed item.
*/
PG_FUNCTION_INFO_V1(gin_consistent_agtype);
Datum gin_consistent_agtype(PG_FUNCTION_ARGS)
{
bool *check;
StrategyNumber strategy;
int32 nkeys;
bool *recheck;
bool res = true;
int32 i;
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) ||
PG_ARGISNULL(3) || PG_ARGISNULL(5))
{
PG_RETURN_NULL();
}
check = (bool *) PG_GETARG_POINTER(0);
strategy = PG_GETARG_UINT16(1);
nkeys = PG_GETARG_INT32(3);
recheck = (bool *) PG_GETARG_POINTER(5);
if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER ||
strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER)
{
/*
* We must always recheck, since we can't tell from the index whether
* the positions of the matched items match the structure of the query
* object. (Even if we could, we'd also have to worry about hashed
* keys and the index's failure to distinguish keys from string array
* elements.) However, the tuple certainly doesn't match unless it
* contains all the query keys.
*/
*recheck = true;
for (i = 0; i < nkeys; i++)
{
if (!check[i])
{
res = false;
break;
}
}
}
else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER)
{
/*
* Although the key is certainly present in the index, we must recheck
* because (1) the key might be hashed, and (2) the index match might
* be for a key that's not at top level of the JSON object. For (1),
* we could look at the query key to see if it's hashed and not
* recheck if not, but the index lacks enough info to tell about (2).
*/
*recheck = true;
res = true;
}
else if (strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER)
{
/* As for plain exists, we must recheck */
*recheck = true;
res = true;
}
else if (strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER)
{
/* As for plain exists, we must recheck */
*recheck = true;
/* ... but unless all the keys are present, we can say "false" */
for (i = 0; i < nkeys; i++)
{
if (!check[i])
{
res = false;
break;
}
}
}
else
{
elog(ERROR, "unrecognized strategy number: %d", strategy);
}
PG_RETURN_BOOL(res);
}
/*
* gin_triconsistent_agtype is similar to gin_consistent_agtype, but instead of
* booleans in the check vector, there are three possible values for each key:
* GIN_TRUE, GIN_FALSE and GIN_MAYBE. GIN_FALSE and GIN_TRUE have the same
* meaning as regular boolean values, while GIN_MAYBE means that the presence of
* that key is not known. When GIN_MAYBE values are present, the function should
* only return GIN_TRUE if the item certainly matches whether or not the index
* item contains the corresponding query keys. Likewise, the function must
* return GIN_FALSE only if the item certainly does not match, whether or not it
* contains the GIN_MAYBE keys. If the result depends on the GIN_MAYBE entries,
* i.e., the match cannot be confirmed or refuted based on the known query keys,
* the function must return GIN_MAYBE.
*
* When there are no GIN_MAYBE values in the check vector, a GIN_MAYBE return
* value is the equivalent of setting the recheck flag in the boolean consistent
* function.
*/
PG_FUNCTION_INFO_V1(gin_triconsistent_agtype);
Datum gin_triconsistent_agtype(PG_FUNCTION_ARGS)
{
GinTernaryValue *check;
StrategyNumber strategy;
int32 nkeys;
GinTernaryValue res = GIN_MAYBE;
int32 i;
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(3))
{
PG_RETURN_NULL();
}
check = (GinTernaryValue *)PG_GETARG_POINTER(0);
strategy = PG_GETARG_UINT16(1);
nkeys = PG_GETARG_INT32(3);
/*
* Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
* corresponds to always forcing recheck in the regular consistent
* function, for the reasons listed there.
*/
if (strategy == AGTYPE_CONTAINS_STRATEGY_NUMBER ||
strategy == AGTYPE_CONTAINS_TOP_LEVEL_STRATEGY_NUMBER ||
strategy == AGTYPE_EXISTS_ALL_STRATEGY_NUMBER)
{
/* All extracted keys must be present */
for (i = 0; i < nkeys; i++)
{
if (check[i] == GIN_FALSE)
{
res = GIN_FALSE;
break;
}
}
}
else if (strategy == AGTYPE_EXISTS_STRATEGY_NUMBER ||
strategy == AGTYPE_EXISTS_ANY_STRATEGY_NUMBER)
{
/* At least one extracted key must be present */
res = GIN_FALSE;
for (i = 0; i < nkeys; i++)
{
if (check[i] == GIN_TRUE || check[i] == GIN_MAYBE)
{
res = GIN_MAYBE;
break;
}
}
}
else
{
elog(ERROR, "unrecognized strategy number: %d", strategy);
}
PG_RETURN_GIN_TERNARY_VALUE(res);
}
/*
* Construct a agtype_ops GIN key from a flag byte and a textual representation
* (which need not be null-terminated). This function is responsible
* for hashing overlength text representations; it will add the
* AGT_GIN_FLAG_HASHED bit to the flag value if it does that.
*/
static Datum make_text_key(char flag, const char *str, int len)
{
text *item;
char hashbuf[10];
if (len > AGT_GIN_MAX_LENGTH)
{
uint32 hashval;
hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
str = hashbuf;
len = 8;
flag |= AGT_GIN_FLAG_HASHED;
}
/*
* Now build the text Datum. For simplicity we build a 4-byte-header
* varlena text Datum here, but we expect it will get converted to short
* header format when stored in the index.
*/
item = (text *)palloc(VARHDRSZ + len + 1);
SET_VARSIZE(item, VARHDRSZ + len + 1);
*VARDATA(item) = flag;
memcpy(VARDATA(item) + 1, str, len);
return PointerGetDatum(item);
}
/*
* Create a textual representation of a agtype_value that will serve as a GIN
* key in a agtype_ops index. is_key is true if the JsonbValue is a key,
* or if it is a string array element (since we pretend those are keys,
* see jsonb.h).
*/
static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key)
{
Datum item = 0;
char *cstr = NULL;
char buf[MAXINT8LEN + 1];
switch (scalarVal->type)
{
case AGTV_NULL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
break;
case AGTV_INTEGER:
{
char *result;
Assert(!is_key);
pg_lltoa(scalarVal->val.int_value, buf);
result = pstrdup(buf);
item = make_text_key(AGT_GIN_FLAG_NUM, result, strlen(result));
break;
}
case AGTV_FLOAT:
Assert(!is_key);
cstr = float8out_internal(scalarVal->val.float_value);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
break;
case AGTV_BOOL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_BOOL,
scalarVal->val.boolean ? "t" : "f", 1);
break;
case AGTV_NUMERIC:
Assert(!is_key);
/*
* A normalized textual representation, free of trailing zeroes,
* is required so that numerically equal values will produce equal
* strings.
*
* It isn't ideal that numerics are stored in a relatively bulky
* textual format. However, it's a notationally convenient way of
* storing a "union" type in the GIN B-Tree, and indexing Jsonb
* strings takes precedence.
*/
cstr = numeric_normalize(scalarVal->val.numeric);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
pfree(cstr);
break;
case AGTV_STRING:
item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
scalarVal->val.string.val,
scalarVal->val.string.len);
break;
case AGTV_VERTEX:
case AGTV_EDGE:
case AGTV_PATH:
elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
break;
default:
elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
break;
}
return item;
}