| /*------------------------------------------------------------------------- |
| * |
| * jsonb_gin.c |
| * GIN support functions for jsonb |
| * |
| * Copyright (c) 2014-2023, PostgreSQL Global Development Group |
| * |
| * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops. |
| * For their description see json.sgml and comments in jsonb.h. |
| * |
| * The operators support, among the others, "jsonb @? jsonpath" and |
| * "jsonb @@ jsonpath". Expressions containing these operators are easily |
| * expressed through each other. |
| * |
| * jb @? 'path' <=> jb @@ 'EXISTS(path)' |
| * jb @@ 'expr' <=> jb @? '$ ? (expr)' |
| * |
| * Thus, we're going to consider only @@ operator, while regarding @? operator |
| * the same is true for jb @@ 'EXISTS(path)'. |
| * |
| * Result of jsonpath query extraction is a tree, which leaf nodes are index |
| * entries and non-leaf nodes are AND/OR logical expressions. Basically we |
| * extract following statements out of jsonpath: |
| * |
| * 1) "accessors_chain = const", |
| * 2) "EXISTS(accessors_chain)". |
| * |
| * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops |
| * additionally supports .* and .**. |
| * |
| * For now, both jsonb_ops and jsonb_path_ops supports only statements of |
| * the 1st find. jsonb_ops might also support statements of the 2nd kind, |
| * but given we have no statistics keys extracted from accessors chain |
| * are likely non-selective. Therefore, we choose to not confuse optimizer |
| * and skip statements of the 2nd kind altogether. In future versions that |
| * might be changed. |
| * |
| * In jsonb_ops statement of the 1st kind is split into expression of AND'ed |
| * keys and const. Sometimes const might be interpreted as both value or key |
| * in jsonb_ops. Then statement of 1st kind is decomposed into the expression |
| * below. |
| * |
| * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key) |
| * |
| * jsonb_path_ops transforms each statement of the 1st kind into single hash |
| * entry below. |
| * |
| * HASH(key1, key2, ... , keyN, const) |
| * |
| * Despite statements of the 2nd kind are not supported by both jsonb_ops and |
| * jsonb_path_ops, EXISTS(path) expressions might be still supported, |
| * when statements of 1st kind could be extracted out of their filters. |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/jsonb_gin.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/gin.h" |
| #include "access/stratnum.h" |
| #include "catalog/pg_collation.h" |
| #include "catalog/pg_type.h" |
| #include "common/hashfn.h" |
| #include "miscadmin.h" |
| #include "utils/builtins.h" |
| #include "utils/jsonb.h" |
| #include "utils/jsonpath.h" |
| #include "utils/varlena.h" |
| |
| typedef struct PathHashStack |
| { |
| uint32 hash; |
| struct PathHashStack *parent; |
| } PathHashStack; |
| |
| /* Buffer for GIN entries */ |
| typedef struct GinEntries |
| { |
| Datum *buf; |
| int count; |
| int allocated; |
| } GinEntries; |
| |
| typedef enum JsonPathGinNodeType |
| { |
| JSP_GIN_OR, |
| JSP_GIN_AND, |
| JSP_GIN_ENTRY |
| } JsonPathGinNodeType; |
| |
| typedef struct JsonPathGinNode JsonPathGinNode; |
| |
| /* Node in jsonpath expression tree */ |
| struct JsonPathGinNode |
| { |
| JsonPathGinNodeType type; |
| union |
| { |
| int nargs; /* valid for OR and AND nodes */ |
| int entryIndex; /* index in GinEntries array, valid for ENTRY |
| * nodes after entries output */ |
| Datum entryDatum; /* path hash or key name/scalar, valid for |
| * ENTRY nodes before entries output */ |
| } val; |
| JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND |
| * nodes */ |
| }; |
| |
| /* |
| * jsonb_ops entry extracted from jsonpath item. Corresponding path item |
| * may be: '.key', '.*', '.**', '[index]' or '[*]'. |
| * Entry type is stored in 'type' field. |
| */ |
| typedef struct JsonPathGinPathItem |
| { |
| struct JsonPathGinPathItem *parent; |
| Datum keyName; /* key name (for '.key' path item) or NULL */ |
| JsonPathItemType type; /* type of jsonpath item */ |
| } JsonPathGinPathItem; |
| |
| /* GIN representation of the extracted json path */ |
| typedef union JsonPathGinPath |
| { |
| JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */ |
| uint32 hash; /* hash of the path (jsonb_path_ops) */ |
| } JsonPathGinPath; |
| |
| typedef struct JsonPathGinContext JsonPathGinContext; |
| |
| /* Callback, which stores information about path item into JsonPathGinPath */ |
| typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path, |
| JsonPathItem *jsp); |
| |
| /* |
| * Callback, which extracts set of nodes from statement of 1st kind |
| * (scalar != NULL) or statement of 2nd kind (scalar == NULL). |
| */ |
| typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt, |
| JsonPathGinPath path, |
| JsonbValue *scalar, |
| List *nodes); |
| |
| /* Context for jsonpath entries extraction */ |
| struct JsonPathGinContext |
| { |
| JsonPathGinAddPathItemFunc add_path_item; |
| JsonPathGinExtractNodesFunc extract_nodes; |
| bool lax; |
| }; |
| |
| static Datum make_text_key(char flag, const char *str, int len); |
| static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); |
| |
| static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt, |
| JsonPathGinPath path, JsonPathItem *jsp, bool not); |
| |
| |
| /* Initialize GinEntries struct */ |
| static void |
| init_gin_entries(GinEntries *entries, int preallocated) |
| { |
| entries->allocated = preallocated; |
| entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL; |
| entries->count = 0; |
| } |
| |
| /* Add new entry to GinEntries */ |
| static int |
| add_gin_entry(GinEntries *entries, Datum entry) |
| { |
| int id = entries->count; |
| |
| if (entries->count >= entries->allocated) |
| { |
| if (entries->allocated) |
| { |
| entries->allocated *= 2; |
| entries->buf = repalloc(entries->buf, |
| sizeof(Datum) * entries->allocated); |
| } |
| else |
| { |
| entries->allocated = 8; |
| entries->buf = palloc(sizeof(Datum) * entries->allocated); |
| } |
| } |
| |
| entries->buf[entries->count++] = entry; |
| |
| return id; |
| } |
| |
| /* |
| * |
| * jsonb_ops GIN opclass support functions |
| * |
| */ |
| |
| Datum |
| gin_compare_jsonb(PG_FUNCTION_ARGS) |
| { |
| text *arg1 = PG_GETARG_TEXT_PP(0); |
| text *arg2 = PG_GETARG_TEXT_PP(1); |
| int32 result; |
| char *a1p, |
| *a2p; |
| int len1, |
| len2; |
| |
| a1p = VARDATA_ANY(arg1); |
| a2p = VARDATA_ANY(arg2); |
| |
| len1 = VARSIZE_ANY_EXHDR(arg1); |
| len2 = VARSIZE_ANY_EXHDR(arg2); |
| |
| /* Compare text as bttextcmp does, but always using C collation */ |
| result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_INT32(result); |
| } |
| |
| Datum |
| gin_extract_jsonb(PG_FUNCTION_ARGS) |
| { |
| Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0); |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| int total = JB_ROOT_COUNT(jb); |
| JsonbIterator *it; |
| JsonbValue v; |
| JsonbIteratorToken r; |
| GinEntries entries; |
| |
| /* If the root level is empty, we certainly have no keys */ |
| if (total == 0) |
| { |
| *nentries = 0; |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| /* Otherwise, use 2 * root count as initial estimate of result size */ |
| init_gin_entries(&entries, 2 * total); |
| |
| it = JsonbIteratorInit(&jb->root); |
| |
| while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| { |
| switch (r) |
| { |
| case WJB_KEY: |
| add_gin_entry(&entries, make_scalar_key(&v, true)); |
| break; |
| case WJB_ELEM: |
| /* Pretend string array elements are keys, see jsonb.h */ |
| add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString)); |
| break; |
| case WJB_VALUE: |
| add_gin_entry(&entries, make_scalar_key(&v, false)); |
| break; |
| default: |
| /* we can ignore structural items */ |
| break; |
| } |
| } |
| |
| *nentries = entries.count; |
| |
| PG_RETURN_POINTER(entries.buf); |
| } |
| |
| /* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */ |
| static bool |
| jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) |
| { |
| JsonPathGinPathItem *pentry; |
| Datum keyName; |
| |
| switch (jsp->type) |
| { |
| case jpiRoot: |
| path->items = NULL; /* reset path */ |
| return true; |
| |
| case jpiKey: |
| { |
| int len; |
| char *key = jspGetString(jsp, &len); |
| |
| keyName = make_text_key(JGINFLAG_KEY, key, len); |
| break; |
| } |
| |
| case jpiAny: |
| case jpiAnyKey: |
| case jpiAnyArray: |
| case jpiIndexArray: |
| keyName = PointerGetDatum(NULL); |
| break; |
| |
| default: |
| /* other path items like item methods are not supported */ |
| return false; |
| } |
| |
| pentry = palloc(sizeof(*pentry)); |
| |
| pentry->type = jsp->type; |
| pentry->keyName = keyName; |
| pentry->parent = path->items; |
| |
| path->items = pentry; |
| |
| return true; |
| } |
| |
| /* Combine existing path hash with next key hash (jsonb_path_ops) */ |
| static bool |
| jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) |
| { |
| switch (jsp->type) |
| { |
| case jpiRoot: |
| path->hash = 0; /* reset path hash */ |
| return true; |
| |
| case jpiKey: |
| { |
| JsonbValue jbv; |
| |
| jbv.type = jbvString; |
| jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len); |
| |
| JsonbHashScalarValue(&jbv, &path->hash); |
| return true; |
| } |
| |
| case jpiIndexArray: |
| case jpiAnyArray: |
| return true; /* path hash is unchanged */ |
| |
| default: |
| /* other items (wildcard paths, item methods) are not supported */ |
| return false; |
| } |
| } |
| |
| static JsonPathGinNode * |
| make_jsp_entry_node(Datum entry) |
| { |
| JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args)); |
| |
| node->type = JSP_GIN_ENTRY; |
| node->val.entryDatum = entry; |
| |
| return node; |
| } |
| |
| static JsonPathGinNode * |
| make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey) |
| { |
| return make_jsp_entry_node(make_scalar_key(scalar, iskey)); |
| } |
| |
| static JsonPathGinNode * |
| make_jsp_expr_node(JsonPathGinNodeType type, int nargs) |
| { |
| JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) + |
| sizeof(node->args[0]) * nargs); |
| |
| node->type = type; |
| node->val.nargs = nargs; |
| |
| return node; |
| } |
| |
| static JsonPathGinNode * |
| make_jsp_expr_node_args(JsonPathGinNodeType type, List *args) |
| { |
| JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args)); |
| ListCell *lc; |
| int i = 0; |
| |
| foreach(lc, args) |
| node->args[i++] = lfirst(lc); |
| |
| return node; |
| } |
| |
| static JsonPathGinNode * |
| make_jsp_expr_node_binary(JsonPathGinNodeType type, |
| JsonPathGinNode *arg1, JsonPathGinNode *arg2) |
| { |
| JsonPathGinNode *node = make_jsp_expr_node(type, 2); |
| |
| node->args[0] = arg1; |
| node->args[1] = arg2; |
| |
| return node; |
| } |
| |
| /* Append a list of nodes from the jsonpath (jsonb_ops). */ |
| static List * |
| jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, |
| JsonbValue *scalar, List *nodes) |
| { |
| JsonPathGinPathItem *pentry; |
| |
| if (scalar) |
| { |
| JsonPathGinNode *node; |
| |
| /* |
| * Append path entry nodes only if scalar is provided. See header |
| * comment for details. |
| */ |
| for (pentry = path.items; pentry; pentry = pentry->parent) |
| { |
| if (pentry->type == jpiKey) /* only keys are indexed */ |
| nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName)); |
| } |
| |
| /* Append scalar node for equality queries. */ |
| if (scalar->type == jbvString) |
| { |
| JsonPathGinPathItem *last = path.items; |
| GinTernaryValue key_entry; |
| |
| /* |
| * Assuming that jsonb_ops interprets string array elements as |
| * keys, we may extract key or non-key entry or even both. In the |
| * latter case we create OR-node. It is possible in lax mode |
| * where arrays are automatically unwrapped, or in strict mode for |
| * jpiAny items. |
| */ |
| |
| if (cxt->lax) |
| key_entry = GIN_MAYBE; |
| else if (!last) /* root ($) */ |
| key_entry = GIN_FALSE; |
| else if (last->type == jpiAnyArray || last->type == jpiIndexArray) |
| key_entry = GIN_TRUE; |
| else if (last->type == jpiAny) |
| key_entry = GIN_MAYBE; |
| else |
| key_entry = GIN_FALSE; |
| |
| if (key_entry == GIN_MAYBE) |
| { |
| JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true); |
| JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false); |
| |
| node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2); |
| } |
| else |
| { |
| node = make_jsp_entry_node_scalar(scalar, |
| key_entry == GIN_TRUE); |
| } |
| } |
| else |
| { |
| node = make_jsp_entry_node_scalar(scalar, false); |
| } |
| |
| nodes = lappend(nodes, node); |
| } |
| |
| return nodes; |
| } |
| |
| /* Append a list of nodes from the jsonpath (jsonb_path_ops). */ |
| static List * |
| jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, |
| JsonbValue *scalar, List *nodes) |
| { |
| if (scalar) |
| { |
| /* append path hash node for equality queries */ |
| uint32 hash = path.hash; |
| |
| JsonbHashScalarValue(scalar, &hash); |
| |
| return lappend(nodes, |
| make_jsp_entry_node(UInt32GetDatum(hash))); |
| } |
| else |
| { |
| /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */ |
| return nodes; |
| } |
| } |
| |
| /* |
| * Extract a list of expression nodes that need to be AND-ed by the caller. |
| * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and |
| * 'EXISTS(path)' otherwise. |
| */ |
| static List * |
| extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, |
| JsonPathItem *jsp, JsonbValue *scalar) |
| { |
| JsonPathItem next; |
| List *nodes = NIL; |
| |
| for (;;) |
| { |
| switch (jsp->type) |
| { |
| case jpiCurrent: |
| break; |
| |
| case jpiFilter: |
| { |
| JsonPathItem arg; |
| JsonPathGinNode *filter; |
| |
| jspGetArg(jsp, &arg); |
| |
| filter = extract_jsp_bool_expr(cxt, path, &arg, false); |
| |
| if (filter) |
| nodes = lappend(nodes, filter); |
| |
| break; |
| } |
| |
| default: |
| if (!cxt->add_path_item(&path, jsp)) |
| |
| /* |
| * Path is not supported by the index opclass, return only |
| * the extracted filter nodes. |
| */ |
| return nodes; |
| break; |
| } |
| |
| if (!jspGetNext(jsp, &next)) |
| break; |
| |
| jsp = &next; |
| } |
| |
| /* |
| * Append nodes from the path expression itself to the already extracted |
| * list of filter nodes. |
| */ |
| return cxt->extract_nodes(cxt, path, scalar, nodes); |
| } |
| |
| /* |
| * Extract an expression node from one of following jsonpath path expressions: |
| * EXISTS(jsp) (when 'scalar' is NULL) |
| * jsp == scalar (when 'scalar' is not NULL). |
| * |
| * The current path (@) is passed in 'path'. |
| */ |
| static JsonPathGinNode * |
| extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path, |
| JsonPathItem *jsp, JsonbValue *scalar) |
| { |
| /* extract a list of nodes to be AND-ed */ |
| List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar); |
| |
| if (nodes == NIL) |
| /* no nodes were extracted => full scan is needed for this path */ |
| return NULL; |
| |
| if (list_length(nodes) == 1) |
| return linitial(nodes); /* avoid extra AND-node */ |
| |
| /* construct AND-node for path with filters */ |
| return make_jsp_expr_node_args(JSP_GIN_AND, nodes); |
| } |
| |
| /* Recursively extract nodes from the boolean jsonpath expression. */ |
| static JsonPathGinNode * |
| extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path, |
| JsonPathItem *jsp, bool not) |
| { |
| check_stack_depth(); |
| |
| switch (jsp->type) |
| { |
| case jpiAnd: /* expr && expr */ |
| case jpiOr: /* expr || expr */ |
| { |
| JsonPathItem arg; |
| JsonPathGinNode *larg; |
| JsonPathGinNode *rarg; |
| JsonPathGinNodeType type; |
| |
| jspGetLeftArg(jsp, &arg); |
| larg = extract_jsp_bool_expr(cxt, path, &arg, not); |
| |
| jspGetRightArg(jsp, &arg); |
| rarg = extract_jsp_bool_expr(cxt, path, &arg, not); |
| |
| if (!larg || !rarg) |
| { |
| if (jsp->type == jpiOr) |
| return NULL; |
| |
| return larg ? larg : rarg; |
| } |
| |
| type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR; |
| |
| return make_jsp_expr_node_binary(type, larg, rarg); |
| } |
| |
| case jpiNot: /* !expr */ |
| { |
| JsonPathItem arg; |
| |
| jspGetArg(jsp, &arg); |
| |
| /* extract child expression inverting 'not' flag */ |
| return extract_jsp_bool_expr(cxt, path, &arg, !not); |
| } |
| |
| case jpiExists: /* EXISTS(path) */ |
| { |
| JsonPathItem arg; |
| |
| if (not) |
| return NULL; /* NOT EXISTS is not supported */ |
| |
| jspGetArg(jsp, &arg); |
| |
| return extract_jsp_path_expr(cxt, path, &arg, NULL); |
| } |
| |
| case jpiNotEqual: |
| |
| /* |
| * 'not' == true case is not supported here because '!(path != |
| * scalar)' is not equivalent to 'path == scalar' in the general |
| * case because of sequence comparison semantics: 'path == scalar' |
| * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' === |
| * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path |
| * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but |
| * 'EMPTY(path)' queries are not supported by the both jsonb |
| * opclasses. However in strict mode we could omit 'EMPTY(path)' |
| * part if the path can return exactly one item (it does not |
| * contain wildcard accessors or item methods like .keyvalue() |
| * etc.). |
| */ |
| return NULL; |
| |
| case jpiEqual: /* path == scalar */ |
| { |
| JsonPathItem left_item; |
| JsonPathItem right_item; |
| JsonPathItem *path_item; |
| JsonPathItem *scalar_item; |
| JsonbValue scalar; |
| |
| if (not) |
| return NULL; |
| |
| jspGetLeftArg(jsp, &left_item); |
| jspGetRightArg(jsp, &right_item); |
| |
| if (jspIsScalar(left_item.type)) |
| { |
| scalar_item = &left_item; |
| path_item = &right_item; |
| } |
| else if (jspIsScalar(right_item.type)) |
| { |
| scalar_item = &right_item; |
| path_item = &left_item; |
| } |
| else |
| return NULL; /* at least one operand should be a scalar */ |
| |
| switch (scalar_item->type) |
| { |
| case jpiNull: |
| scalar.type = jbvNull; |
| break; |
| case jpiBool: |
| scalar.type = jbvBool; |
| scalar.val.boolean = !!*scalar_item->content.value.data; |
| break; |
| case jpiNumeric: |
| scalar.type = jbvNumeric; |
| scalar.val.numeric = |
| (Numeric) scalar_item->content.value.data; |
| break; |
| case jpiString: |
| scalar.type = jbvString; |
| scalar.val.string.val = scalar_item->content.value.data; |
| scalar.val.string.len = |
| scalar_item->content.value.datalen; |
| break; |
| default: |
| elog(ERROR, "invalid scalar jsonpath item type: %d", |
| scalar_item->type); |
| return NULL; |
| } |
| |
| return extract_jsp_path_expr(cxt, path, path_item, &scalar); |
| } |
| |
| default: |
| return NULL; /* not a boolean expression */ |
| } |
| } |
| |
| /* Recursively emit all GIN entries found in the node tree */ |
| static void |
| emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries) |
| { |
| check_stack_depth(); |
| |
| switch (node->type) |
| { |
| case JSP_GIN_ENTRY: |
| /* replace datum with its index in the array */ |
| node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum); |
| break; |
| |
| case JSP_GIN_OR: |
| case JSP_GIN_AND: |
| { |
| int i; |
| |
| for (i = 0; i < node->val.nargs; i++) |
| emit_jsp_gin_entries(node->args[i], entries); |
| |
| break; |
| } |
| } |
| } |
| |
| /* |
| * Recursively extract GIN entries from jsonpath query. |
| * Root expression node is put into (*extra_data)[0]. |
| */ |
| static Datum * |
| extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps, |
| int32 *nentries, Pointer **extra_data) |
| { |
| JsonPathGinContext cxt; |
| JsonPathItem root; |
| JsonPathGinNode *node; |
| JsonPathGinPath path = {0}; |
| GinEntries entries = {0}; |
| |
| cxt.lax = (jp->header & JSONPATH_LAX) != 0; |
| |
| if (pathOps) |
| { |
| cxt.add_path_item = jsonb_path_ops__add_path_item; |
| cxt.extract_nodes = jsonb_path_ops__extract_nodes; |
| } |
| else |
| { |
| cxt.add_path_item = jsonb_ops__add_path_item; |
| cxt.extract_nodes = jsonb_ops__extract_nodes; |
| } |
| |
| jspInit(&root, jp); |
| |
| node = strat == JsonbJsonpathExistsStrategyNumber |
| ? extract_jsp_path_expr(&cxt, path, &root, NULL) |
| : extract_jsp_bool_expr(&cxt, path, &root, false); |
| |
| if (!node) |
| { |
| *nentries = 0; |
| return NULL; |
| } |
| |
| emit_jsp_gin_entries(node, &entries); |
| |
| *nentries = entries.count; |
| if (!*nentries) |
| return NULL; |
| |
| *extra_data = palloc0(sizeof(**extra_data) * entries.count); |
| **extra_data = (Pointer) node; |
| |
| return entries.buf; |
| } |
| |
| /* |
| * Recursively execute jsonpath expression. |
| * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag. |
| */ |
| static GinTernaryValue |
| execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary) |
| { |
| GinTernaryValue res; |
| GinTernaryValue v; |
| int i; |
| |
| switch (node->type) |
| { |
| case JSP_GIN_AND: |
| res = GIN_TRUE; |
| for (i = 0; i < node->val.nargs; i++) |
| { |
| v = execute_jsp_gin_node(node->args[i], check, ternary); |
| if (v == GIN_FALSE) |
| return GIN_FALSE; |
| else if (v == GIN_MAYBE) |
| res = GIN_MAYBE; |
| } |
| return res; |
| |
| case JSP_GIN_OR: |
| res = GIN_FALSE; |
| for (i = 0; i < node->val.nargs; i++) |
| { |
| v = execute_jsp_gin_node(node->args[i], check, ternary); |
| if (v == GIN_TRUE) |
| return GIN_TRUE; |
| else if (v == GIN_MAYBE) |
| res = GIN_MAYBE; |
| } |
| return res; |
| |
| case JSP_GIN_ENTRY: |
| { |
| int index = node->val.entryIndex; |
| |
| if (ternary) |
| return ((GinTernaryValue *) check)[index]; |
| else |
| return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE; |
| } |
| |
| default: |
| elog(ERROR, "invalid jsonpath gin node type: %d", node->type); |
| return GIN_FALSE; /* keep compiler quiet */ |
| } |
| } |
| |
| Datum |
| gin_extract_jsonb_query(PG_FUNCTION_ARGS) |
| { |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| StrategyNumber strategy = PG_GETARG_UINT16(2); |
| int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| Datum *entries; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* Query is a jsonb, so just apply gin_extract_jsonb... */ |
| entries = (Datum *) |
| DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, |
| PG_GETARG_DATUM(0), |
| PointerGetDatum(nentries))); |
| /* ...although "contains {}" requires a full index scan */ |
| if (*nentries == 0) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else if (strategy == JsonbExistsStrategyNumber) |
| { |
| /* Query is a text string, which we treat as a key */ |
| text *query = PG_GETARG_TEXT_PP(0); |
| |
| *nentries = 1; |
| entries = (Datum *) palloc(sizeof(Datum)); |
| entries[0] = make_text_key(JGINFLAG_KEY, |
| VARDATA_ANY(query), |
| VARSIZE_ANY_EXHDR(query)); |
| } |
| else if (strategy == JsonbExistsAnyStrategyNumber || |
| strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* Query is a text array; each element is treated as a key */ |
| ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); |
| Datum *key_datums; |
| bool *key_nulls; |
| int key_count; |
| int i, |
| j; |
| |
| deconstruct_array_builtin(query, TEXTOID, &key_datums, &key_nulls, &key_count); |
| |
| entries = (Datum *) palloc(sizeof(Datum) * key_count); |
| |
| for (i = 0, j = 0; i < key_count; i++) |
| { |
| /* Nulls in the array are ignored */ |
| if (key_nulls[i]) |
| continue; |
| /* We rely on the array elements not being toasted */ |
| entries[j++] = make_text_key(JGINFLAG_KEY, |
| VARDATA_ANY(key_datums[i]), |
| VARSIZE_ANY_EXHDR(key_datums[i])); |
| } |
| |
| *nentries = j; |
| /* ExistsAll with no keys should match everything */ |
| if (j == 0 && strategy == JsonbExistsAllStrategyNumber) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| JsonPath *jp = PG_GETARG_JSONPATH_P(0); |
| Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); |
| |
| entries = extract_jsp_query(jp, strategy, false, nentries, extra_data); |
| |
| if (!entries) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| entries = NULL; /* keep compiler quiet */ |
| } |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_consistent_jsonb(PG_FUNCTION_ARGS) |
| { |
| bool *check = (bool *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| |
| Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
| bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| bool res = true; |
| int32 i; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* |
| * We must always recheck, since we can't tell from the index whether |
| * the positions of the matched items match the structure of the query |
| * object. (Even if we could, we'd also have to worry about hashed |
| * keys and the index's failure to distinguish keys from string array |
| * elements.) However, the tuple certainly doesn't match unless it |
| * contains all the query keys. |
| */ |
| *recheck = true; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbExistsStrategyNumber) |
| { |
| /* |
| * Although the key is certainly present in the index, we must recheck |
| * because (1) the key might be hashed, and (2) the index match might |
| * be for a key that's not at top level of the JSON object. For (1), |
| * we could look at the query key to see if it's hashed and not |
| * recheck if not, but the index lacks enough info to tell about (2). |
| */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == JsonbExistsAnyStrategyNumber) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| res = true; |
| } |
| else if (strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* As for plain exists, we must recheck */ |
| *recheck = true; |
| /* ... but unless all the keys are present, we can say "false" */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| *recheck = true; |
| |
| if (nkeys > 0) |
| { |
| Assert(extra_data && extra_data[0]); |
| res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| false) != GIN_FALSE; |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_BOOL(res); |
| } |
| |
| Datum |
| gin_triconsistent_jsonb(PG_FUNCTION_ARGS) |
| { |
| GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
| GinTernaryValue res = GIN_MAYBE; |
| int32 i; |
| |
| /* |
| * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this |
| * corresponds to always forcing recheck in the regular consistent |
| * function, for the reasons listed there. |
| */ |
| if (strategy == JsonbContainsStrategyNumber || |
| strategy == JsonbExistsAllStrategyNumber) |
| { |
| /* All extracted keys must be present */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_FALSE) |
| { |
| res = GIN_FALSE; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbExistsStrategyNumber || |
| strategy == JsonbExistsAnyStrategyNumber) |
| { |
| /* At least one extracted key must be present */ |
| res = GIN_FALSE; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_TRUE || |
| check[i] == GIN_MAYBE) |
| { |
| res = GIN_MAYBE; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| if (nkeys > 0) |
| { |
| Assert(extra_data && extra_data[0]); |
| res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| true); |
| |
| /* Should always recheck the result */ |
| if (res == GIN_TRUE) |
| res = GIN_MAYBE; |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_GIN_TERNARY_VALUE(res); |
| } |
| |
| /* |
| * |
| * jsonb_path_ops GIN opclass support functions |
| * |
| * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON |
| * value; but the JSON key(s) leading to each value are also included in its |
| * hash computation. This means we can only support containment queries, |
| * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} |
| * since different hashes will be generated. |
| * |
| */ |
| |
| Datum |
| gin_extract_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| Jsonb *jb = PG_GETARG_JSONB_P(0); |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| int total = JB_ROOT_COUNT(jb); |
| JsonbIterator *it; |
| JsonbValue v; |
| JsonbIteratorToken r; |
| PathHashStack tail; |
| PathHashStack *stack; |
| GinEntries entries; |
| |
| /* If the root level is empty, we certainly have no keys */ |
| if (total == 0) |
| { |
| *nentries = 0; |
| PG_RETURN_POINTER(NULL); |
| } |
| |
| /* Otherwise, use 2 * root count as initial estimate of result size */ |
| init_gin_entries(&entries, 2 * total); |
| |
| /* We keep a stack of partial hashes corresponding to parent key levels */ |
| tail.parent = NULL; |
| tail.hash = 0; |
| stack = &tail; |
| |
| it = JsonbIteratorInit(&jb->root); |
| |
| while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| { |
| PathHashStack *parent; |
| |
| switch (r) |
| { |
| case WJB_BEGIN_ARRAY: |
| case WJB_BEGIN_OBJECT: |
| /* Push a stack level for this object */ |
| parent = stack; |
| stack = (PathHashStack *) palloc(sizeof(PathHashStack)); |
| |
| /* |
| * We pass forward hashes from outer nesting levels so that |
| * the hashes for nested values will include outer keys as |
| * well as their own keys. |
| * |
| * Nesting an array within another array will not alter |
| * innermost scalar element hash values, but that seems |
| * inconsequential. |
| */ |
| stack->hash = parent->hash; |
| stack->parent = parent; |
| break; |
| case WJB_KEY: |
| /* mix this key into the current outer hash */ |
| JsonbHashScalarValue(&v, &stack->hash); |
| /* hash is now ready to incorporate the value */ |
| break; |
| case WJB_ELEM: |
| case WJB_VALUE: |
| /* mix the element or value's hash into the prepared hash */ |
| JsonbHashScalarValue(&v, &stack->hash); |
| /* and emit an index entry */ |
| add_gin_entry(&entries, UInt32GetDatum(stack->hash)); |
| /* reset hash for next key, value, or sub-object */ |
| stack->hash = stack->parent->hash; |
| break; |
| case WJB_END_ARRAY: |
| case WJB_END_OBJECT: |
| /* Pop the stack */ |
| parent = stack->parent; |
| pfree(stack); |
| stack = parent; |
| /* reset hash for next key, value, or sub-object */ |
| if (stack->parent) |
| stack->hash = stack->parent->hash; |
| else |
| stack->hash = 0; |
| break; |
| default: |
| elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); |
| } |
| } |
| |
| *nentries = entries.count; |
| |
| PG_RETURN_POINTER(entries.buf); |
| } |
| |
| Datum |
| gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) |
| { |
| int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| StrategyNumber strategy = PG_GETARG_UINT16(2); |
| int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| Datum *entries; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ |
| entries = (Datum *) |
| DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, |
| PG_GETARG_DATUM(0), |
| PointerGetDatum(nentries))); |
| |
| /* ... although "contains {}" requires a full index scan */ |
| if (*nentries == 0) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| JsonPath *jp = PG_GETARG_JSONPATH_P(0); |
| Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); |
| |
| entries = extract_jsp_query(jp, strategy, true, nentries, extra_data); |
| |
| if (!entries) |
| *searchMode = GIN_SEARCH_MODE_ALL; |
| } |
| else |
| { |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| entries = NULL; |
| } |
| |
| PG_RETURN_POINTER(entries); |
| } |
| |
| Datum |
| gin_consistent_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| bool *check = (bool *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
| bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| bool res = true; |
| int32 i; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* |
| * jsonb_path_ops is necessarily lossy, not only because of hash |
| * collisions but also because it doesn't preserve complete |
| * information about the structure of the JSON object. Besides, there |
| * are some special rules around the containment of raw scalars in |
| * arrays that are not handled here. So we must always recheck a |
| * match. However, if not all of the keys are present, the tuple |
| * certainly doesn't match. |
| */ |
| *recheck = true; |
| for (i = 0; i < nkeys; i++) |
| { |
| if (!check[i]) |
| { |
| res = false; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| *recheck = true; |
| |
| if (nkeys > 0) |
| { |
| Assert(extra_data && extra_data[0]); |
| res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| false) != GIN_FALSE; |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_BOOL(res); |
| } |
| |
| Datum |
| gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) |
| { |
| GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| StrategyNumber strategy = PG_GETARG_UINT16(1); |
| |
| /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| int32 nkeys = PG_GETARG_INT32(3); |
| Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); |
| GinTernaryValue res = GIN_MAYBE; |
| int32 i; |
| |
| if (strategy == JsonbContainsStrategyNumber) |
| { |
| /* |
| * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; |
| * this corresponds to always forcing recheck in the regular |
| * consistent function, for the reasons listed there. |
| */ |
| for (i = 0; i < nkeys; i++) |
| { |
| if (check[i] == GIN_FALSE) |
| { |
| res = GIN_FALSE; |
| break; |
| } |
| } |
| } |
| else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| strategy == JsonbJsonpathExistsStrategyNumber) |
| { |
| if (nkeys > 0) |
| { |
| Assert(extra_data && extra_data[0]); |
| res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| true); |
| |
| /* Should always recheck the result */ |
| if (res == GIN_TRUE) |
| res = GIN_MAYBE; |
| } |
| } |
| else |
| elog(ERROR, "unrecognized strategy number: %d", strategy); |
| |
| PG_RETURN_GIN_TERNARY_VALUE(res); |
| } |
| |
| /* |
| * Construct a jsonb_ops GIN key from a flag byte and a textual representation |
| * (which need not be null-terminated). This function is responsible |
| * for hashing overlength text representations; it will add the |
| * JGINFLAG_HASHED bit to the flag value if it does that. |
| */ |
| static Datum |
| make_text_key(char flag, const char *str, int len) |
| { |
| text *item; |
| char hashbuf[10]; |
| |
| if (len > JGIN_MAXLENGTH) |
| { |
| uint32 hashval; |
| |
| hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); |
| snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); |
| str = hashbuf; |
| len = 8; |
| flag |= JGINFLAG_HASHED; |
| } |
| |
| /* |
| * Now build the text Datum. For simplicity we build a 4-byte-header |
| * varlena text Datum here, but we expect it will get converted to short |
| * header format when stored in the index. |
| */ |
| item = (text *) palloc(VARHDRSZ + len + 1); |
| SET_VARSIZE(item, VARHDRSZ + len + 1); |
| |
| *VARDATA(item) = flag; |
| |
| memcpy(VARDATA(item) + 1, str, len); |
| |
| return PointerGetDatum(item); |
| } |
| |
| /* |
| * Create a textual representation of a JsonbValue that will serve as a GIN |
| * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, |
| * or if it is a string array element (since we pretend those are keys, |
| * see jsonb.h). |
| */ |
| static Datum |
| make_scalar_key(const JsonbValue *scalarVal, bool is_key) |
| { |
| Datum item; |
| char *cstr; |
| |
| switch (scalarVal->type) |
| { |
| case jbvNull: |
| Assert(!is_key); |
| item = make_text_key(JGINFLAG_NULL, "", 0); |
| break; |
| case jbvBool: |
| Assert(!is_key); |
| item = make_text_key(JGINFLAG_BOOL, |
| scalarVal->val.boolean ? "t" : "f", 1); |
| break; |
| case jbvNumeric: |
| Assert(!is_key); |
| |
| /* |
| * A normalized textual representation, free of trailing zeroes, |
| * is required so that numerically equal values will produce equal |
| * strings. |
| * |
| * It isn't ideal that numerics are stored in a relatively bulky |
| * textual format. However, it's a notationally convenient way of |
| * storing a "union" type in the GIN B-Tree, and indexing Jsonb |
| * strings takes precedence. |
| */ |
| cstr = numeric_normalize(scalarVal->val.numeric); |
| item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); |
| pfree(cstr); |
| break; |
| case jbvString: |
| item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, |
| scalarVal->val.string.val, |
| scalarVal->val.string.len); |
| break; |
| default: |
| elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type); |
| item = 0; /* keep compiler quiet */ |
| break; |
| } |
| |
| return item; |
| } |