| /*------------------------------------------------------------------------- |
| * |
| * queryjumblefuncs.c |
| * Query normalization and fingerprinting. |
| * |
| * Normalization is a process whereby similar queries, typically differing only |
| * in their constants (though the exact rules are somewhat more subtle than |
| * that) are recognized as equivalent, and are tracked as a single entry. This |
| * is particularly useful for non-prepared queries. |
| * |
| * Normalization is implemented by fingerprinting queries, selectively |
| * serializing those fields of each query tree's nodes that are judged to be |
| * essential to the query. This is referred to as a query jumble. This is |
| * distinct from a regular serialization in that various extraneous |
| * information is ignored as irrelevant or not essential to the query, such |
| * as the collations of Vars and, most notably, the values of constants. |
| * |
| * This jumble is acquired at the end of parse analysis of each query, and |
| * a 64-bit hash of it is stored into the query's Query.queryId field. |
| * The server then copies this value around, making it available in plan |
| * tree(s) generated from the query. The executor can then use this value |
| * to blame query costs on the proper queryId. |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/nodes/queryjumblefuncs.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "common/hashfn.h" |
| #include "miscadmin.h" |
| #include "nodes/queryjumble.h" |
| #include "parser/scansup.h" |
| |
| #define JUMBLE_SIZE 1024 /* query serialization buffer size */ |
| |
| /* GUC parameters */ |
| int compute_query_id = COMPUTE_QUERY_ID_AUTO; |
| |
| /* True when compute_query_id is ON, or AUTO and a module requests them */ |
| bool query_id_enabled = false; |
| |
| static void AppendJumble(JumbleState *jstate, |
| const unsigned char *item, Size size); |
| static void RecordConstLocation(JumbleState *jstate, int location); |
| static void _jumbleNode(JumbleState *jstate, Node *node); |
| static void _jumbleA_Const(JumbleState *jstate, Node *node); |
| static void _jumbleList(JumbleState *jstate, Node *node); |
| static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node); |
| |
| /* |
| * Given a possibly multi-statement source string, confine our attention to the |
| * relevant part of the string. |
| */ |
| const char * |
| CleanQuerytext(const char *query, int *location, int *len) |
| { |
| int query_location = *location; |
| int query_len = *len; |
| |
| /* First apply starting offset, unless it's -1 (unknown). */ |
| if (query_location >= 0) |
| { |
| Assert(query_location <= strlen(query)); |
| query += query_location; |
| /* Length of 0 (or -1) means "rest of string" */ |
| if (query_len <= 0) |
| query_len = strlen(query); |
| else |
| Assert(query_len <= strlen(query)); |
| } |
| else |
| { |
| /* If query location is unknown, distrust query_len as well */ |
| query_location = 0; |
| query_len = strlen(query); |
| } |
| |
| /* |
| * Discard leading and trailing whitespace, too. Use scanner_isspace() |
| * not libc's isspace(), because we want to match the lexer's behavior. |
| */ |
| while (query_len > 0 && scanner_isspace(query[0])) |
| query++, query_location++, query_len--; |
| while (query_len > 0 && scanner_isspace(query[query_len - 1])) |
| query_len--; |
| |
| *location = query_location; |
| *len = query_len; |
| |
| return query; |
| } |
| |
| JumbleState * |
| JumbleQuery(Query *query) |
| { |
| JumbleState *jstate = NULL; |
| |
| Assert(IsQueryIdEnabled()); |
| |
| jstate = (JumbleState *) palloc(sizeof(JumbleState)); |
| |
| /* Set up workspace for query jumbling */ |
| jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); |
| jstate->jumble_len = 0; |
| jstate->clocations_buf_size = 32; |
| jstate->clocations = (LocationLen *) |
| palloc(jstate->clocations_buf_size * sizeof(LocationLen)); |
| jstate->clocations_count = 0; |
| jstate->highest_extern_param_id = 0; |
| |
| /* Compute query ID and mark the Query node with it */ |
| _jumbleNode(jstate, (Node *) query); |
| query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, |
| jstate->jumble_len, |
| 0)); |
| |
| /* |
| * If we are unlucky enough to get a hash of zero, use 1 instead for |
| * normal statements and 2 for utility queries. |
| */ |
| if (query->queryId == UINT64CONST(0)) |
| { |
| if (query->utilityStmt) |
| query->queryId = UINT64CONST(2); |
| else |
| query->queryId = UINT64CONST(1); |
| } |
| |
| return jstate; |
| } |
| |
| /* |
| * Enables query identifier computation. |
| * |
| * Third-party plugins can use this function to inform core that they require |
| * a query identifier to be computed. |
| */ |
| void |
| EnableQueryId(void) |
| { |
| if (compute_query_id != COMPUTE_QUERY_ID_OFF) |
| query_id_enabled = true; |
| } |
| |
| /* |
| * AppendJumble: Append a value that is substantive in a given query to |
| * the current jumble. |
| */ |
| static void |
| AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) |
| { |
| unsigned char *jumble = jstate->jumble; |
| Size jumble_len = jstate->jumble_len; |
| |
| /* |
| * Whenever the jumble buffer is full, we hash the current contents and |
| * reset the buffer to contain just that hash value, thus relying on the |
| * hash to summarize everything so far. |
| */ |
| while (size > 0) |
| { |
| Size part_size; |
| |
| if (jumble_len >= JUMBLE_SIZE) |
| { |
| uint64 start_hash; |
| |
| start_hash = DatumGetUInt64(hash_any_extended(jumble, |
| JUMBLE_SIZE, 0)); |
| memcpy(jumble, &start_hash, sizeof(start_hash)); |
| jumble_len = sizeof(start_hash); |
| } |
| part_size = Min(size, JUMBLE_SIZE - jumble_len); |
| memcpy(jumble + jumble_len, item, part_size); |
| jumble_len += part_size; |
| item += part_size; |
| size -= part_size; |
| } |
| jstate->jumble_len = jumble_len; |
| } |
| |
| /* |
| * Record location of constant within query string of query tree |
| * that is currently being walked. |
| */ |
| static void |
| RecordConstLocation(JumbleState *jstate, int location) |
| { |
| /* -1 indicates unknown or undefined location */ |
| if (location >= 0) |
| { |
| /* enlarge array if needed */ |
| if (jstate->clocations_count >= jstate->clocations_buf_size) |
| { |
| jstate->clocations_buf_size *= 2; |
| jstate->clocations = (LocationLen *) |
| repalloc(jstate->clocations, |
| jstate->clocations_buf_size * |
| sizeof(LocationLen)); |
| } |
| jstate->clocations[jstate->clocations_count].location = location; |
| /* initialize lengths to -1 to simplify third-party module usage */ |
| jstate->clocations[jstate->clocations_count].length = -1; |
| jstate->clocations_count++; |
| } |
| } |
| |
| #define JUMBLE_NODE(item) \ |
| _jumbleNode(jstate, (Node *) expr->item) |
| #define JUMBLE_LOCATION(location) \ |
| RecordConstLocation(jstate, expr->location) |
| #define JUMBLE_FIELD(item) \ |
| AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) |
| #define JUMBLE_FIELD_SINGLE(item) \ |
| AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) |
| #define JUMBLE_STRING(str) \ |
| do { \ |
| if (expr->str) \ |
| AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ |
| } while(0) |
| |
| #include "queryjumblefuncs.funcs.c" |
| |
| static void |
| _jumbleNode(JumbleState *jstate, Node *node) |
| { |
| Node *expr = node; |
| |
| if (expr == NULL) |
| return; |
| |
| /* Guard against stack overflow due to overly complex expressions */ |
| check_stack_depth(); |
| |
| /* |
| * We always emit the node's NodeTag, then any additional fields that are |
| * considered significant, and then we recurse to any child nodes. |
| */ |
| JUMBLE_FIELD(type); |
| |
| switch (nodeTag(expr)) |
| { |
| #include "queryjumblefuncs.switch.c" |
| |
| case T_List: |
| case T_IntList: |
| case T_OidList: |
| case T_XidList: |
| _jumbleList(jstate, expr); |
| break; |
| |
| default: |
| /* Only a warning, since we can stumble along anyway */ |
| elog(WARNING, "unrecognized node type: %d", |
| (int) nodeTag(expr)); |
| break; |
| } |
| |
| /* Special cases to handle outside the automated code */ |
| switch (nodeTag(expr)) |
| { |
| case T_Param: |
| { |
| Param *p = (Param *) node; |
| |
| /* |
| * Update the highest Param id seen, in order to start |
| * normalization correctly. |
| */ |
| if (p->paramkind == PARAM_EXTERN && |
| p->paramid > jstate->highest_extern_param_id) |
| jstate->highest_extern_param_id = p->paramid; |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| |
| static void |
| _jumbleList(JumbleState *jstate, Node *node) |
| { |
| List *expr = (List *) node; |
| ListCell *l; |
| |
| switch (expr->type) |
| { |
| case T_List: |
| foreach(l, expr) |
| _jumbleNode(jstate, lfirst(l)); |
| break; |
| case T_IntList: |
| foreach(l, expr) |
| JUMBLE_FIELD_SINGLE(lfirst_int(l)); |
| break; |
| case T_OidList: |
| foreach(l, expr) |
| JUMBLE_FIELD_SINGLE(lfirst_oid(l)); |
| break; |
| case T_XidList: |
| foreach(l, expr) |
| JUMBLE_FIELD_SINGLE(lfirst_xid(l)); |
| break; |
| default: |
| elog(ERROR, "unrecognized list node type: %d", |
| (int) expr->type); |
| return; |
| } |
| } |
| |
| static void |
| _jumbleA_Const(JumbleState *jstate, Node *node) |
| { |
| A_Const *expr = (A_Const *) node; |
| |
| JUMBLE_FIELD(isnull); |
| if (!expr->isnull) |
| { |
| JUMBLE_FIELD(val.node.type); |
| switch (nodeTag(&expr->val)) |
| { |
| case T_Integer: |
| JUMBLE_FIELD(val.ival.ival); |
| break; |
| case T_Float: |
| JUMBLE_STRING(val.fval.fval); |
| break; |
| case T_Boolean: |
| JUMBLE_FIELD(val.boolval.boolval); |
| break; |
| case T_String: |
| JUMBLE_STRING(val.sval.sval); |
| break; |
| case T_BitString: |
| JUMBLE_STRING(val.bsval.bsval); |
| break; |
| default: |
| elog(ERROR, "unrecognized node type: %d", |
| (int) nodeTag(&expr->val)); |
| break; |
| } |
| } |
| } |
| |
| static void |
| _jumbleRangeTblEntry(JumbleState *jstate, Node *node) |
| { |
| RangeTblEntry *expr = (RangeTblEntry *) node; |
| |
| JUMBLE_FIELD(rtekind); |
| switch (expr->rtekind) |
| { |
| case RTE_RELATION: |
| JUMBLE_FIELD(relid); |
| JUMBLE_NODE(tablesample); |
| JUMBLE_FIELD(inh); |
| break; |
| case RTE_SUBQUERY: |
| JUMBLE_NODE(subquery); |
| break; |
| case RTE_JOIN: |
| JUMBLE_FIELD(jointype); |
| break; |
| case RTE_FUNCTION: |
| JUMBLE_NODE(functions); |
| break; |
| case RTE_TABLEFUNC: |
| JUMBLE_NODE(tablefunc); |
| break; |
| case RTE_VALUES: |
| JUMBLE_NODE(values_lists); |
| break; |
| case RTE_CTE: |
| |
| /* |
| * Depending on the CTE name here isn't ideal, but it's the only |
| * info we have to identify the referenced WITH item. |
| */ |
| JUMBLE_STRING(ctename); |
| JUMBLE_FIELD(ctelevelsup); |
| break; |
| case RTE_NAMEDTUPLESTORE: |
| JUMBLE_STRING(enrname); |
| break; |
| case RTE_RESULT: |
| break; |
| default: |
| elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind); |
| break; |
| } |
| } |