blob: 28f13475b098baa7917f59b250375826eae44892 [file] [log] [blame]
/*!
* \file countmin.h
*
* \brief header file for CM and MFV sketches
*/
#ifndef _COUNTMIN_H_
#define _COUNTMIN_H_
#include <limits.h> /* CHAR_BIT */
#define INT64BITS (sizeof(int64)*CHAR_BIT)
#define RANGES INT64BITS
#define DEPTH 8 /* magic tuning value: number of hash functions */
/* #define NUMCOUNTERS 65535 */
#define NUMCOUNTERS 1024 /* another magic tuning value: modulus of hash functions */
#ifdef INT64_IS_BUSTED
#define MAX_INT64 (INT64CONST(0x7FFFFFFF))
#define MAX_UINT64 (UINT64CONST(0xFFFFFFFF))
#else
#define MAX_INT64 (INT64CONST(0x7FFFFFFFFFFFFFFF))
#define MAX_UINT64 (UINT64CONST(0xFFFFFFFFFFFFFFFF))
#endif /* INT64_IS_BUSTED */
#define MID_INT64 (0)
#define MIN_INT64 (~MAX_INT64)
#define MID_UINT64 (MAX_UINT64 >> 1)
#define MIN_UINT64 (0)
/*!
* \brief the CountMin sketch array
*
* a CountMin sketch is a set of DEPTH arrays of NUMCOUNTERS each.
* It's like a "counting Bloom Filter" where instead of just hashing to
* DEPTH bitmaps, we count up hash-collisions in DEPTH counter arrays
*/
typedef uint64 countmin[DEPTH][NUMCOUNTERS];
#define MAXARGS 3
/*!
* \internal
* \brief the transition value struct for CM sketches
*
* Holds the sketch counters
* and a cache of handy metadata that we'll reuse across calls
* \endinternal
*/
typedef struct {
int64 args[MAXARGS]; /*! carry along additional args for finalizer */
int nargs; /*! number of args being carried for finalizer */
countmin sketches[RANGES];
} cmtransval;
/*! base size of a cmtransval */
#define CM_TRANSVAL_SZ (VARHDRSZ + sizeof(cmtransval))
#define CM_TRANSVAL_INITIALIZED(t) (VARSIZE(t) >= CM_TRANSVAL_SZ)
/*!
* \internal
* \brief array of ranges
*
* a data structure to hold the constituent dyadic (power-of-two) ranges
* corresponding to an arbitrary range.
* E.g. 14-48 becomes [[14-15], [16-31], [32-47], [48-48]]
* \endinternal
*/
typedef struct {
int64 spans[2*INT64BITS][2]; /*! the ranges */
uint32 emptyoffset; /*! offset of next empty span */
} rangelist;
#define ADVANCE_OFFSET(r) \
if ((r).emptyoffset == 2*INT64BITS) { \
uint32 i; \
for (i = 0; i < 2*INT64BITS; i++) \
elog(NOTICE, \
"[" INT64_FORMAT ", " INT64_FORMAT "]", \
(r).spans[i][0], \
(r).spans[i][1]); \
elog(ERROR, "countmin error: rangelist overflow"); \
} \
else ((r).emptyoffset++);
/*!
* \internal
* \brief offset/count pairs for MFV sketches
* \endinternal
*/
typedef struct {
unsigned offset; /*! memory offset to the value */
uint64 cnt; /*! counter */
} offsetcnt;
/*!
* \internal
* \brief the transition value struct for MFV sketches.
*
* Holds a single
* countmin sketch (no dyadic ranges) and an array of Most Frequent Values.
* We are flexible with the number of mfvs, as well as the type.
* Hence at the end of this struct is an array mfv[max_mfvs] of offsetcnt entries,
* followed by an array of Postgres text objects with the output formats of
* the mfvs.
* Each mfv entry contains an offset from the top of the structure where
* we can find a Postgres text object holding the output format of a
* frequent value.
* \endinternal
*/
typedef struct {
unsigned max_mfvs; /*! number of frequent values */
unsigned next_mfv; /*! index of next mfv to insert into */
unsigned next_offset; /*! next memory offset to insert into */
Oid typOid; /*! Oid of the type being counted */
int typLen; /*! Length of the data type */
bool typByVal; /*! Whether type is by value or by reference */
Oid outFuncOid; /*! Oid of the outfunc for this type */
countmin sketch; /*! a single countmin sketch */
/*!
* type-independent collection of Most Frequent Values
* Holds an array of (counter,offset) pairs, which by
* convention is followed by the values themselves as text Datums,
* accessible via the offsets
*/
offsetcnt mfvs[];
} mfvtransval;
/*! base size of an MFV transval */
#define MFV_TRANSVAL_SZ(i) (VARHDRSZ + sizeof(mfvtransval) + i*sizeof(offsetcnt))
/*! free space remaining for text values */
#define MFV_TRANSVAL_CAPACITY(transblob) (VARSIZE(transblob) - VARHDRSZ - \
((mfvtransval *)VARDATA(transblob))-> \
next_offset)
/* countmin aggregate protos */
Datum countmin_trans_c(countmin, Datum, Oid, Oid);
bytea *cmsketch_check_transval(PG_FUNCTION_ARGS, bool);
bytea *cmsketch_init_transval();
void countmin_dyadic_trans_c(cmtransval *, Datum);
/* countmin scalar function protos */
int64 cmsketch_count_c(countmin, Datum, Oid, Oid);
int64 cmsketch_count_md5_datum(countmin, bytea *, Oid);
/* hash_counters_iterate and its lambdas */
int64 hash_counters_iterate(bytea *, countmin, int64, int64 (*lambdaptr)(
uint32,
uint32,
countmin,
int64));
int64 increment_counter(uint32, uint32, countmin, int64);
int64 min_counter(uint32, uint32, countmin, int64);
/* MFV protos */
bytea *mfv_transval_append(bytea *, Datum);
int mfv_find(bytea *, Datum);
bytea *mfv_transval_replace(bytea *, Datum, int);
bytea *mfv_transval_insert_at(bytea *, Datum, uint32);
void *mfv_transval_getval(bytea *, uint32);
bytea *mfv_init_transval(int, Oid);
bytea *mfvsketch_merge_c(bytea *, bytea *);
void mfv_copy_datum(bytea *, int, Datum);
int cnt_cmp_desc(const void *i, const void *j);
/* UDF protos */
Datum __cmsketch_int8_trans(PG_FUNCTION_ARGS);
Datum cmsketch_width_histogram(PG_FUNCTION_ARGS);
Datum cmsketch_dhistogram(PG_FUNCTION_ARGS);
Datum __cmsketch_base64_final(PG_FUNCTION_ARGS);
Datum __cmsketch_merge(PG_FUNCTION_ARGS);
Datum cmsketch_dump(PG_FUNCTION_ARGS);
Datum __cmsketch_count_final(PG_FUNCTION_ARGS);
Datum __cmsketch_rangecount_final(PG_FUNCTION_ARGS);
Datum __cmsketch_centile_final(PG_FUNCTION_ARGS);
Datum __cmsketch_median_final(PG_FUNCTION_ARGS);
Datum __cmsketch_dhist_final(PG_FUNCTION_ARGS);
Datum __mfvsketch_trans(PG_FUNCTION_ARGS);
Datum __mfvsketch_final(PG_FUNCTION_ARGS);
Datum __mfvsketch_merge(PG_FUNCTION_ARGS);
#endif /* _COUNTMIN_H_ */