blob: 1d7012c3dcd6a2d7ea57fa5a316924dc5b8bf0fa [file] [log] [blame]
#include <postgres.h>
#if PG_VERSION_NUM >= 90100
#include "catalog/pg_collation.h"
#endif
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "../../../svec/src/pg_gp/sparse_vector.h"
PG_FUNCTION_INFO_V1(generate_sparse_vector);
Datum generate_sparse_vector(PG_FUNCTION_ARGS)
{
SvecType *output_sfv;
int16_t typlen;
bool typbyval;
char typalign;
bool *nulls;
if (PG_NARGS() != 3)
elog(ERROR, "Invalid number of arguments.");
ArrayType *term_index = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *term_count = PG_GETARG_ARRAYTYPE_P(1);
int64_t dict_size = PG_GETARG_INT64(2);
/* Check if arrays have null entries */
if (ARR_HASNULL(term_index) || ARR_HASNULL(term_count))
elog(ERROR, "One or both of the argument arrays has one or more null entries.");
if (dict_size <= 0)
elog(ERROR, "Dictionary size cannot be zero or negative.");
/* Check if any of the argument arrays is empty */
if ((ARR_NDIM(term_index) == 0) || (ARR_NDIM(term_count) == 0))
elog(ERROR, "One or more argument arrays is empty.");
int term_index_nelems = ARR_DIMS(term_index)[0];
int term_count_nelems = ARR_DIMS(term_count)[0];
/* If no. of elements in the arrays are not equal, throw an error */
if (term_index_nelems != term_count_nelems)
elog(ERROR, "No. of elements in the argument arrays are not equal.");
Datum *term_index_data;
Datum *term_count_data;
/* Deconstruct the arrays */
get_typlenbyvalalign(INT8OID, &typlen, &typbyval, &typalign);
deconstruct_array(term_index, INT8OID, typlen, typbyval, typalign,
&term_index_data, &nulls, &term_index_nelems);
get_typlenbyvalalign(FLOAT8OID, &typlen, &typbyval, &typalign);
deconstruct_array(term_count, FLOAT8OID, typlen, typbyval, typalign,
&term_count_data, &nulls, &term_count_nelems);
/* Check if term index array has indexes in proper order or not */
for(int i = 0; i < term_index_nelems; i++)
{
if (DatumGetInt64(term_index_data[i]) < 0 ||
DatumGetInt64(term_index_data[i]) >= dict_size)
elog(ERROR, "Term indexes must range from 0 to total number of elements in the dictonary - 1.");
}
float8 *histogram = (float8 *)palloc0(sizeof(float8) * dict_size);
for (int k = 0; k < dict_size; k++)
{
histogram[k] = 0;
}
for (int i = 0; i < term_index_nelems; i++)
{
uint64_t idx = DatumGetInt64(term_index_data[i]);
histogram[idx] += DatumGetFloat8(term_count_data[i]);
}
output_sfv = svec_from_float8arr(histogram, dict_size);
pfree(histogram);
PG_RETURN_POINTER(output_sfv);
}