blob: 59e7432196facc153b9c41ac7d41cb901c3cbec1 [file] [log] [blame]
/**
* @file
* \brief This file implements different operations on the SparseData structure.
*
* Most functions on sparse vectors are first defined on SparseData, and
* then packaged up as functions on sparse vectors using wrappers.
*
* This is the general procedure for adding functionalities to sparse vectors:
* 1. Write the function for SparseData.
* 2. Wrap it up for SvecType in operators.c or sparse_vector.c.
* 3. Make the function available in gp_svec.sql.
*/
#include <postgres.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "SparseData.h"
#include "utils/builtins.h"
#if PG_VERSION_NUM >= 100000
#include "utils/varlena.h"
#endif
#include "utils/syscache.h"
#include "parser/parse_func.h"
#include "access/htup.h"
#include "catalog/pg_proc.h"
#if PG_VERSION_NUM >= 90300
#include "access/htup_details.h"
#endif
void* array_pos_ref = NULL;
/* -----------------------------------------------------------------------------
*
* The following functions used to be defined in SparseData.h. It is unclear to
* me why (Florian Schoppmann, June 4, 2012).
*
* -------------------------------------------------------------------------- */
/* BEGIN Previously in SparseData.h */
/* Returns the size of each basic type
*/
size_t
size_of_type(Oid type)
{
switch (type)
{
case FLOAT4OID: return(4);
case FLOAT8OID: return(8);
case CHAROID: return(1);
case INT2OID: return(2);
case INT4OID: return(4);
case INT8OID: return(8);
}
return(1);
}
/* Appends a count to the count array
* The function appendBinaryStringInfo always make sure to attach a trailing '\0'
* to the data array of the index StringInfo.
*/
void append_to_rle_index(StringInfo index, int64 run_len)
{
char bytes[]={0,0,0,0,0,0,0,0,0}; /* 9 bytes into which the compressed
int8 value is written */
int8_to_compword(run_len,bytes); /* create compressed version of
int8 value */
appendBinaryStringInfo(index,bytes,int8compstoragesize(bytes));
}
/* Adds a new block to a SparseData
* The function appendBinaryStringInfo always make sure to attach a trailing '\0'
* to the data array of the vals StringInfo.
*/
void add_run_to_sdata(char *run_val, int64 run_len, size_t width,
SparseData sdata)
{
StringInfo index = sdata->index;
StringInfo vals = sdata->vals;
appendBinaryStringInfo(vals,run_val,width);
append_to_rle_index(index, run_len);
sdata->unique_value_count++;
sdata->total_value_count+=run_len;
}
/*------------------------------------------------------------------------------
* Each integer count in the RLE index is stored in a number of bytes determined
* by its size. The larger the integer count, the larger the size of storage.
* Following is the map of count maximums to storage size:
* Range Storage
* --------- -----------------------------------------
* 0 - 127 signed char stores the negative count
*
* All higher than 127 have two parts, the description byte
* and the count word
*
* description byte signed char stores the number of bytes in the
* count word one of 1,2,4 or 8
*
* 128 - 32767 count word is 2 bytes, signed int16_t
* 32768 - 2147483648 count word is 4 bytes, signed int32_t
* 2147483648 - max count word is 8 bytes, signed int64
*------------------------------------------------------------------------------
*/
/* Transforms an int64 value to an RLE entry. The entry is placed in the
* provided entry[] array and will have a variable size depending on the value.
*/
void int8_to_compword(int64 num, char entry[9])
{
if (num < 128) {
/* The reason this is negative is because entry[0] is
used to record sizes in the other cases. */
entry[0] = -(char)num;
return;
}
entry[1] = (char)(num & 0xFF);
entry[2] = (char)((num & 0xFF00) >> 8);
if (num < 32768) { entry[0] = 2; return; }
entry[3] = (char)((num & 0xFF0000L) >> 16);
entry[4] = (char)((num & 0xFF000000L) >> 24);
if (num < 2147483648LL) { entry[0] = 4; return; }
entry[5] = (char)((num & 0xFF00000000LL) >> 32);
entry[6] = (char)((num & 0xFF0000000000LL) >> 40);
entry[7] = (char)((num & 0xFF000000000000LL) >> 48);
entry[8] = (char)((num & 0xFF00000000000000LL) >> 56);
entry[0] = 8;
}
/* Transforms a count entry into an int64 value when provided with a pointer
* to an entry.
*/
int64 compword_to_int8(const char *entry)
{
char size = int8compstoragesize(entry);
int16_t num_2;
char *numptr2 = (char *)(&num_2);
int32_t num_4;
char *numptr4 = (char *)(&num_4);
int64 num = 0;
char *numptr8 = (char *)(&num);
switch(size) {
case 0: /* entry == NULL represents an array of ones; see
* comment after definition of SparseDataStruct above
*/
return 1;
case 1:
num = -(entry[0]);
break;
case 3:
numptr2[0] = entry[1];
numptr2[1] = entry[2];
num = num_2;
break;
case 5:
numptr4[0] = entry[1];
numptr4[1] = entry[2];
numptr4[2] = entry[3];
numptr4[3] = entry[4];
num = num_4;
break;
case 9:
numptr8[0] = entry[1];
numptr8[1] = entry[2];
numptr8[2] = entry[3];
numptr8[3] = entry[4];
numptr8[4] = entry[5];
numptr8[5] = entry[6];
numptr8[6] = entry[7];
numptr8[7] = entry[8];
break;
}
return num;
}
void printout_double(double *vals, int num_values, int stop)
{
(void) stop; /* avoid warning about unused parameter */
char *output_str = (char *)palloc(sizeof(char)*(num_values*(6+18+2))+1);
char *str = output_str;
int numout;
for (int i=0; i<num_values; i++) {
numout = snprintf(str,26,"%6.2f,%#llX,",vals[i],
*((long long unsigned int *)(&(vals[i]))));
str += numout-1;
}
*str = '\0';
elog(NOTICE,"doubles:%s",output_str);
}
void printout_index(char *ix, int num_values, int stop)
{
(void) stop; /* avoid warning about unused parameter */
char *output_str = (char *)palloc(sizeof(char)*((num_values*7)+1));
char *str = output_str;
int numout;
elog(NOTICE,"num_values=%d",num_values);
for (int i=0; i<num_values; i++,ix+=int8compstoragesize(ix)) {
numout=snprintf(str,7,"%lld,",(long long int)compword_to_int8(ix));
str+=numout;
}
*str = '\0';
elog(NOTICE,"index:%s",output_str);
}
void printout_sdata(SparseData sdata, char *msg, int stop)
{
elog(NOTICE,"%s ==> unvct,tvct,ilen,dlen,datatype=%d,%d,%d,%d,%d",
msg,
sdata->unique_value_count,sdata->total_value_count,
sdata->index->len,sdata->vals->len,
sdata->type_of_data);
{
char *ix=sdata->index->data;
double *ar=(double *)(sdata->vals->data);
printout_double(ar,sdata->unique_value_count,0);
printout_index(ix,sdata->unique_value_count,0);
}
if (stop)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("LAL STOP")));
}
/*------------------------------------------------------------------------------
* Multiplication, Addition, Division by scalars
*------------------------------------------------------------------------------
*/
#define typref(type,ptr) (*((type *)(ptr)))
#define valref(type,val,i) (((type *)(val)->vals->data)[(i)])
#define valsquare(val) (val*val)
#define valcube(val) (val*valsquare(val))
#define valquad(val) (valsquare(valsquare(val)))
#define apply_const_to_sdata(sdata,i,op,scalar) \
switch ((sdata)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,sdata,i) op typref(float,scalar); \
break; \
case FLOAT8OID: \
valref(float8,sdata,i) op typref(float8,scalar); \
break; \
case CHAROID: \
valref(char,sdata,i) op typref(char,scalar); \
break; \
case INT2OID: \
valref(int16,sdata,i) op typref(int16,scalar); \
break; \
case INT4OID: \
valref(int32,sdata,i) op typref(int32,scalar); \
break; \
case INT8OID: \
valref(int64,sdata,i) op typref(int64,scalar); \
break; \
}
#define apply_scalar_left_to_sdata(sdata,i,op,scalar) \
switch ((sdata)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,sdata,i) = \
typref(float,scalar) op valref(float,sdata,i); \
break; \
case FLOAT8OID: \
valref(float8,sdata,i) = \
typref(float8,scalar) op valref(float8,sdata,i); \
break; \
case CHAROID: \
valref(char,sdata,i) = \
typref(char,scalar) op valref(char,sdata,i); \
break; \
case INT2OID: \
valref(int16,sdata,i) = \
typref(int16,scalar) op valref(int16_t,sdata,i); \
break; \
case INT4OID: \
valref(int32,sdata,i) = \
typref(int32,scalar) op valref(int32_t,sdata,i); \
break; \
case INT8OID: \
valref(int64,sdata,i) = \
typref(int64,scalar) op valref(int64,sdata,i); \
break; \
}
#define accum_sdata_result(result,left,i,op,right,j) \
switch ((left)->type_of_data) \
{ \
case FLOAT4OID: \
typref(float,result) = \
valref(float,left,i) op \
valref(float,right,j); \
break; \
case FLOAT8OID: \
typref(float8,result) = \
valref(float8,left,i) op \
valref(float8,right,j); \
break; \
case CHAROID: \
typref(char,result) = \
valref(char,left,i) op \
valref(char,right,j); \
break; \
case INT2OID: \
typref(int16,result) = \
valref(int16,left,i) op \
valref(int16,right,j); \
break; \
case INT4OID: \
typref(int32,result) = \
valref(int32,left,i) op \
valref(int32,right,j); \
break; \
case INT8OID: \
typref(int64,result) = \
valref(int64,left,i) op \
valref(int64,right,j); \
break; \
}
#define apply_function_sdata_scalar(result,func,left,i,scalar) \
switch ((left)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,result,i) =\
(float) func(valref(float,left,i),typref(float,scalar)); \
break; \
case FLOAT8OID: \
valref(float8,result,i) =\
func(valref(float8,left,i),typref(float8,scalar)); \
break; \
case CHAROID: \
valref(char,result,i) =\
(char) func(valref(char,left,i),typref(char,scalar)); \
break; \
case INT2OID: \
valref(int16,result,i) =\
(int16) func(valref(int16,left,i),typref(int16,scalar)); \
break; \
case INT4OID: \
valref(int32,result,i) =\
(int32) func(valref(int32,left,i),typref(int32,scalar)); \
break; \
case INT8OID: \
valref(int64,result,i) =\
(int64) func(valref(int64,left,i),typref(int64,scalar)); \
break; \
}
#define apply_square_sdata(result,left,i) \
switch ((left)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,result,i) = \
valsquare(valref(float,left,i)); \
break; \
case FLOAT8OID: \
valref(float8,result,i) = \
valsquare(valref(float8,left,i));\
break; \
case CHAROID: \
valref(char,result,i) = \
valsquare(valref(char,left,i));\
break; \
case INT2OID: \
valref(int16_t,result,i) = \
valsquare(valref(int16_t,left,i));\
break; \
case INT4OID: \
valref(int32_t,result,i) = \
valsquare(valref(int32_t,left,i));\
break; \
case INT8OID: \
valref(int64,result,i) = \
valsquare(valref(int64,left,i));\
break; \
}
#define apply_cube_sdata(result,left,i) \
switch ((left)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,result,i) = \
valcube(valref(float,left,i)); \
break; \
case FLOAT8OID: \
valref(float8,result,i) = \
valcube(valref(float8,left,i));\
break; \
case CHAROID: \
valref(char,result,i) = \
valcube(valref(char,left,i));\
break; \
case INT2OID: \
valref(int16_t,result,i) = \
valcube(valref(int16_t,left,i));\
break; \
case INT4OID: \
valref(int32_t,result,i) = \
valcube(valref(int32_t,left,i));\
break; \
case INT8OID: \
valref(int64,result,i) = \
valcube(valref(int64,left,i));\
break; \
}
#define apply_quad_sdata(result,left,i) \
switch ((left)->type_of_data) \
{ \
case FLOAT4OID: \
valref(float,result,i) = \
valquad(valref(float,left,i)); \
break; \
case FLOAT8OID: \
valref(float8,result,i) = \
valquad(valref(float8,left,i));\
break; \
case CHAROID: \
valref(char,result,i) = \
valquad(valref(char,left,i));\
break; \
case INT2OID: \
valref(int16_t,result,i) = \
valquad(valref(int16_t,left,i));\
break; \
case INT4OID: \
valref(int32_t,result,i) = \
valquad(valref(int32_t,left,i));\
break; \
case INT8OID: \
valref(int64,result,i) = \
valquad(valref(int64,left,i));\
break; \
}
/* Checks that two SparseData have the same dimension
*/
void
check_sdata_dimensions(SparseData left, SparseData right)
{
if (left->total_value_count != right->total_value_count)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions of vectors must be the same")));
}
}
/* Do one of subtract, add, multiply, or divide depending on
* the value of operation.
*/
void op_sdata_by_scalar_inplace(enum operation_t operation,
char *scalar, SparseData sdata, bool scalar_is_right)
{
if (scalar_is_right) //scalar is on the right
{
for(int i=0; i<sdata->unique_value_count; i++)
{
switch(operation)
{
case subtract:
apply_const_to_sdata(sdata,i,-=,scalar)
break;
case add:
apply_const_to_sdata(sdata,i,+=,scalar)
break;
case multiply:
apply_const_to_sdata(sdata,i,*=,scalar)
break;
case divide:
apply_const_to_sdata(sdata,i,/=,scalar)
break;
}
}
} else { //scalar is on the left
for(int i=0; i<sdata->unique_value_count; i++)
{
switch(operation)
{
case subtract:
apply_scalar_left_to_sdata(sdata,i,-,scalar)
break;
case add:
apply_scalar_left_to_sdata(sdata,i,+,scalar)
break;
case multiply:
apply_scalar_left_to_sdata(sdata,i,*,scalar)
break;
case divide:
apply_scalar_left_to_sdata(sdata,i,/,scalar)
break;
}
}
}
}
SparseData op_sdata_by_scalar_copy(enum operation_t operation,
char *scalar, SparseData source_sdata, bool scalar_is_right)
{
SparseData sdata = makeSparseDataCopy(source_sdata);
op_sdata_by_scalar_inplace(operation,scalar,sdata,scalar_is_right);
return sdata;
}
/* Exponentiates an sdata left argument with a right scalar
*/
SparseData pow_sdata_by_scalar(SparseData sdata,
char *scalar)
{
SparseData result = makeSparseDataCopy(sdata);
for(int i=0; i<sdata->unique_value_count; i++)
apply_function_sdata_scalar(result,pow,sdata,i,scalar)
return(result);
}
SparseData square_sdata(SparseData sdata)
{
SparseData result = makeSparseDataCopy(sdata);
for(int i=0; i<sdata->unique_value_count; i++)
apply_square_sdata(result,sdata,i)
return(result);
}
SparseData cube_sdata(SparseData sdata)
{
SparseData result = makeSparseDataCopy(sdata);
for(int i=0; i<sdata->unique_value_count; i++)
apply_cube_sdata(result,sdata,i)
return(result);
}
SparseData quad_sdata(SparseData sdata)
{
SparseData result = makeSparseDataCopy(sdata);
for(int i=0; i<sdata->unique_value_count; i++)
apply_quad_sdata(result,sdata,i)
return(result);
}
/* Compare two SparseData.
* We can't assume that two SparseData are in canonical form.
*
* The algorithm is simple: we traverse the left SparseData element by
* element, and for each such element x, we traverse all the elements of
* the right SparseData that overlaps with x and check that they are equal.
*
* Note: This function only works on SparseData of float8s at present.
*/
int sparsedata_cmp(SparseData left, SparseData right)
{
char * ix = left->index->data;
double * vals = (double *)left->vals->data;
char * rix = right->index->data;
double * rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int rvid = 0;
int rrun_length, i;
for (i=0; i<left->unique_value_count; i++,ix+=int8compstoragesize(ix)) {
read += compword_to_int8(ix);
while (true) {
/*
* Note: IEEE754 specifies that NaN should not compare equal to
* any other floating-point value (including NaN). In order to
* allow floating-point values to be sorted and used in tree-based
* indexes, PostgreSQL treats NaN values as equal, and greater
* than all non-NaN values. NULLs are represented as NVPs here.
*
* NULL (NVP) > NaN > INF
*/
if (!IS_NVP(vals[i]) || !IS_NVP(rvals[rvid])) {
if (IS_NVP(vals[i])) { return 1; }
else if (IS_NVP(rvals[rvid])) { return -1; }
else if (!isnan(vals[i]) || !isnan(rvals[rvid])) {
if (isnan(vals[i])) { return 1; }
else if (isnan(rvals[rvid])) { return -1; }
else if (vals[i] > rvals[rvid]) { return 1; }
else if (vals[i] < rvals[rvid]) { return -1; }
}
// else NaN == NaN is true
}
// else NVP == NVP is true
/*
* We never move the right element pointer beyond
* the current left element
*/
rrun_length = compword_to_int8(rix);
if (rread + rrun_length > read) break;
/*
* Increase counters if there are more elements in
* the right SparseData that overlaps with current
* left element
*/
rread += rrun_length;
if (rvid < right->unique_value_count) {
rix += int8compstoragesize(rix);
rvid++;
}
if (rvid == right->unique_value_count) {
Assert(left->total_value_count >= right->total_value_count);
if (left->total_value_count == right->total_value_count) {
Assert(rread == read);
return 0;
} else { return 1; }
}
if (rread == read) break;
}
}
return -1;
}
/* Compare two SparseData, and return true if the left one is less.
* We can't assume that two SparseData are in canonical form.
*
* The algorithm is simple: we traverse the left SparseData element by
* element, and for each such element x, we traverse all the elements of
* the right SparseData that overlaps with x and check that they are equal.
*
* Note: This function only works on SparseData of float8s at present.
*/
bool sparsedata_lt(SparseData left, SparseData right)
{
char * ix = left->index->data;
double * vals = (double *)left->vals->data;
char * rix = right->index->data;
double * rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int rvid = 0;
int rrun_length, i;
for (i=0; i<left->unique_value_count; i++,ix+=int8compstoragesize(ix)) {
read += compword_to_int8(ix);
while (true) {
/*
* Note: IEEE754 specifies that NaN should not compare equal to
* any other floating-point value (including NaN). In order to
* allow floating-point values to be sorted and used in tree-based
* indexes, PostgreSQL treats NaN values as equal, and greater
* than all non-NaN values. NULLs are represented as NVPs here.
*
* NULL (NVP) > NaN > INF
*/
if (!IS_NVP(vals[i]) || !IS_NVP(rvals[rvid])) {
if (IS_NVP(vals[i])) { return false; }
else if (IS_NVP(rvals[rvid])) { return true; }
else if (!isnan(vals[i]) || !isnan(rvals[rvid])) {
if (isnan(vals[i])) { return false; }
else if (isnan(rvals[rvid])) { return true; }
else if (vals[i] > rvals[rvid]) { return false; }
else if (vals[i] < rvals[rvid]) { return true; }
}
// else NaN == NaN is true
}
// else NVP == NVP is true
/*
* We never move the right element pointer beyond
* the current left element
*/
rrun_length = compword_to_int8(rix);
if (rread + rrun_length > read) break;
/*
* Increase counters if there are more elements in
* the right SparseData that overlaps with current
* left element
*/
rread += rrun_length;
if (rvid < right->unique_value_count) {
rix += int8compstoragesize(rix);
rvid++;
}
if (rvid == right->unique_value_count) {
Assert(left->total_value_count >= right->total_value_count);
if (left->total_value_count == right->total_value_count) {
Assert(rread == read);
return false;
} else { return false; }
}
if (rread == read) break;
}
}
return true;
}
/* Compare two SparseData, and return true if the left one is greater.
* We can't assume that two SparseData are in canonical form.
*
* The algorithm is simple: we traverse the left SparseData element by
* element, and for each such element x, we traverse all the elements of
* the right SparseData that overlaps with x and check that they are equal.
*
* Note: This function only works on SparseData of float8s at present.
*/
bool sparsedata_gt(SparseData left, SparseData right)
{
char * ix = left->index->data;
double * vals = (double *)left->vals->data;
char * rix = right->index->data;
double * rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int rvid = 0;
int rrun_length, i;
for (i=0; i<left->unique_value_count; i++,ix+=int8compstoragesize(ix)) {
read += compword_to_int8(ix);
while (true) {
/*
* Note: IEEE754 specifies that NaN should not compare equal to
* any other floating-point value (including NaN). In order to
* allow floating-point values to be sorted and used in tree-based
* indexes, PostgreSQL treats NaN values as equal, and greater
* than all non-NaN values. NULLs are represented as NVPs here.
*
* NULL (NVP) > NaN > INF
*/
if (!IS_NVP(vals[i]) || !IS_NVP(rvals[rvid])) {
if (IS_NVP(vals[i])) { return true; }
else if (IS_NVP(rvals[rvid])) { return false; }
else if (!isnan(vals[i]) || !isnan(rvals[rvid])) {
if (isnan(vals[i])) { return true; }
else if (isnan(rvals[rvid])) { return false; }
else if (vals[i] > rvals[rvid]) { return true; }
else if (vals[i] < rvals[rvid]) { return false; }
}
// else NaN == NaN is true
}
// else NVP == NVP is true
/*
* We never move the right element pointer beyond
* the current left element
*/
rrun_length = compword_to_int8(rix);
if (rread + rrun_length > read) break;
/*
* Increase counters if there are more elements in
* the right SparseData that overlaps with current
* left element
*/
rread += rrun_length;
if (rvid < right->unique_value_count) {
rix += int8compstoragesize(rix);
rvid++;
}
if (rvid == right->unique_value_count) {
Assert(left->total_value_count >= right->total_value_count);
if (left->total_value_count == right->total_value_count) {
Assert(rread == read);
return false;
} else { return true; }
}
if (rread == read) break;
}
}
return false;
}
/* Checks the equality of two SparseData. We can't assume that two
* SparseData are in canonical form.
*
* The algorithm is simple: we traverse the left SparseData element by
* element, and for each such element x, we traverse all the elements of
* the right SparseData that overlaps with x and check that they are equal.
*
* Note: This function only works on SparseData of float8s at present.
*/
bool sparsedata_eq(SparseData left, SparseData right)
{
if (left->total_value_count != right->total_value_count)
return false;
char * ix = left->index->data;
double * vals = (double *)left->vals->data;
char * rix = right->index->data;
double * rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int rvid = 0;
int rrun_length, i;
for (i=0; i<left->unique_value_count; i++,ix+=int8compstoragesize(ix)) {
read += compword_to_int8(ix);
while (true) {
/*
* Note: IEEE754 specifies that NaN should not compare equal to
* any other floating-point value (including NaN). In order to
* allow floating-point values to be sorted and used in tree-based
* indexes, PostgreSQL treats NaN values as equal, and greater
* than all non-NaN values.
*
* We use memcmp to handle NULLs and NaNs properly.
*/
if (memcmp(&(vals[i]),&(rvals[rvid]),sizeof(float8))!=0)
return false;
/*
* We never move the right element pointer beyond
* the current left element
*/
rrun_length = compword_to_int8(rix);
if (rread + rrun_length > read) break;
/*
* Increase counters if there are more elements in
* the right SparseData that overlaps with current
* left element
*/
rread += rrun_length;
if (rvid < right->unique_value_count) {
rix += int8compstoragesize(rix);
rvid++;
}
if (rread == read) break;
}
}
Assert(rread == read);
return true;
}
/* Checks the equality of two SparseData. We can't assume that two
* SparseData are in canonical form.
*
* The algorithm is simple: we traverse the left SparseData element by
* element, and for each such element x, we traverse all the elements of
* the right SparseData that overlaps with x and check that they are equal.
*
* Unlike sparsedata_eq, this function assumes that any zero represents a
* missing data and hence still implies equality
*
* Note: This function only works on SparseData of float8s at present.
*/
bool sparsedata_eq_zero_is_equal(SparseData left, SparseData right)
{
char * ix = left->index->data;
double* vals = (double *)left->vals->data;
char * rix = right->index->data;
double* rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int i=-1, j=-1, minimum = 0;
minimum = (left->total_value_count > right->total_value_count) ?
right->total_value_count : left->total_value_count;
for (;(read < minimum)||(rread < minimum);) {
if (read < rread) {
read += (int)compword_to_int8(ix);
ix +=int8compstoragesize(ix);
i++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0) &&
(vals[i]!=0.0)&&(rvals[j]!=0.0)) {
return false;
}
} else if (read > rread){
rread += (int)compword_to_int8(rix);
rix+=int8compstoragesize(rix);
j++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0) &&
(vals[i]!=0.0)&&(rvals[j]!=0.0)) {
return false;
}
} else {
read += (int)compword_to_int8(ix);
rread += (int)compword_to_int8(rix);
ix +=int8compstoragesize(ix);
rix+=int8compstoragesize(rix);
i++;
j++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0) &&
(vals[i]!=0.0)&&(rvals[j]!=0.0)) {
return false;
}
}
}
/*sprintf(result, "result after %d %f", j, rvals[j]);
ereport(NOTICE,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg(result)));*/
return true;
}
/* Checks if one SparseData object contained in another
*
* First vector is said to contain second if all non-zero elements
* of the second data object equal those of the first one
*
* Note: This function only works on SparseData of float8s at present.
*/
bool sparsedata_contains(SparseData left, SparseData right)
{
char * ix = left->index->data;
double* vals = (double *)left->vals->data;
char * rix = right->index->data;
double* rvals = (double *)right->vals->data;
int read = 0, rread = 0;
int i=-1, j=-1, minimum = 0;
int lsize, rsize;
lsize = left->total_value_count;
rsize = right->total_value_count;
if((rsize > lsize)&&(rvals[right->unique_value_count-1]!=0.0)){
return false;
}
minimum = (lsize > rsize)?rsize:lsize;
for (;(read < minimum)||(rread < minimum);) {
if(read < rread){
read += (int)compword_to_int8(ix);
ix +=int8compstoragesize(ix);
i++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0)&&(rvals[j]!=0.0)){
return false;
}
}else if(read > rread){
rread += (int)compword_to_int8(rix);
rix+=int8compstoragesize(rix);
j++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0)&&(rvals[j]!=0.0)){
return false;
}
}else{
read += (int)compword_to_int8(ix);
rread += (int)compword_to_int8(rix);
ix +=int8compstoragesize(ix);
rix+=int8compstoragesize(rix);
i++;
j++;
if ((memcmp(&(vals[i]),&(rvals[j]),sizeof(float8))!=0)&&(rvals[j]!=0.0)){
return false;
}
}
}
return true;
}
static inline double id(double x) { return x; }
static inline double square(double x) { return x*x; }
static inline double myabs(double x) { return (x < 0) ? -(x) : x ; }
/* This function is introduced to capture a common routine for
* traversing a SparseData, transforming each element as we go along and
* summing up the transformed elements. The method is non-destructive to
* the input SparseData.
*/
double
accum_sdata_values_double(SparseData sdata, double (*func)(double))
{
double accum=0.;
char *ix = sdata->index->data;
double *vals = (double *)sdata->vals->data;
int64 run_length;
for (int i=0;i<sdata->unique_value_count;i++)
{
run_length = compword_to_int8(ix);
accum += func(vals[i])*run_length;
ix+=int8compstoragesize(ix);
}
return (accum);
}
/* Computes the running sum of the elements of a SparseData */
double sum_sdata_values_double(SparseData sdata) {
return accum_sdata_values_double(sdata, id);
}
/* Computes the l2 norm of a SparseData */
double l2norm_sdata_values_double(SparseData sdata) {
return sqrt(accum_sdata_values_double(sdata, square));
}
/* Computes the l1 norm of a SparseData */
double l1norm_sdata_values_double(SparseData sdata) {
return accum_sdata_values_double(sdata, myabs);
}
/*
* Addition, Scalar Product, Division between SparseData arrays
*
* There are a few factors to consider:
* - The dimension of the left and right arguments must be the same
* - We employ an algorithm that does the computation on the compressed contents
* which creates a new SparseData array
*------------------------------------------------------------------------------
*/
SparseData op_sdata_by_sdata(enum operation_t operation,
SparseData left, SparseData right)
{
SparseData sdata = makeSparseData();
/*
* Loop over the contents of the left array, operating on elements
* of the right array and append a new value to the sdata when a
* new unique value is generated.
*
* We will manage two cursors, one for each of left and right arrays
*/
char *liptr=left->index->data;
char *riptr=right->index->data;
int left_run_length, right_run_length;
char *new_value,*last_new_value;
int tot_run_length=-1;
left_run_length = compword_to_int8(liptr);
right_run_length = compword_to_int8(riptr);
int left_lst=0,right_lst=0;
int left_nxt=left_run_length,right_nxt=right_run_length;
int nextpos = Min(left_nxt,right_nxt),lastpos=0;
int i=0,j=0;
size_t width = size_of_type(left->type_of_data);
check_sdata_dimensions(left,right);
new_value = (char *)palloc(width);
last_new_value = (char *)palloc(width);
while (1)
{
switch (operation)
{
case subtract:
accum_sdata_result(new_value,left,i,-,right,j)
break;
case add:
default:
accum_sdata_result(new_value,left,i,+,right,j)
break;
case multiply:
accum_sdata_result(new_value,left,i,*,right,j)
break;
case divide:
accum_sdata_result(new_value,left,i,/,right,j)
break;
}
/*
* Potentially add a new run, depending on whether this is a
* different value from the previous calculation. It may be
* that this calculation has produced an identical value to
* the previous, in which case we store it up, waiting for a
* new value to happen.
*/
if (tot_run_length==-1)
{
memcpy(last_new_value,new_value,width);
tot_run_length=0;
}
if (memcmp(new_value,last_new_value,width))
{
add_run_to_sdata(last_new_value,tot_run_length,sizeof(float8),sdata);
tot_run_length = 0;
memcpy(last_new_value,new_value,width);
}
tot_run_length += (nextpos-lastpos);
if ((left_nxt==right_nxt)&&(left_nxt==(left->total_value_count))) {
break;
} else if (left_nxt==right_nxt) {
i++;j++;
left_lst=left_nxt;right_lst=right_nxt;
liptr+=int8compstoragesize(liptr);
riptr+=int8compstoragesize(riptr);
} else if (nextpos==left_nxt) {
i++;
left_lst=left_nxt;
liptr+=int8compstoragesize(liptr);
} else if (nextpos==right_nxt) {
j++;
right_lst=right_nxt;
riptr+=int8compstoragesize(riptr);
}
left_run_length = compword_to_int8(liptr);
right_run_length = compword_to_int8(riptr);
left_nxt=left_run_length+left_lst;
right_nxt=right_run_length+right_lst;
lastpos=nextpos;
nextpos = Min(left_nxt,right_nxt);
}
/*
* Add the last run if we ended with a repeating value
*/
if (tot_run_length!=0)
add_run_to_sdata(new_value,tot_run_length,sizeof(float8),sdata);
/*
* Set the return data type
*/
sdata->type_of_data = left->type_of_data;
pfree(new_value);
pfree(last_new_value);
return sdata;
}
/* END Previously in SparseData.h */
/**
* @return A SparseData structure with allocated empty dynamic
* StringInfo of unknown initial sizes.
*/
SparseData makeSparseData(void) {
/* Allocate the struct */
SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
/* Allocate the included elements */
sdata->vals = makeStringInfo();
sdata->index = makeStringInfo();
sdata->unique_value_count=0;
sdata->total_value_count=0;
sdata->type_of_data = FLOAT8OID;
return sdata;
// makeStringInfo ensures vals and index each has a trailing '\0'
}
/**
* @return A SparseData with zero storage in its StringInfos.
*/
SparseData makeEmptySparseData(void) {
/* Allocate the struct */
SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
/* Set up the data area */
sdata->vals = (StringInfo)palloc(sizeof(StringInfoData));
sdata->vals->data = (char *)palloc(1);
sdata->vals->maxlen = 1;
sdata->vals->data[0] = '\0';
sdata->vals->len = 0;
sdata->vals->cursor = 0;
sdata->index = (StringInfo)palloc(sizeof(StringInfoData));
sdata->index->data = (char *)palloc(1);
sdata->index->maxlen = 1;
sdata->index->data[0] = '\0';
sdata->index->len = 0;
sdata->index->cursor = 0;
sdata->unique_value_count = 0;
sdata->total_value_count = 0;
sdata->type_of_data = FLOAT8OID;
return sdata;
}
/**
* @param vals An array of data values
* @param index An array of run-lengths
* @param datasize The length of the vals array
* @param indexsize The length of the index array
* @param datatype The object ID of the elements represented in the vals array
* @param unique_value_count The number of RLE blocks in the vals array
* @param total_value_count The total number of individual elements represented in the vals and index arrays
* @return A SparseData created in place using pointers to the vals and index data
*/
SparseData makeInplaceSparseData(char *vals, char *index,
int datasize, int indexsize, Oid datatype,
int unique_value_count, int total_value_count) {
SparseData sdata = makeEmptySparseData();
sdata->unique_value_count = unique_value_count;
sdata->total_value_count = total_value_count;
/*
* To be safe, we obey the constraint demanded in lib/stringinfo.h that
* the data field of StringInfoData is always terminated with a null.
*/
if (vals != NULL && vals[datasize] != '\0') {
char * vals_temp = (char *)palloc(datasize+1);
memcpy(vals_temp, vals, datasize);
vals_temp[datasize] = '\0';
vals = vals_temp;
}
if (index != NULL && index[indexsize] != '\0') {
char * index_temp = (char *)palloc(indexsize+1);
memcpy(index_temp, index, indexsize);
index_temp[indexsize] = '\0';
index = index_temp;
}
sdata->vals->data = vals;
sdata->vals->len = datasize;
sdata->vals->maxlen = datasize+1;
sdata->index->data = index;
sdata->index->len = indexsize;
sdata->index->maxlen = index == NULL ? 0 : indexsize+1;
// here we are taking care of the special case of null index, which
// represents uncompressed sparsedata; see SparseDataStruct defn
sdata->type_of_data = datatype;
return sdata;
}
/**
* @return A copy of an existing SparseData.
*/
SparseData makeSparseDataCopy(SparseData source_sdata) {
/* Allocate the struct */
SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
/* Allocate the included elements */
sdata->vals = copyStringInfo(source_sdata->vals);
sdata->index = copyStringInfo(source_sdata->index);
sdata->type_of_data = source_sdata->type_of_data;
sdata->unique_value_count = source_sdata->unique_value_count;
sdata->total_value_count = source_sdata->total_value_count;
return sdata;
}
/**
* @param constant The value of an RLE block
* @param dimension The size of an RLE block
* @return A SparseData with a single RLE block of size dimension having value constant
*/
SparseData makeSparseDataFromDouble(double constant,int64 dimension) {
char *bytestore=(char *)palloc(sizeof(char)*9);
SparseData sdata = float8arr_to_sdata(&constant,1);
int8_to_compword(dimension,bytestore); /* create compressed version of
int8 value */
int newlen = int8compstoragesize(bytestore);
sdata->index->len = 0; // write over existing value
appendBinaryStringInfo(sdata->index, bytestore, newlen);
sdata->total_value_count = dimension;
return sdata;
}
/**
* Frees up the memory occupied by sdata
*/
void freeSparseData(SparseData sdata) {
pfree(sdata->vals);
pfree(sdata->index);
pfree(sdata);
}
/**
* Frees up the memory occupied by sdata, including the data elements of vals and index.
*/
void freeSparseDataAndData(SparseData sdata) {
pfree(sdata->vals->data);
pfree(sdata->index->data);
freeSparseData(sdata);
}
/**
* @param sinfo The StringInfo structure to be copied
* @return A copy of sinfo
*/
StringInfo copyStringInfo(StringInfo sinfo) {
StringInfo result;
char *data;
if (sinfo->data == NULL) {
data = NULL;
} else {
data = (char *)palloc(sizeof(char)*(sinfo->len+1));
memcpy(data,sinfo->data,sinfo->len);
data[sinfo->len] = '\0';
}
result = makeStringInfoFromData(data,sinfo->len);
return result;
}
/**
* @param data Pointer to an array of elements
* @param len The size of the data array
* @return A StringInfo from a data pointer and length
*/
StringInfo makeStringInfoFromData(char *data,int len) {
StringInfo sinfo;
sinfo = (StringInfo)palloc(sizeof(StringInfoData));
if (data != NULL && data[len] != '\0') {
char * data_temp = (char *)palloc(len+1);
memcpy(data_temp, data, len);
data_temp[len] = '\0';
data = data_temp;
}
sinfo->data = data;
sinfo->len = len;
sinfo->maxlen = len+1;
sinfo->cursor = 0;
return sinfo;
}
/**
* @param array The array of doubles to be converted to a SparseData
* @param count The size of array
* @return A SparseData representation of an input array of doubles
*/
SparseData float8arr_to_sdata(double *array, int count) {
return arr_to_sdata((char *)array, sizeof(float8), FLOAT8OID, count);
}
/**
* @param array The array of elements to be converted to a SparseData
* @param width The size of the elements in array
* @param type_of_data The object ID of the elements in array
* @param count The size of array
* @return A SparseData representation of an input array
*/
SparseData arr_to_sdata(char *array, size_t width, Oid type_of_data, int count){
char *run_val=array;
int64 run_len=1;
SparseData sdata = makeSparseData();
sdata->type_of_data=type_of_data;
for (int i=1; i<count; i++) {
char *curr_val=array+ (i*size_of_type(type_of_data));
/*
* Note that special double values like denormalized numbers and exceptions
* like NaN are treated like any other value - if there are duplicates, the
* value of the special number is preserved and they are counted.
*/
if (memcmp(curr_val,run_val,width))
{ /*run is interrupted, initiate new run */
/* package up the finished run */
add_run_to_sdata(run_val,run_len,width,sdata);
/* mark beginning of new run */
run_val=curr_val;
run_len=1;
} else
{ /* we're still in the same run */
run_len++;
}
}
add_run_to_sdata(run_val, run_len, width, sdata); /* package up the last run */
/* Add the final tallies */
sdata->unique_value_count = sdata->vals->len/width;
sdata->total_value_count = count;
return sdata;
}
int compar(const void *i, const void *j){
return (int)((int64*)array_pos_ref)[*(int*)i] - ((int64*)array_pos_ref)[*(int*)j];
}
/**
* @param array_val The array of values to be converted to values in SparseData
* @param array_pos The array of positions to be converted to runs in SparseData
* @param type_of_data type of the value element
* @param count The (common) size of array and array_pos
* @param end The size of the desired SparseData
* @param default_val The default value for positions unspecified in array_pos
* @return A SparseData representation of an input array of doubles
*/
SparseData position_to_sdata(double *array_val, int64 *array_pos,
Oid type_of_data,
int count, int64 end, double default_val) {
char * array = (char *)array_val;
char * base_val = (char *)&default_val;
size_t width = size_of_type(type_of_data);
char *run_val=array;
int64 run_len;
SparseData sdata = makeSparseData();
int *index = (int*)palloc(count*sizeof(int));
for(int i = 0; i < count; ++i)
index[i] = i;
/* this is a temp solution that will be in place only until qsort_r
* is a standard library function */
array_pos_ref = array_pos;
qsort(index, count, sizeof(int), compar);
sdata->type_of_data=type_of_data;
if(array_pos[index[0]] > 1){
run_len = array_pos[index[0]]-1;
add_run_to_sdata(base_val,run_len,width,sdata);
}
for (int i = 0; i<count; i++) {
run_len=1;
/*
* Note that special double values like denormalized numbers and exceptions
* like NaN are treated like any other value - if there are duplicates, the
* value of the special number is preserved and they are counted.
*/
while ((i < count-1) &&
((array_pos[index[i+1]] - array_pos[index[i]])==1) &&
(memcmp((array+index[i]*size_of_type(type_of_data)),
(array+index[i+1]*size_of_type(type_of_data)),width)==0)) {
run_len++;
i++;
}
while ((i < count-1)&&((array_pos[index[i+1]] - array_pos[index[i]])==0)) {
if ((memcmp((array+index[i]*size_of_type(type_of_data)),
(array+index[i+1]*size_of_type(type_of_data)),width)==0)) {
i++;
} else {
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("posit_to_sdata conflicting values for the same position")));
}
}
run_val = array+index[i]*size_of_type(type_of_data);
add_run_to_sdata(run_val,run_len,width,sdata);
if(i < count-1){
run_len = array_pos[index[i+1]]-array_pos[index[i]]-1;
if (run_len > 0){
add_run_to_sdata(base_val,run_len,width,sdata);
}
}else if(array_pos[index[i]] < end){
run_len = end-array_pos[index[i]];
if (run_len > 0){
add_run_to_sdata(base_val,run_len,width,sdata);
}
}
}
/* Add the final tallies */
sdata->unique_value_count = sdata->vals->len/width;
sdata->total_value_count = end;
pfree(index);
return sdata;
}
/**
* @param sdata The SparseData to be converted to an array of float8s
* @return A float8[] representation of a SparseData
*/
double *sdata_to_float8arr(SparseData sdata) {
double *array;
int j, aptr;
char *iptr;
if (sdata->type_of_data != FLOAT8OID) {
ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Data type of SparseData is not FLOAT64\n")));
}
if ((array=(double *)palloc(sizeof(double)*(sdata->total_value_count)))
== NULL)
{
ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Error allocating memory for array\n")));
}
iptr = sdata->index->data;
aptr = 0;
for (int i=0; i<sdata->unique_value_count; i++) {
for (j=0;j<compword_to_int8(iptr);j++,aptr++) {
array[aptr] = ((double *)(sdata->vals->data))[i];
}
iptr+=int8compstoragesize(iptr);
}
if ((aptr) != sdata->total_value_count)
{
ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Array size is incorrect, is: %d and should be %d\n",
aptr,sdata->total_value_count)));
pfree(array);
return NULL;
}
return array;
}
/**
* @return An array of integers given the (compressed) count array of a SparseData
*/
int64 *sdata_index_to_int64arr(SparseData sdata) {
char *iptr;
int64 *array_ix = (int64 *)palloc(
sizeof(int64)*(sdata->unique_value_count));
iptr = sdata->index->data;
for (int i=0; i<sdata->unique_value_count; i++,
iptr+=int8compstoragesize(iptr)) {
array_ix[i] = compword_to_int8(iptr);
}
return(array_ix);
}
/**
* @param target The memory area to store the serialised SparseData
* @para source The SparseData to be serialised
* @return The serialisation of a SparseData structure
*/
void serializeSparseData(char *target, SparseData source)
{
/* SparseDataStruct header */
memcpy(target,source,SIZEOF_SPARSEDATAHDR);
/* Two StringInfo structures describing the data and index */
memcpy(SDATA_DATA_SINFO(target), source->vals,sizeof(StringInfoData));
memcpy(SDATA_INDEX_SINFO(target),source->index,sizeof(StringInfoData));
/* The unique data values */
memcpy(SDATA_VALS_PTR(target),source->vals->data,source->vals->maxlen);
/* The index values */
memcpy(SDATA_INDEX_PTR(target),source->index->data,source->index->maxlen);
/*
* Set pointers to the data areas of the serialized structure
* First the two StringInfo structures contained in the SparseData,
* then the data areas inside each of the two StringInfos.
*/
((SparseData)target)->vals = (StringInfo)SDATA_DATA_SINFO(target);
((SparseData)target)->index = (StringInfo)SDATA_INDEX_SINFO(target);
((StringInfo)(SDATA_DATA_SINFO(target)))->data = SDATA_VALS_PTR(target);
if (source->index->data != NULL)
{
((StringInfo)(SDATA_INDEX_SINFO(target)))->data = SDATA_INDEX_PTR(target);
} else {
((StringInfo)(SDATA_INDEX_SINFO(target)))->data = NULL;
}
}
/**
* Prints a SparseData
*/
void printSparseData(SparseData sdata) {
int value_count = sdata->unique_value_count;
{
char *indexdata = sdata->index->data;
double *values = (double *)(sdata->vals->data);
for (int i=0; i<value_count; i++) {
printf("run_length[%d] = %lld, ",i,(long long int)compword_to_int8(indexdata));
printf("value[%d] = %f\n",i,values[i]);
indexdata+=int8compstoragesize(indexdata);
}
}
}
/**
* @param sdata The SparseData to be projected on
* @param idx The index to be projected
* @return The element of a SparseData at location idx.
*/
double sd_proj(SparseData sdata, int idx) {
char * ix = sdata->index->data;
double * vals = (double *)sdata->vals->data;
int read, i;
/* error checking */
if (0 >= idx || idx > sdata->total_value_count)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Index out of bounds.")));
/* find desired block; as is normal in SQL, we start counting from one */
read = compword_to_int8(ix);
i = 0;
while (read < idx) {
ix += int8compstoragesize(ix);
read += compword_to_int8(ix);
i++;
}
return vals[i];
}
/**
* @param sdata The SparseData from which to extract a subarray
* @param start The start index of the desired subarray
* @param end The end index of the desired subarray
* @return The sub-array, indexed by start and end, of a SparseData.
*/
SparseData subarr(SparseData sdata, int start, int end) {
char * ix = sdata->index->data;
double * vals = (double *)sdata->vals->data;
SparseData ret = makeSparseData();
size_t wf8 = sizeof(float8);
if (start > end)
return reverse(subarr(sdata,end,start));
/* error checking */
if (0 >= start || start > end || end > sdata->total_value_count)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Array index out of bounds.")));
/* find start block */
int read = compword_to_int8(ix);
int i = 0;
while (read < start) {
ix += int8compstoragesize(ix);
read += compword_to_int8(ix);
i++;
}
if (end <= read) {
/* the whole subarray is in the first block, we are done */
add_run_to_sdata((char *)(&vals[i]), end-start+1, wf8, ret);
return ret;
}
/* else start building subarray */
add_run_to_sdata((char *)(&vals[i]), read-start+1, wf8, ret);
for (int j=i+1; j<sdata->unique_value_count; j++) {
ix += int8compstoragesize(ix);
int esize = compword_to_int8(ix);
if (read + esize > end) {
add_run_to_sdata((char *)(&vals[j]), end-read, wf8,ret);
break;
}
add_run_to_sdata((char *)(&vals[j]), esize, wf8, ret);
read += esize;
if (read == end) break;
}
return ret;
}
/**
* @param sdata The SparseData to be reversed
* @return A copy of the input SparseData, with the order of the elements reversed.
*/
SparseData reverse(SparseData sdata) {
char * ix = sdata->index->data;
double * vals = (double *)sdata->vals->data;
SparseData ret = makeSparseData();
size_t w = sizeof(float8);
/* move to the last count array element */
for (int j=0; j<sdata->unique_value_count-1; j++)
ix += int8compstoragesize(ix);
/* copy from right to left */
for (int j=sdata->unique_value_count-1; j!=-1; j--) {
add_run_to_sdata((char *)(&vals[j]),compword_to_int8(ix),w,ret);
ix -= int8compstoragesize(ix);
}
return ret;
}
/**
* @param left The SparseData that comes first in the resulting concatenation
* @param right The SparseData that comes second in the resulting concatenation
* @return The concatenation of two input SparseData.
*/
SparseData concat(SparseData left, SparseData right) {
if (left == NULL && right == NULL) {
return NULL;
} else if (left == NULL && right != NULL) {
return makeSparseDataCopy(right);
} else if (left != NULL && right == NULL) {
return makeSparseDataCopy(left);
}
SparseData sdata = makeEmptySparseData();
char *vals,*index;
int l_val_len = left->vals->len;
int r_val_len = right->vals->len;
int l_ind_len = left->index->len;
int r_ind_len = right->index->len;
int val_len = l_val_len + r_val_len;
int ind_len = l_ind_len + r_ind_len;
vals = (char *)palloc(sizeof(char)*val_len + 1);
index = (char *)palloc(sizeof(char)*ind_len + 1);
memcpy(vals, left->vals->data,l_val_len);
memcpy(vals+l_val_len,right->vals->data,r_val_len);
vals[val_len] = '\0';
memcpy(index, left->index->data,l_ind_len);
memcpy(index+l_ind_len,right->index->data,r_ind_len);
index[ind_len] = '\0';
sdata->vals = makeStringInfoFromData(vals,val_len);
sdata->index = makeStringInfoFromData(index,ind_len);
sdata->type_of_data = left->type_of_data;
sdata->unique_value_count = left->unique_value_count +
right->unique_value_count;
sdata->total_value_count = left->total_value_count +
right->total_value_count;
return sdata;
}
/**
* @param rep The SparseData to be replicated
* @param multiplier The number of times to replicate rep
* @return The input rep SparseData replicated multiplier times.
*/
SparseData concat_replicate(SparseData rep, int multiplier) {
if (rep == NULL) return NULL;
SparseData sdata = makeEmptySparseData();
char *vals,*index;
int l_val_len = rep->vals->len;
int l_ind_len = rep->index->len;
int val_len = l_val_len*multiplier;
int ind_len = l_ind_len*multiplier;
vals = (char *)palloc(sizeof(char)*val_len + 1);
index = (char *)palloc(sizeof(char)*ind_len + 1);
for (int i=0;i<multiplier;i++) {
memcpy(vals+i*l_val_len,rep->vals->data,l_val_len);
memcpy(index+i*l_ind_len,rep->index->data,l_ind_len);
}
vals[val_len] = '\0';
index[ind_len] = '\0';
sdata->vals = makeStringInfoFromData(vals,val_len);
sdata->index = makeStringInfoFromData(index,ind_len);
sdata->type_of_data = rep->type_of_data;
sdata->unique_value_count = multiplier * rep->unique_value_count;
sdata->total_value_count = multiplier * rep->total_value_count;
return sdata;
}
static bool lapply_error_checking(Oid foid, List * funcname);
/**
* This function applies an input function on all elements of a sparse data.
* The function is modelled after the corresponding function in R.
*
* @param func The name of the function to apply
* @param sdata The input sparse data to be modified
* @return A SparseData with the same dimension as sdata but with each element sdata[i] replaced by func(sdata[i])
*/
SparseData lapply(text * func, SparseData sdata) {
Oid argtypes[1] = { FLOAT8OID };
List * funcname = textToQualifiedNameList(func);
SparseData result = makeSparseDataCopy(sdata);
Oid foid = LookupFuncName(funcname, 1, argtypes, false);
lapply_error_checking(foid, funcname);
for (int i=0; i<sdata->unique_value_count; i++)
valref(float8,result,i) =
DatumGetFloat8(
OidFunctionCall1(foid,
Float8GetDatum(valref(float8,sdata,i))));
return result;
}
/* This function checks for error conditions in lapply() function calls.
*/
static bool lapply_error_checking(Oid foid, List * func) {
/* foid != InvalidOid; otherwise LookupFuncName would raise error.
Here we check that the return type of foid is float8. */
HeapTuple ftup = SearchSysCache(PROCOID,
ObjectIdGetDatum(foid), 0, 0, 0);
Form_pg_proc pform = (Form_pg_proc) GETSTRUCT(ftup);
if (pform->prorettype != FLOAT8OID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("return type of %s is not double",
NameListToString(func))));
// check volatility
ReleaseSysCache(ftup);
return true;
}