methods/svec/src/pg_gp/SparseData.h - madlib - Git at Google

 /**
  * @file
  * \brief This file defines the SparseData structure and some basic
  * functions on SparseData.
  *
  * SparseData provides array storage for repetitive data as commonly found
  * in numerical analysis of sparse arrays and matrices.
  * A general run-length encoding scheme is adopted.
  * Sequential duplicate values in the array are represented in an index
  * structure that stores the count of the number of times a given value
  * is duplicated.
  * All storage is allocated with palloc().
  *
  * NOTE:
  * The SparseData structure is an in-memory structure and so must be
  * serialized into a persisted structure like a VARLENA when leaving
  * a GP / Postgres function.  This implies a COPY from the SparseData
  * to the VARLENA.
  */

 #ifndef SPARSEDATA_H
 #define SPARSEDATA_H

 #include <math.h>
 #include <string.h>
 #include "postgres.h"
 #include "lib/stringinfo.h"
 #include "utils/array.h"
 #include "catalog/pg_type.h"

 /*!
  * \internal
  * SparseData holds information about a sparse array of values
  * \endinternal
  */
 typedef struct
 {
 	Oid type_of_data; 	/**< The native type of the data entries */
 	int unique_value_count; /**< The number of unique values in the data array */
 	int total_value_count;  /**< The total number of values, including duplicates */
 	StringInfo vals;        /**< The unique number values are stored here as a stream of bytes */
 	StringInfo index; 	/**< A count of each value is stored in the index */
 } SparseDataStruct;

 /*
  * Sometimes we wish to store an uncompressed array inside a SparseDataStruct;
  * instead of storing an array of ones [1,1,..,1,1] in the index field, which
  * is wasteful, we choose to use index->data == NULL to represent this special
  * case.
  */

 /**
  * Pointer to a SparseDataStruct
  */
 typedef SparseDataStruct *SparseData;

 /*------------------------------------------------------------------------------
  * Serialized SparseData
  *------------------------------------------------------------------------------
  * SparseDataStruct Contents
  * StringInfoData Contents for "vals"
  * StringInfoData Contents for "index"
  * data contents for "vals" (size is vals->maxlen)
  * data contents for "index" (size is index->maxlen)
  *
  * 	The vals and index fields are serialized as StringInfoData, then the
  * 	data contents are serialized at the end.
  *
  * 	Since two StringInfoData structs together are 64-bit aligned, there's
  * 	no need for padding.
  *
  * 	For reference, here is the format of the StringInfoData:
  * 		char * dataptr;
  * 			-> a placeholder in the serialized version, is filled
  * 			   when the serialized version is used in-place
  * 		int len;
  * 		int maxlen;
  * 		int cursor;
  */
 /**
  * @return The size of a serialized SparseData based on the actual consumed
  * length of the StringInfo data and StringInfoData structures.
  */
 #define SIZEOF_SPARSEDATAHDR	MAXALIGN(sizeof(SparseDataStruct))
 /**
  * @param x a SparseData
  * @return The size of x minus the dynamic variables, plus two
  * integers describing the length of the data area and index
  */
 #define SIZEOF_SPARSEDATASERIAL(x) (SIZEOF_SPARSEDATAHDR + \
 		(2*sizeof(StringInfoData)) + \
 		(x)->vals->maxlen + (x)->index->maxlen)

 /*
  * The following take a serialized SparseData as an argument and return
  * pointers to locations inside.
  */
 #define SDATA_DATA_SINFO(x)	((char *)(x)+SIZEOF_SPARSEDATAHDR)
 #define SDATA_INDEX_SINFO(x)	(SDATA_DATA_SINFO(x)+sizeof(StringInfoData))
 #define SDATA_DATA_SIZE(x)	(((StringInfo)SDATA_DATA_SINFO(x))->maxlen)
 #define SDATA_INDEX_SIZE(x)	(((StringInfo)SDATA_INDEX_SINFO(x))->maxlen)
 #define SDATA_VALS_PTR(x)       (SDATA_INDEX_SINFO(x)+sizeof(StringInfoData))
 #define SDATA_INDEX_PTR(x) 	(SDATA_VALS_PTR(x)+SDATA_DATA_SIZE(x))

 #define SDATA_UNIQUE_VALCNT(x)	(((SparseData)(x))->unique_value_count)
 #define SDATA_TOTAL_VALCNT(x)	(((SparseData)(x))->total_value_count)

 /**
  * @param x a SparseData
  * @return True if x is a scalar */
 #define SDATA_IS_SCALAR(x)	(((((x)->unique_value_count)==((x)->total_value_count))&&((x)->total_value_count==1)) ? 1 : 0)

 /**
  * @param ptr Pointer to the start of the count entry of a SparseData
  * @return The size of the integer count in an RLE index pointed to by ptr
  */
 #define	int8compstoragesize(ptr) \
  (((ptr) == NULL) ? 0 : (((*((char *)(ptr)) < 0) ? 1 : (1 + *((char *)(ptr))))))
 /* The size of a compressed int8 is stored in the first element of the ptr
  * array; see the explanation at the int8_to_compword() function below.
  *
  * Note that if the ptr is NULL, a zero size is returned
  */

 static inline void int8_to_compword(int64 num, char entry[9]);

 /** Serialization function */
 void serializeSparseData(char *target, SparseData source);

 /* Constructors and destructors */
 SparseData makeEmptySparseData(void);
 SparseData makeInplaceSparseData(char *vals, char *index,
 		int datasize, int indexsize, Oid datatype,
 		int unique_value_count, int total_value_count);

 SparseData makeSparseDataCopy(SparseData source_sdata);
 SparseData makeSparseDataFromDouble(double scalar,int64 dimension);

 SparseData makeSparseData(void);

 void freeSparseData(SparseData sdata);
 void freeSparseDataAndData(SparseData sdata);

 StringInfo copyStringInfo(StringInfo source_sinfo);
 StringInfo makeStringInfoFromData(char *data,int len);

 /* Conversion to and from arrays */
 double *sdata_to_float8arr(SparseData sdata);
 int64 *sdata_index_to_int64arr(SparseData sdata);
 SparseData float8arr_to_sdata(double *array, int count);
 SparseData arr_to_sdata(char *array, size_t width, Oid type_of_data, int count);

 /* Some functions for accessing and changing elements of a SparseData */
 SparseData lapply(text * func, SparseData sdata);
 double sd_proj(SparseData sdata, int idx);
 SparseData subarr(SparseData sdata, int start, int end);
 SparseData reverse(SparseData sdata);
 SparseData concat(SparseData left, SparseData right);

 /* Returns the size of each basic type
  */
 static inline size_t
 size_of_type(Oid type)
 {
 	switch (type)
 	{
 		case FLOAT4OID: return(4);
 		case FLOAT8OID: return(8);
 		case CHAROID: return(1);
 		case INT2OID: return(2);
 		case INT4OID: return(4);
 	        case INT8OID: return(8);
 	}
 	return(1);
 }

 /* Appends a count to the count array
  */
 static inline void append_to_rle_index(StringInfo index, int64 run_len)
 {
 	char bytes[]={0,0,0,0,0,0,0,0,0}; /* 9 bytes into which the compressed
 					     int8 value is written */
 	int8_to_compword(run_len,bytes); /* create compressed version of
 					    int8 value */
 	appendBinaryStringInfo(index,bytes,int8compstoragesize(bytes));
 }

 /* Adds a new block to a SparseData
  */
 static inline void add_run_to_sdata(char *run_val, int64 run_len, size_t width,
 		SparseData sdata)
 {
 	StringInfo index = sdata->index;
 	StringInfo vals  = sdata->vals;

 	appendBinaryStringInfo(vals,run_val,width);
 	append_to_rle_index(index, run_len);
 	sdata->unique_value_count++;
 	sdata->total_value_count+=run_len;
 }
 /*------------------------------------------------------------------------------
  * Each integer count in the RLE index is stored in a number of bytes determined
  * by its size.  The larger the integer count, the larger the size of storage.
  * Following is the map of count maximums to storage size:
  * 	Range			Storage
  * 	---------		-----------------------------------------
  * 	0     - 127		signed char stores the negative count
  *
  * 		All higher than 127 have two parts, the description byte
  * 		and the count word
  *
  * 	description byte	signed char stores the number of bytes in the
  * 				count word one of 1,2,4 or 8
  *
  * 	128   - 32767		count word is 2 bytes, signed int16_t
  * 	32768 - 2147483648	count word is 4 bytes, signed int32_t
  * 	2147483648 - max	count word is 8 bytes, signed int64
  *------------------------------------------------------------------------------
  */
 /* Transforms an int64 value to an RLE entry.  The entry is placed in the
  * provided entry[] array and will have a variable size depending on the value.
  */
 static inline void int8_to_compword(int64 num, char entry[9])
 {
 	if (num < 128) {
 		/* The reason this is negative is because entry[0] is
 	           used to record sizes in the other cases. */
 		entry[0] = -(char)num;
 		return;
 	}

 	entry[1] = (char)(num & 0xFF);
 	entry[2] = (char)((num & 0xFF00) >> 8);

 	if (num < 32768) { entry[0] = 2; return; }

 	entry[3] = (char)((num & 0xFF0000) >> 16);
 	entry[4] = (char)((num & 0xFF000000) >> 24);

 	if (num < 2147483648) { entry[0] = 4; return; }

 	entry[5] = (char)((num & 0xFF00000000) >> 32);
 	entry[6] = (char)((num & 0xFF0000000000) >> 40);
 	entry[7] = (char)((num & 0xFF000000000000) >> 48);
 	entry[8] = (char)((num & 0xFF00000000000000) >> 56);

 	entry[0] = 8;
 }

 /* Transforms a count entry into an int64 value when provided with a pointer
  * to an entry.
  */
 static inline int64 compword_to_int8(const char *entry)
 {
 	char size = int8compstoragesize(entry);
 	int16_t num_2;
 	char *numptr2 = (char *)(&num_2);
 	int32_t num_4;
 	char *numptr4 = (char *)(&num_4);
 	int64 num;
 	char *numptr8 = (char *)(&num);

 	switch(size) {
 	        case 0: /* entry == NULL represents an array of ones; see
 			 * comment after definition of SparseDataStruct above
 			 */
 			return 1; break;
 		case 1:
 			num = -(entry[0]);
 			break;
 		case 3:
 			numptr2[0] = entry[1];
 			numptr2[1] = entry[2];
 			num = num_2;
 			break;
 		case 5:
 			numptr4[0] = entry[1];
 			numptr4[1] = entry[2];
 			numptr4[2] = entry[3];
 			numptr4[3] = entry[4];
 			num = num_4;
 			break;
 		case 9:
 			numptr8[0] = entry[1];
 			numptr8[1] = entry[2];
 			numptr8[2] = entry[3];
 			numptr8[3] = entry[4];
 			numptr8[4] = entry[5];
 			numptr8[5] = entry[6];
 			numptr8[6] = entry[7];
 			numptr8[7] = entry[8];
 			break;
 	}
 	return num;
 }

 static inline void printout_double(double *vals, int num_values, int stop);
 static inline void printout_double(double *vals, int num_values, int stop)
 {
 	char *output_str = (char *)palloc(sizeof(char)*(num_values*(6+18+2))+1);
 	char *str = output_str;
 	int numout;
 	for (int i=0; i<num_values; i++) {
 		numout = snprintf(str,26,"%6.2f,%#llX,",vals[i],
 				  *((long long unsigned int *)(&(vals[i]))));
 		str += numout-1;
 	}
 	*str = '\0';
 	elog(NOTICE,"doubles:%s",output_str);
 }
 static inline void printout_index(char *ix, int num_values, int stop);
 static inline void printout_index(char *ix, int num_values, int stop)
 {
 	char *output_str = (char *)palloc(sizeof(char)*((num_values*7)+1));
 	char *str = output_str;
 	int numout;
 	elog(NOTICE,"num_values=%d",num_values);
 	for (int i=0; i<num_values; i++,ix+=int8compstoragesize(ix)) {
 		numout=snprintf(str,7,"%lld,",(long long int)compword_to_int8(ix));
 		str+=numout;
 	}
 	*str = '\0';
 	elog(NOTICE,"index:%s",output_str);
 }
 static inline void printout_sdata(SparseData sdata, char *msg, int stop);
 static inline void printout_sdata(SparseData sdata, char *msg, int stop)
 {
 	elog(NOTICE,"%s ==> unvct,tvct,ilen,dlen,datatype=%d,%d,%d,%d,%d",
 			msg,
 			sdata->unique_value_count,sdata->total_value_count,
 			sdata->index->len,sdata->vals->len,
 			sdata->type_of_data);
 	{
 		char *ix=sdata->index->data;
 		double *ar=(double *)(sdata->vals->data);
 		printout_double(ar,sdata->unique_value_count,0);
 		printout_index(ix,sdata->unique_value_count,0);
 	}

 	if (stop)
 	  ereport(ERROR,
 			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			   errmsg("LAL STOP")));
 }

 /*------------------------------------------------------------------------------
  * Multiplication, Addition, Division by scalars
  *------------------------------------------------------------------------------
  */
 #define typref(type,ptr) (*((type *)(ptr)))
 #define valref(type,val,i) (((type *)(val)->vals->data)[(i)])

 #define valsquare(val)	(val*val)
 #define valcube(val)	(val*valsquare(val))
 #define valquad(val)	(valsquare(valsquare(val)))

 #define apply_const_to_sdata(sdata,i,op,scalar) \
 	switch ((sdata)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,sdata,i) op typref(float,scalar); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,sdata,i) op typref(float8,scalar); \
 		break; \
 	case CHAROID: \
 		valref(char,sdata,i) op typref(char,scalar); \
 		break; \
 	case INT2OID: \
 		valref(int16,sdata,i) op typref(int16,scalar); \
 		break; \
 	case INT4OID: \
 		valref(int32,sdata,i) op typref(int32,scalar); \
 		break; \
 	case INT8OID: \
 		valref(int64,sdata,i) op typref(int64,scalar); \
 		break; \
 }

 #define apply_scalar_left_to_sdata(sdata,i,op,scalar) \
 	switch ((sdata)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,sdata,i) = \
 		typref(float,scalar) op valref(float,sdata,i); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,sdata,i) = \
 		typref(float8,scalar) op valref(float8,sdata,i); \
 		break; \
 	case CHAROID: \
 		valref(char,sdata,i) = \
 		typref(char,scalar) op valref(char,sdata,i); \
 		break; \
 	case INT2OID: \
 		valref(int16,sdata,i) = \
 		typref(int16,scalar) op valref(int16_t,sdata,i); \
 		break; \
 	case INT4OID: \
 		valref(int32,sdata,i) = \
 		typref(int32,scalar) op valref(int32_t,sdata,i); \
 		break; \
 	case INT8OID: \
 		valref(int64,sdata,i) = \
 		typref(int64,scalar) op valref(int64,sdata,i); \
 		break; \
 }

 #define accum_sdata_result(result,left,i,op,right,j) \
 	switch ((left)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		typref(float,result) = \
 		valref(float,left,i) op \
 		valref(float,right,j); \
 		break; \
 	case FLOAT8OID: \
 		typref(float8,result) = \
 		valref(float8,left,i) op \
 		valref(float8,right,j); \
 		break; \
 	case CHAROID: \
 		typref(char,result) = \
 		valref(char,left,i) op \
 		valref(char,right,j); \
 		break; \
 	case INT2OID: \
 		typref(int16,result) = \
 		valref(int16,left,i) op \
 		valref(int16,right,j); \
 		break; \
 	case INT4OID: \
 		typref(int32,result) = \
 		valref(int32,left,i) op \
 		valref(int32,right,j); \
 		break; \
 	case INT8OID: \
 		typref(int64,result) = \
 		valref(int64,left,i) op \
 		valref(int64,right,j); \
 		break; \
 }

 #define apply_function_sdata_scalar(result,func,left,i,scalar) \
 	switch ((left)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,result,i) =\
 		func(valref(float,left,i),typref(float,scalar)); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,result,i) =\
 		func(valref(float8,left,i),typref(float8,scalar)); \
 		break; \
 	case CHAROID: \
 		valref(char,result,i) =\
 		func(valref(char,left,i),typref(char,scalar)); \
 		break; \
 	case INT2OID: \
 		valref(int16,result,i) =\
 		func(valref(int16,left,i),typref(int16,scalar)); \
 		break; \
 	case INT4OID: \
 		valref(int32,result,i) =\
 		func(valref(int32,left,i),typref(int32,scalar)); \
 		break; \
 	case INT8OID: \
 		valref(int64,result,i) =\
 		func(valref(int64,left,i),typref(int64,scalar)); \
 		break; \
 }

 #define apply_square_sdata(result,left,i) \
 	switch ((left)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,result,i) = \
 		valsquare(valref(float,left,i)); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,result,i) = \
 		valsquare(valref(float8,left,i));\
 		break; \
 	case CHAROID: \
 		valref(char,result,i) = \
 		valsquare(valref(char,left,i));\
 		break; \
 	case INT2OID: \
 		valref(int16_t,result,i) = \
 		valsquare(valref(int16_t,left,i));\
 		break; \
 	case INT4OID: \
 		valref(int32_t,result,i) = \
 		valsquare(valref(int32_t,left,i));\
 		break; \
 	case INT8OID: \
 		valref(int64,result,i) = \
 		valsquare(valref(int64,left,i));\
 		break; \
 }

 #define apply_cube_sdata(result,left,i) \
 	switch ((left)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,result,i) = \
 		valcube(valref(float,left,i)); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,result,i) = \
 		valcube(valref(float8,left,i));\
 		break; \
 	case CHAROID: \
 		valref(char,result,i) = \
 		valcube(valref(char,left,i));\
 		break; \
 	case INT2OID: \
 		valref(int16_t,result,i) = \
 		valcube(valref(int16_t,left,i));\
 		break; \
 	case INT4OID: \
 		valref(int32_t,result,i) = \
 		valcube(valref(int32_t,left,i));\
 		break; \
 	case INT8OID: \
 		valref(int64,result,i) = \
 		valcube(valref(int64,left,i));\
 		break; \
 }
 #define apply_quad_sdata(result,left,i) \
 	switch ((left)->type_of_data) \
 { \
 	case FLOAT4OID: \
 		valref(float,result,i) = \
 		valquad(valref(float,left,i)); \
 		break; \
 	case FLOAT8OID: \
 		valref(float8,result,i) = \
 		valquad(valref(float8,left,i));\
 		break; \
 	case CHAROID: \
 		valref(char,result,i) = \
 		valquad(valref(char,left,i));\
 		break; \
 	case INT2OID: \
 		valref(int16_t,result,i) = \
 		valquad(valref(int16_t,left,i));\
 		break; \
 	case INT4OID: \
 		valref(int32_t,result,i) = \
 		valquad(valref(int32_t,left,i));\
 		break; \
 	case INT8OID: \
 		valref(int64,result,i) = \
 		valquad(valref(int64,left,i));\
 		break; \
 }

 /* Checks that two SparseData have the same dimension
  */
 static inline void
 check_sdata_dimensions(SparseData left, SparseData right)
 {
 	if (left->total_value_count != right->total_value_count)
 	{
 		ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			 errmsg("dimensions of vectors must be the same")));
 	}
 }

 enum operation_t { subtract, add, multiply, divide };

 /* Do one of subtract, add, multiply, or divide depending on
  * the value of operation.
  */
 static inline void op_sdata_by_scalar_inplace(enum operation_t operation,
 		char *scalar, SparseData sdata, bool scalar_is_right)
 {
 	if (scalar_is_right) //scalar is on the right
 	{
 		for(int i=0; i<sdata->unique_value_count; i++)
 		{
 			switch(operation)
 			{
 			case subtract:
 				apply_const_to_sdata(sdata,i,-=,scalar)
 				break;
 			case add:
 				apply_const_to_sdata(sdata,i,+=,scalar)
 				break;
 			case multiply:
 				apply_const_to_sdata(sdata,i,*=,scalar)
 				break;
 			case divide:
 				apply_const_to_sdata(sdata,i,/=,scalar)
 				break;
 			}
 		}
 	} else { //scalar is on the left
 		for(int i=0; i<sdata->unique_value_count; i++)
 		{
 			switch(operation)
 			{
 			case subtract:
 				apply_scalar_left_to_sdata(sdata,i,-,scalar)
 				break;
 			case add:
 				apply_scalar_left_to_sdata(sdata,i,+,scalar)
 				break;
 			case multiply:
 				apply_scalar_left_to_sdata(sdata,i,*,scalar)
 				break;
 			case divide:
 				apply_scalar_left_to_sdata(sdata,i,/,scalar)
 				break;
 			}
 		}
 	}

 }
 static inline SparseData op_sdata_by_scalar_copy(enum operation_t operation,
 		char *scalar, SparseData source_sdata, bool scalar_is_right)
 {
 	SparseData sdata = makeSparseDataCopy(source_sdata);
 	op_sdata_by_scalar_inplace(operation,scalar,sdata,scalar_is_right);
 	return sdata;
 }

 /* Exponentiates an sdata left argument with a right scalar
  */
 static inline SparseData pow_sdata_by_scalar(SparseData sdata,
 		char *scalar)
 {
 	SparseData result = makeSparseDataCopy(sdata);
 	for(int i=0; i<sdata->unique_value_count; i++)
 		apply_function_sdata_scalar(result,pow,sdata,i,scalar)

 	return(result);
 }
 static inline SparseData square_sdata(SparseData sdata)
 {
 	SparseData result = makeSparseDataCopy(sdata);
 	for(int i=0; i<sdata->unique_value_count; i++)
 		apply_square_sdata(result,sdata,i)

 	return(result);
 }
 static inline SparseData cube_sdata(SparseData sdata)
 {
 	SparseData result = makeSparseDataCopy(sdata);
 	for(int i=0; i<sdata->unique_value_count; i++)
 		apply_cube_sdata(result,sdata,i)

 	return(result);
 }
 static inline SparseData quad_sdata(SparseData sdata)
 {
 	SparseData result = makeSparseDataCopy(sdata);
 	for(int i=0; i<sdata->unique_value_count; i++)
 		apply_quad_sdata(result,sdata,i)

 	return(result);
 }

 /* Checks the equality of two SparseData. We can't assume that two
  * SparseData are in canonical form.
  *
  * The algorithm is simple: we traverse the left SparseData element by
  * element, and for each such element x, we traverse all the elements of
  * the right SparseData that overlaps with x and check that they are equal.
  *
  * Note: This function only works on SparseData of float8s at present.
  */
 static inline bool sparsedata_eq(SparseData left, SparseData right)
 {
 	if (left->total_value_count != right->total_value_count)
 		return false;

 	char * ix = left->index->data;
 	double * vals = (double *)left->vals->data;

 	char * rix = right->index->data;
 	double * rvals = (double *)right->vals->data;

 	int read = 0, rread = 0;
 	int rvid = 0;
 	int rrun_length, i;

 	for (i=0; i<left->unique_value_count; i++,ix+=int8compstoragesize(ix)) {
 		read += compword_to_int8(ix);

 		while (true) {
 			/*
 			 * We need to use memcmp to handle NULLs (represented
 			 * as NaNs) properly
 			 */
 			if (memcmp(&(vals[i]),&(rvals[rvid]),sizeof(float8))!=0)
 				return false;

 			/*
 			 * We never move the right element pointer beyond
 			 * the current left element
 			 */
 			rrun_length = compword_to_int8(rix);
 			if (rread + rrun_length > read) break;

 			/*
 			 * Increase counters if there are more elements in
 			 * the right SparseData that overlaps with current
 			 * left element
 			 */
 			rread += rrun_length;
 			if (rvid < right->unique_value_count) {
 				rix += int8compstoragesize(rix);
 				rvid++;
 			}
 			if (rread == read) break;
 		}
 	}
 	Assert(rread == read);
 	return true;
 }

 static inline double id(double x) { return x; }
 static inline double square(double x) { return x*x; }
 static inline double myabs(double x) { return (x < 0) ? -(x) : x ; }

 /* This function is introduced to capture a common routine for
  * traversing a SparseData, transforming each element as we go along and
  * summing up the transformed elements. The method is non-destructive to
  * the input SparseData.
  */
 static inline double
 accum_sdata_values_double(SparseData sdata, double (*func)(double))
 {
 	double accum=0.;
 	char *ix = sdata->index->data;
 	double *vals = (double *)sdata->vals->data;
 	int64 run_length;

 	for (int i=0;i<sdata->unique_value_count;i++)
 	{
 		run_length = compword_to_int8(ix);
 		accum += func(vals[i])*run_length;
 		ix+=int8compstoragesize(ix);
 	}
 	return (accum);
 }

 /* Computes the running sum of the elements of a SparseData */
 static inline double sum_sdata_values_double(SparseData sdata) {
 	return accum_sdata_values_double(sdata, id);
 }

 /* Computes the l2 norm of a SparseData */
 static inline double l2norm_sdata_values_double(SparseData sdata) {
 	return sqrt(accum_sdata_values_double(sdata, square));
 }

 /* Computes the l1 norm of a SparseData */
 static inline double l1norm_sdata_values_double(SparseData sdata) {
 	return accum_sdata_values_double(sdata, myabs);
 }

 /*
  * Addition, Scalar Product, Division between SparseData arrays
  *
  * There are a few factors to consider:
  * - The dimension of the left and right arguments must be the same
  * - We employ an algorithm that does the computation on the compressed contents
  *   which creates a new SparseData array
  *------------------------------------------------------------------------------
  */
 static inline SparseData op_sdata_by_sdata(enum operation_t operation,
 					   SparseData left, SparseData right)
 {
 	SparseData sdata = makeSparseData();

 	/*
 	 * Loop over the contents of the left array, operating on elements
 	 * of the right array and append a new value to the sdata when a
 	 * new unique value is generated.
 	 *
 	 * We will manage two cursors, one for each of left and right arrays
 	 */
 	char *liptr=left->index->data;
 	char *riptr=right->index->data;
 	int left_run_length, right_run_length;
 	char *new_value,*last_new_value;
 	int tot_run_length=-1;
 	left_run_length = compword_to_int8(liptr);
 	right_run_length = compword_to_int8(riptr);
 	int left_lst=0,right_lst=0;
 	int left_nxt=left_run_length,right_nxt=right_run_length;
 	int nextpos = Min(left_nxt,right_nxt),lastpos=0;
 	int i=0,j=0;
 	size_t width = size_of_type(left->type_of_data);

 	check_sdata_dimensions(left,right);

 	new_value      = (char *)palloc(width);
 	last_new_value = (char *)palloc(width);

 	while (1)
 	{
 		switch (operation)
 		{
 			case subtract:
 				accum_sdata_result(new_value,left,i,-,right,j)
 				break;
 			case add:
 			default:
 				accum_sdata_result(new_value,left,i,+,right,j)
 				break;
 			case multiply:
 				accum_sdata_result(new_value,left,i,*,right,j)
 				break;
 			case divide:
 				accum_sdata_result(new_value,left,i,/,right,j)
 				break;
 		}
 		/*
 		 * Potentially add a new run, depending on whether this is a
 		 * different value from the previous calculation. It may be
 		 * that this calculation has produced an identical value to
 		 * the previous, in which case we store it up, waiting for a
 		 * new value to happen.
 		 */
 		if (tot_run_length==-1)
 		{
 			memcpy(last_new_value,new_value,width);
 			tot_run_length=0;
 		}
 		if (memcmp(new_value,last_new_value,width))
 		{
 			add_run_to_sdata(last_new_value,tot_run_length,sizeof(float8),sdata);
 			tot_run_length = 0;
 			memcpy(last_new_value,new_value,width);
 		}
 		tot_run_length += (nextpos-lastpos);

 		if ((left_nxt==right_nxt)&&(left_nxt==(left->total_value_count))) {
 			break;
 		} else if (left_nxt==right_nxt) {
 			i++;j++;
 			left_lst=left_nxt;right_lst=right_nxt;
 			liptr+=int8compstoragesize(liptr);
 			riptr+=int8compstoragesize(riptr);
 		} else if (nextpos==left_nxt) {
 			i++;
 			left_lst=left_nxt;
 			liptr+=int8compstoragesize(liptr);
 		} else if (nextpos==right_nxt) {
 			j++;
 			right_lst=right_nxt;
 			riptr+=int8compstoragesize(riptr);
 		}
 		left_run_length = compword_to_int8(liptr);
 		right_run_length = compword_to_int8(riptr);
 		left_nxt=left_run_length+left_lst;
 		right_nxt=right_run_length+right_lst;
 		lastpos=nextpos;
 		nextpos = Min(left_nxt,right_nxt);
 	}

 	/*
 	 * Add the last run if we ended with a repeating value
 	 */
 	if (tot_run_length!=0)
 		add_run_to_sdata(new_value,tot_run_length,sizeof(float8),sdata);

 	/*
 	 * Set the return data type
 	 */
 	sdata->type_of_data = left->type_of_data;

 	pfree(new_value);
 	pfree(last_new_value);

 	return sdata;
 }

 /*------------------------------------------------------------------------------
  * macros that will test whether a given double value is in the normal
  * range or is in the special range (denormals, exceptions).
  *------------------------------------------------------------------------------
  */
 /* Anything between LOW and HIGH is a denormal or exception */
 #define SPEC_MASK_HIGH 0xFFF0000000000000
 #define SPEC_MASK_LOW  0x7FF0000000000000
 #define MASKTEST(x,y)	(((x)&(y))==x) /* MASKTEST checks for the presence of
 					  the bits in x in the input y */

 /* The input to MASKTEST_double should be an int64 mask and a (double *) to be
  * tested
  */
 #define MASKTEST_double(x,y)	MASKTEST((x),*((int64 *)(&(y))))

 #define DBL_IS_A_SPECIAL(x) \
   (MASKTEST_double(SPEC_MASK_HIGH,(x)) || MASKTEST_double(SPEC_MASK_LOW,(x)) \
    || ((x) == 0.))

 #endif	/* SPARSEDATA_H */