methods/svec/src/pg_gp/SparseData.c - madlib - Git at Google

 /**
  * @file
  * \brief This file implements different operations on the SparseData structure.
  *
  * Most functions on sparse vectors are first defined on SparseData, and
  * then packaged up as functions on sparse vectors using wrappers.
  *
  * This is the general procedure for adding functionalities to sparse vectors:
  *  1. Write the function for SparseData.
  *  2. Wrap it up for SvecType in operators.c or sparse_vector.c.
  *  3. Make the function available in gp_svec.sql.
  */

 #include <postgres.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <float.h>
 #include "SparseData.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
 #include "parser/parse_func.h"
 #include "access/htup.h"
 #include "catalog/pg_proc.h"

 void* array_pos_ref = NULL;

 /**
  * @return A SparseData structure with allocated empty dynamic
  * StringInfo of unknown initial sizes.
  */
 SparseData makeSparseData(void) {
 	/* Allocate the struct */
 	SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));

 	/* Allocate the included elements */
 	sdata->vals  = makeStringInfo();
 	sdata->index = makeStringInfo();
 	sdata->vals->len = 0;
 	sdata->index->len = 0;
 	sdata->vals->cursor = 0;
 	sdata->index->cursor = 0;
 	sdata->unique_value_count=0;
 	sdata->total_value_count=0;
 	sdata->type_of_data = FLOAT8OID;
 	return sdata;
 }

 /**
  * @return A SparseData with zero storage in its StringInfos.
  */
 SparseData makeEmptySparseData(void) {
 	/* Allocate the struct */
 	SparseData sdata = makeSparseData();

 	/* Free the data area */
 	pfree(sdata->vals->data);
 	pfree(sdata->index->data);
 	sdata->vals->data  = palloc(1);
 	sdata->index->data = palloc(1);
 	sdata->vals->maxlen=0;
 	sdata->index->maxlen=0;
 	return sdata;
 }

 /**
  * @param vals An array of data values
  * @param index An array of run-lengths
  * @param datasize The length of the vals array
  * @param indexsize The length of the index array
  * @param datatype The object ID of the elements represented in the vals array
  * @param unique_value_count The number of RLE blocks in the vals array
  * @param total_value_count The total number of individual elements represented in the vals and index arrays
  * @return A SparseData created in place using pointers to the vals and index data
  */
 SparseData makeInplaceSparseData(char *vals, char *index,
 		int datasize, int indexsize, Oid datatype,
 		int unique_value_count, int total_value_count) {
 	SparseData sdata = makeEmptySparseData();
 	sdata->unique_value_count = unique_value_count;
 	sdata->total_value_count  = total_value_count;

 	/*
 	 * Note: We are disobeying the constraints demanded in lib/stringinfo.h:
 	 * We do not use terminating NULL characters for the data field of
 	 * struct StringInfoData, i.e., sparse-vector code will not rely on
 	 * data[len] == '\0'
 	 */
 	sdata->vals->data = vals;
 	sdata->vals->len = datasize;
 	sdata->vals->maxlen = sdata->vals->len;
 	sdata->index->data = index;
 	sdata->index->len = indexsize;
 	sdata->index->maxlen = sdata->index->len;
 	sdata->type_of_data = datatype;
 	return sdata;
 }

 /**
  * @return A copy of an existing SparseData.
  */
 SparseData makeSparseDataCopy(SparseData source_sdata) {
 	/* Allocate the struct */
 	SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
 	/* Allocate the included elements */
 	sdata->vals = copyStringInfo(source_sdata->vals);
 	sdata->index = copyStringInfo(source_sdata->index);
 	sdata->type_of_data       = source_sdata->type_of_data;
 	sdata->unique_value_count = source_sdata->unique_value_count;
 	sdata->total_value_count  = source_sdata->total_value_count;
 	return sdata;
 }
 /**
  * @param constant The value of an RLE block
  * @param dimension The size of an RLE block
  * @return A SparseData with a single RLE block of size dimension having value constant
  */
 SparseData makeSparseDataFromDouble(double constant,int64 dimension) {
 	char *bytestore=(char *)palloc(sizeof(char)*9);
 	SparseData sdata = float8arr_to_sdata(&constant,1);
 	int8_to_compword(dimension,bytestore); /* create compressed version of
 					          int8 value */
 	pfree(sdata->index->data);
 	sdata->index->data = bytestore;
 	sdata->index->len = int8compstoragesize(bytestore);
 	sdata->total_value_count=dimension;
 	if (sdata->index->maxlen < int8compstoragesize(bytestore)) {
 		ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			       errmsg("Internal error")));
 	}
 	return sdata;
 }

 /**
  * Frees up the memory occupied by sdata
  */
 void freeSparseData(SparseData sdata) {
 	pfree(sdata->vals);
 	pfree(sdata->index);
 	pfree(sdata);
 }

 /**
  * Frees up the memory occupied by sdata, including the data elements of vals and index.
  */
 void freeSparseDataAndData(SparseData sdata) {
 	pfree(sdata->vals->data);
 	pfree(sdata->index->data);
 	freeSparseData(sdata);
 }

 /**
  * @param sinfo The StringInfo structure to be copied
  * @return A copy of sinfo
  */
 StringInfo copyStringInfo(StringInfo sinfo) {
 	StringInfo result;
 	char *data;
 	if (sinfo->data == NULL) {
 		data = NULL;
 	} else {
 		data   = (char *)palloc(sizeof(char)*(sinfo->len+1));
 		memcpy(data,sinfo->data,sinfo->len);
 		data[sinfo->len] = '\0';
 	}
 	result = makeStringInfoFromData(data,sinfo->len);
 	return result;
 }

 /**
  * @param data Pointer to an array of elements
  * @param len The size of the data array
  * @return A StringInfo from a data pointer and length
  */
 StringInfo makeStringInfoFromData(char *data,int len) {
 	StringInfo sinfo;
 	sinfo = (StringInfo)palloc(sizeof(StringInfoData));
 	sinfo->data   = data;
 	sinfo->len    = len;
 	sinfo->maxlen = len;
 	sinfo->cursor = 0;
 	return sinfo;
 }

 /**
  * @param array The array of doubles to be converted to a SparseData
  * @param count The size of array
  * @return A SparseData representation of an input array of doubles
  */
 SparseData float8arr_to_sdata(double *array, int count) {
 	return arr_to_sdata((char *)array, sizeof(float8), FLOAT8OID, count);
 }

 /**
  * @param array The array of values to be converted to values in SparseData
  * @param array_pos The array of positions to be converted to runs in SparseData
  * @param count The size of array
  * @return A SparseData representation of an input array of doubles
  */
 SparseData position_to_sdata(double *array,int64 *array_pos, int count, int64 end, double base_val) {
 	return posit_to_sdata((char *)array, array_pos, sizeof(float8), FLOAT8OID, count, end, (char *)&base_val);
 }

 /**
  * @param array The array of elements to be converted to a SparseData
  * @param width The size of the elements in array
  * @param type_of_data The object ID of the elements in array
  * @param count The size of array
  * @return A SparseData representation of an input array
  */
 SparseData arr_to_sdata(char *array, size_t width, Oid type_of_data, int count){
 	char *run_val=array;
 	int64 run_len=1;
 	SparseData sdata = makeSparseData();

 	sdata->type_of_data=type_of_data;

 	for (int i=1; i<count; i++) {
 		char *curr_val=array+ (i*size_of_type(type_of_data));

 		/*
 		 * Note that special double values like denormalized numbers and exceptions
 		 * like NaN are treated like any other value - if there are duplicates, the
 		 * value of the special number is preserved and they are counted.
 		 */
 		if (memcmp(curr_val,run_val,width))
 		{       /*run is interrupted, initiate new run */
 			/* package up the finished run */
 			add_run_to_sdata(run_val,run_len,width,sdata);
 			/* mark beginning of new run */
 			run_val=curr_val;
 			run_len=1;
 		} else
 		{ /* we're still in the same run */
 			run_len++;
 		}
 	}
 	add_run_to_sdata(run_val, run_len, width, sdata); /* package up the last run */

 	/* Add the final tallies */
 	sdata->unique_value_count = sdata->vals->len/width;
 	sdata->total_value_count = count;

 	return sdata;
 }

 int compar(const void *i, const void *j){
 	return (int)((int64*)array_pos_ref)[*(int*)i] - ((int64*)array_pos_ref)[*(int*)j];
 }

 /**
  * @param array The values of the positions
  * @param array_pos The position for the values
  * @param width The size of the element in the array
  * @param type_of_data tyep of the value element
  * @param count Number of element in the array
  * @param end Target size of the element
  * @return A SparseData representation of an input array
  */

 SparseData posit_to_sdata(char *array, int64* array_pos, size_t width, Oid type_of_data, int count, int64 end, char *base_val){
 	char *run_val=array;
 	int64 run_len;
 	SparseData sdata = makeSparseData();

 	int *index = (int*)palloc(count*sizeof(int));
 	for(int i = 0; i < count; ++i){
 		index[i] = i;
 	}

 	/* this is a temp solutoin that will be in place only until qsort_r is a standard llibrary */
 	array_pos_ref = array_pos;
 	qsort(index, count, sizeof(int), compar);

 	sdata->type_of_data=type_of_data;
 	if(array_pos[index[0]] > 1){
 		run_len = array_pos[index[0]]-1;
 		add_run_to_sdata(base_val,run_len,width,sdata);
 	}

 	for (int i = 0; i<count; i++) {
 		run_len=1;
 		/*
 		 * Note that special double values like denormalized numbers and exceptions
 		 * like NaN are treated like any other value - if there are duplicates, the
 		 * value of the special number is preserved and they are counted.
 		 */
 		while((i < count-1)&&((array_pos[index[i+1]] - array_pos[index[i]])==1)&&(memcmp((array+index[i]*size_of_type(type_of_data)),(array+index[i+1]*size_of_type(type_of_data)),width)==0)){
 			run_len++;
 			i++;
 		}
 		while((i < count-1)&&((array_pos[index[i+1]] - array_pos[index[i]])==0)){
 			if((memcmp((array+index[i]*size_of_type(type_of_data)),(array+index[i+1]*size_of_type(type_of_data)),width)==0)){
 				i++;
 			}else{
 				ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("posit_to_sdata conflicting values for the same position")));
 			}
 		}
 		run_val = array+index[i]*size_of_type(type_of_data);
 		add_run_to_sdata(run_val,run_len,width,sdata);

 		if(i < count-1){
 			run_len = array_pos[index[i+1]]-array_pos[index[i]]-1;
 			if(run_len > 0){
 				add_run_to_sdata(base_val,run_len,width,sdata);
 			}
 		}else if(array_pos[index[i]] < end){
 			run_len = end-array_pos[index[i]];
 			if(run_len > 0){
 				add_run_to_sdata(base_val,run_len,width,sdata);
 			}
 		}
 	}

 	/* Add the final tallies */
 	sdata->unique_value_count = sdata->vals->len/width;
 	sdata->total_value_count = count;
 	pfree(index);
 	return sdata;
 }


 /**
  * @param sdata The SparseData to be converted to an array of float8s
  * @return A float8[] representation of a SparseData
  */
 double *sdata_to_float8arr(SparseData sdata) {
 	double *array;
 	int j, aptr;
 	char *iptr;

 	if (sdata->type_of_data != FLOAT8OID) {
 		ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			errmsg("Data type of SparseData is not FLOAT64\n")));
 	}

 	if ((array=(double *)palloc(sizeof(double)*(sdata->total_value_count)))
 	      == NULL)
 	{
 		ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			       errmsg("Error allocating memory for array\n")));
 	}

 	iptr = sdata->index->data;
 	aptr = 0;
 	for (int i=0; i<sdata->unique_value_count; i++) {
 		for (j=0;j<compword_to_int8(iptr);j++,aptr++) {
 			array[aptr] = ((double *)(sdata->vals->data))[i];
 		}
 		iptr+=int8compstoragesize(iptr);
 	}

 	if ((aptr) != sdata->total_value_count)
 	{
 		ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		  errmsg("Array size is incorrect, is: %d and should be %d\n",
 				aptr,sdata->total_value_count)));

 		pfree(array);
 		return NULL;
 	}

 	return array;
 }

 /**
  * @return An array of integers given the (compressed) count array of a SparseData
  */
 int64 *sdata_index_to_int64arr(SparseData sdata) {
 	char *iptr;
 	int64 *array_ix = (int64 *)palloc(
 			sizeof(int64)*(sdata->unique_value_count));

 	iptr = sdata->index->data;
 	for (int i=0; i<sdata->unique_value_count; i++,
 			iptr+=int8compstoragesize(iptr)) {
 		array_ix[i] = compword_to_int8(iptr);
 	}
 	return(array_ix);
 }

 /**
  * @param target The memory area to store the serialised SparseData
  * @para source The SparseData to be serialised
  * @return The serialisation of a SparseData structure
  */
 void serializeSparseData(char *target, SparseData source)
 {
 	/* SparseDataStruct header */
 	memcpy(target,source,SIZEOF_SPARSEDATAHDR);
 	/* Two StringInfo structures describing the data and index */
 	memcpy(SDATA_DATA_SINFO(target), source->vals,sizeof(StringInfoData));
 	memcpy(SDATA_INDEX_SINFO(target),source->index,sizeof(StringInfoData));
 	/* The unique data values */
 	memcpy(SDATA_VALS_PTR(target),source->vals->data,source->vals->maxlen);
 	/* The index values */
 	memcpy(SDATA_INDEX_PTR(target),source->index->data,source->index->maxlen);

 	/*
 	 * Set pointers to the data areas of the serialized structure
 	 * First the two StringInfo structures contained in the SparseData,
 	 * then the data areas inside each of the two StringInfos.
 	 */
 	((SparseData)target)->vals = (StringInfo)SDATA_DATA_SINFO(target);
 	((SparseData)target)->index = (StringInfo)SDATA_INDEX_SINFO(target);
 	((StringInfo)(SDATA_DATA_SINFO(target)))->data = SDATA_VALS_PTR(target);
 	if (source->index->data != NULL)
 	{
 		((StringInfo)(SDATA_INDEX_SINFO(target)))->data = SDATA_INDEX_PTR(target);
 	} else {
 		((StringInfo)(SDATA_INDEX_SINFO(target)))->data = NULL;
 	}
 }

 /**
  * Prints a SparseData
  */
 void printSparseData(SparseData sdata) {
 	int value_count = sdata->unique_value_count;
 	{
 		char *indexdata = sdata->index->data;
 		double *values  = (double *)(sdata->vals->data);
 		for (int i=0; i<value_count; i++) {
 			printf("run_length[%d] = %lld, ",i,(long long int)compword_to_int8(indexdata));
 			printf("value[%d] = %f\n",i,values[i]);
 			indexdata+=int8compstoragesize(indexdata);
 		}
 	}
 }

 /**
  * @param sdata The SparseData to be projected on
  * @param idx The index to be projected
  * @return The element of a SparseData at location idx.
  */
 double sd_proj(SparseData sdata, int idx) {
 	char * ix = sdata->index->data;
 	double * vals = (double *)sdata->vals->data;
 	int read, i;

 	/* error checking */
 	if (0 >= idx || idx > sdata->total_value_count)
 		ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			 errmsg("Index out of bounds.")));

 	/* find desired block; as is normal in SQL, we start counting from one */
 	read = compword_to_int8(ix);
 	i = 0;
 	while (read < idx) {
 		ix += int8compstoragesize(ix);
 		read += compword_to_int8(ix);
 		i++;
 	}
 	return vals[i];
 }

 /**
  * @param sdata The SparseData from which to extract a subarray
  * @param start The start index of the desired subarray
  * @param end The end index of the desired subarray
  * @return The sub-array, indexed by start and end, of a SparseData.
  */
 SparseData subarr(SparseData sdata, int start, int end) {
 	char * ix = sdata->index->data;
 	double * vals = (double *)sdata->vals->data;
 	SparseData ret = makeSparseData();
 	size_t wf8 = sizeof(float8);

 	if (start > end)
 		return reverse(subarr(sdata,end,start));

 	/* error checking */
 	if (0 >= start || start > end || end > sdata->total_value_count)
 		ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			 errmsg("Array index out of bounds.")));

 	/* find start block */
 	int read = compword_to_int8(ix);
 	int i = 0;
 	while (read < start) {
 		ix += int8compstoragesize(ix);
 		read += compword_to_int8(ix);
 		i++;
 	}
 	if (end <= read) {
 		/* the whole subarray is in the first block, we are done */
 		add_run_to_sdata((char *)(&vals[i]), end-start+1, wf8, ret);
 		return ret;
 	}
 	/* else start building subarray */
 	add_run_to_sdata((char *)(&vals[i]), read-start+1, wf8, ret);

 	for (int j=i+1; j<sdata->unique_value_count; j++) {
 		ix += int8compstoragesize(ix);
 		int esize = compword_to_int8(ix);
 		if (read + esize > end) {
 			add_run_to_sdata((char *)(&vals[j]), end-read, wf8,ret);
 			break;
 		}
 		add_run_to_sdata((char *)(&vals[j]), esize, wf8, ret);
 		read += esize;
 		if (read == end) break;
 	}
 	return ret;
 }

 /**
  * @param sdata The SparseData to be reversed
  * @return A copy of the input SparseData, with the order of the elements reversed.
  */
 SparseData reverse(SparseData sdata) {
 	char * ix = sdata->index->data;
 	double * vals = (double *)sdata->vals->data;
 	SparseData ret = makeSparseData();
 	size_t w = sizeof(float8);

 	/* move to the last count array element */
 	for (int j=0; j<sdata->unique_value_count-1; j++)
 		ix += int8compstoragesize(ix);

 	/* copy from right to left */
 	for (int j=sdata->unique_value_count-1; j!=-1; j--) {
 		add_run_to_sdata((char *)(&vals[j]),compword_to_int8(ix),w,ret);
 		ix -= int8compstoragesize(ix);
 	}
 	return ret;
 }

 /**
  * @param left The SparseData that comes first in the resulting concatenation
  * @param right The SparseData that comes second in the resulting concatenation
  * @return The concatenation of two input SparseData.
  */
 SparseData concat(SparseData left, SparseData right) {
 	if (left == NULL && right == NULL) {
 		return NULL;
 	} else if (left == NULL && right != NULL) {
 		return makeSparseDataCopy(right);
 	} else if (left != NULL && right == NULL) {
 		return makeSparseDataCopy(left);
 	}
 	SparseData sdata = makeEmptySparseData();
 	char *vals,*index;
 	int l_val_len = left->vals->len;
 	int r_val_len = right->vals->len;
 	int l_ind_len = left->index->len;
 	int r_ind_len = right->index->len;
 	int val_len=l_val_len+r_val_len;
 	int ind_len=l_ind_len+r_ind_len;

 	vals = (char *)palloc(sizeof(char)*val_len);
 	index = (char *)palloc(sizeof(char)*ind_len);

 	memcpy(vals          ,left->vals->data,l_val_len);
 	memcpy(vals+l_val_len,right->vals->data,r_val_len);
 	memcpy(index,          left->index->data,l_ind_len);
 	memcpy(index+l_ind_len,right->index->data,r_ind_len);

 	sdata->vals  = makeStringInfoFromData(vals,val_len);
 	sdata->index = makeStringInfoFromData(index,ind_len);
 	sdata->type_of_data = left->type_of_data;
 	sdata->unique_value_count = left->unique_value_count+
 		right->unique_value_count;
 	sdata->total_value_count  = left->total_value_count+
 		right->total_value_count;
 	return sdata;
 }

 static bool lapply_error_checking(Oid foid, List * funcname);

 /**
  * This function applies an input function on all elements of a sparse data.
  * The function is modelled after the corresponding function in R.
  *
  * @param func The name of the function to apply
  * @param sdata The input sparse data to be modified
  * @return A SparseData with the same dimension as sdata but with each element sdata[i] replaced by func(sdata[i])
  */
 SparseData lapply(text * func, SparseData sdata) {
 	Oid argtypes[1] = { FLOAT8OID };
 	List * funcname = textToQualifiedNameList(func);
 	SparseData result = makeSparseDataCopy(sdata);
 	Oid foid = LookupFuncName(funcname, 1, argtypes, false);

 	lapply_error_checking(foid, funcname);

 	for (int i=0; i<sdata->unique_value_count; i++)
 		valref(float8,result,i) =
 		    DatumGetFloat8(
 		      OidFunctionCall1(foid,
 				    Float8GetDatum(valref(float8,sdata,i))));
 	return result;
 }

 /* This function checks for error conditions in lapply() function calls.
  */
 static bool lapply_error_checking(Oid foid, List * func) {
 	/* foid != InvalidOid; otherwise LookupFuncName would raise error.
 	   Here we check that the return type of foid is float8. */
 	HeapTuple ftup = SearchSysCache(PROCOID,
 					ObjectIdGetDatum(foid), 0, 0, 0);
 	Form_pg_proc pform = (Form_pg_proc) GETSTRUCT(ftup);

 	if (pform->prorettype != FLOAT8OID)
 		ereport(ERROR,
 			(errcode(ERRCODE_DATATYPE_MISMATCH),
 			 errmsg("return type of %s is not double",
 				NameListToString(func))));

 	// check volatility

 	ReleaseSysCache(ftup);
 	return true;
 }
	/**
	* @file
	* \brief This file implements different operations on the SparseData structure.
	*
	* Most functions on sparse vectors are first defined on SparseData, and
	* then packaged up as functions on sparse vectors using wrappers.
	*
	* This is the general procedure for adding functionalities to sparse vectors:
	* 1. Write the function for SparseData.
	* 2. Wrap it up for SvecType in operators.c or sparse_vector.c.
	* 3. Make the function available in gp_svec.sql.
	*/

	#include <postgres.h>
	#include <stdio.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>
	#include <float.h>
	#include "SparseData.h"
	#include "utils/builtins.h"
	#include "utils/syscache.h"
	#include "parser/parse_func.h"
	#include "access/htup.h"
	#include "catalog/pg_proc.h"

	void* array_pos_ref = NULL;

	/**
	* @return A SparseData structure with allocated empty dynamic
	* StringInfo of unknown initial sizes.
	*/
	SparseData makeSparseData(void) {
	/* Allocate the struct */
	SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));

	/* Allocate the included elements */
	sdata->vals = makeStringInfo();
	sdata->index = makeStringInfo();
	sdata->vals->len = 0;
	sdata->index->len = 0;
	sdata->vals->cursor = 0;
	sdata->index->cursor = 0;
	sdata->unique_value_count=0;
	sdata->total_value_count=0;
	sdata->type_of_data = FLOAT8OID;
	return sdata;
	}

	/**
	* @return A SparseData with zero storage in its StringInfos.
	*/
	SparseData makeEmptySparseData(void) {
	/* Allocate the struct */
	SparseData sdata = makeSparseData();

	/* Free the data area */
	pfree(sdata->vals->data);
	pfree(sdata->index->data);
	sdata->vals->data = palloc(1);
	sdata->index->data = palloc(1);
	sdata->vals->maxlen=0;
	sdata->index->maxlen=0;
	return sdata;
	}

	/**
	* @param vals An array of data values
	* @param index An array of run-lengths
	* @param datasize The length of the vals array
	* @param indexsize The length of the index array
	* @param datatype The object ID of the elements represented in the vals array
	* @param unique_value_count The number of RLE blocks in the vals array
	* @param total_value_count The total number of individual elements represented in the vals and index arrays
	* @return A SparseData created in place using pointers to the vals and index data
	*/
	SparseData makeInplaceSparseData(char vals, char index,
	int datasize, int indexsize, Oid datatype,
	int unique_value_count, int total_value_count) {
	SparseData sdata = makeEmptySparseData();
	sdata->unique_value_count = unique_value_count;
	sdata->total_value_count = total_value_count;

	/*
	* Note: We are disobeying the constraints demanded in lib/stringinfo.h:
	* We do not use terminating NULL characters for the data field of
	* struct StringInfoData, i.e., sparse-vector code will not rely on
	* data[len] == '\0'
	*/
	sdata->vals->data = vals;
	sdata->vals->len = datasize;
	sdata->vals->maxlen = sdata->vals->len;
	sdata->index->data = index;
	sdata->index->len = indexsize;
	sdata->index->maxlen = sdata->index->len;
	sdata->type_of_data = datatype;
	return sdata;
	}

	/**
	* @return A copy of an existing SparseData.
	*/
	SparseData makeSparseDataCopy(SparseData source_sdata) {
	/* Allocate the struct */
	SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
	/* Allocate the included elements */
	sdata->vals = copyStringInfo(source_sdata->vals);
	sdata->index = copyStringInfo(source_sdata->index);
	sdata->type_of_data = source_sdata->type_of_data;
	sdata->unique_value_count = source_sdata->unique_value_count;
	sdata->total_value_count = source_sdata->total_value_count;
	return sdata;
	}
	/**
	* @param constant The value of an RLE block
	* @param dimension The size of an RLE block
	* @return A SparseData with a single RLE block of size dimension having value constant
	*/
	SparseData makeSparseDataFromDouble(double constant,int64 dimension) {
	char bytestore=(char )palloc(sizeof(char)*9);
	SparseData sdata = float8arr_to_sdata(&constant,1);
	int8_to_compword(dimension,bytestore); /* create compressed version of
	int8 value */
	pfree(sdata->index->data);
	sdata->index->data = bytestore;
	sdata->index->len = int8compstoragesize(bytestore);
	sdata->total_value_count=dimension;
	if (sdata->index->maxlen < int8compstoragesize(bytestore)) {
	ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Internal error")));
	}
	return sdata;
	}

	/**
	* Frees up the memory occupied by sdata
	*/
	void freeSparseData(SparseData sdata) {
	pfree(sdata->vals);
	pfree(sdata->index);
	pfree(sdata);
	}

	/**
	* Frees up the memory occupied by sdata, including the data elements of vals and index.
	*/
	void freeSparseDataAndData(SparseData sdata) {
	pfree(sdata->vals->data);
	pfree(sdata->index->data);
	freeSparseData(sdata);
	}

	/**
	* @param sinfo The StringInfo structure to be copied
	* @return A copy of sinfo
	*/
	StringInfo copyStringInfo(StringInfo sinfo) {
	StringInfo result;
	char *data;
	if (sinfo->data == NULL) {
	data = NULL;
	} else {
	data = (char )palloc(sizeof(char)(sinfo->len+1));
	memcpy(data,sinfo->data,sinfo->len);
	data[sinfo->len] = '\0';
	}
	result = makeStringInfoFromData(data,sinfo->len);
	return result;
	}

	/**
	* @param data Pointer to an array of elements
	* @param len The size of the data array
	* @return A StringInfo from a data pointer and length
	*/
	StringInfo makeStringInfoFromData(char *data,int len) {
	StringInfo sinfo;
	sinfo = (StringInfo)palloc(sizeof(StringInfoData));
	sinfo->data = data;
	sinfo->len = len;
	sinfo->maxlen = len;
	sinfo->cursor = 0;
	return sinfo;
	}

	/**
	* @param array The array of doubles to be converted to a SparseData
	* @param count The size of array
	* @return A SparseData representation of an input array of doubles
	*/
	SparseData float8arr_to_sdata(double *array, int count) {
	return arr_to_sdata((char *)array, sizeof(float8), FLOAT8OID, count);
	}

	/**
	* @param array The array of values to be converted to values in SparseData
	* @param array_pos The array of positions to be converted to runs in SparseData
	* @param count The size of array
	* @return A SparseData representation of an input array of doubles
	*/
	SparseData position_to_sdata(double array,int64 array_pos, int count, int64 end, double base_val) {
	return posit_to_sdata((char )array, array_pos, sizeof(float8), FLOAT8OID, count, end, (char )&base_val);
	}

	/**
	* @param array The array of elements to be converted to a SparseData
	* @param width The size of the elements in array
	* @param type_of_data The object ID of the elements in array
	* @param count The size of array
	* @return A SparseData representation of an input array
	*/
	SparseData arr_to_sdata(char *array, size_t width, Oid type_of_data, int count){
	char *run_val=array;
	int64 run_len=1;
	SparseData sdata = makeSparseData();

	sdata->type_of_data=type_of_data;

	for (int i=1; i<count; i++) {
	char curr_val=array+ (isize_of_type(type_of_data));

	/*
	* Note that special double values like denormalized numbers and exceptions
	* like NaN are treated like any other value - if there are duplicates, the
	* value of the special number is preserved and they are counted.
	*/
	if (memcmp(curr_val,run_val,width))
	{ /run is interrupted, initiate new run /
	/* package up the finished run */
	add_run_to_sdata(run_val,run_len,width,sdata);
	/* mark beginning of new run */
	run_val=curr_val;
	run_len=1;
	} else
	{ /* we're still in the same run */
	run_len++;
	}
	}
	add_run_to_sdata(run_val, run_len, width, sdata); /* package up the last run */

	/* Add the final tallies */
	sdata->unique_value_count = sdata->vals->len/width;
	sdata->total_value_count = count;

	return sdata;
	}

	int compar(const void i, const void j){
	return (int)((int64)array_pos_ref)[(int)i] - ((int64)array_pos_ref)[(int)j];
	}

	/**
	* @param array The values of the positions
	* @param array_pos The position for the values
	* @param width The size of the element in the array
	* @param type_of_data tyep of the value element
	* @param count Number of element in the array
	* @param end Target size of the element
	* @return A SparseData representation of an input array
	*/

	SparseData posit_to_sdata(char array, int64 array_pos, size_t width, Oid type_of_data, int count, int64 end, char *base_val){
	char *run_val=array;
	int64 run_len;
	SparseData sdata = makeSparseData();

	int index = (int)palloc(count*sizeof(int));
	for(int i = 0; i < count; ++i){
	index[i] = i;
	}

	/* this is a temp solutoin that will be in place only until qsort_r is a standard llibrary */
	array_pos_ref = array_pos;
	qsort(index, count, sizeof(int), compar);

	sdata->type_of_data=type_of_data;
	if(array_pos[index[0]] > 1){
	run_len = array_pos[index[0]]-1;
	add_run_to_sdata(base_val,run_len,width,sdata);
	}

	for (int i = 0; i<count; i++) {
	run_len=1;
	/*
	* Note that special double values like denormalized numbers and exceptions
	* like NaN are treated like any other value - if there are duplicates, the
	* value of the special number is preserved and they are counted.
	*/
	while((i < count-1)&&((array_pos[index[i+1]] - array_pos[index[i]])==1)&&(memcmp((array+index[i]size_of_type(type_of_data)),(array+index[i+1]size_of_type(type_of_data)),width)==0)){
	run_len++;
	i++;
	}
	while((i < count-1)&&((array_pos[index[i+1]] - array_pos[index[i]])==0)){
	if((memcmp((array+index[i]size_of_type(type_of_data)),(array+index[i+1]size_of_type(type_of_data)),width)==0)){
	i++;
	}else{
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("posit_to_sdata conflicting values for the same position")));
	}
	}
	run_val = array+index[i]*size_of_type(type_of_data);
	add_run_to_sdata(run_val,run_len,width,sdata);

	if(i < count-1){
	run_len = array_pos[index[i+1]]-array_pos[index[i]]-1;
	if(run_len > 0){
	add_run_to_sdata(base_val,run_len,width,sdata);
	}
	}else if(array_pos[index[i]] < end){
	run_len = end-array_pos[index[i]];
	if(run_len > 0){
	add_run_to_sdata(base_val,run_len,width,sdata);
	}
	}
	}

	/* Add the final tallies */
	sdata->unique_value_count = sdata->vals->len/width;
	sdata->total_value_count = count;
	pfree(index);
	return sdata;
	}


	/**
	* @param sdata The SparseData to be converted to an array of float8s
	* @return A float8[] representation of a SparseData
	*/
	double *sdata_to_float8arr(SparseData sdata) {
	double *array;
	int j, aptr;
	char *iptr;

	if (sdata->type_of_data != FLOAT8OID) {
	ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Data type of SparseData is not FLOAT64\n")));
	}

	if ((array=(double )palloc(sizeof(double)(sdata->total_value_count)))
	== NULL)
	{
	ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Error allocating memory for array\n")));
	}

	iptr = sdata->index->data;
	aptr = 0;
	for (int i=0; i<sdata->unique_value_count; i++) {
	for (j=0;j<compword_to_int8(iptr);j++,aptr++) {
	array[aptr] = ((double *)(sdata->vals->data))[i];
	}
	iptr+=int8compstoragesize(iptr);
	}

	if ((aptr) != sdata->total_value_count)
	{
	ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Array size is incorrect, is: %d and should be %d\n",
	aptr,sdata->total_value_count)));

	pfree(array);
	return NULL;
	}

	return array;
	}

	/**
	* @return An array of integers given the (compressed) count array of a SparseData
	*/
	int64 *sdata_index_to_int64arr(SparseData sdata) {
	char *iptr;
	int64 array_ix = (int64 )palloc(
	sizeof(int64)*(sdata->unique_value_count));

	iptr = sdata->index->data;
	for (int i=0; i<sdata->unique_value_count; i++,
	iptr+=int8compstoragesize(iptr)) {
	array_ix[i] = compword_to_int8(iptr);
	}
	return(array_ix);
	}

	/**
	* @param target The memory area to store the serialised SparseData
	* @para source The SparseData to be serialised
	* @return The serialisation of a SparseData structure
	*/
	void serializeSparseData(char *target, SparseData source)
	{
	/* SparseDataStruct header */
	memcpy(target,source,SIZEOF_SPARSEDATAHDR);
	/* Two StringInfo structures describing the data and index */
	memcpy(SDATA_DATA_SINFO(target), source->vals,sizeof(StringInfoData));
	memcpy(SDATA_INDEX_SINFO(target),source->index,sizeof(StringInfoData));
	/* The unique data values */
	memcpy(SDATA_VALS_PTR(target),source->vals->data,source->vals->maxlen);
	/* The index values */
	memcpy(SDATA_INDEX_PTR(target),source->index->data,source->index->maxlen);

	/*
	* Set pointers to the data areas of the serialized structure
	* First the two StringInfo structures contained in the SparseData,
	* then the data areas inside each of the two StringInfos.
	*/
	((SparseData)target)->vals = (StringInfo)SDATA_DATA_SINFO(target);
	((SparseData)target)->index = (StringInfo)SDATA_INDEX_SINFO(target);
	((StringInfo)(SDATA_DATA_SINFO(target)))->data = SDATA_VALS_PTR(target);
	if (source->index->data != NULL)
	{
	((StringInfo)(SDATA_INDEX_SINFO(target)))->data = SDATA_INDEX_PTR(target);
	} else {
	((StringInfo)(SDATA_INDEX_SINFO(target)))->data = NULL;
	}
	}

	/**
	* Prints a SparseData
	*/
	void printSparseData(SparseData sdata) {
	int value_count = sdata->unique_value_count;
	{
	char *indexdata = sdata->index->data;
	double values = (double )(sdata->vals->data);
	for (int i=0; i<value_count; i++) {
	printf("run_length[%d] = %lld, ",i,(long long int)compword_to_int8(indexdata));
	printf("value[%d] = %f\n",i,values[i]);
	indexdata+=int8compstoragesize(indexdata);
	}
	}
	}

	/**
	* @param sdata The SparseData to be projected on
	* @param idx The index to be projected
	* @return The element of a SparseData at location idx.
	*/
	double sd_proj(SparseData sdata, int idx) {
	char * ix = sdata->index->data;
	double * vals = (double *)sdata->vals->data;
	int read, i;

	/* error checking */
	if (0 >= idx \|\| idx > sdata->total_value_count)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Index out of bounds.")));

	/* find desired block; as is normal in SQL, we start counting from one */
	read = compword_to_int8(ix);
	i = 0;
	while (read < idx) {
	ix += int8compstoragesize(ix);
	read += compword_to_int8(ix);
	i++;
	}
	return vals[i];
	}

	/**
	* @param sdata The SparseData from which to extract a subarray
	* @param start The start index of the desired subarray
	* @param end The end index of the desired subarray
	* @return The sub-array, indexed by start and end, of a SparseData.
	*/
	SparseData subarr(SparseData sdata, int start, int end) {
	char * ix = sdata->index->data;
	double * vals = (double *)sdata->vals->data;
	SparseData ret = makeSparseData();
	size_t wf8 = sizeof(float8);

	if (start > end)
	return reverse(subarr(sdata,end,start));

	/* error checking */
	if (0 >= start \|\| start > end \|\| end > sdata->total_value_count)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("Array index out of bounds.")));

	/* find start block */
	int read = compword_to_int8(ix);
	int i = 0;
	while (read < start) {
	ix += int8compstoragesize(ix);
	read += compword_to_int8(ix);
	i++;
	}
	if (end <= read) {
	/* the whole subarray is in the first block, we are done */
	add_run_to_sdata((char *)(&vals[i]), end-start+1, wf8, ret);
	return ret;
	}
	/* else start building subarray */
	add_run_to_sdata((char *)(&vals[i]), read-start+1, wf8, ret);

	for (int j=i+1; j<sdata->unique_value_count; j++) {
	ix += int8compstoragesize(ix);
	int esize = compword_to_int8(ix);
	if (read + esize > end) {
	add_run_to_sdata((char *)(&vals[j]), end-read, wf8,ret);
	break;
	}
	add_run_to_sdata((char *)(&vals[j]), esize, wf8, ret);
	read += esize;
	if (read == end) break;
	}
	return ret;
	}

	/**
	* @param sdata The SparseData to be reversed
	* @return A copy of the input SparseData, with the order of the elements reversed.
	*/
	SparseData reverse(SparseData sdata) {
	char * ix = sdata->index->data;
	double * vals = (double *)sdata->vals->data;
	SparseData ret = makeSparseData();
	size_t w = sizeof(float8);

	/* move to the last count array element */
	for (int j=0; j<sdata->unique_value_count-1; j++)
	ix += int8compstoragesize(ix);

	/* copy from right to left */
	for (int j=sdata->unique_value_count-1; j!=-1; j--) {
	add_run_to_sdata((char *)(&vals[j]),compword_to_int8(ix),w,ret);
	ix -= int8compstoragesize(ix);
	}
	return ret;
	}

	/**
	* @param left The SparseData that comes first in the resulting concatenation
	* @param right The SparseData that comes second in the resulting concatenation
	* @return The concatenation of two input SparseData.
	*/
	SparseData concat(SparseData left, SparseData right) {
	if (left == NULL && right == NULL) {
	return NULL;
	} else if (left == NULL && right != NULL) {
	return makeSparseDataCopy(right);
	} else if (left != NULL && right == NULL) {
	return makeSparseDataCopy(left);
	}
	SparseData sdata = makeEmptySparseData();
	char vals,index;
	int l_val_len = left->vals->len;
	int r_val_len = right->vals->len;
	int l_ind_len = left->index->len;
	int r_ind_len = right->index->len;
	int val_len=l_val_len+r_val_len;
	int ind_len=l_ind_len+r_ind_len;

	vals = (char )palloc(sizeof(char)val_len);
	index = (char )palloc(sizeof(char)ind_len);

	memcpy(vals ,left->vals->data,l_val_len);
	memcpy(vals+l_val_len,right->vals->data,r_val_len);
	memcpy(index, left->index->data,l_ind_len);
	memcpy(index+l_ind_len,right->index->data,r_ind_len);

	sdata->vals = makeStringInfoFromData(vals,val_len);
	sdata->index = makeStringInfoFromData(index,ind_len);
	sdata->type_of_data = left->type_of_data;
	sdata->unique_value_count = left->unique_value_count+
	right->unique_value_count;
	sdata->total_value_count = left->total_value_count+
	right->total_value_count;
	return sdata;
	}

	static bool lapply_error_checking(Oid foid, List * funcname);

	/**
	* This function applies an input function on all elements of a sparse data.
	* The function is modelled after the corresponding function in R.
	*
	* @param func The name of the function to apply
	* @param sdata The input sparse data to be modified
	* @return A SparseData with the same dimension as sdata but with each element sdata[i] replaced by func(sdata[i])
	*/
	SparseData lapply(text * func, SparseData sdata) {
	Oid argtypes[1] = { FLOAT8OID };
	List * funcname = textToQualifiedNameList(func);
	SparseData result = makeSparseDataCopy(sdata);
	Oid foid = LookupFuncName(funcname, 1, argtypes, false);

	lapply_error_checking(foid, funcname);

	for (int i=0; i<sdata->unique_value_count; i++)
	valref(float8,result,i) =
	DatumGetFloat8(
	OidFunctionCall1(foid,
	Float8GetDatum(valref(float8,sdata,i))));
	return result;
	}

	/* This function checks for error conditions in lapply() function calls.
	*/
	static bool lapply_error_checking(Oid foid, List * func) {
	/* foid != InvalidOid; otherwise LookupFuncName would raise error.
	Here we check that the return type of foid is float8. */
	HeapTuple ftup = SearchSysCache(PROCOID,
	ObjectIdGetDatum(foid), 0, 0, 0);
	Form_pg_proc pform = (Form_pg_proc) GETSTRUCT(ftup);

	if (pform->prorettype != FLOAT8OID)
	ereport(ERROR,
	(errcode(ERRCODE_DATATYPE_MISMATCH),
	errmsg("return type of %s is not double",
	NameListToString(func))));

	// check volatility

	ReleaseSysCache(ftup);
	return true;
	}