blob: 75c3bc54f0301b523b9b515855a6c34b40ee0994 [file] [log] [blame]
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "math.h"
#include "catalog/pg_type.h"
#ifndef NO_PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
Datum hash_array( PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(hash_array);
Datum hash_array( PG_FUNCTION_ARGS)
{
ArrayType *state = PG_GETARG_ARRAYTYPE_P(0);
int dimstate = ARR_NDIM(state);
int *dimsstate = ARR_DIMS(state);
int numstate = ArrayGetNItems(dimstate,dimsstate);
int32 *vals_state=(int32 *)ARR_DATA_PTR(state);
unsigned long hash = 65599;
unsigned short c;
int i = 0;
for (;i<numstate;i++)
{
c = vals_state[i];
hash = c + (hash << 7) + (hash << 16) - hash;
}
PG_RETURN_INT32(hash);
}
static float8 ChiSquareStatistic(float8* values, int from, int size, float8 fract, float8 total){
int i = from, j = 1;
float8 mult = 0;
float8 estimate = 0;
float8 result = 0;
if((fract == 0.0)||(total == 0)){
return 0;
}
mult = fract/total;
for(; i < (size+from); ++i, ++j){
estimate = ((float8)values[j])*mult;
if(estimate > 0){
result += ((float8)values[i] - estimate)*((float8)values[i] - estimate)/estimate;
}
}
return result;
}
static float entropyWeighted(int* values, int size, float fract, float total){
int i = 0;
float mult = 0;
float result = 0;
if((fract == 0.0)||(total == 0)){
return 0;
}
mult = fract/total;
for(; i < size; ++i){
if(values[i] > 0){
result -= (values[i]/fract)*log(values[i]/fract);
}
}
return result*mult;
}
static float8 entropyWeightedFloat(float8* values, int from, int size, float8 fract, float8 total){
int i = from;
float8 mult = 0;
float8 result = 0;
if((fract == 0.0)||(total == 0)){
return 0;
}
mult = fract/total;
for(; i < (size+from); ++i){
if(values[i] > 0){
result -= (values[i]/fract)*log(values[i]/fract);
}
}
return result*mult;
}
Datum aggr_InfoGain(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(aggr_InfoGain);
Datum aggr_InfoGain(PG_FUNCTION_ARGS) {
ArrayType *state = PG_GETARG_ARRAYTYPE_P(0);
int dimstate = ARR_NDIM(state);
int *dimsstate = ARR_DIMS(state);
int numstate = ArrayGetNItems(dimstate,dimsstate);
float8 *vals_state=(float8 *)ARR_DATA_PTR(state);
float8 truevalue = PG_GETARG_FLOAT8(1);
float8 trueweight = PG_GETARG_FLOAT8(2);
int32 posclasses = PG_GETARG_INT32(3);
int32 trueclass = PG_GETARG_INT32(5);
ArrayType *pgarray;
vals_state[0] += trueweight;
vals_state[trueclass] += trueweight;
vals_state[(int)(truevalue*(posclasses+1))] += trueweight;
vals_state[(int)(truevalue*(posclasses+1) + trueclass)] += trueweight;
pgarray = construct_array((Datum *)vals_state,
numstate,FLOAT8OID,
sizeof(float8),true,'d');
PG_RETURN_ARRAYTYPE_P(pgarray);
}
Datum compute_InfoGain(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(compute_InfoGain);
Datum compute_InfoGain(PG_FUNCTION_ARGS) {
ArrayType *state = PG_GETARG_ARRAYTYPE_P(0);
float8 *vals_state=(float8 *)ARR_DATA_PTR(state);
int32 posclasses = PG_GETARG_INT32(1);
int32 posvalues = PG_GETARG_INT32(2);
float8 *result = palloc(sizeof(float8)*4);
int i = 1;
int max = 1;
float8 tot = entropyWeightedFloat(vals_state, 1, posclasses, vals_state[0], vals_state[0]);
ArrayType *pgarray;
/*ereport(NOTICE,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg(">>>>>>>R1: %f %f %f %f %f %f %f %f %f", vals_state[0], vals_state[1], vals_state[2], vals_state[3], vals_state[4], vals_state[5], vals_state[6], vals_state[7], vals_state[8])));*/
for(; i < (posvalues+1); ++i){
tot -= entropyWeightedFloat(vals_state, (i*(posclasses+1)+1), posclasses, vals_state[i*(posclasses+1)], vals_state[0]);
}
result[0] = tot;
i = 1;
tot = 0;
for(; i < (posvalues+1); ++i){
tot += ChiSquareStatistic(vals_state, (i*(posclasses+1)+1), posclasses, vals_state[i*(posclasses+1)], vals_state[0]);
}
result[1] = tot;
i = 1;
for(; i < (posclasses+1); ++i){
if(vals_state[max] < vals_state[i]){
max = i;
}
}
if(vals_state[0] == 0){
result[2] = 0;
}else{
result[2] = vals_state[max]/vals_state[0];
}
result[3] = max;
/*ereport(NOTICE,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg(">>>>>>>R2: %f %f %f %f %f %f %f %f", result[0], result[1], result[2] ,result[3], vals_state[0], vals_state[1], vals_state[2], vals_state[3])));*/
pgarray = construct_array((Datum *)result,
4,FLOAT8OID,
sizeof(float8),true,'d');
PG_RETURN_ARRAYTYPE_P(pgarray);
}
Datum mallocset(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(mallocset);
Datum mallocset(PG_FUNCTION_ARGS) {
int size = PG_GETARG_INT32(0);
int value = PG_GETARG_INT32(1);
float8 *result = (float8*)palloc(sizeof(float8)*size);
ArrayType *pgarray;
memset(result, value, sizeof(float8)*size);
pgarray = construct_array((Datum *)result,
size, FLOAT8OID,
sizeof(float8),true,'d');
PG_RETURN_ARRAYTYPE_P(pgarray);
}
Datum WeightedNoReplacement(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(WeightedNoReplacement);
Datum WeightedNoReplacement(PG_FUNCTION_ARGS) {
int value1 = PG_GETARG_INT32(0);
int value2 = PG_GETARG_INT32(1);
int64 *result = (int64*)palloc(sizeof(int64)*value1);
int32 i = 0;
int32 k = 0;
ArrayType *pgarray;
float to_select = value1;
for(;i < value2; i++){
if(rand()%100 < (to_select/(value2 - i))*100){
result[k] = i+1;
k++;
}
if(to_select == 0)
break;
}
pgarray = construct_array((Datum *)result,
value1, INT8OID,
sizeof(int64),true,'d');
PG_RETURN_ARRAYTYPE_P(pgarray);
}
Datum findentropy(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(findentropy);
Datum findentropy(PG_FUNCTION_ARGS) {
ArrayType *values = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *classes = PG_GETARG_ARRAYTYPE_P(1);
int posvalues = PG_GETARG_INT32(2);
int posclasses = PG_GETARG_INT32(3);
int dimvalues = ARR_NDIM(values);
int *dimsvalues = ARR_DIMS(values);
int numvalues = ArrayGetNItems(dimvalues,dimsvalues);
int32 *vals_values=(int32 *)ARR_DATA_PTR(values);
int32 *vals_classes=(int32 *)ARR_DATA_PTR(classes);
int *pre_entropy = (int*)palloc(sizeof(int)*posclasses);
int i;
int j;
int sum = 0;
float8 result = 0;
for (i=0; i<posvalues; ++i){
memset(pre_entropy, 0, sizeof(int)*posclasses);
for(j=0, sum=0; j<numvalues; ++j){
if(vals_values[j] == (i+1)){
pre_entropy[vals_classes[j]-1]++;
sum++;
}
}
result += entropyWeighted(pre_entropy, posclasses, (float)sum, (float)numvalues);
}
free(pre_entropy);
PG_RETURN_FLOAT8((float8)result);
}