blob: 7ffeae6f9290d3133aaa2d9f8f751b066a0e828f [file] [log] [blame]
/**
Functions that compute array average and standard deviation
*/
#include "dbconnector/dbconnector.hpp"
#include "modules/shared/HandleTraits.hpp"
#include "utils_regularization.hpp"
#include <float.h>
namespace madlib {
namespace modules {
namespace convex {
using namespace madlib::dbal::eigen_integration;
template <class Handle>
class ScalesState
{
template <class OtherHandle> friend class ScalesState;
public:
ScalesState (const AnyType& inArray):
mStorage(inArray.getAs<Handle>())
{
rebind();
}
inline operator AnyType () const
{
return mStorage;
}
inline void allocate (const Allocator& inAllocator,
uint32_t inDimension)
{
mStorage = inAllocator.allocateArray<double,
dbal::AggregateContext,
dbal::DoZero,
dbal::ThrowBadAlloc>(
arraySize(inDimension));
dimension.rebind(&mStorage[0]);
dimension = inDimension;
rebind();
}
template <class OtherHandle>
ScalesState& operator= (const ScalesState<OtherHandle>& inOtherState)
{
for (size_t i = 0; i < mStorage.size(); i++)
mStorage[i] = inOtherState.mStorage[i];
return *this;
}
static inline uint32_t arraySize (const uint32_t inDimension)
{
return 2 + 2 * inDimension;
}
protected:
void rebind ()
{
dimension.rebind(&mStorage[0]);
numRows.rebind(&mStorage[1]);
mean.rebind(&mStorage[2], dimension);
std.rebind(&mStorage[2 + dimension], dimension);
}
Handle mStorage;
public:
typename HandleTraits<Handle>::ReferenceToUInt32 dimension;
typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap mean;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap std;
};
// ------------------------------------------------------------------------
bool close_to_zero(double number) {
if(fabs(number - 0.0) <= DBL_EPSILON*fabs(number)) {
return true;
}
return false;
}
AnyType utils_var_scales_transition::run (AnyType& args)
{
ScalesState<MutableArrayHandle<double> > state = args[0];
MappedColumnVector x(NULL);
try {
new (&x) MappedColumnVector(args[1].getAs<MappedColumnVector>());
} catch(const ArrayWithNullException &e) {
// warning("Input array contains NULL values, skipping it for mean and std computations");
return state;
}
if (state.numRows == 0)
{
int dimension = args[2].getAs<int>();
state.allocate(*this, dimension);
state.dimension = dimension;
state.numRows = 0;
state.mean.setZero();
state.std.setZero();
}
for (uint32_t i = 0; i < state.dimension; i++)
{
state.mean(i) += x(i);
state.std(i) += x(i) * x(i);
}
state.numRows++;
return state;
}
// ------------------------------------------------------------------------
AnyType utils_var_scales_merge::run (AnyType& args)
{
ScalesState<MutableArrayHandle<double> > state1 = args[0];
ScalesState<MutableArrayHandle<double> > state2 = args[1];
if (state1.numRows == 0)
return state2;
else if (state2.numRows == 0)
return state1;
state1.mean += state2.mean;
state1.std += state2.std;
state1.numRows += state2.numRows;
return state1;
}
// ------------------------------------------------------------------------
AnyType utils_var_scales_final::run (AnyType& args)
{
ScalesState<MutableArrayHandle<double> > state = args[0];
if (state.numRows == 0) return Null();
state.mean /= static_cast<double>(state.numRows);
state.std /= static_cast<double>(state.numRows);
for (uint32_t i = 0; i < state.dimension; i++)
state.std(i) = sqrt(state.std(i) - state.mean(i) * state.mean(i));
return state;
}
// ------------------------------------------------------------------------
/*
This final function is used when we want to have a standard deviation
of 1.0 instead of 0.0 for any dimension. This is most likely to be used
by neural networks, since in other modules such as elastic_net, a std value
of 0.0 results in setting the corresponding coefficient value to 0.0 as well.
But in neural networks, we will end up using a std value of 1.0 instead.
*/
AnyType utils_var_scales_non_zero_std_final::run (AnyType& args)
{
ScalesState<MutableArrayHandle<double> > state = args[0];
if (state.numRows == 0) return Null();
state.mean /= static_cast<double>(state.numRows);
state.std /= static_cast<double>(state.numRows);
for (uint32_t i = 0; i < state.dimension; i++) {
if (state.numRows == 1) {
state.mean(i) = 0.0;
state.std(i) = 1.0;
}
else {
state.std(i) = sqrt(state.std(i) - state.mean(i) * state.mean(i));
if(close_to_zero(state.std(i)))
state.std(i) = 1.0;
}
}
return state;
}
// ------------------------------------------------------------------------
AnyType __utils_var_scales_result::run (AnyType& args)
{
ScalesState<ArrayHandle<double> > state = args[0];
AnyType tuple;
tuple << state.mean << state.std;
return tuple;
}
// ------------------------------------------------------------------------
AnyType utils_normalize_data::run (AnyType& args)
{
MutableMappedColumnVector x(NULL);
try {
new (&x) MutableMappedColumnVector(args[0].getAs<MutableMappedColumnVector>());
} catch(const ArrayWithNullException &e) {
// warning("Input array contains NULL values, skipping this array normalization");
return Null();
}
MappedColumnVector mean = args[1].getAs<MappedColumnVector>();
MappedColumnVector std = args[2].getAs<MappedColumnVector>();
for (int i = 0; i < x.size(); i++)
{
if(close_to_zero(std(i)))
x(i) = x(i) - mean(i);
else
x(i) = (x(i) - mean(i)) / std(i);
}
return x;
}
}
}
}