blob: 3608ed60da09caaff5017c620581b339599ea6dc [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file avgerage.cpp
*
* @brief Compute the average of vectors
*
*//* ----------------------------------------------------------------------- */
#include <dbconnector/dbconnector.hpp>
#include <modules/shared/HandleTraits.hpp>
#include "average.hpp"
namespace madlib {
// Use Eigen
using namespace dbal::eigen_integration;
namespace modules {
namespace linalg {
/**
* @brief Transition state for computing average of vectors
*
* Note: We assume that the DOUBLE PRECISION array is initialized by the
* database with length 3, and all elemenets are 0. Handle::operator[] will
* perform bounds checking.
*/
template <class Handle>
class AvgVectorState {
template <class OtherHandle>
friend class AvgVectorState;
public:
AvgVectorState(const AnyType &inArray)
: mStorage(inArray.getAs<Handle>()) {
rebind(static_cast<uint32_t>(mStorage[1]));
}
inline operator AnyType() const {
return mStorage;
}
inline void initialize(const Allocator &inAllocator,
uint32_t inNumDimensions) {
mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(inNumDimensions));
rebind(inNumDimensions);
numDimensions = inNumDimensions;
}
template <class OtherHandle>
AvgVectorState &operator+=(
const AvgVectorState<OtherHandle> &inOtherState) {
numRows += inOtherState.numRows;
sumOfVectors += inOtherState.sumOfVectors;
return *this;
}
private:
static inline size_t arraySize(uint32_t inNumDimensions) {
return static_cast<size_t>(2 + inNumDimensions);
}
/**
* @brief Rebind to a new storage array
*
* @param inNumDimensions The number of dimensions
*
* Array layout (iteration refers to one aggregate-function call):
* Inter-iteration components (updated in final function):
* - 0: numRows (number of rows already processed in this iteration)
* - 1: numDimensions (dimension of space that points are from)
* - 2: sumOfPoints (vector with \c numDimensions rows)
*/
void rebind(uint32_t inNumDimensions) {
numRows.rebind(&mStorage[0]);
numDimensions.rebind(&mStorage[1]);
sumOfVectors.rebind(&mStorage[2], inNumDimensions);
madlib_assert(mStorage.size() >= arraySize(inNumDimensions),
std::runtime_error("Out-of-bounds array access detected."));
}
Handle mStorage;
public:
typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
typename HandleTraits<Handle>::ReferenceToUInt32 numDimensions;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap
sumOfVectors;
};
AnyType
avg_vector_transition::run(AnyType& args) {
AvgVectorState<MutableArrayHandle<double> > state = args[0];
if (args[1].isNull()) { return args[0]; }
MappedColumnVector x;
try {
MappedColumnVector xx = args[1].getAs<MappedColumnVector>();
x.rebind(xx.memoryHandle(), xx.size());
} catch (const ArrayWithNullException &e) {
return args[0];
}
if (state.numRows == 0)
state.initialize(*this, static_cast<uint32_t>(x.size()));
else if (x.size() != state.sumOfVectors.size()
|| state.numDimensions !=
static_cast<uint32_t>(state.sumOfVectors.size()))
throw std::invalid_argument("Invalid arguments: Dimensions of points "
"not consistent.");
++state.numRows;
state.sumOfVectors += x;
return state;
}
AnyType
avg_vector_merge::run(AnyType& args) {
AvgVectorState<MutableArrayHandle<double> > stateLeft = args[0];
AvgVectorState<ArrayHandle<double> > stateRight = args[1];
// We first handle the trivial case where this function is called with one
// of the states being the initial state
if (stateLeft.numRows == 0)
return stateRight;
else if (stateRight.numRows == 0)
return stateLeft;
if (stateLeft.numDimensions != stateRight.numDimensions) {
throw std::invalid_argument("Invalid arguments: Dimensions of points "
"not consistent.");
}
stateLeft += stateRight;
return stateLeft;
}
AnyType
avg_vector_final::run(AnyType& args) {
AvgVectorState<ArrayHandle<double> > state = args[0];
MutableNativeColumnVector avgVector(allocateArray<double>(
state.sumOfVectors.size()));
avgVector = state.sumOfVectors / static_cast<double>(state.numRows);
return avgVector;
}
AnyType
normalized_avg_vector_transition::run(AnyType& args) {
AvgVectorState<MutableArrayHandle<double> > state = args[0];
if (args[1].isNull()) { return args[0]; }
MappedColumnVector x;
try {
MappedColumnVector xx = args[1].getAs<MappedColumnVector>();
x.rebind(xx.memoryHandle(), xx.size());
} catch (const ArrayWithNullException &e) {
return args[0];
}
if (state.numRows == 0)
state.initialize(*this, static_cast<uint32_t>(x.size()));
else if (x.size() != state.sumOfVectors.size()
|| state.numDimensions !=
static_cast<uint32_t>(state.sumOfVectors.size()))
throw std::invalid_argument("Invalid arguments: Dimensions of points "
"not consistent.");
++state.numRows;
state.sumOfVectors += x.normalized();
return state;
}
AnyType
normalized_avg_vector_final::run(AnyType& args) {
AvgVectorState<ArrayHandle<double> > state = args[0];
MutableNativeColumnVector avgVector(allocateArray<double>(
state.sumOfVectors.size()));
avgVector = (state.sumOfVectors /
static_cast<double>(state.numRows)).normalized();
return avgVector;
}
} // namespace linalg
} // namespace modules
} // namespace regress