blob: 039052db00b28d2d68ab8b714c9a9b821d216830 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file lmf_igd.cpp
*
* @brief Low-rank Matrix Factorization functions
*
*//* ----------------------------------------------------------------------- */
#include <dbconnector/dbconnector.hpp>
#include <modules/shared/HandleTraits.hpp>
#include "lmf_igd.hpp"
#include "task/lmf.hpp"
#include "algo/igd.hpp"
#include "algo/loss.hpp"
#include "type/tuple.hpp"
#include "type/model.hpp"
#include "type/state.hpp"
namespace madlib {
namespace modules {
namespace convex {
// This 2 classes contain public static methods that can be called
typedef IGD<LMFIGDState<MutableArrayHandle<double> >, LMFIGDState<ArrayHandle<double> >,
LMF<LMFModel<MutableArrayHandle<double> >, LMFTuple > > LMFIGDAlgorithm;
typedef Loss<LMFIGDState<MutableArrayHandle<double> >, LMFIGDState<ArrayHandle<double> >,
LMF<LMFModel<MutableArrayHandle<double> >, LMFTuple > > LMFLossAlgorithm;
/**
* @brief Perform the low-rank matrix factorization transition step
*
* Called for each tuple.
*/
AnyType
lmf_igd_transition::run(AnyType &args) {
// The real state.
// For the first tuple: args[0] is nothing more than a marker that
// indicates that we should do some initial operations.
// For other tuples: args[0] holds the computation state until last tuple
LMFIGDState<MutableArrayHandle<double> > state = args[0];
// initilize the state if first tuple
if (state.algo.numRows == 0) {
if (!args[4].isNull()) {
LMFIGDState<ArrayHandle<double> > previousState = args[4];
state.allocate(*this, previousState.task.rowDim,
previousState.task.colDim, previousState.task.maxRank);
state = previousState;
} else {
// configuration parameters
int32_t rowDim = args[5].getAs<int32_t>();
if (rowDim <= 0) {
throw std::runtime_error("Invalid parameter: row_dim <= 0");
}
int32_t columnDim = args[6].getAs<int32_t>();
if (columnDim <= 0) {
throw std::runtime_error("Invalid parameter: column_dim <= 0");
}
int32_t maxRank = args[7].getAs<int32_t>();
if (maxRank <= 0) {
throw std::runtime_error("Invalid parameter: max_rank <= 0");
}
if (maxRank >= rowDim || maxRank >= columnDim) {
throw std::runtime_error("Invalid parameter: "
"max_rank >= row_dim || max_rank >= column_dim");
}
double stepsize = args[8].getAs<double>();
if (stepsize <= 0.) {
throw std::runtime_error("Invalid parameter: stepsize <= 0.0");
}
double scaleFactor = args[9].getAs<double>();
if (scaleFactor <= 0.) {
throw std::runtime_error("Invalid parameter: "
"scale_factor <= 0.0");
}
state.allocate(*this, rowDim, columnDim, maxRank);
state.task.stepsize = stepsize;
state.task.scaleFactor = scaleFactor;
state.task.model.initialize(scaleFactor);
}
// resetting in either case
state.reset();
}
// tuple
LMFTuple tuple;
tuple.indVar.i = args[1].getAs<int32_t>();
tuple.indVar.j = args[2].getAs<int32_t>();
if (tuple.indVar.i == 0 || tuple.indVar.j == 0) {
throw std::runtime_error("Invalid parameter: [col_row] = 0 or "
"[col_column] = 0 in table [rel_source]");
}
// database starts from 1, while C++ starts from 0
tuple.indVar.i --;
tuple.indVar.j --;
tuple.depVar = args[3].getAs<double>();
// Now do the transition step
LMFIGDAlgorithm::transition(state, tuple);
LMFLossAlgorithm::transition(state, tuple);
state.algo.numRows ++;
return state;
}
/**
* @brief Perform the perliminary aggregation function: Merge transition states
*/
AnyType
lmf_igd_merge::run(AnyType &args) {
LMFIGDState<MutableArrayHandle<double> > stateLeft = args[0];
LMFIGDState<ArrayHandle<double> > stateRight = args[1];
// We first handle the trivial case where this function is called with one
// of the states being the initial state
if (stateLeft.algo.numRows == 0) { return stateRight; }
else if (stateRight.algo.numRows == 0) { return stateLeft; }
// Merge states together
LMFIGDAlgorithm::merge(stateLeft, stateRight);
LMFLossAlgorithm::merge(stateLeft, stateRight);
// The following numRows update, cannot be put above, because the model
// averaging depends on their original values
stateLeft.algo.numRows += stateRight.algo.numRows;
return stateLeft;
}
/**
* @brief Perform the low-rank matrix factorization final step
*/
AnyType
lmf_igd_final::run(AnyType &args) {
// We request a mutable object. Depending on the backend, this might perform
// a deep copy.
LMFIGDState<MutableArrayHandle<double> > state = args[0];
// Aggregates that haven't seen any data just return Null.
if (state.algo.numRows == 0) { return Null(); }
// finalizing
LMFIGDAlgorithm::final(state);
// LMFLossAlgorithm::final(state); // empty function call causes a warning
state.computeRMSE();
return state;
}
/**
* @brief Return the difference in RMSE between two states
*/
AnyType
internal_lmf_igd_distance::run(AnyType &args) {
LMFIGDState<ArrayHandle<double> > stateLeft = args[0];
LMFIGDState<ArrayHandle<double> > stateRight = args[1];
return std::abs(stateLeft.task.RMSE - stateRight.task.RMSE);
}
/**
* @brief Return the coefficients and diagnostic statistics of the state
*/
AnyType
internal_lmf_igd_result::run(AnyType &args) {
LMFIGDState<ArrayHandle<double> > state = args[0];
Matrix U = trans(state.task.model.matrixU);
Matrix V = trans(state.task.model.matrixV);
double RMSE = state.task.RMSE;
AnyType tuple;
tuple << U << V << RMSE;
return tuple;
}
} // namespace convex
} // namespace modules
} // namespace madlib