src/modules/regress/multilogistic.cpp - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * @file multilogistic.cpp
  *
  * @brief Multinomial Logistic-Regression functions
  *
  * We implement the iteratively-reweighted-least-squares method.
  *
  *//* ----------------------------------------------------------------------- */

 #include <dbconnector/dbconnector.hpp>

 #include <modules/shared/HandleTraits.hpp>
 #include <modules/prob/boost.hpp>
 #include <boost/math/distributions.hpp>
 #include <modules/prob/student.hpp>
 #include "multilogistic.hpp"

 #include <vector>
 #include <fstream>

 namespace madlib {

 // Use Eigen
 using namespace dbal::eigen_integration;

 namespace modules {

 // Import names from other MADlib modules
 using dbal::NoSolutionFoundException;

 namespace regress {

 /**
  * @brief Logistic function
  */
 inline double sigma(double x) {
     return 1. / (1. + std::exp(-x));
 }


 /**
  * @brief Inter- and intra-iteration state for iteratively-reweighted-least-
  *        squares method for logistic regression
  *
  * TransitionState encapsualtes the transition state during the
  * logistic-regression aggregate function. To the database, the state is
  * exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper
  * object containing scalars, a vector, and a matrix.
  *
  * Note: We assume that the DOUBLE PRECISION array is initialized by the
  * database with length at least 4, and all elemenets are 0.
  */
 template <class Handle>
 class MLogRegrIRLSTransitionState {

     // By §14.5.3/9: "Friend declarations shall not declare partial
     // specializations." We do access protected members in operator+=().
     template <class OtherHandle>
     friend class MLogRegrIRLSTransitionState;

 public:


     MLogRegrIRLSTransitionState(const AnyType &inArray)
         : mStorage(inArray.getAs<Handle>()) {
         rebind(static_cast<uint16_t>(mStorage[0]),static_cast<uint16_t>(mStorage[1]));
     }

     /**
      * @brief Convert to backend representation
      *
      * We define this function so that we can use State in the
      * argument list and as a return type.
      */
     inline operator AnyType() const {
         return mStorage;
     }

     /**
      * @brief Initialize the iteratively-reweighted-least-squares state.
      *
      * This function is only called for the first iteration, for the first row.
      */
     inline void initialize(
         const Allocator &inAllocator,
         uint16_t inWidthOfX,
         uint16_t inNumCategories, uint16_t inRefCategory) {

         size_t state_size = arraySize(inWidthOfX, inNumCategories);
         // GPDB limits the single array size to be 1GB, which means that the size
         // of a double array cannot be large than 134217727 because
         // (134217727 * 8) / (1024 * 1024) = 1023. And solve
         // state_size = x^2 + 2^x + 6 <= 134217727 will give x <= 11584.
         if(state_size > 134217727)
             throw std::domain_error(
                 "The product of number of independent variables and number of "
                  "categories cannot be larger than 11584.");

         mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
             dbal::DoZero, dbal::ThrowBadAlloc>(state_size);
         rebind(inWidthOfX, inNumCategories);
         widthOfX = inWidthOfX;
         numCategories = inNumCategories;
         ref_category = inRefCategory;
     }

     /**
      * @brief We need to support assigning the previous state
      */
     template <class OtherHandle> MLogRegrIRLSTransitionState &operator=(
         const MLogRegrIRLSTransitionState<OtherHandle> &inOtherState) {

         for (size_t i = 0; i < mStorage.size(); i++)
             mStorage[i] = inOtherState.mStorage[i];
         return *this;
     }

     /**
      * @brief Merge with another State object by copying the intra-iteration
      *     fields
      */
     template <class OtherHandle> MLogRegrIRLSTransitionState &operator+=(
         const MLogRegrIRLSTransitionState<OtherHandle> &inOtherState) {
         if (mStorage.size() != inOtherState.mStorage.size() ||
             widthOfX != inOtherState.widthOfX)
             throw std::logic_error("Internal error: Incompatible transition "
                 "states");

         numRows += inOtherState.numRows;
         gradient += inOtherState.gradient;
         X_transp_AX += inOtherState.X_transp_AX;
         logLikelihood += inOtherState.logLikelihood;
         return *this;
     }

     /**
      * @brief Reset the inter-iteration fields.
      */
     inline void reset() {
         numRows = 0;
         gradient.fill(0);
         X_transp_AX.fill(0);
         logLikelihood = 0;
     }

 private:
     static inline uint32_t arraySize(const uint16_t inWidthOfX,
         const uint16_t inNumCategories) {
         return 6 + inWidthOfX * inWidthOfX * inNumCategories * inNumCategories
                                  + 2 * inWidthOfX * inNumCategories;
     }

     void rebind(uint16_t inWidthOfX = 0, uint16_t inNumCategories = 0) {
         widthOfX.rebind(&mStorage[0]);
         numCategories.rebind(&mStorage[1]);
         conditionNo.rebind(&mStorage[2]);

         coef.rebind(&mStorage[3], inWidthOfX*inNumCategories);

         numRows.rebind(&mStorage[3 + inWidthOfX*inNumCategories]);

         gradient.rebind(&mStorage[4 + inWidthOfX*inNumCategories],inWidthOfX*inNumCategories);
         X_transp_AX.rebind(&mStorage[4 + 2 * inWidthOfX*inNumCategories],
             inNumCategories*inWidthOfX, inWidthOfX*inNumCategories);
         logLikelihood.rebind(&mStorage[4 +
              inNumCategories*inNumCategories*inWidthOfX*inWidthOfX
              + 2 * inWidthOfX*inNumCategories]);
         ref_category.rebind(&mStorage[5 +
              inNumCategories*inNumCategories*inWidthOfX*inWidthOfX
              + 2 * inWidthOfX*inNumCategories]);
     }

     Handle mStorage;

 public:
     typename HandleTraits<Handle>::ReferenceToUInt16 widthOfX;
     typename HandleTraits<Handle>::ReferenceToUInt16 numCategories;
     typename HandleTraits<Handle>::ReferenceToDouble conditionNo;

     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap coef;
     typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap gradient;
     typename HandleTraits<Handle>::MatrixTransparentHandleMap X_transp_AX;
     typename HandleTraits<Handle>::ReferenceToDouble logLikelihood;
     typename HandleTraits<Handle>::ReferenceToUInt16 ref_category;
 };


 /**
  * @brief Inter- and intra-iteration state for robust variance calculations
  *
  * TransitionState encapsualtes the transition state during the
  * logistic-regression robust variance calculation. To the database, the state is
  * exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper
  * object containing scalars, a vector, and a matrix.
  *
  * Note: We assume that the DOUBLE PRECISION array is initialized by the
  * database with length at least 4, and all elemenets are 0.
  */
 template <class Handle>
 class MLogRegrRobustTransitionState {

     // By §14.5.3/9: "Friend declarations shall not declare partial
     // specializations." We do access protected members in operator+=().
     template <class OtherHandle>
     friend class MLogRegrRobustTransitionState;

 public:
     MLogRegrRobustTransitionState(const AnyType &inArray)
         : mStorage(inArray.getAs<Handle>()) {
         rebind(static_cast<uint16_t>(mStorage[0]),static_cast<uint16_t>(mStorage[1]));
     }

     /**
      * @brief Convert to backend representation
      *
      * We define this function so that we can use State in the
      * argument list and as a return type.
      */
     inline operator AnyType() const {
         return mStorage;
     }

     /**
      * @brief Initialize the iteratively-reweighted-least-squares state.
      *
      * This function is only called for the first iteration, for the first row.
      */
     inline void initialize(const Allocator &inAllocator,
         uint16_t inWidthOfX, uint16_t inNumCategories, uint16_t inRefCategory) {
         mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
             dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(inWidthOfX, inNumCategories));
         rebind(inWidthOfX, inNumCategories);
         widthOfX = inWidthOfX;
         numCategories = inNumCategories;
         ref_category = inRefCategory;
     }

     /**
      * @brief We need to support assigning the previous state
      */
     template <class OtherHandle> MLogRegrRobustTransitionState &operator=(
         const MLogRegrRobustTransitionState<OtherHandle> &inOtherState) {
         for (size_t i = 0; i < mStorage.size(); i++)
             mStorage[i] = inOtherState.mStorage[i];
         return *this;
     }

     /**
      * @brief Merge with another State object by copying the intra-iteration
      *     fields
      */
     template <class OtherHandle> MLogRegrRobustTransitionState &operator+=(
         const MLogRegrRobustTransitionState<OtherHandle> &inOtherState) {
         if (mStorage.size() != inOtherState.mStorage.size() ||
             widthOfX != inOtherState.widthOfX)
             throw std::logic_error("Internal error: Incompatible transition "
                 "states");

         numRows += inOtherState.numRows;
         X_transp_AX += inOtherState.X_transp_AX;
         meat += inOtherState.meat;
         return *this;
     }

     /**
      * @brief Reset the inter-iteration fields.
      */
     inline void reset() {
         numRows = 0;
         meat.fill(0);
         X_transp_AX.fill(0);
     }

 private:
     static inline uint32_t arraySize(const uint16_t inWidthOfX,
         const uint16_t inNumCategories) {
         return 4 + 2*inWidthOfX * inWidthOfX * inNumCategories * inNumCategories
                                  + inWidthOfX * inNumCategories;
     }

     /**
      * @brief Rebind to a new storage array
      *
      * @param inWidthOfX             The number of independent variables.
      * @param inNumCategories The number of categories of the dependant var


      * Array layout (iteration refers to one aggregate-function call):
      * Inter-iteration components (updated in final function):
      * - 0: widthOfX (number of independant variables)
      * - 1: numCategories (number of categories)
      * - 2: ref_category
      * - 3: coef (vector of coefficients)
      *
      * Intra-iteration components (updated in transition step):
      * - 3 + widthOfX*numCategories: numRows (number of rows already processed in this iteration)
      * - 4 + widthOfX * inNumCategories: X_transp_AX (X^T A X).
      * - 4 + widthOfX^2*numCategories^2: meat  (The meat matrix)
      */
     void rebind(uint16_t inWidthOfX = 0, uint16_t inNumCategories = 0) {
         widthOfX.rebind(&mStorage[0]);
         numCategories.rebind(&mStorage[1]);
         ref_category.rebind(&mStorage[2]);
         coef.rebind(&mStorage[3], inWidthOfX * inNumCategories);
         numRows.rebind(&mStorage[3 + inWidthOfX * inNumCategories]);
         X_transp_AX.rebind(&mStorage[4 + inWidthOfX * inNumCategories],
             inNumCategories * inWidthOfX, inWidthOfX * inNumCategories);
         meat.rebind(&mStorage[4 +
              inNumCategories * inNumCategories * inWidthOfX * inWidthOfX
              + inWidthOfX * inNumCategories], inWidthOfX * inNumCategories, inWidthOfX * inNumCategories);
     }

     Handle mStorage;

 public:
     typename HandleTraits<Handle>::ReferenceToUInt16 widthOfX;
     typename HandleTraits<Handle>::ReferenceToUInt16 numCategories;
     typename HandleTraits<Handle>::ReferenceToUInt16 ref_category;

     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap coef;
     typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
     typename HandleTraits<Handle>::MatrixTransparentHandleMap X_transp_AX;
     typename HandleTraits<Handle>::MatrixTransparentHandleMap meat;
 };

 /**
  * @brief IRLS Transition
  * @param args
  *
  * Arguments (Matched with PSQL wrapped)
  * - 0: Current State
  * - 1: y value (Integer)
  * - 2: numCategories (Integer)
  * - 3: ref_category (Integer)
  * - 4: X value (Column Vector)
  * - 5: Previous State

  */
 AnyType
 __mlogregr_irls_step_transition::run(AnyType &args) {
     MLogRegrIRLSTransitionState<MutableArrayHandle<double> > state = args[0];

     if (args[1].isNull() || args[2].isNull() || args[3].isNull() ||
             args[4].isNull()) {
         return args[0];
     }

     // Get x as a vector of double
     MappedColumnVector x;
     try{
         // an exception is raised in the backend if args[2] contains nulls
         MappedColumnVector xx = args[4].getAs<MappedColumnVector>();
         // x is a const reference, we can only rebind to change its pointer
         x.rebind(xx.memoryHandle(), xx.size());
     } catch (const ArrayWithNullException &e) {
         return args[0];
     }

     // Get the category & numCategories as integer
     int32_t category = args[1].getAs<int32_t>();
     // Number of categories after pivoting (we pivot around the first category)
     int32_t numCategories = (args[2].getAs<int32_t>() - 1);
     int32_t ref_category = args[3].getAs<int32_t>();

     // The following check was added with MADLIB-138.
     if (!x.is_finite())
             throw std::domain_error("Design matrix is not finite.");

     if (state.numRows == 0) {
         if (x.size() > std::numeric_limits<uint16_t>::max())
                 throw std::domain_error("Number of independent variables cannot be "
                         "larger than 65535.");

         if (numCategories < 1)
                 throw std::domain_error("Number of cateogires must be at least 2");

         // Init the state (requires x.size() and category.size())
         state.initialize(*this,
             static_cast<uint16_t>(x.size()) ,
             static_cast<uint16_t>(numCategories),
             static_cast<uint16_t>(ref_category));

         if (!args[5].isNull()) {
                 MLogRegrIRLSTransitionState<ArrayHandle<double> >
                         previousState = args[5];
                 state = previousState;
                 state.reset();
         }
     }


     /*
      * This check should be done for each iteration. Only checking the first
      * run is not enough.
      */
     if (category > numCategories || category < 0)
         throw std::domain_error("Invalid category. Categories must be integer "
             "values between 0 and (number of categories - 1).");

     if (ref_category > numCategories || ref_category < 0)
         throw std::domain_error("Invalid reference category. Reference category must be integer "
             "value between 0 and (number of categories - 1).");

     // Now do the transition step
     state.numRows++;
     /*     Get y: Convert to 1/0 boolean vector
             Example: Category 4 : 0 0 0 1 0 0
             Storing it in this forms helps us get a nice closed form expression
     */

     //To pivot around the specified reference category
     ColumnVector y(numCategories);
     y.fill(0);
     if (category > ref_category) {
         y(category - 1) = 1;
     } else if (category < ref_category) {
         y(category) = 1;
     }

     /*
     Compute the parameter vector (the 'pi' vector in the documentation)
     for the data point being processed.
     Casting the coefficients into a matrix makes the calculation simple.
     */
     Matrix coef = state.coef;
     coef.resize(numCategories, state.widthOfX);

     //Store the intermediate calculations because we'll reuse them in the LLH
     ColumnVector t1 = x; //t1 is vector of size state.widthOfX
     t1 = coef*x;
     /* Note: The above 2 lines could have been written as:
         ColumnVector t1 = -coef*x;

         but this creates warnings. These warnings are somehow related to the factor
         that x is of an immutable type. The following alternative could resolve
         the warnings (although not necessary the best alternative):
     */

     ColumnVector t2 = t1.array().exp();
     double t3 = 1 + t2.sum();
     ColumnVector pi = t2/t3;

     //The gradient matrix has numCategories rows and widthOfX columns
     Matrix grad = -y*x.transpose() + pi*x.transpose();
     //We cast the gradient into a vector to make the Newton step calculations much easier.
     grad.resize(numCategories*state.widthOfX,1);

     /*
          a is a matrix of size JxJ where J is the number of categories
          a_j1j2 = -pi(j1)*(1-pi(j2))if j1 == j2
          a_j1j2 =  pi(j1)*pi(j2) if j1 != j2
     */
     Matrix a(numCategories,numCategories);
     // Compute the 'a' matrix.
     Matrix piDiag = pi.asDiagonal();
     a = pi * pi.transpose() - piDiag;
     state.gradient.noalias() += grad;

     //Start the Hessian calculations
     Matrix X_transp_AX(numCategories * state.widthOfX, numCategories * state.widthOfX);

     /*
         Again: The following 3 lines could have been written as
         Matrix XXTrans = x * x.transpose();
         but it creates warnings related to the type of x. Here is an easy fix
     */
     Matrix cv_x = x;
     Matrix XXTrans = trans(cv_x);
     XXTrans = cv_x * XXTrans;

     //Eigen doesn't supported outer-products for matrices, so we have to do our own.
     //This operation is also known as a tensor-product.
     for (int i1 = 0; i1 < state.widthOfX; i1++){
          for (int i2 = 0; i2 <state.widthOfX; i2++){
             int rowOffset = numCategories * i1;
             int colOffset = numCategories * i2;

             X_transp_AX.block(rowOffset, colOffset, numCategories,  numCategories) = XXTrans(i1,i2)*a;
         }
     }

     triangularView<Lower>(state.X_transp_AX) += X_transp_AX;

     state.logLikelihood += y.transpose()*t1 - log(t3);

     return state;

 }

 /**
  * @brief Perform the perliminary aggregation function: Merge transition states
      This merge step is the same as that of logistic regression.
  */
 AnyType
 __mlogregr_irls_step_merge_states::run(AnyType &args) {
     MLogRegrIRLSTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
     MLogRegrIRLSTransitionState<ArrayHandle<double> > stateRight = args[1];

     // We first handle the trivial case where this function is called with one
     // of the states being the initial state
     if (stateLeft.numRows == 0)
         return stateRight;
     else if (stateRight.numRows == 0)
         return stateLeft;

     // Merge states together and return
     stateLeft += stateRight;
     return stateLeft;
 }

 /**
  * @brief Perform the logistic-regression final step
  */
 AnyType
 __mlogregr_irls_step_final::run(AnyType &args) {
     // We request a mutable object.
     // Depending on the backend, this might perform a deep copy.
     MLogRegrIRLSTransitionState<MutableArrayHandle<double> > state = args[0];

     // Aggregates that haven't seen any data just return Null.
     if (state.numRows == 0)
         return Null();

     // See MADLIB-138. At least on certain platforms and with certain versions,
     // LAPACK will run into an infinite loop if pinv() is called for non-finite
     // matrices. We extend the check also to the dependent variables.
     if (!state.X_transp_AX.is_finite() || !state.gradient.is_finite())
         throw NoSolutionFoundException("Over- or underflow in intermediate "
             "calculation. Input data is likely of poor numerical condition.");

     SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
         -1 * state.X_transp_AX, EigenvaluesOnly, ComputePseudoInverse);

     // Precompute (X^T * A * X)^-1
     Matrix hessianInv = -1 * decomposition.pseudoInverse();

     state.coef.noalias() += hessianInv * state.gradient;

     if(!state.coef.is_finite())
         throw NoSolutionFoundException("Over- or underflow in Newton step, "
             "while updating coefficients. Input data is likely of poor "
             "numerical condition.");

     // We use the intra-iteration field gradient for storing the diagonal
     // of X^T A X, so that we don't have to recompute it in the result function.
     // Likewise, we store the condition number.
     // FIXME: This feels a bit like a hack.
     state.conditionNo = decomposition.conditionNo();
     state.X_transp_AX = -1 * hessianInv;

     return state;
 }

 /**
  * @brief Compute the diagnostic statistics
  *
  * This function wraps the common parts of mlogregr state into the result.
  * (the result data type is defined in multilogistic.sql_in)
  */
 AnyType mLogstateToResult(
     const Allocator &inAllocator,
     MLogRegrIRLSTransitionState<ArrayHandle<double> > state) {

     int ref_category = state.ref_category;
     const HandleMap<const ColumnVector, TransparentHandle<double> > &inCoef = state.coef;
     double logLikelihood = state.logLikelihood;
     uint64_t num_processed = state.numRows;

     // Per the hack at the end of the final function we place the inverse
     // of the X_tranp_AX into the state.X_transp_AX
     const Matrix & X_transp_AX_inverse = state.X_transp_AX;
     const ColumnVector &diagonal_of_hessian = X_transp_AX_inverse.diagonal();

     MutableNativeColumnVector stdErr(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldZStats(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldPValues(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector oddsRatios(
         inAllocator.allocateArray<double>(inCoef.size()));

     for (Index i = 0; i < inCoef.size(); ++i) {
         stdErr(i) = std::sqrt(diagonal_of_hessian(i));
         waldZStats(i) = inCoef(i) / stdErr(i);
         waldPValues(i) = 2. * prob::cdf( prob::normal(),
             -std::abs(waldZStats(i)));
         oddsRatios(i) = std::exp( inCoef(i) );
     }
     int num_iterations = 0;
     // Return all coefficients, standard errors, etc. in a tuple
     AnyType tuple;
     tuple << ref_category << inCoef << logLikelihood << stdErr
           << waldZStats << waldPValues << oddsRatios
           << static_cast<double>(state.conditionNo) << num_iterations << num_processed;
     return tuple;
 }


 /**
  * @brief Return the difference in log-likelihood between two states
  */
 AnyType
 __internal_mlogregr_irls_step_distance::run(AnyType &args) {
     MLogRegrIRLSTransitionState<ArrayHandle<double> > stateLeft = args[0];
     MLogRegrIRLSTransitionState<ArrayHandle<double> > stateRight = args[1];

     return std::abs(stateLeft.logLikelihood - stateRight.logLikelihood);
 }

 /**
  * @brief Return the coefficients and diagnostic statistics of the state
  */
 AnyType
 __internal_mlogregr_irls_result::run(AnyType &args) {
     MLogRegrIRLSTransitionState<ArrayHandle<double> > state = args[0];

     return mLogstateToResult(*this, state);
     // state.ref_category, state.coef,
     // state.gradient, state.logLikelihood, state.X_transp_AX(0,0));
 }

 /**
  * @brief Return the coefficients and diagnostic statistics of the state
  */
 AnyType
 __internal_mlogregr_summary_results::run(AnyType &args) {
     MLogRegrIRLSTransitionState<ArrayHandle<double> > state = args[0];
     const Matrix & X_transp_AX_inverse = state.X_transp_AX;
     // X_transp_AX is actually it's inverse - this is a hack added at the end of
     // the final function
     AnyType tuple;
     Matrix coef = state.coef;
     coef.resize(state.numCategories, state.widthOfX);
     coef.transposeInPlace();
     tuple << coef << X_transp_AX_inverse;
     return tuple;
 }
 // ----------------- End of Multinomial Logistic Regression --------------------

 // ---------------------------------------------------------------------------
 //             Robust Variance Multi-Logistic
 // ---------------------------------------------------------------------------

 AnyType
 mlogregr_robust_step_transition::run(AnyType &args) {
     using std::endl;

 	MLogRegrRobustTransitionState<MutableArrayHandle<double> > state = args[0];

    if (args[1].isNull() || args[2].isNull() || args[3].isNull() ||
             args[4].isNull() || args[5].isNull()) {
         return args[0];
     }

     // Get x as a vector of double
     MappedColumnVector x;
     try{
         // an exception is raised in the backend if args[2] contains nulls
         MappedColumnVector xx = args[4].getAs<MappedColumnVector>();
         // x is a const reference, we can only rebind to change its pointer
         x.rebind(xx.memoryHandle(), xx.size());
     } catch (const ArrayWithNullException &e) {
         return args[0];
     }

     // Get the category & numCategories as integer
     int16_t category = static_cast<int16_t>(args[1].getAs<int>());
     // Number of categories after pivoting (We pivot around the first category)
     int16_t numCategories = static_cast<int16_t>(args[2].getAs<int>() - 1);
     int32_t ref_category = args[3].getAs<int32_t>();
 	MappedMatrix coefMat = args[5].getAs<MappedMatrix>();

     // The following check was added with MADLIB-138.
     if (!x.is_finite())
             throw std::domain_error("Design matrix is not finite.");

     if (state.numRows == 0) {
         if (x.size() > std::numeric_limits<uint16_t>::max())
                 throw std::domain_error("Number of independent variables cannot be "
                         "larger than 65535.");
         if (numCategories < 1)
                 throw std::domain_error("Number of cateogires must be at least 2");
         if (category > numCategories)
                 throw std::domain_error("You have entered a category > numCategories"
                     "Categories must be of values {0,1... numCategories-1}");

         // Init the state (requires x.size() and category.size())
         state.initialize(*this,
             static_cast<uint16_t>(x.size()) ,
             static_cast<uint16_t>(numCategories),
             static_cast<uint16_t>(ref_category));

         Matrix mat = coefMat;
         mat.transposeInPlace();
         mat.resize(coefMat.size(), 1);
         state.coef = mat;
     }

     // Now do the transition step
     state.numRows++;
     /*
         Get y: Convert to 1/0 boolean vector
         Example: Category 4 : 0 0 0 1 0 0
         Storing it in this forms helps us get a nice closed form expression
     */

     //To pivot around the specified reference category
     ColumnVector y(numCategories);
     y.fill(0);
     if (category > ref_category) {
         y(category - 1) = 1;
     } else if (category < ref_category) {
         y(category) = 1;
     }

     /*
     Compute the parameter vector (the 'pi' vector in the documentation)
     for the data point being processed.
     Casting the coefficients into a matrix makes the calculation simple.
     */

     Matrix coef = state.coef;
     coef.resize(numCategories, state.widthOfX);

     //Store the intermediate calculations because we'll reuse them in the LLH
     ColumnVector t1 = x; //t1 is vector of size state.widthOfX
     t1 = coef*x;
     /*
         Note: The above 2 lines could have been written as:
         ColumnVector t1 = -coef*x;

         but this creates warnings. These warnings are somehow related to the factor
         that x is an immutable type.
     */

     ColumnVector t2 = t1.array().exp();
     double t3 = 1 + t2.sum();
     ColumnVector pi = t2/t3;

 	//The gradient matrix has numCategories rows and widthOfX columns
     Matrix grad = -y * x.transpose() + pi * x.transpose();
     //We cast the gradient into a vector to make the math easier.
     grad.resize(numCategories * state.widthOfX, 1);

     Matrix GradGradTranspose;
     GradGradTranspose = grad * grad.transpose();
 	state.meat += GradGradTranspose;

     /*
          a is a matrix of size JxJ where J is the number of categories
          a_j1j2 = -pi(j1)*(1-pi(j2))if j1 == j2
          a_j1j2 =  pi(j1)*pi(j2) if j1 != j2
     */
     // Compute the 'a' matrix.
     Matrix a(numCategories,numCategories);
     Matrix piDiag = pi.asDiagonal();
     a = pi * pi.transpose() - piDiag;

     //Start the Hessian calculations
     Matrix X_transp_AX(numCategories * state.widthOfX, numCategories * state.widthOfX);

     /*
         Again: The following 3 lines could have been written as
         Matrix XXTrans = x * x.transpose();
         but it creates warnings related to the type of x. Here is an easy fix
     */
     Matrix cv_x = x;
     Matrix XXTrans = trans(cv_x);
     XXTrans = cv_x * XXTrans;

     //Eigen doesn't supported outer-products for matrices, so we have to do our own.
     //This operation is also known as a tensor-product.
     for (int i1 = 0; i1 < state.widthOfX; i1++){
          for (int i2 = 0; i2 <state.widthOfX; i2++){
             int rowOffset = numCategories * i1;
             int colOffset = numCategories * i2;

             X_transp_AX.block(rowOffset, colOffset, numCategories,  numCategories) = XXTrans(i1, i2) * a;
         }
     }

     triangularView<Lower>(state.X_transp_AX) += X_transp_AX;

     return state;

 }

 /**
  * @brief Perform the perliminary aggregation function: Merge transition states
      This merge step is the same as that of logistic regression.
  */
 AnyType
 mlogregr_robust_step_merge_states::run(AnyType &args) {
     MLogRegrRobustTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
     MLogRegrRobustTransitionState<ArrayHandle<double> > stateRight = args[1];

     // We first handle the trivial case where this function is called with one
     // of the states being the initial state
     if (stateLeft.numRows == 0)
         return stateRight;
     else if (stateRight.numRows == 0)
         return stateLeft;

     // Merge states together and return
     stateLeft += stateRight;
     return stateLeft;
 }

 AnyType MLrobuststateToResult(
     const Allocator &inAllocator,
     int ref_category,
     const HandleMap<const ColumnVector, TransparentHandle<double> >& inCoef,
     const ColumnVector &diagonal_of_varianceMat) {

 	MutableNativeColumnVector variance(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector stdErr(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldZStats(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldPValues(
         inAllocator.allocateArray<double>(inCoef.size()));

     for (Index i = 0; i < inCoef.size(); ++i) {
         variance(i) = diagonal_of_varianceMat(i);
         stdErr(i) = std::sqrt(diagonal_of_varianceMat(i));
         waldZStats(i) = inCoef(i) / stdErr(i);
         waldPValues(i) = 2. * prob::cdf(
             prob::normal(), -std::abs(waldZStats(i)));
     }

     // Return all coefficients, standard errors, etc. in a tuple
     AnyType tuple;
     tuple <<  ref_category << inCoef << stdErr << waldZStats << waldPValues;
     return tuple;
 }


 /**
  * @brief Perform the logistic-regression final step
  */
 AnyType
 mlogregr_robust_step_final::run(AnyType &args) {
     // We request a mutable object. Depending on the backend, this might perform
     // a deep copy.
     MLogRegrRobustTransitionState<ArrayHandle<double> > state = args[0];
     // Aggregates that haven't seen any data just return Null.
     if (state.numRows == 0)
         return Null();

     // See MADLIB-138. At least on certain platforms and with certain versions,
     // LAPACK will run into an infinite loop if pinv() is called for non-finite
     // matrices. We extend the check also to the dependent variables.
     if (!state.X_transp_AX.is_finite())
         throw NoSolutionFoundException("Over- or underflow in intermediate "
             "calculation. Input data is likely of poor numerical condition.");


     SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
         -1 * state.X_transp_AX, EigenvaluesOnly, ComputePseudoInverse);

     // Precompute (X^T * A * X)^-1
     Matrix bread = decomposition.pseudoInverse();
 	Matrix varianceMat;
     varianceMat = bread * state.meat * bread;

     if(!state.coef.is_finite())
         throw NoSolutionFoundException("Over- or underflow in Newton step, "
             "while updating coefficients. Input data is likely of poor "
             "numerical condition.");

     return MLrobuststateToResult(*this, state.ref_category, state.coef, varianceMat.diagonal());
 }
 // ------------------------ End of Robust Variance -----------------------------

 AnyType __sub_array::run(AnyType &args) {
     if (args[0].isNull() || args[1].isNull())
         return Null();

     ArrayHandle<double> value = args[0].getAs<ArrayHandle<double> >();
     ArrayHandle<int32> index = args[1].getAs<ArrayHandle<int32> >();

     for (size_t i = 0; i < index.size(); i++) {
         if (index[i] < 1 || index[i] > static_cast<int>(value.size()))
             throw std::domain_error("Invalid indices - out of bound");
     }

     MutableArrayHandle<double> res =
         allocateArray<double, dbal::AggregateContext, dbal::DoZero,
             dbal::ThrowBadAlloc>(index.size());

     for (size_t i = 0; i < index.size(); i++)
         res[i] = value[index[i] - 1];

     return res;
 }

 // ----------------------------------------------------------------------------
 typedef struct __sr_ctx{
     const double * inarray;
     int32_t maxcall;
     int32_t num_feature;
     int32_t num_category;
     int32_t ref_category;
     int32_t curcall;
 } sr_ctx;

 void *
 __mlogregr_format::SRF_init(AnyType &args) {
     sr_ctx *ctx = new sr_ctx;
     ctx->curcall = 0;

     MutableArrayHandle<double> inarray = NULL;
     try{
         inarray = args[0].getAs<MutableArrayHandle<double> >();
     } catch (const ArrayWithNullException &e) {
         ctx->maxcall = 0;
         return ctx;
     }

     int32_t num_feature = args[1].getAs<int32_t>();
     int32_t num_category = args[2].getAs<int32_t>();
     int32_t ref_category = args[3].getAs<int32_t>();

     ctx->inarray = inarray.ptr();
     ctx->maxcall = num_category - 1;
     ctx->num_category = num_category - 1;
     ctx->num_feature = num_feature;
     ctx->ref_category = ref_category;

     if (num_feature * (num_category - 1) !=
             static_cast<int32_t>(inarray.size())) {
         throw std::runtime_error("num_feature * (num_category - 1) != "
                 "inarray.size()");
     }

     if (ref_category >= num_category){
         throw std::runtime_error("ref_category >= num_category");
     }

     return ctx;
 }

 AnyType
 __mlogregr_format::SRF_next(void * user_fctx, bool * is_last_call) {
     sr_ctx * ctx = (sr_ctx *) user_fctx;
     if (ctx->curcall >= ctx->maxcall) {
         *is_last_call = true;
         return Null();
     }

     MutableArrayHandle<double> outarray =
         allocateArray<double, dbal::FunctionContext,
             dbal::DoZero, dbal::ThrowBadAlloc>(ctx->num_feature);
     for(int i = 0; i < ctx->num_feature; i++) {
         outarray[i] = ctx->inarray[i * ctx->num_category + ctx->curcall];
     }

     AnyType tuple;
     tuple << (ctx->curcall < ctx->ref_category
             ? ctx->curcall
             : ctx->curcall + 1)
           << outarray;

     ctx->curcall++;

     return tuple;
 }
 /* ------------------------------------------------------------ */


 AnyType mlogregr_predict_prob::run(AnyType &args)
 {
     // dimension: N x (L - 1), where L is the number of categories
     MappedMatrix coef = args[0].getAs<MappedMatrix>();

     int ref_category = args[1].getAs<int>();
     MappedColumnVector x;
     try {
         MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
         x.rebind(xx.memoryHandle(), xx.size());
     } catch (const ArrayWithNullException &e) {
         return Null();
     }

     int L = static_cast<int>(coef.cols()) + 1; // number of categories

     ColumnVector res(L);
     int cat_index = 0;
     for (int i = 0; i < L; i++) {
         if (i == ref_category) {
             res(i) = 1;
         } else {
             res(i) = exp(coef.col(cat_index).dot(x));
             cat_index++;
         }
     }
     res /= res.sum();
     return res;
 }
 /* ------------------------------------------------------------ */

 AnyType mlogregr_predict_response::run(AnyType &args)
 {
     // dimension: N x (L - 1), where L is the number of categories
     MappedMatrix coef = args[0].getAs<MappedMatrix>();

     int ref_category = args[1].getAs<int>();
     MappedColumnVector x;
     try {
         MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
         x.rebind(xx.memoryHandle(), xx.size());
     } catch (const ArrayWithNullException &e) {
         return Null();
     }

     ColumnVector linear_predictors(x.transpose() * coef);
     Index max_cat = 0;
     linear_predictors.maxCoeff(&max_cat);
     double max_lp = linear_predictors(max_cat);

     if (exp(max_lp) < 1){
         // no category has high enough probability as reference category
         return ref_category;
     } else if (max_cat < ref_category){
         return static_cast<uint>(max_cat);
     }
     else{
         // since ref_category is not present in the coef matrix, index of
         // categories after ref_category is 1 less than the actual index
         return static_cast<uint>(max_cat + 1u);
     }
 }

 } // namespace regress

 } // namespace modules

 } // namespace madlib