examples/hello_world/iterative/simple_logistic.cpp - madlib - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 /* ------------------------------------------------------
  *
  * @file logistic.cpp
  *
  * @brief Logistic-Regression functions
  *
  * We implement the conjugate-gradient method and the iteratively-reweighted-
  * least-squares method.
  *
  *//* ----------------------------------------------------------------------- */
 #include <limits>
 #include <dbconnector/dbconnector.hpp>
 #include <modules/shared/HandleTraits.hpp>
 #include <modules/prob/boost.hpp>
 #include <boost/math/distributions.hpp>
 #include <modules/prob/student.hpp>
 #include "simple_logistic.hpp"

 namespace madlib {

 // Use Eigen
 using namespace dbal::eigen_integration;

 namespace modules {

 // Import names from other MADlib modules
 using dbal::NoSolutionFoundException;

 namespace hello_world {

 // FIXME this enum should be accessed by all modules that may need grouping
 // valid status values
 enum { IN_PROCESS, COMPLETED, TERMINATED, NULL_EMPTY };

 // Internal functions
 AnyType stateToResult(const Allocator &inAllocator,
                       const HandleMap<const ColumnVector, TransparentHandle<double> >& inCoef,
                       const Matrix & hessian,
                       const double &logLikelihood,
                       int status,
                       const uint64_t &numRows);


 // ---------------------------------------------------------------------------
 //              Logistic Regression States
 // ---------------------------------------------------------------------------

 /**
  * @brief Inter- and intra-iteration state for conjugate-gradient method for
  *        logistic regression
  *
  * TransitionState encapsualtes the transition state during the
  * logistic-regression aggregate function. To the database, the state is
  * exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper
  * object containing scalars and vectors.
  *
  * Note: We assume that the DOUBLE PRECISION array is initialized by the
  * database with length at least 5, and all elemenets are 0.
  *
  */
 template <class Handle>
 class LogRegrSPTransitionState {
     template <class OtherHandle>
     friend class LogRegrSPTransitionState;

   public:
     LogRegrSPTransitionState(const AnyType &inArray)
         : mStorage(inArray.getAs<Handle>()) {

         rebind(static_cast<uint16_t>(mStorage[1]));
     }

     /**
      * @brief Convert to backend representation
      *
      * We define this function so that we can use State in the
      * argument list and as a return type.
      */
     inline operator AnyType() const {
         return mStorage;
     }

     /**
      * @brief Initialize the conjugate-gradient state.
      *
      * This function is only called for the first iteration, for the first row.
      */
     inline void initialize(const Allocator &inAllocator, uint16_t inWidthOfX) {
         mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
                                              dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(inWidthOfX));
         rebind(inWidthOfX);
         widthOfX = inWidthOfX;
     }

     /**
      * @brief We need to support assigning the previous state
      */
     template <class OtherHandle>
     LogRegrSPTransitionState &operator=(
         const LogRegrSPTransitionState<OtherHandle> &inOtherState) {

         for (size_t i = 0; i < mStorage.size(); i++)
             mStorage[i] = inOtherState.mStorage[i];
         return *this;
     }

     /**
      * @brief Merge with another State object by copying the intra-iteration
      *     fields
      */
     template <class OtherHandle>
     LogRegrSPTransitionState &operator+=(
         const LogRegrSPTransitionState<OtherHandle> &inOtherState) {

         if (mStorage.size() != inOtherState.mStorage.size() ||
             widthOfX != inOtherState.widthOfX)
             throw std::logic_error("Internal error: Incompatible transition "
                                    "states");

         numRows += inOtherState.numRows;
         gradNew += inOtherState.gradNew;
         X_transp_AX += inOtherState.X_transp_AX;
         logLikelihood += inOtherState.logLikelihood;
         // merged state should have the higher status
         // (see top of file for more on 'status' )
         status = (inOtherState.status > status) ? inOtherState.status : status;
         return *this;
     }

     /**
      * @brief Reset the inter-iteration fields.
      */
     inline void reset() {
         numRows = 0;
         X_transp_AX.fill(0);
         gradNew.fill(0);
         logLikelihood = 0;
         status = IN_PROCESS;
     }

   private:
     static inline size_t arraySize(const uint16_t inWidthOfX) {
         return 6 + inWidthOfX * inWidthOfX + 4 * inWidthOfX;
     }

     /**
      * @brief Rebind to a new storage array
      *
      * @param inWidthOfX The number of independent variables.
      *
      * Array layout (iteration refers to one aggregate-function call):
      * Inter-iteration components (updated in final function):
      * - 0: iteration (current iteration)
      * - 1: widthOfX (number of coefficients)
      * - 2: coef (vector of coefficients)
      * - 2 + widthOfX: dir (direction)
      * - 2 + 2 * widthOfX: grad (gradient)
      * - 2 + 3 * widthOfX: beta (scale factor)
      *
      * Intra-iteration components (updated in transition step):
      * - 3 + 3 * widthOfX: numRows (number of rows already processed in this iteration)
      * - 4 + 3 * widthOfX: gradNew (intermediate value for gradient)
      * - 4 + 4 * widthOfX: X_transp_AX (X^T A X)
      * - 4 + widthOfX * widthOfX + 4 * widthOfX: logLikelihood ( ln(l(c)) )
      */
     void rebind(uint16_t inWidthOfX) {
         iteration.rebind(&mStorage[0]);
         widthOfX.rebind(&mStorage[1]);
         coef.rebind(&mStorage[2], inWidthOfX);
         dir.rebind(&mStorage[2 + inWidthOfX], inWidthOfX);
         grad.rebind(&mStorage[2 + 2 * inWidthOfX], inWidthOfX);
         beta.rebind(&mStorage[2 + 3 * inWidthOfX]);
         numRows.rebind(&mStorage[3 + 3 * inWidthOfX]);
         gradNew.rebind(&mStorage[4 + 3 * inWidthOfX], inWidthOfX);
         X_transp_AX.rebind(&mStorage[4 + 4 * inWidthOfX], inWidthOfX, inWidthOfX);
         logLikelihood.rebind(&mStorage[4 + inWidthOfX * inWidthOfX + 4 * inWidthOfX]);
         status.rebind(&mStorage[5 + inWidthOfX * inWidthOfX + 4 * inWidthOfX]);
     }

     Handle mStorage;

   public:
     typename HandleTraits<Handle>::ReferenceToUInt32 iteration;
     typename HandleTraits<Handle>::ReferenceToUInt16 widthOfX;
     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap coef;
     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap dir;
     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap grad;
     typename HandleTraits<Handle>::ReferenceToDouble beta;

     typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
     typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap gradNew;
     typename HandleTraits<Handle>::MatrixTransparentHandleMap X_transp_AX;
     typename HandleTraits<Handle>::ReferenceToDouble logLikelihood;
     typename HandleTraits<Handle>::ReferenceToUInt16 status;
 };


 /**
  * @brief Logistic function
  */
 inline double sigma(double x) {
     return 1. / (1. + std::exp(-x));
 }

 /**
  * @brief Perform the logistic-regression transition step
  */
 AnyType
 logregr_simple_step_transition::run(AnyType &args) {
     LogRegrSPTransitionState<MutableArrayHandle<double> > state = args[0];
     if (args[1].isNull() || args[2].isNull()) { return args[0]; }
     double y = args[1].getAs<bool>() ? 1. : -1.;
     MappedColumnVector x;
     try {
         // an exception is raised in the backend if args[2] contains nulls
         MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
         // x is a const reference, we can only rebind to change its pointer
         x.rebind(xx.memoryHandle(), xx.size());
     } catch (const ArrayWithNullException &e) {
         return args[0];
     }

     // The following check was added with MADLIB-138.
     if (!dbal::eigen_integration::isfinite(x)) {
         warning("Design matrix is not finite.");
         state.status = TERMINATED;
         return state;
     }

     if (state.numRows == 0) {
         if (x.size() > std::numeric_limits<uint16_t>::max()){
             warning("Number of independent variables cannot be larger than 65535.");
             state.status = TERMINATED;
             return state;
         }


         state.initialize(*this, static_cast<uint16_t>(x.size()));
         if (!args[3].isNull()) {
             LogRegrSPTransitionState<ArrayHandle<double> > previousState = args[3];

             state = previousState;
             state.reset();
         }
     }
     // Now do the transition step
     state.numRows++;
     double xc = dot(x, state.coef);
     state.gradNew.noalias() += sigma(-y * xc) * y * trans(x);

     // Note: sigma(-x) = 1 - sigma(x).
     double a = sigma(xc) * sigma(-xc);
     state.X_transp_AX += x * trans(x) * a;

     state.logLikelihood -= std::log( 1. + std::exp(-y * xc) );

     return state;
 }


 /**
  * @brief Perform the perliminary aggregation function: Merge transition states
  */
 AnyType
 logregr_simple_step_merge_states::run(AnyType &args) {
     LogRegrSPTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
     LogRegrSPTransitionState<ArrayHandle<double> > stateRight = args[1];

     // We first handle the trivial case where this function is called with one
     // of the states being the initial state
     if (stateLeft.numRows == 0)
         return stateRight;
     else if (stateRight.numRows == 0)
         return stateLeft;

     // Merge states together and return
     stateLeft += stateRight;
     return stateLeft;
 }


 /**
  * @brief Perform the logistic-regression final step
  */
 AnyType
 logregr_simple_step_final::run(AnyType &args) {
     // We request a mutable object. Depending on the backend, this might perform
     // a deep copy.
     LogRegrSPTransitionState<MutableArrayHandle<double> > state = args[0];

     // Aggregates that haven't seen any data just return Null.
     if (state.numRows == 0){
         state.status = NULL_EMPTY;
         return state;
     }

     // Note: k = state.iteration
     if (state.iteration == 0) {
         // Iteration computes the gradient

         state.dir = state.gradNew;
         state.grad = state.gradNew;
     } else {
         // We use the Hestenes-Stiefel update formula:
         //
         //            g_k^T (g_k - g_{k-1})
         // beta_k = -------------------------
         //          d_{k-1}^T (g_k - g_{k-1})
         ColumnVector gradNewMinusGrad = state.gradNew - state.grad;
         state.beta
             = dot(state.gradNew, gradNewMinusGrad)
             / dot(state.dir, gradNewMinusGrad);

         // Do a direction restart (Powell restart)
         // Note: This is testing whether state.beta < 0 if state.beta were
         // assigned according to Polak-Ribière
         if (dot(state.gradNew, gradNewMinusGrad)
             / dot(state.grad, state.grad) <= std::numeric_limits<double>::denorm_min()) state.beta = 0;

         // d_k = g_k - beta_k * d_{k-1}
         state.dir = state.gradNew - state.beta * state.dir;
         state.grad = state.gradNew;
     }

     // H_k = - X^T A_k X
     // where A_k = diag(a_1, ..., a_n) and a_i = sigma(x_i c_{k-1}) sigma(-x_i c_{k-1})
     //
     //             g_k^T d_k
     // alpha_k = -------------
     //           d_k^T H_k d_k
     //
     // c_k = c_{k-1} - alpha_k * d_k
     state.coef += dot(state.grad, state.dir) /
         as_scalar(trans(state.dir) * state.X_transp_AX * state.dir)
         * state.dir;

     if(!state.coef.is_finite()){
         // we don't throw an error - instead set the state status to allow
         // other states (possibly trained as part of group by) to continue
         // training
         warning("Over- or underflow in conjugate-gradient step, while updating "
               "coefficients. Input data is likely of poor numerical condition.");
         state.status = TERMINATED;
         return state;
     }

     state.iteration++;
     return state;
 }

 /**
  * @brief Return the difference in log-likelihood between two states
  */
 AnyType
 internal_logregr_simple_step_distance::run(AnyType &args) {
     LogRegrSPTransitionState<ArrayHandle<double> > stateLeft = args[0];
     LogRegrSPTransitionState<ArrayHandle<double> > stateRight = args[1];

     if(stateLeft.status == NULL_EMPTY || stateRight.status == NULL_EMPTY){
         return 0.0;
     }
     return std::abs(stateLeft.logLikelihood - stateRight.logLikelihood);
 }

 /**
  * @brief Return the coefficients and diagnostic statistics of the state
  */
 AnyType
 internal_logregr_simple_result::run(AnyType &args) {
     LogRegrSPTransitionState<ArrayHandle<double> > state = args[0];
     if (state.status == NULL_EMPTY)
         return Null();

     SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
         state.X_transp_AX, EigenvaluesOnly, ComputePseudoInverse);

     return stateToResult(*this, state.coef,
                          state.X_transp_AX, state.logLikelihood,
                          state.status, state.numRows);
 }


 /**
  * @brief Compute the diagnostic statistics
  *
  * This function wraps the common parts of computing the results for both the
  * CG and the IRLS method.
  */
 AnyType stateToResult(
     const Allocator &inAllocator,
     const HandleMap<const ColumnVector, TransparentHandle<double> > &inCoef,
     const Matrix & hessian,
     const double &logLikelihood,
     int status,
     const uint64_t &numRows) {

     SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
         hessian, EigenvaluesOnly, ComputePseudoInverse);

     const Matrix &inverse_of_X_transp_AX = decomposition.pseudoInverse();
     const ColumnVector &diagonal_of_X_transp_AX = inverse_of_X_transp_AX.diagonal();

     MutableNativeColumnVector stdErr(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldZStats(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector waldPValues(
         inAllocator.allocateArray<double>(inCoef.size()));
     MutableNativeColumnVector oddsRatios(
         inAllocator.allocateArray<double>(inCoef.size()));

     for (Index i = 0; i < inCoef.size(); ++i) {
         stdErr(i) = std::sqrt(diagonal_of_X_transp_AX(i));
         waldZStats(i) = inCoef(i) / stdErr(i);
         waldPValues(i) = 2. * prob::cdf( prob::normal(),
                                          -std::abs(waldZStats(i)));
         oddsRatios(i) = std::exp( inCoef(i) );
     }

     // Return all coefficients, standard errors, etc. in a tuple
     AnyType tuple;
     tuple << inCoef << logLikelihood << stdErr << waldZStats << waldPValues
           << oddsRatios << inverse_of_X_transp_AX
           << sqrt(decomposition.conditionNo()) << status << numRows;
     return tuple;
 }

 } // namespace hello_world
 } // namespace modules
 } // namespace madlib
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	/* ------------------------------------------------------
	*
	* @file logistic.cpp
	*
	* @brief Logistic-Regression functions
	*
	* We implement the conjugate-gradient method and the iteratively-reweighted-
	* least-squares method.
	*
	// ----------------------------------------------------------------------- */
	#include <limits>
	#include <dbconnector/dbconnector.hpp>
	#include <modules/shared/HandleTraits.hpp>
	#include <modules/prob/boost.hpp>
	#include <boost/math/distributions.hpp>
	#include <modules/prob/student.hpp>
	#include "simple_logistic.hpp"

	namespace madlib {

	// Use Eigen
	using namespace dbal::eigen_integration;

	namespace modules {

	// Import names from other MADlib modules
	using dbal::NoSolutionFoundException;

	namespace hello_world {

	// FIXME this enum should be accessed by all modules that may need grouping
	// valid status values
	enum { IN_PROCESS, COMPLETED, TERMINATED, NULL_EMPTY };

	// Internal functions
	AnyType stateToResult(const Allocator &inAllocator,
	const HandleMap<const ColumnVector, TransparentHandle<double> >& inCoef,
	const Matrix & hessian,
	const double &logLikelihood,
	int status,
	const uint64_t &numRows);


	// ---------------------------------------------------------------------------
	// Logistic Regression States
	// ---------------------------------------------------------------------------

	/**
	* @brief Inter- and intra-iteration state for conjugate-gradient method for
	* logistic regression
	*
	* TransitionState encapsualtes the transition state during the
	* logistic-regression aggregate function. To the database, the state is
	* exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper
	* object containing scalars and vectors.
	*
	* Note: We assume that the DOUBLE PRECISION array is initialized by the
	* database with length at least 5, and all elemenets are 0.
	*
	*/
	template <class Handle>
	class LogRegrSPTransitionState {
	template <class OtherHandle>
	friend class LogRegrSPTransitionState;

	public:
	LogRegrSPTransitionState(const AnyType &inArray)
	: mStorage(inArray.getAs<Handle>()) {

	rebind(static_cast<uint16_t>(mStorage[1]));
	}

	/**
	* @brief Convert to backend representation
	*
	* We define this function so that we can use State in the
	* argument list and as a return type.
	*/
	inline operator AnyType() const {
	return mStorage;
	}

	/**
	* @brief Initialize the conjugate-gradient state.
	*
	* This function is only called for the first iteration, for the first row.
	*/
	inline void initialize(const Allocator &inAllocator, uint16_t inWidthOfX) {
	mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
	dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(inWidthOfX));
	rebind(inWidthOfX);
	widthOfX = inWidthOfX;
	}

	/**
	* @brief We need to support assigning the previous state
	*/
	template <class OtherHandle>
	LogRegrSPTransitionState &operator=(
	const LogRegrSPTransitionState<OtherHandle> &inOtherState) {

	for (size_t i = 0; i < mStorage.size(); i++)
	mStorage[i] = inOtherState.mStorage[i];
	return *this;
	}

	/**
	* @brief Merge with another State object by copying the intra-iteration
	* fields
	*/
	template <class OtherHandle>
	LogRegrSPTransitionState &operator+=(
	const LogRegrSPTransitionState<OtherHandle> &inOtherState) {

	if (mStorage.size() != inOtherState.mStorage.size() \|\|
	widthOfX != inOtherState.widthOfX)
	throw std::logic_error("Internal error: Incompatible transition "
	"states");

	numRows += inOtherState.numRows;
	gradNew += inOtherState.gradNew;
	X_transp_AX += inOtherState.X_transp_AX;
	logLikelihood += inOtherState.logLikelihood;
	// merged state should have the higher status
	// (see top of file for more on 'status' )
	status = (inOtherState.status > status) ? inOtherState.status : status;
	return *this;
	}

	/**
	* @brief Reset the inter-iteration fields.
	*/
	inline void reset() {
	numRows = 0;
	X_transp_AX.fill(0);
	gradNew.fill(0);
	logLikelihood = 0;
	status = IN_PROCESS;
	}

	private:
	static inline size_t arraySize(const uint16_t inWidthOfX) {
	return 6 + inWidthOfX * inWidthOfX + 4 * inWidthOfX;
	}

	/**
	* @brief Rebind to a new storage array
	*
	* @param inWidthOfX The number of independent variables.
	*
	* Array layout (iteration refers to one aggregate-function call):
	* Inter-iteration components (updated in final function):
	* - 0: iteration (current iteration)
	* - 1: widthOfX (number of coefficients)
	* - 2: coef (vector of coefficients)
	* - 2 + widthOfX: dir (direction)
	* - 2 + 2 * widthOfX: grad (gradient)
	* - 2 + 3 * widthOfX: beta (scale factor)
	*
	* Intra-iteration components (updated in transition step):
	* - 3 + 3 * widthOfX: numRows (number of rows already processed in this iteration)
	* - 4 + 3 * widthOfX: gradNew (intermediate value for gradient)
	* - 4 + 4 * widthOfX: X_transp_AX (X^T A X)
	* - 4 + widthOfX * widthOfX + 4 * widthOfX: logLikelihood ( ln(l(c)) )
	*/
	void rebind(uint16_t inWidthOfX) {
	iteration.rebind(&mStorage[0]);
	widthOfX.rebind(&mStorage[1]);
	coef.rebind(&mStorage[2], inWidthOfX);
	dir.rebind(&mStorage[2 + inWidthOfX], inWidthOfX);
	grad.rebind(&mStorage[2 + 2 * inWidthOfX], inWidthOfX);
	beta.rebind(&mStorage[2 + 3 * inWidthOfX]);
	numRows.rebind(&mStorage[3 + 3 * inWidthOfX]);
	gradNew.rebind(&mStorage[4 + 3 * inWidthOfX], inWidthOfX);
	X_transp_AX.rebind(&mStorage[4 + 4 * inWidthOfX], inWidthOfX, inWidthOfX);
	logLikelihood.rebind(&mStorage[4 + inWidthOfX * inWidthOfX + 4 * inWidthOfX]);
	status.rebind(&mStorage[5 + inWidthOfX * inWidthOfX + 4 * inWidthOfX]);
	}

	Handle mStorage;

	public:
	typename HandleTraits<Handle>::ReferenceToUInt32 iteration;
	typename HandleTraits<Handle>::ReferenceToUInt16 widthOfX;
	typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap coef;
	typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap dir;
	typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap grad;
	typename HandleTraits<Handle>::ReferenceToDouble beta;

	typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
	typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap gradNew;
	typename HandleTraits<Handle>::MatrixTransparentHandleMap X_transp_AX;
	typename HandleTraits<Handle>::ReferenceToDouble logLikelihood;
	typename HandleTraits<Handle>::ReferenceToUInt16 status;
	};




	/**
	* @brief Logistic function
	*/
	inline double sigma(double x) {
	return 1. / (1. + std::exp(-x));
	}

	/**
	* @brief Perform the logistic-regression transition step
	*/
	AnyType
	logregr_simple_step_transition::run(AnyType &args) {
	LogRegrSPTransitionState<MutableArrayHandle<double> > state = args[0];
	if (args[1].isNull() \|\| args[2].isNull()) { return args[0]; }
	double y = args[1].getAs<bool>() ? 1. : -1.;
	MappedColumnVector x;
	try {
	// an exception is raised in the backend if args[2] contains nulls
	MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
	// x is a const reference, we can only rebind to change its pointer
	x.rebind(xx.memoryHandle(), xx.size());
	} catch (const ArrayWithNullException &e) {
	return args[0];
	}

	// The following check was added with MADLIB-138.
	if (!dbal::eigen_integration::isfinite(x)) {
	warning("Design matrix is not finite.");
	state.status = TERMINATED;
	return state;
	}

	if (state.numRows == 0) {
	if (x.size() > std::numeric_limits<uint16_t>::max()){
	warning("Number of independent variables cannot be larger than 65535.");
	state.status = TERMINATED;
	return state;
	}


	state.initialize(*this, static_cast<uint16_t>(x.size()));
	if (!args[3].isNull()) {
	LogRegrSPTransitionState<ArrayHandle<double> > previousState = args[3];

	state = previousState;
	state.reset();
	}
	}
	// Now do the transition step
	state.numRows++;
	double xc = dot(x, state.coef);
	state.gradNew.noalias() += sigma(-y * xc) * y * trans(x);

	// Note: sigma(-x) = 1 - sigma(x).
	double a = sigma(xc) * sigma(-xc);
	state.X_transp_AX += x * trans(x) * a;

	state.logLikelihood -= std::log( 1. + std::exp(-y * xc) );

	return state;
	}


	/**
	* @brief Perform the perliminary aggregation function: Merge transition states
	*/
	AnyType
	logregr_simple_step_merge_states::run(AnyType &args) {
	LogRegrSPTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
	LogRegrSPTransitionState<ArrayHandle<double> > stateRight = args[1];

	// We first handle the trivial case where this function is called with one
	// of the states being the initial state
	if (stateLeft.numRows == 0)
	return stateRight;
	else if (stateRight.numRows == 0)
	return stateLeft;

	// Merge states together and return
	stateLeft += stateRight;
	return stateLeft;
	}



	/**
	* @brief Perform the logistic-regression final step
	*/
	AnyType
	logregr_simple_step_final::run(AnyType &args) {
	// We request a mutable object. Depending on the backend, this might perform
	// a deep copy.
	LogRegrSPTransitionState<MutableArrayHandle<double> > state = args[0];

	// Aggregates that haven't seen any data just return Null.
	if (state.numRows == 0){
	state.status = NULL_EMPTY;
	return state;
	}

	// Note: k = state.iteration
	if (state.iteration == 0) {
	// Iteration computes the gradient

	state.dir = state.gradNew;
	state.grad = state.gradNew;
	} else {
	// We use the Hestenes-Stiefel update formula:
	//
	// g_k^T (g_k - g_{k-1})
	// beta_k = -------------------------
	// d_{k-1}^T (g_k - g_{k-1})
	ColumnVector gradNewMinusGrad = state.gradNew - state.grad;
	state.beta
	= dot(state.gradNew, gradNewMinusGrad)
	/ dot(state.dir, gradNewMinusGrad);

	// Do a direction restart (Powell restart)
	// Note: This is testing whether state.beta < 0 if state.beta were
	// assigned according to Polak-Ribière
	if (dot(state.gradNew, gradNewMinusGrad)
	/ dot(state.grad, state.grad) <= std::numeric_limits<double>::denorm_min()) state.beta = 0;

	// d_k = g_k - beta_k * d_{k-1}
	state.dir = state.gradNew - state.beta * state.dir;
	state.grad = state.gradNew;
	}

	// H_k = - X^T A_k X
	// where A_k = diag(a_1, ..., a_n) and a_i = sigma(x_i c_{k-1}) sigma(-x_i c_{k-1})
	//
	// g_k^T d_k
	// alpha_k = -------------
	// d_k^T H_k d_k
	//
	// c_k = c_{k-1} - alpha_k * d_k
	state.coef += dot(state.grad, state.dir) /
	as_scalar(trans(state.dir) * state.X_transp_AX * state.dir)
	* state.dir;

	if(!state.coef.is_finite()){
	// we don't throw an error - instead set the state status to allow
	// other states (possibly trained as part of group by) to continue
	// training
	warning("Over- or underflow in conjugate-gradient step, while updating "
	"coefficients. Input data is likely of poor numerical condition.");
	state.status = TERMINATED;
	return state;
	}

	state.iteration++;
	return state;
	}

	/**
	* @brief Return the difference in log-likelihood between two states
	*/
	AnyType
	internal_logregr_simple_step_distance::run(AnyType &args) {
	LogRegrSPTransitionState<ArrayHandle<double> > stateLeft = args[0];
	LogRegrSPTransitionState<ArrayHandle<double> > stateRight = args[1];

	if(stateLeft.status == NULL_EMPTY \|\| stateRight.status == NULL_EMPTY){
	return 0.0;
	}
	return std::abs(stateLeft.logLikelihood - stateRight.logLikelihood);
	}

	/**
	* @brief Return the coefficients and diagnostic statistics of the state
	*/
	AnyType
	internal_logregr_simple_result::run(AnyType &args) {
	LogRegrSPTransitionState<ArrayHandle<double> > state = args[0];
	if (state.status == NULL_EMPTY)
	return Null();

	SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
	state.X_transp_AX, EigenvaluesOnly, ComputePseudoInverse);

	return stateToResult(*this, state.coef,
	state.X_transp_AX, state.logLikelihood,
	state.status, state.numRows);
	}


	/**
	* @brief Compute the diagnostic statistics
	*
	* This function wraps the common parts of computing the results for both the
	* CG and the IRLS method.
	*/
	AnyType stateToResult(
	const Allocator &inAllocator,
	const HandleMap<const ColumnVector, TransparentHandle<double> > &inCoef,
	const Matrix & hessian,
	const double &logLikelihood,
	int status,
	const uint64_t &numRows) {

	SymmetricPositiveDefiniteEigenDecomposition<Matrix> decomposition(
	hessian, EigenvaluesOnly, ComputePseudoInverse);

	const Matrix &inverse_of_X_transp_AX = decomposition.pseudoInverse();
	const ColumnVector &diagonal_of_X_transp_AX = inverse_of_X_transp_AX.diagonal();

	MutableNativeColumnVector stdErr(
	inAllocator.allocateArray<double>(inCoef.size()));
	MutableNativeColumnVector waldZStats(
	inAllocator.allocateArray<double>(inCoef.size()));
	MutableNativeColumnVector waldPValues(
	inAllocator.allocateArray<double>(inCoef.size()));
	MutableNativeColumnVector oddsRatios(
	inAllocator.allocateArray<double>(inCoef.size()));

	for (Index i = 0; i < inCoef.size(); ++i) {
	stdErr(i) = std::sqrt(diagonal_of_X_transp_AX(i));
	waldZStats(i) = inCoef(i) / stdErr(i);
	waldPValues(i) = 2. * prob::cdf( prob::normal(),
	-std::abs(waldZStats(i)));
	oddsRatios(i) = std::exp( inCoef(i) );
	}

	// Return all coefficients, standard errors, etc. in a tuple
	AnyType tuple;
	tuple << inCoef << logLikelihood << stdErr << waldZStats << waldPValues
	<< oddsRatios << inverse_of_X_transp_AX
	<< sqrt(decomposition.conditionNo()) << status << numRows;
	return tuple;
	}

	} // namespace hello_world
	} // namespace modules
	} // namespace madlib