blob: 6efcf617f8027d6497a2ab3373a80a990bfb5f2b [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file linear_crf.cpp
*
* @brief Linear-chain Conditional Random Field functions
*
* We implement limited-memory BFGS method.
*
*//* ----------------------------------------------------------------------- */
#include <iostream>
#include <dbconnector/dbconnector.hpp>
#include <modules/shared/HandleTraits.hpp>
#include "linear_crf.hpp"
namespace madlib {
// Use Eigen
using namespace dbal::eigen_integration;
namespace modules {
// Import names from other MADlib modules
using dbal::NoSolutionFoundException;
namespace crf {
// Internal functions
AnyType stateToResult(const Allocator &inAllocator,
const HandleMap<const ColumnVector,
TransparentHandle<double> > &incoef,
double loglikelihood, const uint32_t &iterations);
/**
* @brief Inter- and intra-iteration state for lbfgs method for
* linear-chain conditional random field
*
* TransitionState encapsualtes the transition state during the
* linear-chain crf aggregate function. To the database, the state is
* exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper
* object containing scalars and vectors.
*
* Note: We assume that the DOUBLE PRECISION array is initialized by the
* database with length at least 58, and all elemenets are 0.
*/
template <class Handle>
class LinCrfLBFGSTransitionState {
template <class OtherHandle>
friend class LinCrfLBFGSTransitionState;
public:
LinCrfLBFGSTransitionState(const AnyType &inArray)
: mStorage(inArray.getAs<Handle>()) {
rebind(static_cast<uint32_t>(mStorage[1]));
}
/**
* @brief Convert to backend representation
*
* We define this function so that we can use State in the
* argument list and as a return type.
*/
inline operator AnyType() const {
return mStorage;
}
/**
* @brief Initialize the lbfgs state.
*
* This function is only called for the first row.
*/
inline void initialize(const Allocator &inAllocator, uint32_t inWidthOfX,
uint32_t tagSize) {
mStorage = inAllocator.allocateArray<double, dbal::AggregateContext,
dbal::DoZero, dbal::ThrowBadAlloc>(arraySize(inWidthOfX));
rebind(inWidthOfX);
num_features = inWidthOfX;
num_labels = tagSize;
if(iteration == 0)
diag.fill(1);
}
/**
* @brief We need to support assigning the previous state
*/
template <class OtherHandle>
LinCrfLBFGSTransitionState &operator=(
const LinCrfLBFGSTransitionState<OtherHandle> &inOtherState) {
for (size_t i = 0; i < mStorage.size(); i++)
mStorage[i] = inOtherState.mStorage[i];
return *this;
}
/**
* @brief Merge with another State object by copying the intra-iteration
* fields
*/
template <class OtherHandle>
LinCrfLBFGSTransitionState &operator+=(
const LinCrfLBFGSTransitionState<OtherHandle> &inOtherState) {
if (mStorage.size() != inOtherState.mStorage.size())
throw std::logic_error("Internal error: Incompatible transition "
"states");
numRows += inOtherState.numRows;
grad += inOtherState.grad;
loglikelihood += inOtherState.loglikelihood;
return *this;
}
/**
* @brief Reset the inter-iteration fields.
*/
inline void reset() {
numRows = 0;
grad.fill(0);
loglikelihood = 0;
}
static const int m=7;// The number of corrections used in the LBFGS update.
private:
static inline uint32_t arraySize(const uint32_t num_features) {
return 52 + 3 * num_features + num_features*(2*m+1)+2*m;
}
void rebind(uint32_t inWidthOfFeature) {
iteration.rebind(&mStorage[0]);
num_features.rebind(&mStorage[1]);
num_labels.rebind(&mStorage[2]);
coef.rebind(&mStorage[3], inWidthOfFeature);
diag.rebind(&mStorage[3 + inWidthOfFeature], inWidthOfFeature);
grad.rebind(&mStorage[3 + 2 * inWidthOfFeature], inWidthOfFeature);
ws.rebind(&mStorage[3 + 3 * inWidthOfFeature],
inWidthOfFeature*(2*m+1)+2*m);
numRows.rebind(&mStorage[3 + 3 * inWidthOfFeature +
inWidthOfFeature*(2*m+1)+2*m]);
loglikelihood.rebind(&mStorage[4 + 3 * inWidthOfFeature +
inWidthOfFeature*(2*m+1)+2*m]);
lbfgs_state.rebind(&mStorage[5 + 3 * inWidthOfFeature +
inWidthOfFeature*(2*m+1)+2*m], 21);
mcsrch_state.rebind(&mStorage[26 + 3 * inWidthOfFeature +
inWidthOfFeature*(2*m+1)+2*m], 25);
}
Handle mStorage;
public:
typename HandleTraits<Handle>::ReferenceToUInt32 iteration;
typename HandleTraits<Handle>::ReferenceToUInt32 num_features;
typename HandleTraits<Handle>::ReferenceToUInt32 num_labels;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap coef;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap diag;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap grad;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap ws;
typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
typename HandleTraits<Handle>::ReferenceToDouble loglikelihood;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap lbfgs_state;
typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap mcsrch_state;
};
/** This class contains code for the limited-memory Broyden-Fletcher-Goldfarb-Shanno
* (LBFGS) algorithm for large-scale multidimensional unconstrained minimization problems.
* This following class is a translation of Fortran code written by Jorge Nocedal.
*/
class LBFGS {
public:
// shared variable in lbfgs
double stp1, ftol, stp, sq, yr, beta;
// iflag A return with <code>iflag &lt; 0</code> indicates an error,
// and <code>iflag = 0</code> indicates that the routine has
// terminated without detecting errors. On a return with
// <code>iflag = 1</code>, the user must evaluate the function
// <code>f</code> and gradient <code>g</code>. On a return with
// iflag is negative , lbfgs failed
int iflag, iter, nfun, point, ispt, iypt, maxfev, info, bound, npt, cp, nfev, inmc, iycn, iscn;
// shared varibles in mcscrch
int infoc;
double dg, dgm, dginit, dgtest, dgx, dgxm, dgy, dgym, finit, ftest1, fm, fx, fxm, fy, fym, p5, p66, stx, sty, stmin, stmax, width, width1, xtrapf;
bool brackt, stage1, finish;
ColumnVector w;//
ColumnVector x;// solution vector
ColumnVector diag;
LBFGS(LinCrfLBFGSTransitionState<MutableArrayHandle<double> >&);
void save_state(LinCrfLBFGSTransitionState<MutableArrayHandle<double> > &state);
void mcstep (double&, double& , double&, double&, double& , double&, double&, double, double, bool&, double, double, int&);
void mcsrch (int, Eigen::VectorXd&, double, Eigen::VectorXd&, const Eigen::VectorXd&, double&, double, double, int, int&, int&, Eigen::VectorXd&);
void lbfgs(int, int, double, Eigen::VectorXd, double, double);
};
/**
*@brief initialize state of current lbfgs iteration with the state of last iteration
*/
LBFGS::LBFGS(LinCrfLBFGSTransitionState<MutableArrayHandle<double> >& state) {
w = state.ws;
diag = state.diag;
x = state.coef;
stp1 = state.lbfgs_state(0);
ftol = state.lbfgs_state(1);
stp = state.lbfgs_state(2);
sq = state.lbfgs_state(3);
yr = state.lbfgs_state(4);
beta = state.lbfgs_state(5);
iflag = static_cast<int>(state.lbfgs_state(6));
iter = static_cast<int>(state.lbfgs_state(7));
nfun = static_cast<int>(state.lbfgs_state(8));
point = static_cast<int>(state.lbfgs_state(9));
ispt = static_cast<int>(state.lbfgs_state(10));
iypt = static_cast<int>(state.lbfgs_state(11));
maxfev = static_cast<int>(state.lbfgs_state(12));
info = static_cast<int>(state.lbfgs_state(13));
bound = static_cast<int>(state.lbfgs_state(14));
npt = static_cast<int>(state.lbfgs_state(15));
cp = static_cast<int>(state.lbfgs_state(16));
nfev = static_cast<int>(state.lbfgs_state(17));
inmc = static_cast<int>(state.lbfgs_state(18));
iycn = static_cast<int>(state.lbfgs_state(19));
iscn = static_cast<int>(state.lbfgs_state(20));
infoc = static_cast<int>(state.mcsrch_state(0));
dg = state.mcsrch_state(1);
dgm = state.mcsrch_state(2);
dginit = state.mcsrch_state(3);
dgtest = state.mcsrch_state(4);
dgx = state.mcsrch_state(5);
dgxm = state.mcsrch_state(6);
dgy = state.mcsrch_state(7);
dgym = state.mcsrch_state(8);
finit = state.mcsrch_state(9);
ftest1 = state.mcsrch_state(10);
fm = state.mcsrch_state(11);
fx = state.mcsrch_state(12);
fxm = state.mcsrch_state(13);
fy = state.mcsrch_state(14);
fym = state.mcsrch_state(15);
stx = state.mcsrch_state(16);
sty = state.mcsrch_state(17);
stmin = state.mcsrch_state(18);
stmax = state.mcsrch_state(19);
width = state.mcsrch_state(20);
width1 = state.mcsrch_state(21);
brackt = (state.mcsrch_state(22) == 1.0 ? true: false);
stage1 = (state.mcsrch_state(23) == 1.0 ? true: false);
finish = (state.mcsrch_state(24) == 1.0 ? true: false);
}
/**
*@brief save current lbfgs state for the next lbfgs iteration
*/
void LBFGS::save_state(LinCrfLBFGSTransitionState<MutableArrayHandle<double> > &state) {
state.ws = w ;
state.diag = diag ;
state.coef = x ;
state.lbfgs_state(0) = stp1;
state.lbfgs_state(1) = ftol;
state.lbfgs_state(2) = stp;
state.lbfgs_state(3) = sq;
state.lbfgs_state(4) = yr;
state.lbfgs_state(5) = beta;
state.lbfgs_state(6) = iflag;
state.lbfgs_state(7) = iter;
state.lbfgs_state(8) = nfun;
state.lbfgs_state(9) = point;
state.lbfgs_state(10) = ispt;
state.lbfgs_state(11) = iypt;
state.lbfgs_state(12) = maxfev;
state.lbfgs_state(13) = info;
state.lbfgs_state(14) = bound;
state.lbfgs_state(15) = npt;
state.lbfgs_state(16) = cp;
state.lbfgs_state(17) = nfev;
state.lbfgs_state(18) = inmc;
state.lbfgs_state(19) = iycn;
state.lbfgs_state(20) = iscn;
state.mcsrch_state(0) = infoc;
state.mcsrch_state(1) = dg;
state.mcsrch_state(2) = dgm;
state.mcsrch_state(3) = dginit;
state.mcsrch_state(4) = dgtest;
state.mcsrch_state(5) = dgx;
state.mcsrch_state(6) = dgxm;
state.mcsrch_state(7) = dgy;
state.mcsrch_state(8) = dgym;
state.mcsrch_state(9) = finit;
state.mcsrch_state(10) = ftest1;
state.mcsrch_state(11) = fm;
state.mcsrch_state(12) = fx;
state.mcsrch_state(13) = fxm;
state.mcsrch_state(14) = fy;
state.mcsrch_state(15) = fym;
state.mcsrch_state(16) = stx;
state.mcsrch_state(17) = sty;
state.mcsrch_state(18) = stmin;
state.mcsrch_state(19) = stmax;
state.mcsrch_state(20) = width;
state.mcsrch_state(21) = width1;
state.mcsrch_state(22) = (brackt == true ? 1.0 : 0.0);
state.mcsrch_state(23) = (stage1 == true ? 1.0 : 0.0);
state.mcsrch_state(24) = (finish == true ? 1.0 : 0.0);
}
void LBFGS::mcstep(double& stx, double& fx, double& dx,
double& sty, double& fy, double& dy,
double& stp, double fp, double dp, bool& brackt,
double stmin, double stmax, int& info)
{
bool bound;
double gamma, p, q, r, sgnd, stpc, stpf, stpq, theta, s;
info = 0;
if ((brackt && ((stp <= std::min(stx, sty)) || (stp >= std::max(stx, sty)))) ||
(dx * (stp - stx) >= 0) || (stmax < stmin)) {
return;
}
sgnd = dp*(dx/fabs(dx));
if (fp > fx) {
info = 1;
bound = true;
theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp;
s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp)));
gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s));
if (stp < stx) {
gamma = -gamma;
}
p = gamma - dx + theta;
q = gamma - dx + gamma + dp;
r = p / q;
stpc = stx + r * (stp - stx);
stpq = stx + dx/((fx - fp)/(stp - stx) + dx)/2 * (stp - stx);
if (fabs(stpc - stx) < fabs(stpq - stx)) {
stpf = stpc;
} else {
stpf = stpc + (stpq - stpc)/2;
}
brackt = true;
} else if (sgnd < 0.0) {
info = 2;
bound = false;
theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp;
s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp)));
gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s));
if (stp > stx) {
gamma = -gamma;
}
p = gamma - dp + theta;
q = gamma - dp + gamma + dx;
r = p / q;
stpc = stp + r * (stx - stp);
stpq = stp + dp / (dp - dx) * (stx - stp);
stpf = (fabs(stpc - stp) > fabs(stpq - stp)) ? stpc : stpq;
brackt = true;
} else if (fabs(dp) < fabs(dx)) {
info = 3;
bound = true;
theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp;
s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp)));
gamma = s * sqrt(std::max(0.0, (theta/s)*(theta/s) - (dx/s)*(dp/s)));
if (stp > stx) {
gamma = -gamma;
}
p = gamma - dp + theta;
q = gamma + (dx - dp) + gamma;
r = p / q;
if ((r < 0.0) && (gamma != 0.0)) {
stpc = stp + r * (stx - stp);
} else {
stpc = (stp > stx) ? stmax : stmin;
}
stpq = stp + dp / (dp - dx) * (stx - stp);
if (brackt) {
stpf = (fabs(stp - stpc) < fabs(stp - stpq)) ? stpc : stpq;
} else {
stpf = (fabs(stp - stpc) > fabs(stp - stpq)) ? stpc : stpq;
}
} else {
info = 4;
bound = false;
if (brackt) {
theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp;
s = std::max(fabs(theta), std::max(fabs(dy), fabs(dp)));
gamma = s * sqrt((theta/s)*(theta/s) - (dy/s)*(dp/s));
if (stp > sty) {
gamma = -gamma;
}
p = gamma - dp + theta;
q = gamma - dp + gamma + dy;
r = p / q;
stpc = stp + r * (sty - stp);
stpf = stpc;
} else {
stpf = (stp > stx) ? stmax : stmin;
}
}
if (fp > fx) {
sty = stp;
fy = fp;
dy = dp;
} else {
if (sgnd < 0.0) {
sty = stx;
fy = fx;
dy = dx;
}
stx = stp;
fx = fp;
dx = dp;
}
stp = std::max(stmin, std::min(stmax, stpf));
if (brackt && bound) {
if (sty > stx) {
stp = std::min(stx + 0.66*(sty - stx), stp);
} else {
stp = std::max(stx + 0.66*(sty - stx), stp);
}
}
return;
}
void LBFGS::mcsrch(int n, Eigen::VectorXd& x, double f, Eigen::VectorXd& g,
const Eigen::VectorXd& s, double& stp, double ftol,
double xtol, int maxfev, int& info, int& nfev,
Eigen::VectorXd& wa)
{
double stpmin = 1e-20;
double stpmax = 1e20;
double p5 = 0.5;
double p66 = 0.66;
double xtrapf = 4.0;
double gtol = 0.9;
if(info != -1) {
infoc = 1;
if (n <= 0 || stp <= 0 || ftol < 0 || gtol < 0 || xtol < 0 ||
stpmin < 0 || stpmax < stpmin || maxfev <= 0 )
return;
dginit = g.dot(s);
if (dginit >= 0.0) {
std::cout << "The search direction is not a descent direction."
<< std::endl;
return;
}
brackt = false;
stage1 = true;
nfev = 0;
finit = f;
dgtest = ftol * dginit;
width = stpmax - stpmin;
width1 = width/p5;
wa = x;
stx = 0.0;
fx = finit;
dgx = dginit;
sty = 0.0;
fy = finit;
dgy = dginit;
}
while(true)
{
if(info != -1)
{
if (brackt) {
if (stx < sty) {
stmin = stx;
stmax = sty;
} else {
stmin = sty;
stmax = stx;
}
} else {
stmin = stx;
stmax = stp + xtrapf * (stp - stx);
}
stp = std::max(stpmin, std::min(stpmax, stp));
if ((brackt && ((stp <= stmin) || (stp >= stmax))) ||
(nfev == maxfev - 1) ||
(!infoc) || (brackt && ((stmax - stmin) <= xtol * stmax))) {
stp = stx;
}
x = wa + stp * s;
info = -1;
return;
}
info = 0;
nfev= nfev + 1;
dg = g.dot(s);
ftest1 = finit + stp * dgtest;
if ((brackt && ((stp <= stmin) || (stp >= stmax))) || (!infoc)) {
info = 6;
}
if ((stp == stpmax) && (f <= ftest1) && (dg <= dgtest)) {
info = 5;
}
if ((stp == stpmin) && ((f >= ftest1) || (dg >= dgtest))) {
info = 4;
}
if (nfev >= maxfev) {
info = 3;
}
if (brackt && (stmax - stmin <= xtol * stmax)) {
info = 2;
}
if ((f <= ftest1) && (fabs(dg) <= -gtol * dginit)) {
info = 1;
}
if (info !=0 )
return ;
if ( stage1 && f <= ftest1 && dg >= std::min(ftol , gtol) * dginit )
stage1 = false;
if (stage1 && f <= fx && f > ftest1) {
fm = f - stp * dgtest;
fxm = fx - stx * dgtest;
fym = fy - sty * dgtest;
dgm = dg - dgtest;
dgxm = dgx - dgtest;
dgym = dgy - dgtest;
mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt,
stmin, stmax, infoc);
fx = fxm + stx * dgtest;
fy = fym + sty * dgtest;
dgx = dgxm + dgtest;
dgy = dgym + dgtest;
} else {
mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt, stmin,
stmax, infoc);
}
if (brackt) {
if (fabs(sty - stx) >= p66 * width1) {
stp = stx + p5 * (sty - stx);
}
width1 = width;
width = fabs(sty - stx);
}
}
}
void LBFGS::lbfgs(int n, int m, double f, Eigen::VectorXd g, double eps,
double xtol)
{
bool execute_entire_while_loop = false;
if(iflag == 0) {
iter=0;
if ( n <= 0 || m <= 0 )
{
iflag= -3;
}
nfun= 1;
point = 0;
finish = false;
ispt = n + 2*m;
iypt = ispt + n*m;
npt = 0;
w.segment(ispt, n) = (-g).cwiseProduct(diag);
stp1 = 1.0 / g.norm();
ftol= 0.0001;
maxfev= 20;
execute_entire_while_loop = true;
}
while(true) {
if(execute_entire_while_loop) {
iter++;
info = 0;
bound=iter-1;
if (iter!=1) {
if (iter > m) bound = m;
double ys = w.segment(iypt + npt, n).dot(w.segment(ispt + npt, n));
double yy = w.segment(iypt + npt, n).squaredNorm();
diag.setConstant(ys / yy);
cp = point;
if (point ==0 ) cp =m;
w[n + cp-1] = 1.0 / ys;
w.head(n) = -g;
cp = point;
for (int i = 0; i < bound; i++) {
cp -= 1;
if (cp == -1) {
cp = m - 1;
}
sq = w.segment(ispt + cp *n,n).dot(w.head(n));
inmc = n + m + cp;
iycn = iypt + cp * n;
w[inmc] = sq * w[n + cp];
w.head(n) -= w[inmc] * w.segment(iycn, n);
}
w.head(n)=w.head(n).cwiseProduct(diag);
for (int i = 0; i < bound; i++) {
yr = w.segment(iypt + cp * n, n).dot(w.head(n));
inmc = n + m + cp;
beta = w[inmc] - w[n + cp] * yr;
iscn = ispt + cp * n;
w.head(n) += beta * w.segment(iscn, n);
cp += 1;
if (cp == m) {
cp = 0;
}
}
w.segment(ispt + point * n, n) = w.head(n);
}
nfev = 0;
stp = (iter == 1) ? stp1 : 1.0;
w.head(n) = g;
}
mcsrch(n, x, f, g, w.segment(ispt + point * n, n), stp, ftol,
xtol, maxfev, info, nfev, diag);
if(info == -1) {
iflag = 1;
return;
} else {
iflag = -1;
}
nfun = nfun + nfev;
npt = point * n;
w.segment(ispt + npt,n) *=stp;
w.segment(iypt + npt,n) =g - w.head(n);
point = point + 1;
if (point == m) {
point = 0;
}
if(g.norm()/std::max(1.0,x.norm())<=eps) {
finish = true;
}
if (finish) {
iflag = 0;
return;
}
execute_entire_while_loop = true;
}
}
/**
*@brief compute exponential of Mi and Vi
*/
void compute_exp_Mi(int num_labels, Eigen::MatrixXd &Mi, Eigen::VectorXd &Vi) {
// take exponential operator
for (int m = 0; m < num_labels; m++) {
Vi(m) = std::exp(Vi(m));
for (int n = 0; n < num_labels; n++) {
Mi(m, n) = std::exp(Mi(m, n));
}
}
}
Eigen::VectorXd mult(Eigen::MatrixXd Mi, Eigen::VectorXd Vi, bool trans,
int num_label)
{
int i=0,j=0,r=0,c=0;
Eigen::VectorXd z(num_label);
z.fill(0);
for (i = 0; i < num_label; i++ )
{
for (j=0; j< num_label; j++) {
r= i;
c=j;
if(trans) {
r= j;
c=i;
}
z(r)+=Mi(i,j)*Vi(c);
}
}
return z;
}
/**
*@brief compute loglikelihood and gradient using forward-backward algorithm
*/
void validate_label(int label_id, int num_labels)
{
if ((label_id < 0) || (label_id >= num_labels))
throw std::runtime_error("Out of bound label ids found in feature table.");
}
void compute_logli_gradient(LinCrfLBFGSTransitionState<MutableArrayHandle<double> >& state,
MappedColumnVector& sparse_r,
MappedColumnVector& dense_m,
MappedColumnVector& sparse_m) {
int r_size = static_cast<int>(sparse_r.size());
int sparse_m_size = static_cast<int>(sparse_m.size());
int seq_len = static_cast<int>(sparse_r(r_size-2)) + 1;
Eigen::MatrixXd betas(static_cast<uint32_t>(state.num_labels), seq_len);
Eigen::VectorXd scale(seq_len);
Eigen::MatrixXd Mi(static_cast<uint32_t>(state.num_labels),
static_cast<uint32_t>(state.num_labels));
Eigen::VectorXd Vi(state.num_labels);
Eigen::VectorXd alpha(state.num_labels);
Eigen::VectorXd next_alpha(state.num_labels);
Eigen::VectorXd temp(state.num_labels);
Eigen::VectorXd ExpF(state.num_features);
betas.fill(0);
scale.fill(0);
alpha.fill(1);
next_alpha.fill(0);
temp.fill(0);
ExpF.fill(0);
// compute beta values in a backward fashion
// also scale beta-values to 1 to avoid numerical problems
scale(seq_len - 1) = state.num_labels;
betas.col(seq_len - 1).fill(1.0 / scale(seq_len - 1));
int index = r_size-1;
for (int i = seq_len - 1; i > 0; i--) {
Mi.fill(0);
Vi.fill(0);
// examine all features at position "pos"
//(prev_labe, curr_label, f_index, start_pos, exist)
while (index-4>=0 && sparse_r(index-1) == i) {
int curr_label = static_cast<int>(sparse_r(index-3));
validate_label(curr_label, state.num_labels);
int f_index = static_cast<int>(sparse_r(index-2));
Vi(curr_label) += state.coef(f_index);
index-=5;
}
//(f_index, prev_label, curr_label)
for(int n=0; n+2<sparse_m_size ; n+=3) {
int prev_label = static_cast<int>(sparse_m(n+1));
int curr_label = static_cast<int>(sparse_m(n+2));
validate_label(prev_label, state.num_labels);
validate_label(curr_label, state.num_labels);
Mi(prev_label, curr_label) += state.coef(static_cast<int>(sparse_m(n)));
}
compute_exp_Mi(state.num_labels, Mi, Vi);
temp=betas.col(i);
temp=temp.cwiseProduct(Vi);
betas.col(i -1) = mult(Mi,temp,false,state.num_labels);
// scale for the next (backward) beta values
scale(i - 1)=betas.col(i-1).sum();
betas.col(i - 1)*=(1.0 / scale(i - 1));
} // end of beta values computation
index = 0;
// start to compute the log-likelihood of the current sequence
for (int j = 0; j < seq_len; j++) {
Mi.fill(0);
Vi.fill(0);
// examine all features at position "pos"
int ori_index = index;
while (((index+4) <= (r_size-1)) && sparse_r(index+3) == j) {
int curr_label = static_cast<int>(sparse_r(index+1));
int f_index = static_cast<int>(sparse_r(index+2));
Vi(curr_label) += state.coef(f_index);
index+=5;
}
if(j>=1) {
for(int n=0; n+2<sparse_m_size ; n+=3) {
int prev_label = static_cast<int>(sparse_m(n+1));
int curr_label = static_cast<int>(sparse_m(n+2));
Mi(prev_label, curr_label) += state.coef(static_cast<int>(sparse_m(n)));
}
}
compute_exp_Mi(state.num_labels, Mi, Vi);
if(j>0) {
temp = alpha;
next_alpha=mult(Mi,temp,true,state.num_labels);
next_alpha=next_alpha.cwiseProduct(Vi);
} else {
next_alpha=Vi;
}
index = ori_index;
while (((index+4) <= (r_size-1)) && sparse_r(index+3) == j) {
int curr_label = (int)sparse_r(index+1);
int f_index = (int)sparse_r(index+2);
int exist = (int)sparse_r(index+4);
if (exist == 1) {
state.grad(f_index) += 1;
state.loglikelihood += state.coef(f_index);
}
ExpF(f_index) += next_alpha(curr_label) * betas(curr_label,j);
index+=5;
}
// Edge feature
if(j>=1) {
int f_index = (int)dense_m((j-1)*5+2);
state.grad(f_index) += 1;
state.loglikelihood += state.coef(f_index);
//(f_index, prev_label, curr_label)
for(int n=0; n+2<sparse_m_size ; n+=3) {
int f_index = (int)sparse_m(n);
int prev_label = static_cast<int>(sparse_m(n+1));
int curr_label = static_cast<int>(sparse_m(n+2));
ExpF(f_index) += alpha[prev_label] * Vi(curr_label) * Mi(prev_label,curr_label) * betas(curr_label, j);
}
}
alpha = next_alpha;
alpha*=(1.0 / scale(j));
}
// Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len
double Zx = alpha.sum();
state.loglikelihood -= std::log(Zx);
// re-correct the value of seq_logli because Zx was computed from
// scaled alpha values
for (int k = 0; k < seq_len; k++) {
state.loglikelihood -= std::log(scale(k));
}
// update the gradient vector
for (size_t k = 0; k < state.num_features; k++) {
state.grad(k) -= ExpF(k) / Zx;
}
}
/**
* @brief Compute the log likelihood and gradient vector for each tuple
*/
AnyType
lincrf_lbfgs_step_transition::run(AnyType &args) {
LinCrfLBFGSTransitionState<MutableArrayHandle<double> > state = args[0];
MappedColumnVector sparse_r = args[1].getAs<MappedColumnVector>();
MappedColumnVector dense_m = args[2].getAs<MappedColumnVector>();
MappedColumnVector sparse_m = args[3].getAs<MappedColumnVector>();
if (state.numRows == 0) {
state.initialize(*this, static_cast<uint32_t>(args[4].getAs<double>()), static_cast<uint32_t>(args[5].getAs<double>()));
if (!args[6].isNull()) {
LinCrfLBFGSTransitionState<ArrayHandle<double> > previousState = args[6];
state = previousState;
state.reset();
}
}
state.numRows++;
compute_logli_gradient(state, sparse_r, dense_m, sparse_m);
return state;
}
/**
* @brief Perform the perliminary aggregation function: Merge transition states
*/
AnyType
lincrf_lbfgs_step_merge_states::run(AnyType &args) {
LinCrfLBFGSTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
LinCrfLBFGSTransitionState<ArrayHandle<double> > stateRight = args[1];
// We first handle the trivial case where this function is called with one
// of the states being the initial state
if (stateLeft.numRows == 0)
return stateRight;
else if (stateRight.numRows == 0)
return stateLeft;
// Merge states together and return
stateLeft += stateRight;
return stateLeft;
}
/**
* @brief Perform the licrf_lbfgs final step
*/
AnyType
lincrf_lbfgs_step_final::run(AnyType &args) {
// We request a mutable object. Depending on the backend, this might perform
// a deep copy.
LinCrfLBFGSTransitionState<MutableArrayHandle<double> > state = args[0];
// Aggregates that haven't seen any data just return Null.
if (state.numRows == 0)
return Null();
// To avoid overfitting, penalize the likelihood with a spherical Gaussian
// weight prior
double sigma_square = 100;
state.loglikelihood -= (state.coef.dot(state.coef)/(2 * sigma_square));
state.grad -= state.coef/sigma_square;
// the lbfgs minimize function, we want to maximize loglikelihood
state.loglikelihood = state.loglikelihood * -1;
state.grad = -state.grad;
double eps = 0.001; //accuracy of the solution to be found
double xtol = 1.0e-16; //an estimate of the machine precision
assert((state.m > 0) && (state.m <= state.num_features) && (eps >= 0.0));
LBFGS instance(state);// initialize the lbfgs with state of last iteration
instance.lbfgs(state.num_features, state.m, state.loglikelihood, state.grad, eps, xtol);// lbfgs optimization
instance.save_state(state);//save current state for the next iteration of lbfgs
switch(instance.iflag)
{
case -1:
throw std::logic_error("The line search rountine mcsch failed");
break;
case -2:
throw std::logic_error("The i-th diagonal element of the diagonal inverse Hessian"
"approximation, given in DIAG, is not positive");
break;
case -3:
throw std::logic_error("Improper input parameters for LBFGS n or m are not positive");
break;
}
if(!state.coef.is_finite())
throw NoSolutionFoundException("Over- or underflow in "
"L-BFGS step, while updating coefficients. Input data "
"is likely of poor numerical condition.");
state.iteration++;
return state;
}
/**
* @brief Return iflag which indicates whether L-BFGS converge or not
*/
AnyType
internal_lincrf_lbfgs_converge::run(AnyType &args) {
LinCrfLBFGSTransitionState<ArrayHandle<double> > state = args[0];
return state.lbfgs_state(6);
}
/**
* @brief Return the coefficients and diagnostic statistics of the state
*/
AnyType
internal_lincrf_lbfgs_result::run(AnyType &args) {
LinCrfLBFGSTransitionState<ArrayHandle<double> > state = args[0];
return stateToResult(*this, state.coef, state.loglikelihood, state.iteration);
}
/**
* @brief Return the results for the L-BFGS method.
*/
AnyType stateToResult(
const Allocator &inAllocator,
const HandleMap<const ColumnVector, TransparentHandle<double> > &incoef,
double loglikelihood, const uint32_t &iterations) {
// FIXME: We currently need to copy the coefficient to a native array
// This should be transparent to user code
MutableNativeColumnVector coef(
inAllocator.allocateArray<double>(incoef.size()));
coef = incoef;
// Return all coefficients, loglikelihood in a tuple
AnyType tuple;
tuple << coef << loglikelihood << iterations;
return tuple;
}
} // namespace crf
} // namespace modules
} // namespace madlib