Fix loss averaging, remove usage of numBuffers
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index 45565d5..3ae4c13 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -35,7 +35,6 @@
static void transition(state_type &state, const tuple_type &tuple);
static void transitionInMiniBatch(state_type &state, const tuple_type &tuple);
- static void transitionInMiniBatch2(state_type &state, const tuple_type &tuple);
static void merge(state_type &state, const_state_type &otherState);
static void mergeInPlace(state_type &state, const_state_type &otherState);
static void final(state_type &state);
@@ -107,8 +106,8 @@
state.task.model, X_batch, y_batch, state.task.stepsize);
}
- // The first epoch will most likely have the most loss.
- // So being pessimistic, we return average loss only for the first epoch.
+ // The first epoch will most likely have the highest loss.
+ // Being pessimistic, use the total loss only from the first epoch.
if (curr_epoch==0) state.algo.loss += loss;
}
return;
@@ -156,8 +155,8 @@
}
// model averaging, weighted by rows seen
- double leftRows = static_cast<double>(state.algo.numRows + state.algo.numBuffers);
- double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numBuffers);
+ double leftRows = static_cast<double>(state.algo.numRows + state.algo.numRows);
+ double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numRows);
double totalNumRows = leftRows + rightRows;
state.task.model *= leftRows / rightRows;
state.task.model += otherState.task.model;
diff --git a/src/modules/convex/linear_svm_igd.cpp b/src/modules/convex/linear_svm_igd.cpp
index 4512efd..90882a3 100644
--- a/src/modules/convex/linear_svm_igd.cpp
+++ b/src/modules/convex/linear_svm_igd.cpp
@@ -212,8 +212,6 @@
L1<GLMModel>::clipping(state.task.model, state.task.stepsize);
state.algo.numRows += x.cols();
- state.algo.numBuffers ++;
-
return state;
}
@@ -263,7 +261,6 @@
// averaging depends on their original values
stateLeft.algo.numRows += stateRight.algo.numRows;
stateLeft.algo.loss += stateRight.algo.loss;
- stateLeft.algo.numBuffers += stateRight.algo.numBuffers;
return stateLeft;
}
@@ -304,7 +301,7 @@
SVMMinibatchState<MutableArrayHandle<double> > state = args[0];
// Aggregates that haven't seen any data just return Null.
if (state.algo.numRows == 0) { return Null(); }
- state.algo.loss = state.algo.loss/state.algo.numBuffers;
+ state.algo.loss = state.algo.loss / state.algo.numRows;
return state;
}
diff --git a/src/modules/convex/task/linear_svm.hpp b/src/modules/convex/task/linear_svm.hpp
index 892bf2a..7146432 100644
--- a/src/modules/convex/task/linear_svm.hpp
+++ b/src/modules/convex/task/linear_svm.hpp
@@ -119,7 +119,7 @@
* @param x Batch of independent variables
* @param y Batch of dependent variables
* @param stepsize Learning rate for model update
-* @return Average loss in the batch
+* @return Total loss in the batch
*/
template <class Model, class Tuple>
double
@@ -133,33 +133,32 @@
// the model for each batch. x and y in the function signature are defined
// as generic variables to ensure a consistent interface across all modules.
- // Assumption: 'gradient' will always be of the same type as the coefficients
- // With SVM, the model is just the coefficients, but can be more complex with
- // other modules like MLP.
+ // ASSUMPTION: 'gradient' will always be of the same type as the
+ // coefficients. In SVM, the model is just the coefficients, but can be
+ // more complex with other modules like MLP.
coefficient_type gradient = model;
gradient.setZero();
coefficient_type w_transpose_x = x * model;
double loss = 0.0;
int batch_size = x.rows();
- double dist_from_hyperplane = 0.;
- double c = 0.;
- int n_points_with_positive_dist=0;
- for (int i=0; i<batch_size; i++) {
+ double dist_from_hyperplane = 0.0;
+ double c = 0.0;
+ int n_points_with_positive_dist = 0;
+ for (int i = 0; i < batch_size; i++) {
if (is_svc) {
c = -y(i); // minus for "-loglik"
- dist_from_hyperplane = 1. - w_transpose_x(i) * y(i);
+ dist_from_hyperplane = 1.0 - w_transpose_x(i) * y(i);
} else {
double wx_y = w_transpose_x(i) - y(i);
- c = wx_y > 0 ? 1. : -1.;
+ c = wx_y > 0 ? 1.0 : -1.0;
dist_from_hyperplane = c * wx_y - epsilon;
}
- if ( dist_from_hyperplane > 0.) {
+ if (dist_from_hyperplane > 0.) {
gradient += c * x.row(i);
loss += dist_from_hyperplane;
n_points_with_positive_dist++;
}
}
- loss /= n_points_with_positive_dist;
gradient.array() /= n_points_with_positive_dist;
model -= stepsize * gradient;
return loss;
diff --git a/src/modules/convex/type/state.hpp b/src/modules/convex/type/state.hpp
index c2478be..f846e8f 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -352,7 +352,6 @@
task.reg.rebind(&mStorage[4]);
algo.batchSize.rebind(&mStorage[5]);
algo.nEpochs.rebind(&mStorage[6]);
- algo.numBuffers.rebind(&mStorage[7]);
task.model.rebind(&mStorage[8], task.nFeatures);
}
@@ -368,7 +367,6 @@
struct AlgoState {
typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
- typename HandleTraits<Handle>::ReferenceToUInt64 numBuffers;
typename HandleTraits<Handle>::ReferenceToDouble loss;
typename HandleTraits<Handle>::ReferenceToUInt32 batchSize;
typename HandleTraits<Handle>::ReferenceToUInt32 nEpochs;