Fix loss averaging, remove usage of numBuffers
diff --git a/src/modules/convex/algo/igd.hpp b/src/modules/convex/algo/igd.hpp
index 45565d5..3ae4c13 100644
--- a/src/modules/convex/algo/igd.hpp
+++ b/src/modules/convex/algo/igd.hpp
@@ -35,7 +35,6 @@
 
     static void transition(state_type &state, const tuple_type &tuple);
     static void transitionInMiniBatch(state_type &state, const tuple_type &tuple);
-    static void transitionInMiniBatch2(state_type &state, const tuple_type &tuple);
     static void merge(state_type &state, const_state_type &otherState);
     static void mergeInPlace(state_type &state, const_state_type &otherState);
     static void final(state_type &state);
@@ -107,8 +106,8 @@
                state.task.model, X_batch, y_batch, state.task.stepsize);
         }
 
-        // The first epoch will most likely have the most loss.
-        // So being pessimistic, we return average loss only for the first epoch.
+        // The first epoch will most likely have the highest loss.
+        // Being pessimistic, use the total loss only from the first epoch.
         if (curr_epoch==0) state.algo.loss += loss;
     }
     return;
@@ -156,8 +155,8 @@
     }
 
     // model averaging, weighted by rows seen
-    double leftRows = static_cast<double>(state.algo.numRows + state.algo.numBuffers);
-    double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numBuffers);
+    double leftRows = static_cast<double>(state.algo.numRows + state.algo.numRows);
+    double rightRows = static_cast<double>(otherState.algo.numRows + otherState.algo.numRows);
     double totalNumRows = leftRows + rightRows;
     state.task.model *= leftRows / rightRows;
     state.task.model += otherState.task.model;
diff --git a/src/modules/convex/linear_svm_igd.cpp b/src/modules/convex/linear_svm_igd.cpp
index 4512efd..90882a3 100644
--- a/src/modules/convex/linear_svm_igd.cpp
+++ b/src/modules/convex/linear_svm_igd.cpp
@@ -212,8 +212,6 @@
     L1<GLMModel>::clipping(state.task.model, state.task.stepsize);
 
     state.algo.numRows += x.cols();
-    state.algo.numBuffers ++;
-
     return state;
 }
 
@@ -263,7 +261,6 @@
     // averaging depends on their original values
     stateLeft.algo.numRows += stateRight.algo.numRows;
     stateLeft.algo.loss += stateRight.algo.loss;
-    stateLeft.algo.numBuffers += stateRight.algo.numBuffers;
 
     return stateLeft;
 }
@@ -304,7 +301,7 @@
     SVMMinibatchState<MutableArrayHandle<double> > state = args[0];
     // Aggregates that haven't seen any data just return Null.
     if (state.algo.numRows == 0) { return Null(); }
-    state.algo.loss = state.algo.loss/state.algo.numBuffers;
+    state.algo.loss = state.algo.loss / state.algo.numRows;
     return state;
 }
 
diff --git a/src/modules/convex/task/linear_svm.hpp b/src/modules/convex/task/linear_svm.hpp
index 892bf2a..7146432 100644
--- a/src/modules/convex/task/linear_svm.hpp
+++ b/src/modules/convex/task/linear_svm.hpp
@@ -119,7 +119,7 @@
 * @param x Batch of independent variables
 * @param y Batch of dependent variables
 * @param stepsize Learning rate for model update
-* @return Average loss in the batch
+* @return Total loss in the batch
 */
 template <class Model, class Tuple>
 double
@@ -133,33 +133,32 @@
     // the model for each batch. x and y in the function signature are defined
     // as generic variables to ensure a consistent interface across all modules.
 
-    // Assumption: 'gradient' will always be of the same type as the coefficients
-    // With SVM, the model is just the coefficients, but can be more complex with
-    // other modules like MLP.
+    // ASSUMPTION: 'gradient' will always be of the same type as the
+    // coefficients. In SVM, the model is just the coefficients, but can be
+    // more complex with other modules like MLP.
     coefficient_type gradient = model;
     gradient.setZero();
     coefficient_type w_transpose_x = x * model;
     double loss = 0.0;
     int batch_size = x.rows();
-    double dist_from_hyperplane = 0.;
-    double c = 0.;
-    int n_points_with_positive_dist=0;
-    for (int i=0; i<batch_size; i++) {
+    double dist_from_hyperplane = 0.0;
+    double c = 0.0;
+    int n_points_with_positive_dist = 0;
+    for (int i = 0; i < batch_size; i++) {
         if (is_svc) {
             c = -y(i);   // minus for "-loglik"
-            dist_from_hyperplane = 1. - w_transpose_x(i) * y(i);
+            dist_from_hyperplane = 1.0 - w_transpose_x(i) * y(i);
         } else {
             double wx_y = w_transpose_x(i) - y(i);
-            c = wx_y > 0 ? 1. : -1.;
+            c = wx_y > 0 ? 1.0 : -1.0;
             dist_from_hyperplane = c * wx_y - epsilon;
         }
-        if ( dist_from_hyperplane > 0.) {
+        if (dist_from_hyperplane > 0.) {
             gradient += c * x.row(i);
             loss += dist_from_hyperplane;
             n_points_with_positive_dist++;
         }
     }
-    loss /= n_points_with_positive_dist;
     gradient.array() /= n_points_with_positive_dist;
     model -= stepsize * gradient;
     return loss;
diff --git a/src/modules/convex/type/state.hpp b/src/modules/convex/type/state.hpp
index c2478be..f846e8f 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -352,7 +352,6 @@
         task.reg.rebind(&mStorage[4]);
         algo.batchSize.rebind(&mStorage[5]);
         algo.nEpochs.rebind(&mStorage[6]);
-        algo.numBuffers.rebind(&mStorage[7]);
         task.model.rebind(&mStorage[8], task.nFeatures);
     }
 
@@ -368,7 +367,6 @@
 
     struct AlgoState {
         typename HandleTraits<Handle>::ReferenceToUInt64 numRows;
-        typename HandleTraits<Handle>::ReferenceToUInt64 numBuffers;
         typename HandleTraits<Handle>::ReferenceToDouble loss;
         typename HandleTraits<Handle>::ReferenceToUInt32 batchSize;
         typename HandleTraits<Handle>::ReferenceToUInt32 nEpochs;