[SYSTEMDS-411] Fix bugs in cache eviction.
This patch fixes bugs in handling of multi-level cache duplicates,
eviction and reading from disk. This also adds a new test, l2svm.
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
index 8d688b8..464884c 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
@@ -409,7 +409,7 @@
}
case SEQ: {
//replace output variable name with a placeholder
- //tmpInstStr = InstructionUtils.replaceOperandName(tmpInstStr);
+ tmpInstStr = InstructionUtils.replaceOperandName(tmpInstStr);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_from, 5, ec);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_to, 6, ec);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_incr, 7, ec);
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 1aa717c..1e875a4 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -380,14 +380,14 @@
else
e.setValue(oe.getSOValue(), computetime);
e._origItem = probeItem;
- // Add the SB/func entry to the end of the list of items pointing to the same data.
+ // Add itself as original item to navigate the list.
+ oe._origItem = probeItem;
+
+ // Add the SB/func entry to the list of items pointing to the same data.
// No cache size update is necessary.
- LineageCacheEntry tmp = oe;
// Maintain _origItem as head.
- while (tmp._nextEntry != null)
- tmp = tmp._nextEntry;
- // FIXME: No need add at the end; add just after head.
- tmp._nextEntry = e;
+ e._nextEntry = oe._nextEntry;
+ oe._nextEntry = e;
//maintain order for eviction
LineageCacheEviction.addEntry(e);
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
index f100bfd..48fd186 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
@@ -38,7 +38,7 @@
"rightIndex", "leftIndex", "groupedagg", "r'", "solve", "spoof",
"uamean", "max", "min", "ifelse", "-", "sqrt", ">", "uak+", "<=",
"^", "uamax", "uark+", "uacmean", "eigen", "ctableexpand", "replace",
- "^2", "uack+"
+ "^2", "uack+", "tak+*"
//TODO: Reuse everything.
};
private static String[] REUSE_OPCODES = new String[] {};
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
index 4071c1e..d6dc36d 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
@@ -101,8 +101,12 @@
private static void removeOrSpillEntry(Map<LineageItem, LineageCacheEntry> cache, LineageCacheEntry e, boolean spill) {
if (e._origItem == null) {
// Single entry. Remove or spill.
- if (spill)
- spillToLocalFS(cache, e);
+ if (spill) {
+ updateSize(e.getSize(), false); //Release memory
+ spillToLocalFS(cache, e); //Spill to disk
+ e.setNullValues(); //Set null
+ e.setCacheStatus(LineageCacheStatus.SPILLED); //Set status to spilled
+ }
else
removeEntry(cache, e);
return;
@@ -129,7 +133,9 @@
if (write) {
// Spill to disk if at least one entry has status TOSPILL.
spillToLocalFS(cache, cache.get(e._origItem));
- LineageCacheEntry h = cache.get(e._origItem);
+ // Reduce cachesize once for all the entries.
+ updateSize(e.getSize(), false);
+ LineageCacheEntry h = cache.get(e._origItem); //head
while (h != null) {
// Set values to null for all the entries.
h.setNullValues();
@@ -137,8 +143,6 @@
h.setCacheStatus(LineageCacheStatus.SPILLED);
h = h._nextEntry;
}
- // Reduce cachesize once for all the entries.
- updateSize(e.getSize(), false);
// Keep them in cache.
return;
}
@@ -359,10 +363,11 @@
h.setValue(mb);
h = h._nextEntry;
}
- // Increase cachesize once for all the entries.
- updateSize(e.getSize(), true);
}
+ // Increase cachesize once for all the entries.
+ updateSize(e.getSize(), true);
+
// Adjust disk reading speed
adjustReadWriteSpeed(e, ((double)(t1-t0))/1000000000, true);
// TODO: set cache status as RELOADED for this entry
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/LineageReuseAlg.java b/src/test/java/org/apache/sysds/test/functions/lineage/LineageReuseAlg.java
index 9f97b96..74b055e 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/LineageReuseAlg.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/LineageReuseAlg.java
@@ -39,7 +39,7 @@
protected static final String TEST_DIR = "functions/lineage/";
protected static final String TEST_NAME = "LineageReuseAlg";
- protected static final int TEST_VARIANTS = 4;
+ protected static final int TEST_VARIANTS = 5;
protected String TEST_CLASS_DIR = TEST_DIR + LineageReuseAlg.class.getSimpleName() + "/";
@Override
@@ -68,6 +68,11 @@
public void testPCAHybrid() {
testLineageTrace(TEST_NAME+"4", ReuseCacheType.REUSE_HYBRID);
}
+
+ @Test
+ public void testGridSearchL2svmHybrid() {
+ testLineageTrace(TEST_NAME+"5", ReuseCacheType.REUSE_HYBRID);
+ }
@Test
public void testStepLMFull() {
@@ -106,7 +111,6 @@
// Without lineage-based reuse enabled
List<String> proArgs = new ArrayList<>();
proArgs.add("-stats");
- proArgs.add("-lineage");
proArgs.add("-args");
proArgs.add(output("X"));
programArgs = proArgs.toArray(new String[proArgs.size()]);
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
new file mode 100644
index 0000000..57af575
--- /dev/null
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml
@@ -0,0 +1,63 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#hyperparameter(lambda, intercept) grid search for l2svm
+
+l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Boolean icpt)
+return (Matrix[Double] loss) {
+ if (icpt)
+ X = cbind(X, matrix(1, nrow(X), 1));
+ loss = as.matrix(sum((y - X%*%B)^2));
+}
+
+N = 1000;
+no_lamda = 10;
+stp = (0.1 - 0.0001)/no_lamda;
+lamda = 0.0001;
+Rbeta = matrix(0, rows=N+1, cols=no_lamda*2);
+Rloss = matrix(0, rows=no_lamda*2, cols=1);
+i = 1;
+
+X = rand(rows=1000, cols=N, sparsity=1.0, seed=42);
+y = rand(rows=1000, cols=1, min=0, max=2, seed=42);
+y = ceil(y);
+
+for (l in 1:no_lamda)
+{
+ beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12,
+ lambda = lamda, verbose=FALSE);
+ Rbeta[1:nrow(beta),i] = beta;
+ Rloss[i,] = l2norm(X, y, beta, FALSE);
+ i = i + 1;
+
+ beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12,
+ lambda = lamda, verbose=FALSE);
+ Rbeta[1:nrow(beta),i] = beta;
+ Rloss[i,] = l2norm(X, y, beta, TRUE);
+ i = i + 1;
+
+ lamda = lamda + stp;
+}
+
+leastLoss = rowIndexMin(t(Rloss));
+bestModel = Rbeta[,as.scalar(leastLoss)];
+
+write(bestModel, $1, format="text");