[MINOR] Extend reusable instructions and bug fixes
This patch
- extends reusable opcodes, which primarily improves multilevel (statementblock) cache hits,
- removes most of the System.nanoTime calls from cache logic,
- replaces operand names with placeholders in datagen lineage items,
(Note: this fix is temporarily commented due to a bug in parfor-lineage)
- fixes a bug in lineage item creation for multilevel caching,
- update grid search lineage test with a loss function.
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/InstructionUtils.java b/src/main/java/org/apache/sysds/runtime/instructions/InstructionUtils.java
index 1401bfa..4377918 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/InstructionUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/InstructionUtils.java
@@ -959,6 +959,16 @@
parts[operand] = newValue;
return concatOperands(parts);
}
+
+ public static String replaceOperandName(String instStr) {
+ String[] parts = instStr.split(Lop.OPERAND_DELIMITOR);
+ String oldName = parts[parts.length-1];
+ String[] Nameparts = oldName.split(Instruction.VALUETYPE_PREFIX);
+ Nameparts[0] = "xxx";
+ String newName = concatOperandParts(Nameparts);
+ parts[parts.length-1] = newName;
+ return concatOperands(parts);
+ }
public static String concatOperands(String... inputs) {
return concatOperandsWithDelim(Lop.OPERAND_DELIMITOR, inputs);
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
index 4aa9660..baacca6 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/DataGenCPInstruction.java
@@ -401,16 +401,23 @@
tmpInstStr = position != 0 ? InstructionUtils.replaceOperand(
tmpInstStr, position, String.valueOf(runtimeSeed)) : tmpInstStr;
}
+ //replace output variable name with a placeholder
+ //tmpInstStr = InstructionUtils.replaceOperandName(tmpInstStr);
tmpInstStr = replaceNonLiteral(tmpInstStr, rows, 2, ec);
tmpInstStr = replaceNonLiteral(tmpInstStr, cols, 3, ec);
break;
}
case SEQ: {
+ //replace output variable name with a placeholder
+ //tmpInstStr = InstructionUtils.replaceOperandName(tmpInstStr);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_from, 5, ec);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_to, 6, ec);
tmpInstStr = replaceNonLiteral(tmpInstStr, seq_incr, 7, ec);
break;
}
+ case TIME:
+ //only opcode (time) is sufficient to compute from lineage.
+ break;
default:
throw new DMLRuntimeException("Unsupported datagen op: "+method);
}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index cb2d13b..5c23928 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -128,7 +128,7 @@
HashMap<String, LineageItem> funcLIs = new HashMap<>();
for (int i=0; i<numOutputs; i++) {
String opcode = name + String.valueOf(i+1);
- LineageItem li = new LineageItem(outNames.get(i), opcode, liInputs);
+ LineageItem li = new LineageItem(opcode, liInputs);
LineageCacheEntry e = null;
synchronized(_cache) {
if (LineageCache.probe(li)) {
@@ -251,7 +251,7 @@
boolean AllOutputsCacheable = true;
for (int i=0; i<outputs.size(); i++) {
String opcode = name + String.valueOf(i+1);
- LineageItem li = new LineageItem(outputs.get(i).getName(), opcode, liInputs);
+ LineageItem li = new LineageItem(opcode, liInputs);
String boundVarName = outputs.get(i).getName();
LineageItem boundLI = ec.getLineage().get(boundVarName);
if (boundLI != null)
@@ -506,10 +506,6 @@
throw new DMLRuntimeException("Lineage Cache: unsupported instruction: "+inst.getOpcode());
}
- if (DMLScript.STATISTICS) {
- long t1 = System.nanoTime();
- LineageCacheStatistics.incrementCostingTime(t1-t0);
- }
return nflops / (2L * 1024 * 1024 * 1024);
}
}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
index c4caed9..48a512a 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
@@ -34,7 +34,9 @@
private static final String[] OPCODES = new String[] {
"tsmm", "ba+*", "*", "/", "+", "||", "nrow", "ncol", "round", "exp", "log",
- "rightIndex", "leftIndex", "groupedagg", "r'", "solve", "spoof"
+ "rightIndex", "leftIndex", "groupedagg", "r'", "solve", "spoof",
+ "uamean", "max", "min", "ifelse", "-", "sqrt", ">", "uak+", "<=",
+ "^", "uamax", "uark+"
//TODO: Reuse everything.
};
private static String[] REUSE_OPCODES = new String[] {};
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
index 30f930f..4071c1e 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheEviction.java
@@ -246,15 +246,9 @@
if (!e.isMatrixValue() || e.isNullVal())
return 0;
// This includes sum of writing to and reading from disk
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
double size = getDiskSizeEstimate(e);
double loadtime = isSparse(e) ? size/LineageCacheConfig.FSREAD_SPARSE : size/LineageCacheConfig.FSREAD_DENSE;
double writetime = isSparse(e) ? size/LineageCacheConfig.FSWRITE_SPARSE : size/LineageCacheConfig.FSWRITE_DENSE;
-
- //double loadtime = CostEstimatorStaticRuntime.getFSReadTime(r, c, s);
- //double writetime = CostEstimatorStaticRuntime.getFSWriteTime(r, c, s);
- if (DMLScript.STATISTICS)
- LineageCacheStatistics.incrementCostingTime(System.nanoTime() - t0);
return loadtime + writetime;
}
@@ -276,7 +270,6 @@
// Scalar or too small
return;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
double newIOSpeed = size / IOtime; // MB per second
// Adjust the read/write speed taking into account the last read/write.
// These constants will eventually converge to the real speed.
@@ -292,8 +285,6 @@
else
LineageCacheConfig.FSWRITE_DENSE= (LineageCacheConfig.FSWRITE_DENSE+ newIOSpeed) / 2;
}
- if (DMLScript.STATISTICS)
- LineageCacheStatistics.incrementCostingTime(System.nanoTime() - t0);
}
private static boolean isSparse(LineageCacheEntry e) {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
index 143fdfc..5b54592 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
@@ -38,9 +38,6 @@
private static final LongAdder _numRewrites = new LongAdder();
private static final LongAdder _ctimeFSRead = new LongAdder(); //in nano sec
private static final LongAdder _ctimeFSWrite = new LongAdder(); //in nano sec
- private static final LongAdder _ctimeCosting = new LongAdder(); //in nano sec
- private static final LongAdder _ctimeRewrite = new LongAdder(); //in nano sec
- private static final LongAdder _ctimeRewriteEx = new LongAdder(); //in nano sec
public static void reset() {
_numHitsMem.reset();
@@ -55,9 +52,6 @@
_numRewrites.reset();
_ctimeFSRead.reset();
_ctimeFSWrite.reset();
- _ctimeCosting.reset();
- _ctimeRewrite.reset();
- _ctimeRewriteEx.reset();
}
public static void incrementMemHits() {
@@ -128,16 +122,6 @@
_ctimeFSWrite.add(delta);
}
- public static void incrementCostingTime(long delta) {
- // Total time spent estimating computation and disk spill costs.
- _ctimeCosting.add(delta);
- }
-
- public static void incrementPRewriteTime(long delta) {
- // Total time spent compiling lineage rewrites.
- _ctimeRewrite.add(delta);
- }
-
public static long getMultiLevelFnHits() {
return _numHitsFunc.longValue();
}
@@ -146,11 +130,6 @@
return _numHitsSB.longValue();
}
- public static void incrementPRwExecTime(long delta) {
- // Total time spent executing lineage rewrites.
- _ctimeRewriteEx.add(delta);
- }
-
public static String displayHits() {
StringBuilder sb = new StringBuilder();
sb.append(_numHitsMem.longValue());
@@ -194,18 +173,4 @@
sb.append(String.format("%.3f", ((double)_ctimeFSWrite.longValue())/1000000000)); //in sec
return sb.toString();
}
-
- public static String displayCostingTime() {
- StringBuilder sb = new StringBuilder();
- sb.append(String.format("%.3f", ((double)_ctimeCosting.longValue())/1000000000)); //in sec
- return sb.toString();
- }
-
- public static String displayRewriteTime() {
- StringBuilder sb = new StringBuilder();
- sb.append(String.format("%.3f", ((double)_ctimeRewrite.longValue())/1000000000)); //in sec
- sb.append("/");
- sb.append(String.format("%.3f", ((double)_ctimeRewriteEx.longValue())/1000000000)); //in sec
- return sb.toString();
- }
}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
index 7c67423..9bd0855 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
@@ -123,8 +123,6 @@
//put the result into the cache
LineageCache.putMatrix(curr, ec, t1-t0);
- if (DMLScript.STATISTICS)
- LineageCacheStatistics.incrementPRwExecTime(t1-t0);
DMLScript.EXPLAIN = et; //TODO can't change this here
//cleanup execution context
@@ -142,7 +140,6 @@
if(!isTsmmCbind(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the cached tsmm result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -186,10 +183,8 @@
LOG.debug("LINEAGE REWRITE rewriteTsmmCbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -202,7 +197,6 @@
if(!isTsmmCbindOnes(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the cached tsmm result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -228,10 +222,8 @@
LOG.debug("LINEAGE REWRITE rewriteTsmmCbindOnes APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -242,7 +234,6 @@
if (!isTsmmRbind(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last tsmm result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -272,10 +263,8 @@
LOG.debug("LINEAGE REWRITE rewriteTsmmRbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -286,7 +275,6 @@
if (!isTsmm2Cbind(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last tsmm result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
MatrixObject newmo = convMBtoMO(cachedEntry);
@@ -335,10 +323,8 @@
LOG.debug("LINEAGE REWRITE rewriteTsmm2Cbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -349,7 +335,6 @@
if (!isMatMulRbindLeft(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last ba+* result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -380,10 +365,8 @@
LOG.debug("LINEAGE REWRITE rewriteMetMulRbindLeft APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -394,7 +377,6 @@
if (!isMatMulCbindRight(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last ba+* result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -425,10 +407,8 @@
LOG.debug("LINEAGE REWRITE rewriteMatMulCbindRight APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -441,7 +421,6 @@
if (!isMatMulCbindRightOnes(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last ba+* result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -459,10 +438,8 @@
LOG.debug("LINEAGE REWRITE rewriteMatMulCbindRightOnes APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -473,7 +450,6 @@
if (!isElementMulRbind(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last * result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -515,10 +491,8 @@
LOG.debug("LINEAGE REWRITE rewriteElementMulRbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -529,7 +503,6 @@
if (!isElementMulCbind(curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last * result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -571,10 +544,8 @@
LOG.debug("LINEAGE REWRITE rewriteElementMulCbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -585,7 +556,6 @@
if (!isAggCbind (curr, ec, inCache))
return null;
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
// Create a transient read op over the last * result
MatrixBlock cachedEntry = inCache.get("lastMatrix");
lrwec.setVariable("cachedEntry", convMBtoMO(cachedEntry));
@@ -627,10 +597,8 @@
LOG.debug("LINEAGE REWRITE rewriteElementMulCbind APPLIED");
ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
- if (DMLScript.STATISTICS) {
- LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
+ if (DMLScript.STATISTICS)
LineageCacheStatistics.incrementPRewrites();
- }
return inst;
}
@@ -908,10 +876,7 @@
/*----------------------INSTRUCTIONS GENERATION & EXECUTION-----------------------*/
private static ArrayList<Instruction> genInst(Hop hops, ExecutionContext ec) {
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
ArrayList<Instruction> newInst = Recompiler.recompileHopsDag(hops, ec.getVariables(), null, true, true, 0);
- if (DMLScript.STATISTICS)
- LineageCacheStatistics.incrementPRwExecTime(System.nanoTime()-t0);
if (LOG.isDebugEnabled()) {
LOG.debug("COMPENSATION PLAN: ");
LOG.debug("EXPLAIN LINEAGE REWRITE (HOP) \n" + Explain.explain(hops,1));
diff --git a/src/main/java/org/apache/sysds/utils/Statistics.java b/src/main/java/org/apache/sysds/utils/Statistics.java
index eb94f83..6ad3e7a 100644
--- a/src/main/java/org/apache/sysds/utils/Statistics.java
+++ b/src/main/java/org/apache/sysds/utils/Statistics.java
@@ -948,9 +948,7 @@
sb.append("LinCache MultiLevel (Ins/SB/Fn):" + LineageCacheStatistics.displayMultiLevelHits() + ".\n");
sb.append("LinCache writes (Mem/FS/Del): \t" + LineageCacheStatistics.displayWtrites() + ".\n");
sb.append("LinCache FStimes (Rd/Wr): \t" + LineageCacheStatistics.displayTime() + " sec.\n");
- sb.append("LinCache costing time: \t" + LineageCacheStatistics.displayCostingTime() + " sec.\n");
sb.append("LinCache Rewrites: \t\t" + LineageCacheStatistics.displayRewrites() + ".\n");
- sb.append("LinCache RWtime (Com/Ex): \t" + LineageCacheStatistics.displayRewriteTime() + " sec.\n");
}
if( ConfigurationManager.isCodegenEnabled() ) {
sb.append("Codegen compile (DAG,CP,JC):\t" + getCodegenDAGCompile() + "/"
diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg2.dml b/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
index dcc69cd..d838e5b 100644
--- a/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
+++ b/src/test/scripts/functions/lineage/LineageReuseAlg2.dml
@@ -19,7 +19,10 @@
#
#-------------------------------------------------------------
-l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return (Matrix[Double] loss) {
+l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B, Integer icpt)
+return (Matrix[Double] loss) {
+ if (icpt > 0)
+ X = cbind(X, matrix(1, nrow(X), 1));
loss = as.matrix(sum((y - X%*%B)^2));
}
@@ -32,6 +35,7 @@
y = rand(rows=100, cols=1, sparsity=1.0, seed=1);
Rbeta = matrix(0, rows=525, cols=ncol(X)); #nrows = 5*5*3*7 = 525
+Rloss = matrix(0, rows=525, cols=1);
k = 1;
for (i in 1:5)
{
@@ -47,6 +51,7 @@
tol = 10^h3;
beta = lm(X=Xi, y=y, icpt=icpt, reg=reg, tol=tol, maxi=0, verbose=FALSE);
Rbeta[k, 1:nrow(beta)] = t(beta);
+ Rloss[k,] = l2norm(Xi, y, beta, icpt);
k = k + 1;
}
}
@@ -54,5 +59,7 @@
}
while(FALSE) {}
-write(Rbeta, $1, format="text");
+leastLoss = rowIndexMin(t(Rloss));
+bestModel = Rbeta[as.scalar(leastLoss),];
+write(bestModel, $1, format="text");
diff --git a/src/test/scripts/functions/lineage/RewriteTest3.dml b/src/test/scripts/functions/lineage/RewriteTest3.dml
index a3da294..0e41a6e 100644
--- a/src/test/scripts/functions/lineage/RewriteTest3.dml
+++ b/src/test/scripts/functions/lineage/RewriteTest3.dml
@@ -25,10 +25,10 @@
tmp = X[,1];
tmp1 = matrix(0, rows=nrow(X), cols=0);
R = matrix(0, 1, ncol(X));
-ones_n = matrix(1, rows=nrow(X), cols=1);
for (i in 1:ncol(X)) {
tmp = cbind(tmp, X[,i]);
+ ones_n = matrix(1, rows=nrow(X), cols=1);
tmp1 = cbind(tmp, ones_n);
Res1 = t(tmp1) %*% tmp1;
while(FALSE) {};