[SYSTEMDS-418] Performance lineage tracing and reuse probing

This patch makes the following performance improvements in the context
of basic lineage tracing and lineage-based reuse probing:

1) Avoid string handling: Materialize the flag if a createvar
instruction has a persistent-read prefix in the name, which avoid
unnecessary string comparisons for ALL createvar instructions, so almost
30% of all instructions.

2) Apply the existing constant folding rewrite not just during static
rewrites but now also as a cleanup rewrite in order to remove remaining
constant expressions (introduced by rewrites) inside loops. This has
especially large impact in lineage because constructing the lineage item
is more expensive than the entire scalar operation.

3) Leverage the materialized hash code in lineage items as early-out
condition in the recursive equals check of lineage DAGs. This is
especially useful where all lineage DAGs have the same repreated
structure (e.g., from unrolled iterations) but a different input. The
equals would go all the way to the first differences, while the
comparison of hash codes (aggregates over all inputs) very likely differ
earlier.

On a mini-batch scenario of 250,000 iterations batch size 8, and 40
operations per iteration, the runtime w/o lineage was 65.6s, and the
changes (1) and (2) improved the runtime with lineage tracing from 76.5s
to 72.6s. Furthermore, we also seen some improvements for reuse probing
in this scenario, but this requires but work too.
diff --git a/dev/Tasks.txt b/dev/Tasks.txt
index c06bd46..1b2e94c 100644
--- a/dev/Tasks.txt
+++ b/dev/Tasks.txt
@@ -344,6 +344,7 @@
  * 415 MLContext lineage support (tracing and reuse correctness)      OK
  * 416 Lineage deduplication while, nested if, loop sequences         OK
  * 417 New rewrite for partial reuse in StepLM                        OK
+ * 418 Performance lineage tracing and reuse probing small data       OK
 
 SYSTEMDS-420 Compiler Improvements
  * 421 Fix invalid IPA scalar propagation into functions              OK
diff --git a/src/main/java/org/apache/sysds/hops/rewrite/ProgramRewriter.java b/src/main/java/org/apache/sysds/hops/rewrite/ProgramRewriter.java
index 2e6138c..29daa63 100644
--- a/src/main/java/org/apache/sysds/hops/rewrite/ProgramRewriter.java
+++ b/src/main/java/org/apache/sysds/hops/rewrite/ProgramRewriter.java
@@ -144,6 +144,8 @@
 			_dagRuleSet.add( new RewriteRemoveUnnecessaryCasts()             );
 		if( OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION )
 			_dagRuleSet.add( new RewriteCommonSubexpressionElimination(true) );
+		if( OptimizerUtils.ALLOW_CONSTANT_FOLDING )
+			_dagRuleSet.add( new RewriteConstantFolding()                    ); //dependency: cse
 		_sbRuleSet.add(  new RewriteRemoveEmptyBasicBlocks()                 );
 	}
 	
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/CPOperand.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/CPOperand.java
index f349022..97c6e1e 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/CPOperand.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/CPOperand.java
@@ -164,7 +164,7 @@
 	public String getLineageLiteral() {
 		return InstructionUtils.concatOperandParts(
 			getName(), getDataType().name(),
-			getValueType().name(),  String.valueOf(isLiteral()));
+			getValueType().name(), String.valueOf(isLiteral()));
 	}
 	
 	public String getLineageLiteral(ScalarObject so) {
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 2ed0d8d..528ac4d 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -116,6 +116,7 @@
 	private final CPOperand output;
 	private final MetaData metadata;
 	private final UpdateType _updateType;
+	private final boolean _containsPreadPrefix;
 	
 	// Frame related members
 	private final String _schema;
@@ -136,6 +137,8 @@
 		_formatProperties = fprops;
 		_schema = schema;
 		_updateType = utype;
+		_containsPreadPrefix = in1 != null && in1.getName()
+			.contains(org.apache.sysds.lops.Data.PREAD_PREFIX);
 	}
 	
 	private VariableCPInstruction(VariableOperationCode op, CPOperand in1, CPOperand in2, CPOperand in3, CPOperand out,
@@ -1259,7 +1262,7 @@
 		LineageItem li = null;
 		switch (getVariableOpcode()) {
 			case CreateVariable:
-				if (!getInput1().getName().contains(org.apache.sysds.lops.Data.PREAD_PREFIX))
+				if (!_containsPreadPrefix)
 					break; //otherwise fall through
 			
 			case Read: {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 5e56d84..97f7d05 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -95,7 +95,7 @@
 			//atomic try reuse full/partial and set placeholder, without
 			//obtaining value to avoid blocking in critical section
 			LineageCacheEntry e = null;
-			Boolean reuseAll = true;
+			boolean reuseAll = true;
 			synchronized( _cache ) {
 				//try to reuse full or partial intermediates
 				for (LineageItem item : liMap.keySet()) {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
index b936948..7a55902 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
@@ -147,8 +147,8 @@
 		
 		boolean ret = _opcode.equals(that._opcode);
 		ret &= _data.equals(that._data);
-		
-		if (_inputs != null && ret && (_inputs.length == that._inputs.length))
+		ret &= (hashCode() == that.hashCode());
+		if( ret && _inputs != null && _inputs.length == that._inputs.length )
 			for (int i = 0; i < _inputs.length; i++)
 				ret &= _inputs[i].equalsLI(that._inputs[i]);
 		
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
index 4216a93..b0a1453 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
@@ -57,12 +57,12 @@
 			return; // no need for lineage tracing
 		if (!(inst instanceof LineageTraceable))
 			throw new DMLRuntimeException("Unknown Instruction (" + inst.getOpcode() + ") traced.");
-		
-		if( ((LineageTraceable) inst).hasSingleLineage() ) {
-			trace(inst, ec, ((LineageTraceable) inst).getLineageItem(ec));
+		LineageTraceable linst = (LineageTraceable) inst;
+		if( linst.hasSingleLineage() ) {
+			trace(inst, ec, linst.getLineageItem(ec));
 		}
 		else {
-			Pair<String, LineageItem>[] items = ((LineageTraceable) inst).getLineageItems(ec);
+			Pair<String, LineageItem>[] items = linst.getLineageItems(ec);
 			if (items == null || items.length < 1)
 				trace(inst, ec, null);
 			else {