[SYSTEMDS-415] Initial mlcontext lineage support (tracing, reuse)

This patch adds basic lineage support to the MLContext API. Since
in-memory objects are directly bound to the symbol table, lineage
tracing views these objects as literals and incorrectly reused
intermediates even if different in-memory objects where used in
subsequent mlcontext invocations.
diff --git a/dev/Tasks.txt b/dev/Tasks.txt
index 673c737..a97e1ba 100644
--- a/dev/Tasks.txt
+++ b/dev/Tasks.txt
@@ -340,6 +340,7 @@
  * 412 Robust lineage tracing (non-recursive, parfor)                 OK
  * 413 Cache and reuse MultiReturnBuiltin instructions                OK
  * 414 New rewrite for PCA --> lmDS pipeline                          OK
+ * 415 MLContext lineage support (tracing and reuse correctness)      OK
 
 SYSTEMDS-420 Compiler Improvements
  * 421 Fix invalid IPA scalar propagation into functions              OK
diff --git a/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java b/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
index 10a1aaa..6a39fda 100644
--- a/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
+++ b/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
@@ -41,6 +41,8 @@
 import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysds.runtime.instructions.cp.Data;
 import org.apache.sysds.runtime.instructions.cp.ScalarObject;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
 import org.apache.sysds.runtime.meta.MetaDataFormat;
 import org.apache.sysds.utils.MLContextProxy;
 import org.apache.sysds.utils.Explain.ExplainType;
@@ -386,14 +388,35 @@
 	 * to standard output.
 	 *
 	 * @param explain
-	 *            {@code true} if explanation should be output, {@code false}
-	 *            otherwise
+	 *     {@code true} if explanation should be output, {@code false} otherwise
 	 */
 	public void setExplain(boolean explain) {
 		this.explain = explain;
 	}
 
 	/**
+	 * Set whether or not lineage should be traced
+	 * 
+	 * @param lineage
+	 *     {@code true} if lineage should be traced, {@code false} otherwise
+	 */
+	public void setLineage(boolean lineage) {
+		DMLScript.LINEAGE = lineage;
+	}
+	
+	/**
+	 *  Set type of lineage-based reuse caching and enable lineage tracing
+	 * 
+	 * @param reuse
+	 *     reuse cache type to use
+	 */
+	public void setLineage(ReuseCacheType reuse) {
+		DMLScript.LINEAGE_REUSE = reuse;
+		setLineage(true);
+		LineageCacheConfig.setConfig(reuse);
+	}
+
+	/**
 	 * Obtain whether or not all values should be maintained in the symbol table
 	 * after execution.
 	 *
diff --git a/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
index cfc96cb..bf9c4ee 100644
--- a/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
@@ -50,6 +50,7 @@
 import org.apache.sysds.runtime.controlprogram.Program;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContextFactory;
+import org.apache.sysds.runtime.lineage.LineageItemUtils;
 import org.apache.sysds.utils.Explain;
 import org.apache.sysds.utils.Statistics;
 import org.apache.sysds.utils.Explain.ExplainCounts;
@@ -214,8 +215,11 @@
 	protected void createAndInitializeExecutionContext() {
 		executionContext = ExecutionContextFactory.createContext(runtimeProgram);
 		LocalVariableMap symbolTable = script.getSymbolTable();
-		if (symbolTable != null)
+		if (symbolTable != null) {
 			executionContext.setVariables(symbolTable);
+			if( DMLScript.LINEAGE )
+				LineageItemUtils.addAllDataLineage(executionContext);
+		}
 		//attach registered outputs (for dynamic recompile)
 		executionContext.getVariables().setRegisteredOutputs(
 			new HashSet<>(script.getOutputVariables()));
diff --git a/src/main/java/org/apache/sysds/lops/compile/Dag.java b/src/main/java/org/apache/sysds/lops/compile/Dag.java
index ca4353b..3e6f646 100644
--- a/src/main/java/org/apache/sysds/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysds/lops/compile/Dag.java
@@ -775,7 +775,7 @@
 				//String createInst = prepareVariableInstruction("createvar", node);
 				//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
 				int blen = (int) oparams.getBlocksize();
-				Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+				Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
 					oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(),
 					getOutputFileFormat(node, false).toString(),
 					new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), blen, oparams.getNnz()),
@@ -803,7 +803,7 @@
 					for( Lop fnOut: fcall.getFunctionOutputs()) {
 						OutputParameters fnOutParams = fnOut.getOutputParameters();
 						//OutputInfo oinfo = getOutputInfo((N)fnOut, false);
-						Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+						Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
 							fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(), 
 							true, fnOut.getDataType(), getOutputFileFormat(fnOut, false).toString(),
 							new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int)fnOutParams.getBlocksize(), fnOutParams.getNnz()),
@@ -913,7 +913,7 @@
 						String tempFileName = getNextUniqueFilename();
 						
 						int blen = (int) oparams.getBlocksize();
-						Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+						Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
 							tempVarName, tempFileName, true, node.getDataType(), out.getOutInfo().toString(), 
 							new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), blen, oparams.getNnz()),
 							oparams.getUpdateType());
@@ -946,7 +946,7 @@
 						
 						// Generate a single mvvar instruction (e.g., mvvar tempA A) 
 						//    instead of two instructions "cpvar tempA A" and "rmvar tempA"
-						Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
+						Instruction currInstr = VariableCPInstruction.prepMoveInstruction(tempVarName, constVarName);
 						
 						currInstr.setLocation(node);
 						
@@ -1010,7 +1010,7 @@
 					&& ((VariableCPInstruction)inst2).isRemoveVariableNoFile()
 					&& inst1.getInput1().getName().equals(
 						((VariableCPInstruction)inst2).getInput1().getName()) ) {
-					ret.add(VariableCPInstruction.prepareMoveInstruction(
+					ret.add(VariableCPInstruction.prepMoveInstruction(
 						inst1.getInput1().getName(), inst1.getInput2().getName()));
 				}
 				else {
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index 79fefc5..8accfea 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -215,7 +215,7 @@
 	 */
 	protected CacheableData(DataType dt, ValueType vt) {
 		super (dt, vt);
-		_uniqueID = isCachingActive() ? _seq.getNextID() : -1;
+		_uniqueID = _seq.getNextID();
 		_cacheStatus = CacheStatus.EMPTY;
 		_numReadThreads = 0;
 		_gpuObjects = DMLScript.USE_ACCELERATOR ? new HashMap<>() : null;
@@ -270,6 +270,10 @@
 	public String getFileName() {
 		return _hdfsFileName;
 	}
+	
+	public long getUniqueID() {
+		return _uniqueID;
+	}
 
 	public synchronized void setFileName( String file ) {
 		if( _hdfsFileName!=null && !_hdfsFileName.equals(file) )
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 15e9ccb..2ed0d8d 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -1126,7 +1126,7 @@
 		return parseInstruction(sb.toString());
 	}
 	
-	public static Instruction prepareMoveInstruction(String srcVar, String destFileName, String format) {
+	public static Instruction prepMoveInstruction(String srcVar, String destFileName, String format) {
 		StringBuilder sb = new StringBuilder();
 		sb.append("CP");
 		sb.append(Lop.OPERAND_DELIMITOR);
@@ -1141,7 +1141,7 @@
 		return parseInstruction(str);
 	}
 	
-	public static Instruction prepareMoveInstruction(String srcVar, String destVar) {
+	public static Instruction prepMoveInstruction(String srcVar, String destVar) {
 		// example: mvvar tempA A 
 		StringBuilder sb = new StringBuilder();
 		sb.append("CP");
@@ -1155,7 +1155,7 @@
 		return parseInstruction(str);
 	}
 	
-	private static String getBasicCreateVarString(String varName, String fileName, boolean fNameOverride, DataType dt, String format) {
+	private static String getBasicCreatevarString(String varName, String fileName, boolean fNameOverride, DataType dt, String format) {
 		//note: the filename override property leads to concatenation of unique ids in order to 
 		//ensure conflicting filenames for objects that originate from the same instruction
 		boolean lfNameOverride = fNameOverride && !ConfigurationManager
@@ -1179,13 +1179,13 @@
 		return sb.toString();
 	}
 	
-	public static Instruction prepareCreateMatrixVariableInstruction(String varName, String fileName, boolean fNameOverride, String format) {
-		return parseInstruction(getBasicCreateVarString(varName, fileName, fNameOverride, DataType.MATRIX, format));
+	public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, String format) {
+		return parseInstruction(getBasicCreatevarString(varName, fileName, fNameOverride, DataType.MATRIX, format));
 	}
 
-	public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update) {
+	public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update) {
 		StringBuilder sb = new StringBuilder();
-		sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, dt, format));
+		sb.append(getBasicCreatevarString(varName, fileName, fNameOverride, dt, format));
 		
 		sb.append(Lop.OPERAND_DELIMITOR);
 		sb.append(mc.getRows());
@@ -1203,9 +1203,9 @@
 		return parseInstruction(str);
 	}
 	
-	public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update, boolean hasHeader, String delim, boolean sparse) {
+	public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update, boolean hasHeader, String delim, boolean sparse) {
 		StringBuilder sb = new StringBuilder();
-		sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, dt, format));
+		sb.append(getBasicCreatevarString(varName, fileName, fNameOverride, dt, format));
 		
 		sb.append(Lop.OPERAND_DELIMITOR);
 		sb.append(mc.getRows());
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
index e659025..1693627 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
@@ -64,6 +64,7 @@
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.controlprogram.BasicProgramBlock;
 import org.apache.sysds.runtime.controlprogram.Program;
+import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContextFactory;
 import org.apache.sysds.runtime.instructions.Instruction;
@@ -86,6 +87,7 @@
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Stack;
 import java.util.stream.Collectors;
@@ -825,4 +827,18 @@
 		return(CPOpInputs != null ? LineageItemUtils.getLineage(ec, 
 			CPOpInputs.toArray(new CPOperand[CPOpInputs.size()])) : null);
 	}
+	
+	public static void addAllDataLineage(ExecutionContext ec) {
+		for( Entry<String, Data> e : ec.getVariables().entrySet() ) {
+			if( e.getValue() instanceof CacheableData<?> ) {
+				CacheableData<?> cdata = (CacheableData<?>) e.getValue();
+				//only createvar instruction with pREAD prefix added to lineage
+				String fromVar = org.apache.sysds.lops.Data.PREAD_PREFIX+e.getKey();
+				ec.traceLineage(VariableCPInstruction.prepCreatevarInstruction(
+					fromVar, "CacheableData::"+cdata.getUniqueID(), false, "binary"));
+				//move from pREADx to x
+				ec.traceLineage(VariableCPInstruction.prepMoveInstruction(fromVar, e.getKey()));
+			}
+		}
+	}
 }
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java
new file mode 100644
index 0000000..2e3203f
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.lineage;
+
+import static org.apache.sysds.api.mlcontext.ScriptFactory.dml;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.Test;
+import org.apache.sysds.api.mlcontext.MatrixFormat;
+import org.apache.sysds.api.mlcontext.MatrixMetadata;
+import org.apache.sysds.api.mlcontext.Script;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
+import org.apache.sysds.test.functions.mlcontext.MLContextTestBase;
+
+public class LineageMLContextTest extends MLContextTestBase {
+
+	@Test
+	public void testPrintLineage() {
+		System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+		List<String> list = new ArrayList<>();
+		list.add("1 1 5");
+		list.add("2 2 5");
+		list.add("3 3 5");
+		JavaRDD<String> javaRDD = sc.parallelize(list);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+
+		Script script = dml(
+			 "print('sum: '+sum(M+M));"
+			+"print(lineage(M+M));"
+			).in("M", javaRDD, mm);
+		setExpectedStdOut("sum: 30.0");
+		
+		ml.setLineage(ReuseCacheType.NONE);
+		ml.execute(script);
+	}
+	
+	@Test
+	public void testReuseSameRDD() {
+		System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+		List<String> list = new ArrayList<>();
+		list.add("1 1 5");
+		list.add("2 2 5");
+		list.add("3 3 5");
+		JavaRDD<String> javaRDD = sc.parallelize(list);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+		
+		Script script = dml(
+			 "print('sum: '+sum(M+M));"
+			+"print(lineage(M+M));"
+			).in("M", javaRDD, mm);
+		setExpectedStdOut("sum: 30.0");
+		
+		ml.setLineage(ReuseCacheType.REUSE_FULL);
+		ml.execute(script);
+		ml.execute(script); //w/ reuse
+	}
+	
+	@Test
+	public void testNoReuseDifferentRDD() {
+		System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+		List<String> list = new ArrayList<>();
+		list.add("1 1 5");
+		list.add("2 2 5");
+		list.add("3 3 5");
+		JavaRDD<String> javaRDD = sc.parallelize(list);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+		
+		Script script = dml(
+			 "print('sum: '+sum(M+M));"
+			+"print(lineage(M+M));"
+			).in("M", javaRDD, mm);
+		
+		ml.setLineage(ReuseCacheType.REUSE_FULL);
+		
+		setExpectedStdOut("sum: 30.0");
+		ml.execute(script);
+		
+		list.add("4 4 5");
+		JavaRDD<String> javaRDD2 = sc.parallelize(list);
+		MatrixMetadata mm2 = new MatrixMetadata(MatrixFormat.IJV, 4, 4);
+		script.in("M", javaRDD2, mm2);
+		
+		setExpectedStdOut("sum: 40.0");
+		ml.execute(script); //w/o reuse
+	}
+}
diff --git a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
index efd0397..2eb6ccf 100644
--- a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
@@ -30,27 +30,38 @@
 
 public class IPAConstantPropagationFunTest extends AutomatedTestBase 
 {
-	private final static String TEST_NAME1 = "IPAFunctionArgs";
+	private final static String TEST_NAME1 = "IPAFunctionArgsFor";
+	private final static String TEST_NAME2 = "IPAFunctionArgsParfor";
+	
 	private final static String TEST_DIR = "functions/recompile/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + IPAConstantPropagationFunTest.class.getSimpleName() + "/";
 	
 	@Override
 	public void setUp() {
 		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"R"}));
+		addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"R"}));
 	}
 
 	@Test
-	public void runIPAConstantPropagationTest()
-	{
+	public void runIPAConstantPropagationForTest() {
+		runIPAConstantPropagationTest(TEST_NAME1);
+	}
+	
+	@Test
+	public void runIPAConstantPropagationParForTest() {
+		runIPAConstantPropagationTest(TEST_NAME2);
+	}
+	
+	private void runIPAConstantPropagationTest(String testname) {
 		boolean oldFlagIPA = OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
 		
 		try
 		{
-			TestConfiguration config = getTestConfiguration(TEST_NAME1);
+			TestConfiguration config = getTestConfiguration(testname);
 			loadTestConfiguration(config);
 			
 			String HOME = SCRIPT_DIR + TEST_DIR;
-			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+			fullDMLScriptName = HOME + testname + ".dml";
 			programArgs = new String[]{"-args", output("R") };
 
 			OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = true;
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgs.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
similarity index 100%
rename from src/test/scripts/functions/recompile/IPAFunctionArgs.dml
rename to src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
new file mode 100644
index 0000000..f7de41f
--- /dev/null
+++ b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
@@ -0,0 +1,109 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+checkR2 = function(Matrix[double] X, Matrix[double] y, Matrix[double] y_p,
+          Matrix[double] beta, Integer icpt) return (Double R2_ad)
+{
+  n = nrow(X);
+  m = ncol(X);
+  m_ext = m;
+  if (icpt == 1|icpt == 2)
+      m_ext = m+1; #due to extra column ones
+  avg_tot = sum(y)/n;
+  ss_tot = sum(y^2);
+  ss_avg_tot = ss_tot - n*avg_tot^2;
+  y_res = y - y_p;
+  avg_res = sum(y - y_p)/n;
+  ss_res = sum((y - y_p)^2);
+  R2 = 1 - ss_res/ss_avg_tot;
+  dispersion = ifelse(n>m_ext, ss_res/(n-m_ext), NaN);
+  R2_ad = ifelse(n>m_ext, 1-dispersion/(ss_avg_tot/(n-1)), NaN);
+}
+
+
+PCA = function(Matrix[Double] A, Integer K = ncol(A), Integer center = 1, Integer scale = 1,
+    Integer projectData = 1) return(Matrix[Double] newA)
+{
+  evec_dominant = matrix(0,cols=1,rows=1);
+
+  N = nrow(A);
+  D = ncol(A);
+  print("K = "+K);
+
+  # perform z-scoring (centering and scaling)
+  A = scale(A, center==1, scale==1);
+
+  # co-variance matrix
+  mu = colSums(A)/N;
+  C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu;
+
+  # compute eigen vectors and values
+  [evalues, evectors] = eigen(C);
+
+  decreasing_Idx = order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE);
+  diagmat = table(seq(1,D),decreasing_Idx);
+  # sorts eigenvalues by decreasing order
+  evalues = diagmat %*% evalues;
+  # sorts eigenvectors column-wise in the order of decreasing eigenvalues
+  evectors = evectors %*% diagmat;
+
+
+  # select K dominant eigen vectors
+  nvec = ncol(evectors);
+
+  eval_dominant = evalues[1:K, 1];
+  evec_dominant = evectors[,1:K];
+
+  # the square root of eigenvalues
+  eval_stdev_dominant = sqrt(eval_dominant);
+
+  if (projectData == 1){
+    # Construct new data set by treating computed dominant eigenvectors as the basis vectors
+    newA = A %*% evec_dominant;
+  }
+}
+
+# Get the dataset
+M = 1000;
+A = rand(rows=M, cols=100, seed=1);
+y = rand(rows=M, cols=1, seed=2);
+R = matrix(0, rows=1, cols=20);
+
+Kc = floor(ncol(A) * 0.8);
+
+for (i in 1:10) {
+  newA1 = PCA(A=A, K=Kc+i);
+  beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
+  y_predict1 = lmpredict(X=newA1, w=beta1, icpt=1);
+  R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
+  R[,i] = R2_ad1;
+}
+
+parfor (i in 1:10) {
+  newA3 = PCA(A=A, K=Kc+5);
+  beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE);
+  y_predict3 = lmpredict(X=newA3, w=beta3, icpt=1);
+  R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1);
+  R[,10+i] = R2_ad3;
+}
+
+
+write(R, $1);