[SYSTEMDS-415] Initial mlcontext lineage support (tracing, reuse)
This patch adds basic lineage support to the MLContext API. Since
in-memory objects are directly bound to the symbol table, lineage
tracing views these objects as literals and incorrectly reused
intermediates even if different in-memory objects where used in
subsequent mlcontext invocations.
diff --git a/dev/Tasks.txt b/dev/Tasks.txt
index 673c737..a97e1ba 100644
--- a/dev/Tasks.txt
+++ b/dev/Tasks.txt
@@ -340,6 +340,7 @@
* 412 Robust lineage tracing (non-recursive, parfor) OK
* 413 Cache and reuse MultiReturnBuiltin instructions OK
* 414 New rewrite for PCA --> lmDS pipeline OK
+ * 415 MLContext lineage support (tracing and reuse correctness) OK
SYSTEMDS-420 Compiler Improvements
* 421 Fix invalid IPA scalar propagation into functions OK
diff --git a/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java b/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
index 10a1aaa..6a39fda 100644
--- a/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
+++ b/src/main/java/org/apache/sysds/api/mlcontext/MLContext.java
@@ -41,6 +41,8 @@
import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysds.runtime.instructions.cp.Data;
import org.apache.sysds.runtime.instructions.cp.ScalarObject;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
import org.apache.sysds.runtime.meta.MetaDataFormat;
import org.apache.sysds.utils.MLContextProxy;
import org.apache.sysds.utils.Explain.ExplainType;
@@ -386,14 +388,35 @@
* to standard output.
*
* @param explain
- * {@code true} if explanation should be output, {@code false}
- * otherwise
+ * {@code true} if explanation should be output, {@code false} otherwise
*/
public void setExplain(boolean explain) {
this.explain = explain;
}
/**
+ * Set whether or not lineage should be traced
+ *
+ * @param lineage
+ * {@code true} if lineage should be traced, {@code false} otherwise
+ */
+ public void setLineage(boolean lineage) {
+ DMLScript.LINEAGE = lineage;
+ }
+
+ /**
+ * Set type of lineage-based reuse caching and enable lineage tracing
+ *
+ * @param reuse
+ * reuse cache type to use
+ */
+ public void setLineage(ReuseCacheType reuse) {
+ DMLScript.LINEAGE_REUSE = reuse;
+ setLineage(true);
+ LineageCacheConfig.setConfig(reuse);
+ }
+
+ /**
* Obtain whether or not all values should be maintained in the symbol table
* after execution.
*
diff --git a/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
index cfc96cb..bf9c4ee 100644
--- a/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysds/api/mlcontext/ScriptExecutor.java
@@ -50,6 +50,7 @@
import org.apache.sysds.runtime.controlprogram.Program;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContextFactory;
+import org.apache.sysds.runtime.lineage.LineageItemUtils;
import org.apache.sysds.utils.Explain;
import org.apache.sysds.utils.Statistics;
import org.apache.sysds.utils.Explain.ExplainCounts;
@@ -214,8 +215,11 @@
protected void createAndInitializeExecutionContext() {
executionContext = ExecutionContextFactory.createContext(runtimeProgram);
LocalVariableMap symbolTable = script.getSymbolTable();
- if (symbolTable != null)
+ if (symbolTable != null) {
executionContext.setVariables(symbolTable);
+ if( DMLScript.LINEAGE )
+ LineageItemUtils.addAllDataLineage(executionContext);
+ }
//attach registered outputs (for dynamic recompile)
executionContext.getVariables().setRegisteredOutputs(
new HashSet<>(script.getOutputVariables()));
diff --git a/src/main/java/org/apache/sysds/lops/compile/Dag.java b/src/main/java/org/apache/sysds/lops/compile/Dag.java
index ca4353b..3e6f646 100644
--- a/src/main/java/org/apache/sysds/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysds/lops/compile/Dag.java
@@ -775,7 +775,7 @@
//String createInst = prepareVariableInstruction("createvar", node);
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int blen = (int) oparams.getBlocksize();
- Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+ Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(),
getOutputFileFormat(node, false).toString(),
new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), blen, oparams.getNnz()),
@@ -803,7 +803,7 @@
for( Lop fnOut: fcall.getFunctionOutputs()) {
OutputParameters fnOutParams = fnOut.getOutputParameters();
//OutputInfo oinfo = getOutputInfo((N)fnOut, false);
- Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+ Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(),
true, fnOut.getDataType(), getOutputFileFormat(fnOut, false).toString(),
new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int)fnOutParams.getBlocksize(), fnOutParams.getNnz()),
@@ -913,7 +913,7 @@
String tempFileName = getNextUniqueFilename();
int blen = (int) oparams.getBlocksize();
- Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(
+ Instruction createvarInst = VariableCPInstruction.prepCreatevarInstruction(
tempVarName, tempFileName, true, node.getDataType(), out.getOutInfo().toString(),
new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), blen, oparams.getNnz()),
oparams.getUpdateType());
@@ -946,7 +946,7 @@
// Generate a single mvvar instruction (e.g., mvvar tempA A)
// instead of two instructions "cpvar tempA A" and "rmvar tempA"
- Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
+ Instruction currInstr = VariableCPInstruction.prepMoveInstruction(tempVarName, constVarName);
currInstr.setLocation(node);
@@ -1010,7 +1010,7 @@
&& ((VariableCPInstruction)inst2).isRemoveVariableNoFile()
&& inst1.getInput1().getName().equals(
((VariableCPInstruction)inst2).getInput1().getName()) ) {
- ret.add(VariableCPInstruction.prepareMoveInstruction(
+ ret.add(VariableCPInstruction.prepMoveInstruction(
inst1.getInput1().getName(), inst1.getInput2().getName()));
}
else {
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index 79fefc5..8accfea 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -215,7 +215,7 @@
*/
protected CacheableData(DataType dt, ValueType vt) {
super (dt, vt);
- _uniqueID = isCachingActive() ? _seq.getNextID() : -1;
+ _uniqueID = _seq.getNextID();
_cacheStatus = CacheStatus.EMPTY;
_numReadThreads = 0;
_gpuObjects = DMLScript.USE_ACCELERATOR ? new HashMap<>() : null;
@@ -270,6 +270,10 @@
public String getFileName() {
return _hdfsFileName;
}
+
+ public long getUniqueID() {
+ return _uniqueID;
+ }
public synchronized void setFileName( String file ) {
if( _hdfsFileName!=null && !_hdfsFileName.equals(file) )
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 15e9ccb..2ed0d8d 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -1126,7 +1126,7 @@
return parseInstruction(sb.toString());
}
- public static Instruction prepareMoveInstruction(String srcVar, String destFileName, String format) {
+ public static Instruction prepMoveInstruction(String srcVar, String destFileName, String format) {
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
@@ -1141,7 +1141,7 @@
return parseInstruction(str);
}
- public static Instruction prepareMoveInstruction(String srcVar, String destVar) {
+ public static Instruction prepMoveInstruction(String srcVar, String destVar) {
// example: mvvar tempA A
StringBuilder sb = new StringBuilder();
sb.append("CP");
@@ -1155,7 +1155,7 @@
return parseInstruction(str);
}
- private static String getBasicCreateVarString(String varName, String fileName, boolean fNameOverride, DataType dt, String format) {
+ private static String getBasicCreatevarString(String varName, String fileName, boolean fNameOverride, DataType dt, String format) {
//note: the filename override property leads to concatenation of unique ids in order to
//ensure conflicting filenames for objects that originate from the same instruction
boolean lfNameOverride = fNameOverride && !ConfigurationManager
@@ -1179,13 +1179,13 @@
return sb.toString();
}
- public static Instruction prepareCreateMatrixVariableInstruction(String varName, String fileName, boolean fNameOverride, String format) {
- return parseInstruction(getBasicCreateVarString(varName, fileName, fNameOverride, DataType.MATRIX, format));
+ public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, String format) {
+ return parseInstruction(getBasicCreatevarString(varName, fileName, fNameOverride, DataType.MATRIX, format));
}
- public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update) {
+ public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update) {
StringBuilder sb = new StringBuilder();
- sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, dt, format));
+ sb.append(getBasicCreatevarString(varName, fileName, fNameOverride, dt, format));
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRows());
@@ -1203,9 +1203,9 @@
return parseInstruction(str);
}
- public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update, boolean hasHeader, String delim, boolean sparse) {
+ public static Instruction prepCreatevarInstruction(String varName, String fileName, boolean fNameOverride, DataType dt, String format, DataCharacteristics mc, UpdateType update, boolean hasHeader, String delim, boolean sparse) {
StringBuilder sb = new StringBuilder();
- sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, dt, format));
+ sb.append(getBasicCreatevarString(varName, fileName, fNameOverride, dt, format));
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRows());
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
index e659025..1693627 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageItemUtils.java
@@ -64,6 +64,7 @@
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.BasicProgramBlock;
import org.apache.sysds.runtime.controlprogram.Program;
+import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysds.runtime.instructions.Instruction;
@@ -86,6 +87,7 @@
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.stream.Collectors;
@@ -825,4 +827,18 @@
return(CPOpInputs != null ? LineageItemUtils.getLineage(ec,
CPOpInputs.toArray(new CPOperand[CPOpInputs.size()])) : null);
}
+
+ public static void addAllDataLineage(ExecutionContext ec) {
+ for( Entry<String, Data> e : ec.getVariables().entrySet() ) {
+ if( e.getValue() instanceof CacheableData<?> ) {
+ CacheableData<?> cdata = (CacheableData<?>) e.getValue();
+ //only createvar instruction with pREAD prefix added to lineage
+ String fromVar = org.apache.sysds.lops.Data.PREAD_PREFIX+e.getKey();
+ ec.traceLineage(VariableCPInstruction.prepCreatevarInstruction(
+ fromVar, "CacheableData::"+cdata.getUniqueID(), false, "binary"));
+ //move from pREADx to x
+ ec.traceLineage(VariableCPInstruction.prepMoveInstruction(fromVar, e.getKey()));
+ }
+ }
+ }
}
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java
new file mode 100644
index 0000000..2e3203f
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/LineageMLContextTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.lineage;
+
+import static org.apache.sysds.api.mlcontext.ScriptFactory.dml;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.Test;
+import org.apache.sysds.api.mlcontext.MatrixFormat;
+import org.apache.sysds.api.mlcontext.MatrixMetadata;
+import org.apache.sysds.api.mlcontext.Script;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
+import org.apache.sysds.test.functions.mlcontext.MLContextTestBase;
+
+public class LineageMLContextTest extends MLContextTestBase {
+
+ @Test
+ public void testPrintLineage() {
+ System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+ List<String> list = new ArrayList<>();
+ list.add("1 1 5");
+ list.add("2 2 5");
+ list.add("3 3 5");
+ JavaRDD<String> javaRDD = sc.parallelize(list);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+
+ Script script = dml(
+ "print('sum: '+sum(M+M));"
+ +"print(lineage(M+M));"
+ ).in("M", javaRDD, mm);
+ setExpectedStdOut("sum: 30.0");
+
+ ml.setLineage(ReuseCacheType.NONE);
+ ml.execute(script);
+ }
+
+ @Test
+ public void testReuseSameRDD() {
+ System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+ List<String> list = new ArrayList<>();
+ list.add("1 1 5");
+ list.add("2 2 5");
+ list.add("3 3 5");
+ JavaRDD<String> javaRDD = sc.parallelize(list);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+
+ Script script = dml(
+ "print('sum: '+sum(M+M));"
+ +"print(lineage(M+M));"
+ ).in("M", javaRDD, mm);
+ setExpectedStdOut("sum: 30.0");
+
+ ml.setLineage(ReuseCacheType.REUSE_FULL);
+ ml.execute(script);
+ ml.execute(script); //w/ reuse
+ }
+
+ @Test
+ public void testNoReuseDifferentRDD() {
+ System.out.println("LineageMLContextTest - JavaRDD<String> IJV sum DML");
+
+ List<String> list = new ArrayList<>();
+ list.add("1 1 5");
+ list.add("2 2 5");
+ list.add("3 3 5");
+ JavaRDD<String> javaRDD = sc.parallelize(list);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3);
+
+ Script script = dml(
+ "print('sum: '+sum(M+M));"
+ +"print(lineage(M+M));"
+ ).in("M", javaRDD, mm);
+
+ ml.setLineage(ReuseCacheType.REUSE_FULL);
+
+ setExpectedStdOut("sum: 30.0");
+ ml.execute(script);
+
+ list.add("4 4 5");
+ JavaRDD<String> javaRDD2 = sc.parallelize(list);
+ MatrixMetadata mm2 = new MatrixMetadata(MatrixFormat.IJV, 4, 4);
+ script.in("M", javaRDD2, mm2);
+
+ setExpectedStdOut("sum: 40.0");
+ ml.execute(script); //w/o reuse
+ }
+}
diff --git a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
index efd0397..2eb6ccf 100644
--- a/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/recompile/IPAConstantPropagationFunTest.java
@@ -30,27 +30,38 @@
public class IPAConstantPropagationFunTest extends AutomatedTestBase
{
- private final static String TEST_NAME1 = "IPAFunctionArgs";
+ private final static String TEST_NAME1 = "IPAFunctionArgsFor";
+ private final static String TEST_NAME2 = "IPAFunctionArgsParfor";
+
private final static String TEST_DIR = "functions/recompile/";
private final static String TEST_CLASS_DIR = TEST_DIR + IPAConstantPropagationFunTest.class.getSimpleName() + "/";
@Override
public void setUp() {
addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"R"}));
+ addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"R"}));
}
@Test
- public void runIPAConstantPropagationTest()
- {
+ public void runIPAConstantPropagationForTest() {
+ runIPAConstantPropagationTest(TEST_NAME1);
+ }
+
+ @Test
+ public void runIPAConstantPropagationParForTest() {
+ runIPAConstantPropagationTest(TEST_NAME2);
+ }
+
+ private void runIPAConstantPropagationTest(String testname) {
boolean oldFlagIPA = OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
try
{
- TestConfiguration config = getTestConfiguration(TEST_NAME1);
+ TestConfiguration config = getTestConfiguration(testname);
loadTestConfiguration(config);
String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+ fullDMLScriptName = HOME + testname + ".dml";
programArgs = new String[]{"-args", output("R") };
OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = true;
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgs.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
similarity index 100%
rename from src/test/scripts/functions/recompile/IPAFunctionArgs.dml
rename to src/test/scripts/functions/recompile/IPAFunctionArgsFor.dml
diff --git a/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
new file mode 100644
index 0000000..f7de41f
--- /dev/null
+++ b/src/test/scripts/functions/recompile/IPAFunctionArgsParfor.dml
@@ -0,0 +1,109 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+checkR2 = function(Matrix[double] X, Matrix[double] y, Matrix[double] y_p,
+ Matrix[double] beta, Integer icpt) return (Double R2_ad)
+{
+ n = nrow(X);
+ m = ncol(X);
+ m_ext = m;
+ if (icpt == 1|icpt == 2)
+ m_ext = m+1; #due to extra column ones
+ avg_tot = sum(y)/n;
+ ss_tot = sum(y^2);
+ ss_avg_tot = ss_tot - n*avg_tot^2;
+ y_res = y - y_p;
+ avg_res = sum(y - y_p)/n;
+ ss_res = sum((y - y_p)^2);
+ R2 = 1 - ss_res/ss_avg_tot;
+ dispersion = ifelse(n>m_ext, ss_res/(n-m_ext), NaN);
+ R2_ad = ifelse(n>m_ext, 1-dispersion/(ss_avg_tot/(n-1)), NaN);
+}
+
+
+PCA = function(Matrix[Double] A, Integer K = ncol(A), Integer center = 1, Integer scale = 1,
+ Integer projectData = 1) return(Matrix[Double] newA)
+{
+ evec_dominant = matrix(0,cols=1,rows=1);
+
+ N = nrow(A);
+ D = ncol(A);
+ print("K = "+K);
+
+ # perform z-scoring (centering and scaling)
+ A = scale(A, center==1, scale==1);
+
+ # co-variance matrix
+ mu = colSums(A)/N;
+ C = (t(A) %*% A)/(N-1) - (N/(N-1))*t(mu) %*% mu;
+
+ # compute eigen vectors and values
+ [evalues, evectors] = eigen(C);
+
+ decreasing_Idx = order(target=evalues,by=1,decreasing=TRUE,index.return=TRUE);
+ diagmat = table(seq(1,D),decreasing_Idx);
+ # sorts eigenvalues by decreasing order
+ evalues = diagmat %*% evalues;
+ # sorts eigenvectors column-wise in the order of decreasing eigenvalues
+ evectors = evectors %*% diagmat;
+
+
+ # select K dominant eigen vectors
+ nvec = ncol(evectors);
+
+ eval_dominant = evalues[1:K, 1];
+ evec_dominant = evectors[,1:K];
+
+ # the square root of eigenvalues
+ eval_stdev_dominant = sqrt(eval_dominant);
+
+ if (projectData == 1){
+ # Construct new data set by treating computed dominant eigenvectors as the basis vectors
+ newA = A %*% evec_dominant;
+ }
+}
+
+# Get the dataset
+M = 1000;
+A = rand(rows=M, cols=100, seed=1);
+y = rand(rows=M, cols=1, seed=2);
+R = matrix(0, rows=1, cols=20);
+
+Kc = floor(ncol(A) * 0.8);
+
+for (i in 1:10) {
+ newA1 = PCA(A=A, K=Kc+i);
+ beta1 = lm(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
+ y_predict1 = lmpredict(X=newA1, w=beta1, icpt=1);
+ R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
+ R[,i] = R2_ad1;
+}
+
+parfor (i in 1:10) {
+ newA3 = PCA(A=A, K=Kc+5);
+ beta3 = lm(X=newA3, y=y, icpt=1, reg=0.001*i, verbose=FALSE);
+ y_predict3 = lmpredict(X=newA3, w=beta3, icpt=1);
+ R2_ad3 = checkR2(newA3, y, y_predict3, beta3, 1);
+ R[,10+i] = R2_ad3;
+}
+
+
+write(R, $1);