blob: 9fbd61c836930c4797e841983860538e50901db7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.test.functions.recompile;
import java.util.HashMap;
import org.junit.Test;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.io.FileFormatPropertiesCSV;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
import org.apache.sysds.runtime.util.DataConverter;
import org.apache.sysds.runtime.util.HDFSTool;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
public class CSVReadUnknownSizeTest extends AutomatedTestBase {
private final static String TEST_NAME = "csv_read_unknown";
private final static String TEST_DIR = "functions/recompile/";
private final static String TEST_CLASS_DIR = TEST_DIR + CSVReadUnknownSizeTest.class.getSimpleName() + "/";
private final static int rows = 10;
private final static int cols = 15;
/** Main method for running one test at a time from Eclipse. */
public static void main(String[] args) {
long startMsec = System.currentTimeMillis();
CSVReadUnknownSizeTest t = new CSVReadUnknownSizeTest();
t.setUpBase();
t.setUp();
t.testCSVReadUnknownSizeSplitRewrites();
t.tearDown();
long elapsedMsec = System.currentTimeMillis() - startMsec;
System.err.printf("Finished in %1.3f sec\n", elapsedMsec / 1000.0);
}
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration(TEST_NAME,
new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "X" }));
}
@Test
public void testCSVReadUnknownSizeNoSplitNoRewrites() {
runCSVReadUnknownSizeTest(false, false);
}
@Test
public void testCSVReadUnknownSizeNoSplitRewrites() {
runCSVReadUnknownSizeTest(false, true);
}
@Test
public void testCSVReadUnknownSizeSplitNoRewrites() {
runCSVReadUnknownSizeTest(true, false);
}
@Test
public void testCSVReadUnknownSizeSplitRewrites() {
runCSVReadUnknownSizeTest(true, true);
}
private void runCSVReadUnknownSizeTest( boolean splitDags, boolean rewrites )
{
boolean oldFlagSplit = OptimizerUtils.ALLOW_SPLIT_HOP_DAGS;
boolean oldFlagRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
try
{
getAndLoadTestConfiguration(TEST_NAME);
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[]{
// "-explain",
"-stats",
"-args", input("X"), output("R") };
fullRScriptName = HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
OptimizerUtils.ALLOW_SPLIT_HOP_DAGS = splitDags;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
double[][] X = getRandomMatrix(rows, cols, -1, 1, 1.0d, 7);
MatrixBlock mb = DataConverter.convertToMatrixBlock(X);
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, 1000, 1000);
FileFormatPropertiesCSV fprop = new FileFormatPropertiesCSV();
DataConverter.writeMatrixToHDFS(mb, input("X"), FileFormat.CSV, mc, -1, fprop);
mc.set(-1, -1, -1, -1);
HDFSTool.writeMetaDataFile(input("X.mtd"), ValueType.FP64, mc, FileFormat.CSV, fprop);
runTest(true, false, null, -1);
//compare matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R");
for( int i=0; i<rows; i++ )
for( int j=0; j<cols; j++ )
{
Double tmp = dmlfile.get(new CellIndex(i+1,j+1));
double expectedValue = mb.quickGetValue(i, j);
double actualValue = (tmp==null)?0.0:tmp;
if (expectedValue != actualValue) {
throw new Exception(String.format("Value of cell (%d,%d) "
+ "(zero-based indices) in output file %s is %f, "
+ "but original value was %f",
i, j, baseDirectory + OUTPUT_DIR + "R",
actualValue, expectedValue));
}
}
//check expected number of compiled and executed MR jobs
//note: with algebraic rewrites - unary op in reducer prevents job-level recompile
int expectedNumCompiled = (rewrites && !splitDags) ? 5 : 5;
int expectedNumExecuted = splitDags ? 0 : rewrites ? 3 : 3;
checkNumCompiledSparkInst(expectedNumCompiled);
checkNumExecutedSparkInst(expectedNumExecuted);
}
catch(Exception ex) {
throw new RuntimeException(ex);
}
finally {
OptimizerUtils.ALLOW_SPLIT_HOP_DAGS = oldFlagSplit;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlagRewrites;
}
}
}