blob: fb82660828d6e455c9aea26700f97843a8f77316 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.test.applications;
import java.util.HashMap;
import org.junit.Test;
import org.apache.sysds.hops.Hop;
import org.apache.sysds.lops.LopProperties.ExecType;
import org.apache.sysds.runtime.controlprogram.ParForProgramBlock.PExecMode;
import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
public class ParForBivariateStatsTest extends AutomatedTestBase
{
private final static String TEST_NAME = "parfor_bivariate";
private final static String TEST_DIR = "applications/parfor/";
private final static String TEST_CLASS_DIR = TEST_DIR + ParForBivariateStatsTest.class.getSimpleName() + "/";
private final static double eps = 1e-10;
private final static int rows1 = 1000; // # of rows in each vector (for CP instructions)
private final static int rows2 = (int) (Hop.CPThreshold+1); // # of rows in each vector (for MR instructions)
private final static int cols = 30; // # of columns in each vector
private final static int cols2 = 10; // # of columns in each vector - initial test: 7
private final static double minVal=1; // minimum value in each vector
private final static double maxVal=5; // maximum value in each vector
@Override
public void setUp()
{
addTestConfiguration(TEST_NAME,
new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "Rout" }) );
}
@Test
public void testParForBivariateStatsLocalRemoteCP() {
runParForBivariateStatsTest(true, PExecMode.LOCAL, PExecMode.REMOTE_SPARK, ExecType.CP);
}
@Test
public void testParForBivariateStatsRemoteLocalCP() {
runParForBivariateStatsTest(true, PExecMode.REMOTE_SPARK, PExecMode.LOCAL, ExecType.CP);
}
private void runParForBivariateStatsTest( boolean parallel, PExecMode outer, PExecMode inner, ExecType instType )
{
//inst exec type, influenced via rows
int rows = -1;
if( instType == ExecType.CP )
rows = rows1;
else //if type MR
rows = rows2;
//script
int scriptNum = -1;
if( parallel )
{
if( inner == PExecMode.REMOTE_SPARK ) scriptNum=2;
else if( outer == PExecMode.REMOTE_SPARK ) scriptNum=3;
else if( outer == PExecMode.LOCAL ) scriptNum=1;
else scriptNum=4; //optimized
}
else
{
scriptNum = 0;
}
TestConfiguration config = getTestConfiguration(TEST_NAME);
//config.addVariable("rows", rows);
//config.addVariable("cols", cols);
loadTestConfiguration(config);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME +scriptNum + ".dml";
programArgs = new String[]{"-args", input("D"),
input("S1"), input("S2"), input("K1"), input("K2"), output("bivarstats"),
Integer.toString(rows), Integer.toString(cols), Integer.toString(cols2),
Integer.toString(cols2*cols2), Integer.toString((int)maxVal) };
fullRScriptName = HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " +
inputDir() + " " + Integer.toString((int)maxVal) + " " + expectedDir();
//generate actual dataset
double[][] D = getRandomMatrix(rows, cols, minVal, maxVal, 1, 7777);
double[] Dkind = new double[cols];
for( int i=0; i<cols; i++ )
{
Dkind[i]=(i%3)+1;//kind 1,2,3
if( Dkind[i]!=1 )
TestUtils.floor(D,i); //for ordinal and categorical vars
}
writeInputMatrix("D", D, true);
//generate attribute sets
double[][] S1 = getRandomMatrix(1, cols2, 1, cols+1-eps, 1, 1112);
double[][] S2 = getRandomMatrix(1, cols2, 1, cols+1-eps, 1, 1113);
TestUtils.floor(S1);
TestUtils.floor(S2);
writeInputMatrix("S1", S1, true);
writeInputMatrix("S2", S2, true);
//generate kind for attributes (1,2,3)
double[][] K1 = new double[1][cols2];
double[][] K2 = new double[1][cols2];
for( int i=0; i<cols2; i++ )
{
K1[0][i] = Dkind[(int)S1[0][i]-1];
K2[0][i] = Dkind[(int)S2[0][i]-1];
}
writeInputMatrix("K1", K1, true);
writeInputMatrix("K2", K2, true);
boolean exceptionExpected = false;
runTest(true, exceptionExpected, null, 92);
runRScript(true);
//compare matrices
for( String out : new String[]{"bivar.stats", "category.counts", "category.means", "category.variances" } )
{
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("bivarstats/"+out);
HashMap<CellIndex, Double> rfile = readRMatrixFromExpectedDir(out);
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
}
}
}