blob: 8c37ae5d295bd27b01e351709c7ab311762b9c5f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.test.applications;
import java.util.HashMap;
import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
/**
* Shared methods and fields from the different univariate stats test harness
* classes.
*/
public abstract class UnivariateStatsBase extends AutomatedTestBase {
/**
* Hard-coded path to the directory where this test keeps all of its files,
* generated and static
*/
protected final static String TEST_DIR = "applications/descriptivestats/";
protected String TEST_CLASS_DIR = TEST_DIR + UnivariateStatsBase.class.getSimpleName() + "/";
/** Fudge factor for comparisons against R's output */
protected final static double epsilon = 0.000000001;
/**
* Number of rows in the primary input to each aggregate ("V" in the test
* script, "X" in the SystemDS documentation)
*/
protected final static int rows1 = 2000;
/**
* Number of rows in the secondary input to aggregates that take two input
* vectors ("P" in the test script and the documentation)
*/
protected final static int rows2 = 5;
/**
* Sizes of inputs used in tests, measured as number of rows in primary
* input vector. DIV4=divisible by 4; DIV4P1=divisible by 4 plus 1
*/
protected enum SIZE {
DIV4(2000), DIV4P1(2001), DIV4P2(2002), DIV4P3(2003);
int size = -1;
SIZE(int s) {
size = s;
}
}
/** Ranges of values passed to aggregates in different tests. */
protected enum RANGE {
NEG(-255, -2), MIXED(-200, 200), POS(2, 255);
double min, max;
RANGE(double mn, double mx) {
min = mn;
max = mx;
}
}
/**
* Actual sparsity values used in the "dense" and "sparse" variants of the
* tests.
*/
protected enum SPARSITY {
SPARSE(0.3), DENSE(0.8);
double sparsity;
SPARSITY(double sp) {
sparsity = sp;
}
}
/** Shared setup code for test harness configurations */
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration("Scale", new TestConfiguration(TEST_CLASS_DIR, "Scale",
new String[] { "mean" + ".scalar", "std" + ".scalar",
"se" + ".scalar", "var" + ".scalar", "cv" + ".scalar",
/* "har", "geom", */
"min" + ".scalar", "max" + ".scalar",
"rng" + ".scalar", "g1" + ".scalar",
"se_g1" + ".scalar", "g2" + ".scalar",
"se_g2" + ".scalar", "out_minus", "out_plus",
"median" + ".scalar", "quantile", "iqm" + ".scalar" }));
addTestConfiguration("WeightedScaleTest", new TestConfiguration(
TEST_CLASS_DIR, "WeightedScaleTest", new String[] {
"mean" + ".scalar", "std" + ".scalar",
"se" + ".scalar", "var" + ".scalar", "cv" + ".scalar",
/* "har", "geom", */
"min" + ".scalar", "max" + ".scalar",
"rng" + ".scalar", "g1" + ".scalar",
"se_g1" + ".scalar", "g2" + ".scalar",
"se_g2" + ".scalar", "out_minus", "out_plus",
"median" + ".scalar", "quantile", "iqm" + ".scalar" }));
addTestConfiguration("Categorical", new TestConfiguration(TEST_CLASS_DIR,
"Categorical", new String[] { "Nc", "R" + ".scalar", "Pc", "C",
"Mode" })); // Indicate some file is scalar
addTestConfiguration("WeightedCategoricalTest", new TestConfiguration(
TEST_CLASS_DIR, "WeightedCategoricalTest", new String[] { "Nc",
"R" + ".scalar", "Pc", "C", "Mode" }));
}
/**
* Shared test driver for tests of univariate statistics over continuous,
* scaled, but not weighted, data
*
* @param sz
* size of primary input vector
* @param rng
* range of randomly-generated values to use
* @param sp
* sparsity of generated data
* @param rt
* backend platform to test
*/
protected void testScaleWithR(SIZE sz, RANGE rng, SPARSITY sp, ExecMode rt) {
ExecMode oldrt = rtplatform;
rtplatform = rt;
try {
TestConfiguration config = getTestConfiguration("Scale");
config.addVariable("rows1", sz.size);
config.addVariable("rows2", rows2);
loadTestConfiguration(config);
// This is for running the junit test the new way, i.e., construct
// the arguments directly
String S_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = S_HOME + "Scale" + ".dml";
programArgs = new String[] { "-args",
input("vector"), Integer.toString(sz.size),
input("prob"), Integer.toString(rows2),
output("mean"), output("std"),
output("se"), output("var"),
output("cv"), output("min"),
output("max"), output("rng"),
output("g1"), output("se_g1"),
output("g2"), output("se_g2"),
output("median"),
output("iqm"),
output("out_minus"),
output("out_plus"),
output("quantile") };
fullRScriptName = S_HOME + "Scale" + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
double[][] vector = getRandomMatrix(sz.size, 1, rng.min, rng.max, sp.sparsity, 20160124165501L);
double[][] prob = getRandomMatrix(rows2, 1, 0, 1, 1, 20160124165502L);
writeInputMatrix("vector", vector, true);
writeInputMatrix("prob", prob, true);
runTest(true, false, null, -1);
runRScript(true);
for (String file : config.getOutputFiles()) {
// NOte that some files do not contain matrix, but just a single
// scalar value inside
HashMap<CellIndex, Double> dmlfile;
HashMap<CellIndex, Double> rfile;
if (file.endsWith(".scalar")) {
file = file.replace(".scalar", "");
dmlfile = readDMLScalarFromOutputDir(file);
rfile = readRScalarFromExpectedDir(file);
} else {
dmlfile = readDMLMatrixFromOutputDir(file);
rfile = readRMatrixFromExpectedDir(file);
}
TestUtils.compareMatrices(dmlfile, rfile, epsilon, file
+ "-DML", file + "-R");
}
} finally {
rtplatform = oldrt;
}
}
/**
* Shared test driver for tests of univariate statistics over continuous,
* scaled, and weighted data
*
* @param sz
* size of primary input vector
* @param rng
* range of randomly-generated values to use
* @param sp
* sparsity of generated data
* @param rt
* backend platform to test
*/
protected void testWeightedScaleWithR(SIZE sz, RANGE rng, SPARSITY sp,
ExecMode rt) {
ExecMode oldrt = rtplatform;
rtplatform = rt;
try {
TestConfiguration config = getTestConfiguration("WeightedScaleTest");
config.addVariable("rows1", sz.size);
config.addVariable("rows2", rows2);
loadTestConfiguration(config);
// This is for running the junit test the new way, i.e., construct
// the arguments directly
String S_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = S_HOME + "WeightedScaleTest" + ".dml";
programArgs = new String[] { "-args",
input("vector"), Integer.toString(sz.size),
input("weight"), input("prob"),
Integer.toString(rows2), output("mean"),
output("std"), output("se"),
output("var"), output("cv"),
output("min"), output("max"),
output("rng"), output("g1"),
output("se_g1"), output("g2"),
output("se_g2"),
output("median"),
output("iqm"),
output("out_minus"),
output("out_plus"),
output("quantile") };
fullRScriptName = S_HOME + "WeightedScaleTest" + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
createHelperMatrix();
double[][] vector = getRandomMatrix(sz.size, 1, rng.min, rng.max, sp.sparsity, 20160124165503L);
double[][] weight = getRandomMatrix(sz.size, 1, 1, 10, 1, 20160124165504L);
OrderStatisticsTest.round(weight);
double[][] prob = getRandomMatrix(rows2, 1, 0, 1, 1, 20160124165505L);
writeInputMatrix("vector", vector, true);
writeInputMatrix("weight", weight, true);
writeInputMatrix("prob", prob, true);
runTest(true, false, null, -1);
runRScript(true);
for (String file : config.getOutputFiles()) {
// NOte that some files do not contain matrix, but just a single
// scalar value inside
HashMap<CellIndex, Double> dmlfile;
HashMap<CellIndex, Double> rfile;
if (file.endsWith(".scalar")) {
file = file.replace(".scalar", "");
dmlfile = readDMLScalarFromOutputDir(file);
rfile = readRScalarFromExpectedDir(file);
} else {
dmlfile = readDMLMatrixFromOutputDir(file);
rfile = readRMatrixFromExpectedDir(file);
}
TestUtils.compareMatrices(dmlfile, rfile, epsilon, file
+ "-DML", file + "-R");
}
} finally {
// reset runtime platform
rtplatform = oldrt;
}
}
}