blob: cd2ee719da3405ea39f99fb5d400ae2fd7791357 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.test.functions.frame;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.lops.LopProperties;
import org.apache.sysds.runtime.io.FrameWriter;
import org.apache.sysds.runtime.io.FrameWriterFactory;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Collectors;
public class FrameDropInvalidLengthTest extends AutomatedTestBase {
private final static String TEST_NAME = "DropInvalidLength";
private final static String TEST_DIR = "functions/frame/";
private static final String TEST_CLASS_DIR = TEST_DIR + FrameDropInvalidLengthTest.class.getSimpleName() + "/";
private final static int rows = 800;
private final static int cols = 4;
private final static Types.ValueType[] schemaStrings = {Types.ValueType.FP64, Types.ValueType.STRING, Types.ValueType.STRING, Types.ValueType.INT64};
public static void init() {
TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
}
public static void cleanUp() {
if (TEST_CACHE_ENABLED) {
TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
}
}
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"B"}));
if (TEST_CACHE_ENABLED) {
setOutAndExpectedDeletionDisabled(true);
}
}
@Test
public void testTwoBadColCP() {
double[][] invalidLength = {{-1,30,20,-1}};
runDropInvalidLenTest( invalidLength,1, LopProperties.ExecType.CP);
}
@Test
public void testTwoBadColSP() {
double[][] invalidLength = {{-1,30,20,-1}};
runDropInvalidLenTest( invalidLength,1, LopProperties.ExecType.SPARK);
}
@Test
public void testOneBadColCP() {
double[][] invalidLength = {{-1,-1,20,-1}};
runDropInvalidLenTest( invalidLength,2, LopProperties.ExecType.CP);
}
@Test
public void testOneBadColSP() {
double[][] invalidLength = {{-1,-1,20,-1}};
runDropInvalidLenTest( invalidLength,2, LopProperties.ExecType.SPARK);
}
@Test
public void testAllBadColCP() {
double[][] invalidLength = {{2,2,2,1}};
runDropInvalidLenTest( invalidLength,3, LopProperties.ExecType.CP);
}
@Test
public void testAllBadColSP() {
double[][] invalidLength = {{2,2,2,1}};
runDropInvalidLenTest( invalidLength,3, LopProperties.ExecType.SPARK);
}
@Test
public void testNoneBadColCP() {
double[][] invalidLength = {{-1,20,20,-1}};
runDropInvalidLenTest( invalidLength,4, LopProperties.ExecType.CP);
}
@Test
public void testNoneBadColSP() {
double[][] invalidLength = {{-1,20,20,-1}};
runDropInvalidLenTest( invalidLength,4, LopProperties.ExecType.SPARK);
}
private void runDropInvalidLenTest(double[][] colInvalidLength, int test, LopProperties.ExecType et)
{
Types.ExecMode platformOld = setExecMode(et);
boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
try {
getAndLoadTestConfiguration(TEST_NAME);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[] {"-args", input("A"), input("M"),
String.valueOf(rows), Integer.toString(cols), output("B")};
FrameBlock frame1 = new FrameBlock(schemaStrings);
double[][] A = getRandomMatrix(rows, cols, 10, 100, 1, 2373);
initFrameDataString(frame1,A, schemaStrings); // initialize a frame with one column
FrameWriter writer = FrameWriterFactory.createFrameWriter(Types.FileFormat.CSV);
ArrayList<Integer> badIndex = getBadIndexes(rows/4);
int expected = 0;
switch (test) { //Double in String
case 1:
for (int i = 0; i < badIndex.size(); i++) {
frame1.set(badIndex.get(i),1,"This is a very long sentence that could" +
" count up to multiple characters");
}
expected += badIndex.size();
case 2:
for (int i = 0; i < badIndex.size(); i++) {
frame1.set(badIndex.get(i), 2, "This is out of length");
}
expected += badIndex.size();
break;
case 3:
expected += rows*cols;
break;
case 4:
expected += 0.0;
break;
}
// write data frame
writer.writeFrameToHDFS(
frame1.slice(0, rows - 1, 0, cols-1, new FrameBlock()),
input("A"), rows, schemaStrings.length);
// write expected feature length vector
writeInputMatrixWithMTD("M", colInvalidLength, true);
runTest(true, false, null, -1);
// compare output
FrameBlock frameOut = readDMLFrameFromHDFS("B", Types.FileFormat.BINARY);
//read output data and compare results
ArrayList<Object> data = new ArrayList<>();
for (int i = 0; i < frameOut.getNumRows(); i++)
for(int j=0; j < frameOut.getNumColumns(); j++ )
data.add(frameOut.get(i, j));
int nullNum = Math.toIntExact(data.stream().filter(s -> s == null).count());
Assert.assertEquals(expected, nullNum, 1e-5);
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
}
}
private static ArrayList<Integer> getBadIndexes(int length) {
ArrayList<Integer> list = new ArrayList<>();
for(int i =0; i<length; i++)
{
int r = ThreadLocalRandom.current().nextInt(0, rows);
list.add(r);
}
return (ArrayList<Integer>) list.stream()
.distinct().collect(Collectors.toList());
}
public static void initFrameDataString(FrameBlock frame1, double[][] data, Types.ValueType[] lschema) {
for (int j = 0; j < lschema.length; j++) {
Types.ValueType vt = lschema[j];
switch (vt) {
case STRING:
String[] tmp1 = new String[rows];
for (int i = 0; i < rows; i++)
tmp1[i] = (String) UtilFunctions.doubleToObject(vt, data[i][j]);
frame1.appendColumn(tmp1);
break;
case INT64:
long[] tmp4 = new long[rows];
for (int i = 0; i < rows; i++)
data[i][j] = tmp4[i] = (Long) UtilFunctions.doubleToObject(Types.ValueType.INT64,
data[i][j], false);
frame1.appendColumn(tmp4);
break;
case FP64:
double[] tmp6 = new double[rows];
for (int i = 0; i < rows; i++)
tmp6[i] = (Double) UtilFunctions.doubleToObject(vt, data[i][j], false);
frame1.appendColumn(tmp6);
break;
default:
throw new RuntimeException("Unsupported value type: " + vt);
}
}
}
}