blob: 841c61e8644f5b90639ed05034a1445ecc0eb81e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.test.functions.frame;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types;
import org.apache.sysds.common.Types.FileFormat;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.lops.LopProperties;
import org.apache.sysds.runtime.io.FrameWriter;
import org.apache.sysds.runtime.io.FrameWriterFactory;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
public class FrameDropInvalidTypeTest extends AutomatedTestBase
{
private final static String TEST_NAME = "DropInvalidType";
private final static String TEST_DIR = "functions/frame/";
private static final String TEST_CLASS_DIR = TEST_DIR + FrameDropInvalidTypeTest.class.getSimpleName() + "/";
private final static int rows = 20;
private final static ValueType[] schemaStrings = {ValueType.FP64, ValueType.STRING};
public static void init() {
TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
}
public static void cleanUp() {
if (TEST_CACHE_ENABLED) {
TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
}
}
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"B"}));
if (TEST_CACHE_ENABLED) {
setOutAndExpectedDeletionDisabled(true);
}
}
@Test
public void testDoubleinStringCP() {
// This test now verifies floating points are okay in string columns
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 3, 1, LopProperties.ExecType.CP, true);
}
@Test
public void testDoubleinStringSpark() {
// This test now verifies floating points are okay in string columns
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 3, 1, LopProperties.ExecType.SPARK, true);
}
@Test
public void testStringInDouble() {
// This test now verifies strings are removed in float columns
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 3, 2, LopProperties.ExecType.CP);
}
@Test
public void testStringInDoubleSpark() {
// This test now verifies strings are removed in float columns
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 3, 2, LopProperties.ExecType.SPARK);
}
@Test
public void testDoubleInFloat() {
// This test now verifies that changing from FP64 to FP32 is okay.
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 5, 3, LopProperties.ExecType.CP,true);
}
@Test
public void testDoubleInFloatSpark() {
// This test now verifies that changing from FP64 to FP32 is okay.
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 5, 3, LopProperties.ExecType.SPARK, true);
}
@Test
public void testLongInInt() {
// This test now verifies that changing from INT32 to INT64 is okay.
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 5, 4, LopProperties.ExecType.CP, true);
}
@Test
public void testLongInIntSpark() {
// This test now verifies that changing from INT32 to INT64 is okay.
runIsCorrectTest(schemaStrings, rows, schemaStrings.length, 5, 4, LopProperties.ExecType.SPARK, true);
}
private void runIsCorrectTest(ValueType[] schema, int rows, int cols,
int badValues, int test, LopProperties.ExecType et){
runIsCorrectTest(schema, rows, cols, badValues, test, et, false);
}
private void runIsCorrectTest(ValueType[] schema, int rows, int cols,
int badValues, int test, LopProperties.ExecType et, boolean ignore)
{
Types.ExecMode platformOld = setExecMode(et);
boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
try {
getAndLoadTestConfiguration(TEST_NAME);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[] {"-args", input("A"), input("M"),
String.valueOf(rows), Integer.toString(cols), output("B")};
FrameBlock frame1 = new FrameBlock(schema);
FrameWriter writer = FrameWriterFactory.createFrameWriter(FileFormat.CSV);
FrameBlock frame2 = new FrameBlock(UtilFunctions.nCopies(cols, Types.ValueType.STRING));
String[] meta = new String[]{"FP64", "STRING"};
initFrameDataString(frame1); // initialize a frame with one column
switch (test) { //Double in String
case 1:
String[] S = new String[rows];
Arrays.fill(S, "string_value");
for (int i = 0; i < badValues; i++)
S[i] = "0.1345672225";
frame1.appendColumn(S);
break;
case 2: { // String in double
double[][] D = getRandomMatrix(rows, 1, 1, 10, 0.7, 2373);
String[] tmp1 = new String[rows];
for (int i = 0; i < rows; i++)
tmp1[i] = (String) UtilFunctions.doubleToObject(ValueType.STRING, D[i][0], false);
frame1.appendColumn(tmp1);
for (int i = 0; i < badValues; i++)
frame1.set(i, 1, "string_value");
meta[meta.length - 1] = "FP64";
break;
}
case 3: {//Double in float
double[][] D = getRandomMatrix(rows, 1, 1, 10, 0.7, 2373);
String[] tmp1 = new String[rows];
for (int i = 0; i < rows; i++)
tmp1[i] = (String) UtilFunctions.doubleToObject(ValueType.STRING, D[i][0], false);
frame1.appendColumn(tmp1);
for (int i = 0; i < badValues; i++)
frame1.set(i, 1, "1234567890123456768E40");
meta[meta.length - 1] = "FP32";
break;
}
case 4: { // long in int
String[] tmp1 = new String[rows];
for (int i = 0; i < rows; i++)
tmp1[i] = String.valueOf(i);
for (int i = 0; i < badValues; i++)
tmp1[i] = "12345678910111212";
frame1.appendColumn(tmp1);
meta[meta.length - 1] = "INT32";
break;
}
}
writer.writeFrameToHDFS(
frame1.slice(0, rows - 1, 0, 1, new FrameBlock()),
input("A"), rows, schema.length);
frame2.appendRow(meta);
writer.writeFrameToHDFS(frame2, input("M"), 1, schema.length);
runTest(true, false, null, -1);
FrameBlock frameout = readDMLFrameFromHDFS("B", FileFormat.BINARY);
//read output data and compare results
ArrayList<Object> data = new ArrayList<>();
for (int i = 0; i < frameout.getNumRows(); i++)
data.add(frameout.get(i, 1));
int nullNum = Math.toIntExact(data.stream().filter(s -> s == null).count());
//verify output schema
Assert.assertEquals("Wrong result: " + nullNum + ".", ignore ? 0 : badValues, nullNum);
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
}
}
private void initFrameDataString(FrameBlock frame1) {
double[][] A = getRandomMatrix(rows, 1, Float.MAX_VALUE, Double.MAX_VALUE, 0.7, 2373);
double[] tmp6 = new double[rows];
for (int i = 0; i < rows; i++)
tmp6[i] = (Double) UtilFunctions.doubleToObject(ValueType.FP64, A[i][0], false);
frame1.appendColumn(tmp6);
}
}