blob: a97c7ea1d654316f31ddd98265ad9c2e7f1b8b8e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.view.hive2.resources.upload;
import org.apache.ambari.view.hive2.client.ColumnDescription;
import org.apache.ambari.view.hive2.client.ColumnDescriptionShort;
import org.apache.ambari.view.hive2.client.Row;
import org.apache.ambari.view.hive2.resources.uploads.ColumnDescriptionImpl;
import org.apache.ambari.view.hive2.resources.uploads.parsers.DataParser;
import org.apache.ambari.view.hive2.resources.uploads.parsers.ParseOptions;
import org.apache.ambari.view.hive2.resources.uploads.parsers.PreviewData;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
public class DataParserCSVTest {
@Test
public void testParsePreviewCSV() throws Exception {
String str = "1,a\n" +
"2,b\n" +
"3,c\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try (
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions);
){
PreviewData pd = dp.parsePreview();
Assert.assertNotNull(pd.getPreviewRows());
Assert.assertNotNull(pd.getHeader());
Assert.assertEquals(2, pd.getPreviewRows().size()); // now it will not return the first row which is header
Assert.assertEquals(2, pd.getHeader().size());
ColumnDescription[] cd = {new ColumnDescriptionImpl("1", ColumnDescriptionShort.DataTypes.INT.toString(), 0),
new ColumnDescriptionImpl("a", ColumnDescriptionShort.DataTypes.CHAR.toString(), 1)};
Object cols2[] = new Object[2];
cols2[0] = "2";
cols2[1] = "b";
Row row2 = new Row(cols2);
Object cols3[] = new Object[2];
cols3[0] = "3";
cols3[1] = "c";
Row row3 = new Row(cols3);
Row[] rows = { row2, row3};
Assert.assertArrayEquals("Header Not Correct.", cd, pd.getHeader().toArray());
Assert.assertArrayEquals("Rows Not Correct.", rows, pd.getPreviewRows().toArray());
}
}
/**
* even if in one of the preview rows, datatype is not correct, then it should be assigned that datatype.
* but if first row is header then first row should not be acconted for detecting datatype
* @throws IOException
*/
@Test
public void testParsePreviewDataTypeDetectionCSV() throws Exception {
String str = "1,a,10,k\n" +
"2,b,6,8\n" +
"2.2,b,7,9\n" +
"2,b,abc,1\n" +
"2,b,9,3\n" +
"2,b,8,5\n" +
"2,b,7,3\n" +
"2,b,6,3\n" +
"3,c,c,3\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(StringReader sr = new StringReader(str);
DataParser dp= new DataParser(sr, parseOptions)) {
PreviewData pd = dp.parsePreview();
Assert.assertNotNull(pd.getHeader());
Assert.assertEquals(4, pd.getHeader().size());
ColumnDescription[] cd = {
// as row 3 contains 2.2
new ColumnDescriptionImpl("1", ColumnDescriptionShort.DataTypes.DOUBLE.toString(), 0),
// as all are chars
new ColumnDescriptionImpl("a", ColumnDescriptionShort.DataTypes.CHAR.toString(), 1),
// as row 4 contains abc
new ColumnDescriptionImpl("10", ColumnDescriptionShort.DataTypes.STRING.toString(), 2),
// although row 1 contains k but it is in header and not counted in detecting datatype
new ColumnDescriptionImpl("k", ColumnDescriptionShort.DataTypes.INT.toString(), 3)};
Assert.assertArrayEquals("Header Not Correct.", cd, pd.getHeader().toArray());
}
}
/**
* even if in one of the preview rows, datatype is not correct, then it should be assigned that datatype.
* but if first row is header then first row should not be acconted for detecting datatype
* @throws IOException
*/
@Test
public void testParsePreviewDataTypeDetection2CSV() throws Exception {
String str = "1,a,10,k\n" +
"2,b,6,p\n" +
"2.2,b,7,9\n" +
"2,b,2.2,1\n" +
"2,b,9,3\n" +
"2,b,8,5\n" +
"2,b,7,3\n" +
"2,b,6,3\n" +
"3,c,c,3\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)) {
PreviewData pd = dp.parsePreview();
Assert.assertNotNull(pd.getHeader());
Assert.assertEquals(4, pd.getHeader().size());
ColumnDescription[] cd = {
// as row 3 contains 2.2
new ColumnDescriptionImpl("1", ColumnDescriptionShort.DataTypes.DOUBLE.toString(), 0),
// as all are chars
new ColumnDescriptionImpl("a", ColumnDescriptionShort.DataTypes.CHAR.toString(), 1),
// some are int, char and some double .. nothing other than 'string' satisfies all the rows
new ColumnDescriptionImpl("10", ColumnDescriptionShort.DataTypes.STRING.toString(), 2),
// although row 1 contains k but it is in header and not counted in detecting datatype
// but row 2 also has a char p which will be acconted for datatype detection
new ColumnDescriptionImpl("k", ColumnDescriptionShort.DataTypes.CHAR.toString(), 3)};
Assert.assertArrayEquals("Header Not Correct.", cd, pd.getHeader().toArray());
}
}
/**
* One row csv will give default column names and 1st row in preview if HEADER.PROVIDED_BY_USER is selected
* @throws IOException
*/
@Test
public void testParsePreview1RowCSV() throws Exception {
String str = "1,a\n" ;
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.PROVIDED_BY_USER.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
Assert.assertNotNull(pd.getPreviewRows());
Assert.assertNotNull(pd.getHeader());
Assert.assertEquals(1, pd.getPreviewRows().size());
Assert.assertEquals(2, pd.getHeader().size());
ColumnDescription[] cd = {new ColumnDescriptionImpl("column1", ColumnDescriptionShort.DataTypes.INT.toString(), 0),
new ColumnDescriptionImpl("column2", ColumnDescriptionShort.DataTypes.CHAR.toString(), 1)};
Object cols1[] = new Object[2];
cols1[0] = "1";
cols1[1] = "a";
Row row1 = new Row(cols1);
Row[] rows = {row1};
Assert.assertArrayEquals("Header Not Correct.", cd, pd.getHeader().toArray());
Assert.assertArrayEquals("Rows Not Correct.", rows, pd.getPreviewRows().toArray());
}
}
/**
* One row csv will throw exception in preview if HEADER.FIRST_RECORD is selected.
* @throws IOException
*/
@Test(expected = java.util.NoSuchElementException.class)
public void testParsePreview1RowCSVFirstRowHeader() throws Exception {
String str = "col1,col2\n" ;
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
}
}
/**
* more number of columns in a row => igore the extra columns. Number of columns is decided by the first row.
* If other row contains more columns then those columns will be ignored
* Here first row has 2 columns and second row has 3 columns so the value 'x' is ignored
* @throws IOException
*/
@Test
public void testParsePreviewCSVMoreColumns() throws Exception {
String str = "1,a\n" +
"2,b,x\n" + // contains 3 cols, more number of columns
"3,c\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
Row row = new Row(new Object[]{"2","b"});
Assert.assertArrayEquals("Additional columns not properly handled.", row.getRow(),pd.getPreviewRows().get(0).getRow());
}
}
/**
* less number of columns => treat missing values as null. Number of columns is decided by the first row of the table
* if other rows has less number of columns then it treats other columns as null
* @throws IOException
*/
@Test
public void testParsePreviewCSVLessColumns() throws Exception {
String str = "1,a\n" +
"2\n" + // contains 1 col, less number of columns
"3,c\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
Assert.assertEquals("Missing value not detected as null.",pd.getPreviewRows().get(1).getRow()[1],null);
}
}
/**
* empty values are treated as empty string
* @throws IOException
*/
@Test
public void testEmptyColumn() throws Exception {
String str = "1,a,x\n" +
"2,,y\n" + // contains 1 col, less number of columns
"3,c,z\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
Assert.assertEquals("Empty column not detected properly.",pd.getPreviewRows().get(0).getRow()[1],"");
}
}
/**
* empty values are treated as empty string
* @throws IOException
*/
@Test
public void testLastEmptyColumn() throws Exception {
String str = "1,a,x\n" +
"2,,\n" + // contains 1 col, less number of columns
"3,c,z\n";
ParseOptions parseOptions = new ParseOptions();
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, ParseOptions.InputFileType.CSV.toString());
parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString());
try(
StringReader sr = new StringReader(str);
DataParser dp = new DataParser(sr, parseOptions)
) {
PreviewData pd = dp.parsePreview();
Assert.assertEquals("Empty column not detected properly.",pd.getPreviewRows().get(0).getRow()[1],"");
Assert.assertEquals("Empty column not detected properly.",pd.getPreviewRows().get(0).getRow()[2],"");
}
}
}