blob: 46f6cf999d14708772397f8f93677b7dddedfe23 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.spark.testsuite.badrecordloger
import java.io.File
import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest
import org.apache.carbondata.common.constants.LoggerAction
/**
* Test Class for detailed query on timestamp datatypes
*
*
*/
class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
var hiveContext: HiveContext = _
override def beforeAll {
try {
sql("drop table IF EXISTS emptyColumnValues")
sql("drop table IF EXISTS emptyColumnValues_false")
sql("drop table IF EXISTS empty_timestamp")
sql("drop table IF EXISTS empty_timestamp_false")
sql("drop table IF EXISTS dataloadOptionTests")
sql("drop table IF EXISTS bigtab")
CarbonProperties.getInstance().addProperty(
CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
var csvFilePath = ""
// 1. empty data for string data type - take empty value
// 2. empty data for non-string data type - Bad records/Null value based on configuration
//table should have only two records.
sql(
"""CREATE TABLE IF NOT EXISTS emptyColumnValues(ID BigInt, date Timestamp, country String,
actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED AS carbondata
""")
csvFilePath = s"$resourcesPath/badrecords/emptyValues.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE emptyColumnValues OPTIONS"
+
"('bad_records_logger_enable'='true','IS_EMPTY_DATA_BAD_RECORD'='false'," +
" 'bad_records_action'='ignore', " +
"'DELIMITER'= ',', 'QUOTECHAR'= '\"')");
// load with bad_records_logger_enable to false
sql(
"""CREATE TABLE IF NOT EXISTS emptyColumnValues_false(ID BigInt, date Timestamp, country
String,
actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED AS carbondata
""")
csvFilePath = s"$resourcesPath/badrecords/emptyValues.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE emptyColumnValues_false OPTIONS"
+ "('bad_records_logger_enable'='false', 'DELIMITER'= ',', 'QUOTECHAR'= '\"')");
// 4.1 Time stamp empty data - Bad records/Null value based on configuration
// 5. non-parsable data - Bad records/Null value based on configuration
// 6. empty line(check current one) - Bad records/Null value based on configuration
// only one value should be loadded.
sql(
"""CREATE TABLE IF NOT EXISTS empty_timestamp(ID BigInt, date Timestamp, country String,
actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED AS carbondata
""")
csvFilePath = s"$resourcesPath/badrecords/emptyTimeStampValue.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE empty_timestamp OPTIONS"
+
"('bad_records_logger_enable'='true','IS_EMPTY_DATA_BAD_RECORD'='false' ," +
"'bad_records_action'='ignore', " +
"'DELIMITER'= ',', 'QUOTECHAR'= '\"')");
// load with bad_records_logger_enable to false
sql(
"""CREATE TABLE IF NOT EXISTS empty_timestamp_false(ID BigInt, date Timestamp, country
String,
actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED AS carbondata
""")
csvFilePath = s"$resourcesPath/badrecords/emptyTimeStampValue.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE empty_timestamp_false OPTIONS"
+ "('bad_records_logger_enable'='false','IS_EMPTY_DATA_BAD_RECORD'='false'," +
" 'DELIMITER'= ',', 'QUOTECHAR'= '\"')");
} catch {
case x: Throwable => CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
}
}
test("select count(*) from empty_timestamp") {
checkAnswer(
sql("select count(*) from empty_timestamp"),
Seq(Row(2)
)
)
}
test("select count(*) from emptyColumnValues") {
checkAnswer(
sql("select count(*) from emptyColumnValues"),
Seq(Row(7)
)
)
}
test("select count(*) from emptyColumnValues_false") {
checkAnswer(
sql("select count(*) from emptyColumnValues_false"),
Seq(Row(7)
)
)
}
test("select count(*) from empty_timestamp_false") {
sql("select count(*) from empty_timestamp_false").show(100, false)
checkAnswer(
sql("select count(*) from empty_timestamp_false"),
Seq(Row(7)
)
)
}
test("test load ddl command") {
sql(
"""CREATE TABLE IF NOT EXISTS dataloadOptionTests(ID BigInt, date Timestamp, country
String,
actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED AS carbondata
""")
val csvFilePath = s"$resourcesPath/badrecords/emptyTimeStampValue.csv"
try {
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE dataloadOptionTests OPTIONS"
+ "('IS_EMPTY_DATA_BAD_RECORD'='xyz', 'DELIMITER'= ',', 'QUOTECHAR'= '\"')");
} catch {
case ex: Exception =>
assert("option IS_EMPTY_DATA_BAD_RECORD can have option either true or false"
.equals(ex.getMessage))
}
}
test("test load multiple loads- pne with valid record and one with invalid") {
sql("create table bigtab (val string, bal int) STORED AS carbondata")
intercept[Exception] {
sql(s"load data inpath '$resourcesPath/badrecords/bigtabbad.csv' into table bigtab " +
"options('DELIMITER'=',','QUOTECHAR'='\"','BAD_RECORDS_ACTION'='FAIL'," +
"'FILEHEADER'='val,bal')")
}
sql(s"load data inpath '$resourcesPath/badrecords/bigtab.csv' into table bigtab " +
"options('DELIMITER'=',','QUOTECHAR'='\"','BAD_RECORDS_ACTION'='FAIL'," +
"'FILEHEADER'='val,bal')")
checkAnswer(sql("select count(*) from bigtab"), Seq(Row(1)))
}
override def afterAll {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
sql("drop table IF EXISTS emptyColumnValues")
sql("drop table IF EXISTS emptyColumnValues_false")
sql("drop table IF EXISTS empty_timestamp")
sql("drop table IF EXISTS empty_timestamp_false")
sql("drop table IF EXISTS dataloadOptionTests")
sql("drop table IF EXISTS bigtab")
}
}