blob: cf7a6ceefecf1425d9a3f7d06ff4b00f4189767b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.integration.spark.testsuite.dataload
import java.math.BigDecimal
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterEach
import org.apache.carbondata.core.util.path.CarbonTablePath
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.metadata.CarbonMetadata
import org.apache.carbondata.core.constants.{CarbonCommonConstants, CarbonLoadOptionConstants}
import org.apache.carbondata.core.datamap.Segment
import org.apache.carbondata.core.util.CarbonProperties
class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
override def beforeEach {
sql("DROP TABLE IF EXISTS loadtest")
sql(
"""
| CREATE TABLE loadtest(id int, name string, city string, age int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
}
private def checkSegmentExists(
segmentId: String,
databaseName: String,
tableName: String): Boolean = {
val carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName)
val partitionPath =
CarbonTablePath.getPartitionDir(carbonTable.getAbsoluteTableIdentifier.getTablePath)
val segment = Segment.getSegment(segmentId, carbonTable.getAbsoluteTableIdentifier.getTablePath)
segment != null
}
test("test data loading CSV file") {
val testData = s"$resourcesPath/sample.csv"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(6))
)
}
test("test data loading CSV file without extension name") {
val testData = s"$resourcesPath/sample"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(4))
)
}
test("test data loading GZIP compressed CSV file") {
val testData = s"$resourcesPath/sample.csv.gz"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(4))
)
}
test("test data loading BZIP2 compressed CSV file") {
val testData = s"$resourcesPath/sample.csv.bz2"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(4))
)
}
test("test data loading CSV file with delimiter char \\017") {
val testData = s"$resourcesPath/sample_withDelimiter017.csv"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest options ('delimiter'='\\017')")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(4))
)
}
test("test data loading with invalid values for mesasures") {
val testData = s"$resourcesPath/invalidMeasures.csv"
sql("drop table if exists invalidMeasures")
sql("CREATE TABLE invalidMeasures (country String, salary double, age decimal(10,2)) STORED BY 'carbondata'")
sql(s"LOAD DATA LOCAL INPATH '$testData' into table invalidMeasures options('Fileheader'='country,salary,age')")
checkAnswer(
sql("SELECT * FROM invalidMeasures"),
Seq(Row("India",null,new BigDecimal("22.44")), Row("Russia",null,null), Row("USA",234.43,null))
)
}
test("test data loading into table whose name has '_'") {
sql("DROP TABLE IF EXISTS load_test")
sql(""" CREATE TABLE load_test(id int, name string, city string, age int)
STORED BY 'org.apache.carbondata.format' """)
val testData = s"$resourcesPath/sample.csv"
try {
sql(s"LOAD DATA LOCAL INPATH '$testData' into table load_test")
sql(s"LOAD DATA LOCAL INPATH '$testData' into table load_test")
} catch {
case ex: Exception =>
assert(false)
}
assert(checkSegmentExists("0", "default", "load_test"))
assert(checkSegmentExists("1", "default", "load_test"))
sql("DROP TABLE load_test")
}
test("test load data with decimal type and sort intermediate files as 1") {
sql("drop table if exists carbon_table")
sql("drop table if exists carbonBigDecimalLoad")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT, "1")
.addProperty(CarbonCommonConstants.SORT_SIZE, "1")
.addProperty(CarbonCommonConstants.DATA_LOAD_BATCH_SIZE, "1")
sql("create table if not exists carbonBigDecimalLoad (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary decimal(27, 10)) STORED BY 'org.apache.carbondata.format'")
sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/decimalBoundaryDataCarbon.csv' into table carbonBigDecimalLoad")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT,
CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT_DEFAULT_VALUE)
.addProperty(CarbonCommonConstants.SORT_SIZE, CarbonCommonConstants.SORT_SIZE_DEFAULT_VAL)
.addProperty(CarbonCommonConstants.DATA_LOAD_BATCH_SIZE,
CarbonCommonConstants.DATA_LOAD_BATCH_SIZE_DEFAULT)
sql("drop table if exists carbon_table")
}
test("test insert / update with data more than 32000 characters") {
val testdata =s"$resourcesPath/32000char.csv"
sql("drop table if exists load32000chardata")
sql("drop table if exists load32000chardata_dup")
sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED BY 'org.apache.carbondata.format'")
sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED BY 'org.apache.carbondata.format'")
sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')")
intercept[Exception] {
sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show()
}
sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata_dup OPTIONS('FILEHEADER'='dim1,dim2,mes1')")
intercept[Exception] {
sql("update load32000chardata_dup set(load32000chardata_dup.dim2)=(select concat(load32000chardata.dim2,'aaaa') from load32000chardata)").show()
}
}
test("test load / insert / update with data more than 32000 bytes - dictionary_exclude") {
val testdata = s"$resourcesPath/unicodechar.csv"
sql("drop table if exists load32000bytes")
sql("create table load32000bytes(name string) stored by 'carbondata'")
sql("insert into table load32000bytes select 'aaa'")
assert(intercept[Exception] {
sql(s"load data local inpath '$testdata' into table load32000bytes OPTIONS ('FILEHEADER'='name')")
}.getMessage.contains("DataLoad failure: Dataload failed, String size cannot exceed 32000 bytes"))
val source = scala.io.Source.fromFile(testdata, CarbonCommonConstants.DEFAULT_CHARSET)
val data = source.mkString
intercept[Exception] {
sql(s"insert into load32000bytes values('$data')")
}
intercept[Exception] {
sql(s"update load32000bytes set(name)= ('$data')").show()
}
sql("drop table if exists load32000bytes")
}
test("test if stale folders are deleting on data load") {
sql("drop table if exists stale")
sql("create table stale(a string) stored by 'carbondata'")
sql("insert into stale values('k')")
val carbonTable = CarbonMetadata.getInstance().getCarbonTable("default", "stale")
val tableStatusFile = CarbonTablePath.getTableStatusFilePath(carbonTable.getTablePath)
FileFactory.getCarbonFile(tableStatusFile).delete()
sql("insert into stale values('k')")
checkAnswer(sql("select * from stale"), Row("k"))
}
test("test data loading with directly writing fact data to hdfs") {
val originStatus = CarbonProperties.getInstance().getProperty(
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH,
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH_DEFAULT)
CarbonProperties.getInstance().addProperty(
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH, "true")
val testData = s"$resourcesPath/sample.csv"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(6))
)
CarbonProperties.getInstance().addProperty(
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH,
originStatus)
}
test("test data loading with page size less than 32000") {
CarbonProperties.getInstance().addProperty(
CarbonCommonConstants.BLOCKLET_SIZE, "16000")
val testData = s"$resourcesPath/sample.csv"
sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
checkAnswer(
sql("SELECT COUNT(*) FROM loadtest"),
Seq(Row(6))
)
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
}
override def afterEach {
sql("DROP TABLE if exists loadtest")
sql("drop table if exists invalidMeasures")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT,
CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT_DEFAULT_VALUE)
.addProperty(CarbonCommonConstants.SORT_SIZE, CarbonCommonConstants.SORT_SIZE_DEFAULT_VAL)
.addProperty(CarbonCommonConstants.DATA_LOAD_BATCH_SIZE,
CarbonCommonConstants.DATA_LOAD_BATCH_SIZE_DEFAULT)
}
}