blob: 899b38bc38c5e5b4cd7d7cccc5be929e78825b54 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.spark.testsuite.allqueries
import java.io.File
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.test.util.QueryTest
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
/**
* Test Class for all query on multiple datatypes
*
*/
class MeasureOnlyTableTestCases extends QueryTest with BeforeAndAfterAll {
val rootPath = new File(this.getClass.getResource("/").getPath
+ "../../../..").getCanonicalPath
val path = s"$rootPath/examples/spark/src/main/resources/data.csv"
override def beforeAll {
clean
sql(s"""
| CREATE TABLE carbon_table(
| shortField SMALLINT,
| intField INT,
| bigintField BIGINT,
| doubleField DOUBLE,
| floatField FLOAT,
| decimalField DECIMAL(18,2)
| )
| STORED AS carbondata
""".stripMargin)
val path = s"$rootPath/examples/spark/src/main/resources/data.csv"
sql(
s"""
| LOAD DATA LOCAL INPATH '$path'
| INTO TABLE carbon_table
| OPTIONS('FILEHEADER'='shortField,intField,bigintField,doubleField,stringField,timestampField,decimalField,dateField,charField,floatField,complexData',
| 'COMPLEX_DELIMITER_LEVEL_1'='#')
""".stripMargin)
sql("create table if not exists carbon_table_hive (shortField SMALLINT,intField INT," +
"bigintField BIGINT,doubleField DOUBLE,stringField STRING,timestampField TIMESTAMP," +
"decimalField DECIMAL(18,2),dateField DATE,charField CHAR(5),floatField FLOAT,complexData ARRAY<STRING>)row format delimited fields terminated by ','")
sql(s"""LOAD DATA LOCAL INPATH '$path' INTO table carbon_table_hive""")
}
def clean {
sql("drop table if exists carbon_table")
sql("drop table if exists carbon_table_hive")
}
override def afterAll {
clean
}
test("SELECT sum(intField) FROM carbon_table where intField > 10") {
checkAnswer(
sql("SELECT sum(intField) FROM carbon_table where intField > 10"),
sql("SELECT sum(intField) FROM carbon_table_hive where intField > 10")
)
}
test("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR doubleField > 10") {
checkAnswer(
sql("SELECT sum(intField),sum(doubleField) FROM carbon_table where intField > 10 OR " +
"doubleField > 10"),
sql("SELECT sum(intField),sum(doubleField) FROM carbon_table_hive where intField > 10 OR " +
"doubleField > 10")
)
}
test("SELECT sum(decimalField) FROM carbon_table") {
checkAnswer(
sql("SELECT sum(decimalField) FROM carbon_table"),
sql("SELECT sum(decimalField) FROM carbon_table_hive")
)
}
test("SELECT count(*), sum(intField) FROM carbon_table where intField > 10") {
checkAnswer(
sql("SELECT count(*), sum(intField) FROM carbon_table where intField > 10"),
sql("SELECT count(*), sum(intField) FROM carbon_table_hive where intField > 10")
)
}
test("SELECT count(*), sum(decimalField) b FROM carbon_table order by b") {
checkAnswer(
sql("SELECT count(*), sum(decimalField) b FROM carbon_table order by b"),
sql("SELECT count(*), sum(decimalField) b FROM carbon_table_hive order by b")
)
}
test("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
"total") {
checkAnswer(
sql("SELECT intField, sum(floatField) total FROM carbon_table group by intField order by " +
"total"),
sql("SELECT intField, sum(floatField) total FROM carbon_table_hive group by intField order " +
"by total")
)
}
test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField") {
checkAnswer(
sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField"),
sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField")
)
}
test("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order by " +
"a") {
checkAnswer(
sql("select shortField, avg(intField+ 10) as a from carbon_table group by shortField order " +
"by a"),
sql("select shortField, avg(intField+ 10) as a from carbon_table_hive group by shortField " +
"order by a")
)
}
test("select shortField, avg(intField+ intField) as a from carbon_table group by shortField " +
"order by a") {
checkAnswer(
sql("select shortField, avg(intField+ intField) as a from carbon_table group by shortField order " +
"by a"),
sql("select shortField, avg(intField+ intField) as a from carbon_table_hive group by " +
"shortField order by a")
)
}
test("select shortField, count(intField+ 10) as a from carbon_table group by shortField order " +
"by a") {
checkAnswer(
sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
"order by a"),
sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
" order by a")
)
}
test("select shortField, min(intField+ 10) as a from carbon_table group by shortField order " +
"by a") {
checkAnswer(
sql("select shortField, min(intField+ 10) as a from carbon_table group by shortField " +
"order by a"),
sql("select shortField, min(intField+ 10) as a from carbon_table_hive group by shortField " +
"order by a")
)
}
test("select shortField, max(intField+ 10) as a from carbon_table group by shortField order " +
"by a") {
checkAnswer(
sql("select shortField, count(intField+ 10) as a from carbon_table group by shortField " +
"order by a"),
sql("select shortField, count(intField+ 10) as a from carbon_table_hive group by shortField" +
" order by a")
)
}
test("select shortField, sum(distinct intField) + 10 as a from carbon_table group by shortField" +
"order by a") {
checkAnswer(
sql("select shortField, sum(distinct intField) + 10 as a from carbon_table group by " +
"shortField order by a"),
sql("select shortField, sum(distinct intField) + 10 as a from carbon_table_hive group by " +
"shortField order by a")
)
}
test("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField") {
checkAnswer(
sql("select sum(doubleField) + 7.28 as a, intField from carbon_table group by intField"),
sql("select sum(doubleField) + 7.28 as a, intField from carbon_table_hive group by intField")
)
}
test("select count(floatField) + 7.28 a, intField from carbon_table group by intField") {
checkAnswer(
sql("select count(floatField) + 7.28 a, intField from carbon_table group by intField"),
sql("select count(floatField) + 7.28 a, intField from carbon_table_hive group by intField")
)
}
test("select count(distinct floatField) + 7.28 a, intField from carbon_table group by " +
"intField") {
checkAnswer(
sql("select count(distinct floatField) + 7.28 a, intField from carbon_table group by intField"),
sql("select count(distinct floatField) + 7.28 a, intField from carbon_table_hive group" +
" by intField")
)
}
test("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table") {
checkAnswer(
sql("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table"),
sql("select count (if(doubleField>100,NULL,doubleField)) a from carbon_table_hive")
)
}
test("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table") {
checkAnswer(
sql("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table"),
sql("select count (if(decimalField>100,NULL,decimalField)) a from carbon_table_hive")
)
}
test("select avg (if(floatField>100,NULL,floatField)) a from carbon_table") {
checkAnswer(
sql("select avg (if(floatField>100,NULL,floatField)) a from carbon_table"),
sql("select avg (if(floatField>100,NULL,floatField)) a from carbon_table_hive")
)
}
test("select min (if(intField>100,NULL,intField)) a from carbon_table") {
checkAnswer(
sql("select min (if(intField>3,NULL,intField)) a from carbon_table"),
sql("select min (if(intField>3,NULL,intField)) a from carbon_table_hive")
)
}
test("select max (if(intField>5,NULL,intField)) a from carbon_table")({
checkAnswer(
sql("select max (if(intField>5,NULL,intField)) a from carbon_table"),
sql("select max (if(intField>5,NULL,intField)) a from carbon_table_hive")
)
})
test("select variance(doubleField) as a from carbon_table")({
checkAnswer(
sql("select variance(doubleField) as a from carbon_table"),
sql("select variance(doubleField) as a from carbon_table_hive")
)
})
test("select var_samp(doubleField) as a from carbon_table")({
checkAnswer(
sql("select var_samp(doubleField) as a from carbon_table"),
sql("select var_samp(doubleField) as a from carbon_table_hive")
)
})
test("select stddev_pop(doubleField) as a from carbon_table")({
checkAnswer(
sql("select stddev_pop(doubleField) as a from carbon_table"),
sql("select stddev_pop(doubleField) as a from carbon_table_hive")
)
})
//TC_106
test("select stddev_samp(doubleField) as a from carbon_table")({
checkAnswer(
sql("select stddev_samp(doubleField) as a from carbon_table"),
sql("select stddev_samp(doubleField) as a from carbon_table_hive")
)
})
test("select covar_pop(doubleField,doubleField) as a from carbon_table")({
checkAnswer(
sql("select covar_pop(doubleField,doubleField) as a from carbon_table"),
sql("select covar_pop(doubleField,doubleField) as a from carbon_table_hive")
)
})
test("select covar_samp(doubleField,doubleField) as a from carbon_table")({
checkAnswer(
sql("select covar_samp(doubleField,doubleField) as a from carbon_table"),
sql("select covar_samp(doubleField,doubleField) as a from carbon_table_hive")
)
})
test("select corr(doubleField,doubleField) as a from carbon_table")({
checkAnswer(
sql("select corr(doubleField,doubleField) as a from carbon_table"),
sql("select corr(doubleField,doubleField) as a from carbon_table_hive")
)
})
test("select percentile(bigintField,0.2) as a from carbon_table")({
checkAnswer(
sql("select percentile(bigintField,0.2) as a from carbon_table"),
sql("select percentile(bigintField,0.2) as a from carbon_table_hive"))
})
test("select last(doubleField) a from carbon_table")({
checkAnswer(
sql("select last(doubleField) a from carbon_table"),
sql("select last(doubleField) a from carbon_table_hive")
)
})
test("select intField from carbon_table where carbon_table.intField IN (3,2)")({
checkAnswer(
sql("select intField from carbon_table where carbon_table.intField IN (3,2)"),
sql("select intField from carbon_table_hive where carbon_table_hive.intField IN (3,2)")
)
})
test("select intField from carbon_table where carbon_table.intField NOT IN (3,2)")({
checkAnswer(
sql("select intField from carbon_table where carbon_table.intField NOT IN (3,2)"),
sql("select intField from carbon_table_hive where carbon_table_hive.intField NOT IN (3,2)")
)
})
test("select intField,sum(floatField) a from carbon_table group by intField order by a " +
"desc")({
checkAnswer(
sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
"a desc"),
sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
"a desc")
)
})
test("select intField,sum(floatField) a from carbon_table group by intField order by a" +
" asc")({
checkAnswer(
sql("select intField,sum(floatField) a from carbon_table group by intField order by " +
"a asc"),
sql("select intField,sum(floatField) a from carbon_table_hive group by intField order by " +
"a asc")
)
})
test("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField")({
checkAnswer(
sql("select doubleField from carbon_table where doubleField NOT BETWEEN intField AND floatField"),
sql("select doubleField from carbon_table_hive where doubleField NOT BETWEEN intField AND " +
"floatField")
)
})
test("select cast(doubleField as int) as a from carbon_table limit 10")({
checkAnswer(
sql("select cast(doubleField as int) as a from carbon_table limit 10"),
sql("select cast(doubleField as int) as a from carbon_table_hive limit 10")
)
})
test("select percentile_approx(1, 0.5 ,5000) from carbon_table")({
checkAnswer(
sql("select percentile_approx(1, 0.5 ,5000) from carbon_table"),
sql("select percentile_approx(1, 0.5 ,5000) from carbon_table_hive")
)
})
test("CARBONDATA-60-union-defect")({
sql("drop table if exists carbonunion")
import sqlContext.implicits._
val df = sqlContext.sparkContext.parallelize(1 to 1000).map(x => (x, (x+100))).toDF("c1", "c2")
df.createOrReplaceTempView("sparkunion")
df.write
.format("carbondata")
.mode(SaveMode.Overwrite)
.option("tableName", "carbonunion")
.save()
checkAnswer(
sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from carbonunion union all select c2 as c1,c1 as c2 from carbonunion)t where c1=200 group by c1"),
sql("select c1,count(c1) from (select c1 as c1,c2 as c2 from sparkunion union all select c2 as c1,c1 as c2 from sparkunion)t where c1=200 group by c1"))
sql("drop table if exists carbonunion")
})
test("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField")({
checkAnswer(
sql("select b.intField from carbon_table a join carbon_table b on a.intField=b.intField"),
sql("select b.intField from carbon_table_hive a join carbon_table_hive b on a.intField=b.intField"))
})
}