blob: baf2f616e4fe58421788a1584dfe4edab0219504 [file] [log] [blame]
package org.apache.carbondata.geo
import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.core.constants.CarbonCommonConstants
class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
val table1 = "geoTable1"
val table2 = "geotable2"
val result = Seq(Row(116187332, 39979316),
Row(116362699, 39942444),
Row(116288955, 39999101),
Row(116325378, 39963129),
Row(116337069, 39951887),
Row(116285807, 40084087))
override def beforeAll(): Unit = {
drop()
}
test("Invalid spatial index property") {
// Index name must not match with table column name. Fails to create table.
var exception = intercept[MalformedCarbonCommandException](sql(
s"""
| CREATE TABLE malformed(timevalue BIGINT, longitude LONG, latitude LONG)
| COMMENT "This is a malformed table"
| STORED AS carbondata
| TBLPROPERTIES ('SPATIAL_INDEX'='longitude')
""".stripMargin))
assert(exception.getMessage.contains(
"index: longitude must not match with any other column name in the table"))
// Type property is not configured. Fails to create table.
exception = intercept[MalformedCarbonCommandException](sql(
s"""
| CREATE TABLE malformed(timevalue BIGINT, longitude LONG, latitude LONG)
| COMMENT "This is a malformed table"
| STORED AS carbondata
| TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash')
""".stripMargin))
assert(exception.getMessage.contains(
s"${CarbonCommonConstants.SPATIAL_INDEX}.mygeohash.type property must be specified"))
// Source columns are not configured. Fails to create table.
exception = intercept[MalformedCarbonCommandException](sql(
s"""
| CREATE TABLE malformed(timevalue BIGINT, longitude LONG, latitude LONG)
| COMMENT "This is a malformed table"
| STORED AS carbondata
| TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash', 'SPATIAL_INDEX.mygeohash.type'='geohash')
""".stripMargin))
assert(exception.getMessage.contains(
s"${CarbonCommonConstants.SPATIAL_INDEX}.mygeohash.sourcecolumns property must be " +
s"specified."))
// Source columns must be present in the table. Fails to create table.
exception = intercept[MalformedCarbonCommandException](sql(
s"""
| CREATE TABLE malformed(timevalue BIGINT, longitude LONG, latitude LONG)
| COMMENT "This is a malformed table"
| STORED AS carbondata
| TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash', 'SPATIAL_INDEX.mygeohash.type'='geohash',
| 'SPATIAL_INDEX.mygeohash.sourcecolumns'='unknown1, unknown2')
""".stripMargin))
assert(exception.getMessage.contains(
s"Source column: unknown1 in property " +
s"${CarbonCommonConstants.SPATIAL_INDEX}.mygeohash.sourcecolumns must be a column in the " +
s"table."))
}
test("test geo table create and load and check describe formatted") {
createTable()
loadData()
// Test if spatial index column is added as a sort column
val descTable = sql(s"describe formatted $table1").collect
descTable.find(_.get(0).toString.contains("Sort Scope")) match {
case Some(row) => assert(row.get(1).toString.contains("LOCAL_SORT"))
case None => assert(false)
}
descTable.find(_.get(0).toString.contains("Sort Columns")) match {
case Some(row) => assert(row.get(1).toString.contains("mygeohash"))
case None => assert(false)
}
}
test("test polygon query") {
createTable()
loadData()
checkAnswer(
sql(s"select longitude, latitude from $table1 where IN_POLYGON('116.321011 40.123503, " +
s"116.137676 39.947911, 116.560993 39.935276, 116.321011 40.123503')"),
result)
}
test("test insert into table select from another table") {
val sourceTable = table1;
val targetTable = table2;
createTable(sourceTable)
loadData(sourceTable)
createTable(targetTable)
sql(s"insert into $targetTable select * from $sourceTable")
checkAnswer(
sql(s"select longitude, latitude from $targetTable where IN_POLYGON('116.321011 40.123503, " +
s"116.137676 39.947911, 116.560993 39.935276, 116.321011 40.123503')"),
result)
}
test("test insert into table select from another table with target table sort scope as global") {
val sourceTable = table1;
val targetTable = table2;
createTable(sourceTable)
loadData(sourceTable)
createTable(targetTable, "'SORT_SCOPE'='GLOBAL_SORT',")
sql(s"insert into $targetTable select * from $sourceTable")
checkAnswer(
sql(s"select longitude, latitude from $targetTable where IN_POLYGON('116.321011 40.123503, " +
s"116.137676 39.947911, 116.560993 39.935276, 116.321011 40.123503')"),
result)
}
test("test block pruning for polygon query") {
createTable()
sql(s"insert into $table1 select 1575428400000,116285807,40084087")
sql(s"insert into $table1 select 1575428400000,116372142,40129503")
sql(s"insert into $table1 select 1575428400000,116187332,39979316")
sql(s"insert into $table1 select 1575428400000,116337069,39951887")
sql(s"insert into $table1 select 1575428400000,116359102,40154684")
sql(s"insert into $table1 select 1575428400000,116736367,39970323")
sql(s"insert into $table1 select 1575428400000,116362699,39942444")
sql(s"insert into $table1 select 1575428400000,116325378,39963129")
sql(s"insert into $table1 select 1575428400000,116302895,39930753")
sql(s"insert into $table1 select 1575428400000,116288955,39999101")
val df = sql(s"select longitude, latitude from $table1 where IN_POLYGON('116.321011 " +
s"40.123503, 116.137676 39.947911, 116.560993 39.935276, 116.321011 40.123503')")
assert(df.rdd.getNumPartitions == 6)
checkAnswer(df, result)
}
test("test polygon query on table partitioned by timevalue column") {
sql(s"""
| CREATE TABLE $table1(
| longitude LONG,
| latitude LONG) COMMENT "This is a GeoTable" PARTITIONED BY (timevalue BIGINT)
| STORED AS carbondata
| TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash',
| 'SPATIAL_INDEX.mygeohash.type'='geohash',
| 'SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude',
| 'SPATIAL_INDEX.mygeohash.originLatitude'='39.832277',
| 'SPATIAL_INDEX.mygeohash.gridSize'='50',
| 'SPATIAL_INDEX.mygeohash.minLongitude'='115.811865',
| 'SPATIAL_INDEX.mygeohash.maxLongitude'='116.782233',
| 'SPATIAL_INDEX.mygeohash.minLatitude'='39.832277',
| 'SPATIAL_INDEX.mygeohash.maxLatitude'='40.225281',
| 'SPATIAL_INDEX.mygeohash.conversionRatio'='1000000')
""".stripMargin)
loadData()
checkAnswer(
sql(s"select longitude, latitude from $table1 where IN_POLYGON('116.321011 40.123503, " +
s"116.137676 39.947911, 116.560993 39.935276, 116.321011 40.123503')"),
result)
}
override def afterEach(): Unit = {
drop()
}
override def afterAll(): Unit = {
drop()
}
def drop(): Unit = {
sql(s"drop table if exists $table1")
sql(s"drop table if exists $table2")
}
def createTable(tableName : String = table1, customProperties : String = ""): Unit = {
sql(s"""
| CREATE TABLE $tableName(
| timevalue BIGINT,
| longitude LONG,
| latitude LONG) COMMENT "This is a GeoTable"
| STORED AS carbondata
| TBLPROPERTIES ($customProperties 'SPATIAL_INDEX'='mygeohash',
| 'SPATIAL_INDEX.mygeohash.type'='geohash',
| 'SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude',
| 'SPATIAL_INDEX.mygeohash.originLatitude'='39.832277',
| 'SPATIAL_INDEX.mygeohash.gridSize'='50',
| 'SPATIAL_INDEX.mygeohash.minLongitude'='115.811865',
| 'SPATIAL_INDEX.mygeohash.maxLongitude'='116.782233',
| 'SPATIAL_INDEX.mygeohash.minLatitude'='39.832277',
| 'SPATIAL_INDEX.mygeohash.maxLatitude'='40.225281',
| 'SPATIAL_INDEX.mygeohash.conversionRatio'='1000000')
""".stripMargin)
}
def loadData(tableName : String = table1): Unit = {
sql(s"""LOAD DATA local inpath '$resourcesPath/geodata.csv' INTO TABLE $tableName OPTIONS
|('DELIMITER'= ',')""".stripMargin)
}
}