integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BucketingTestCase.scala - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.carbondata.cluster.sdv.generated

 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.spark.sql.common.util._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.scalatest.BeforeAndAfterAll

 class BucketingTestCase extends QueryTest with BeforeAndAfterAll {

   var threshold: Int = _
   var timeformat = CarbonProperties.getInstance()
     .getProperty("carbon.timestamp.format", CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)

   override def beforeAll {
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
     threshold = sqlContext.getConf("spark.sql.autoBroadcastJoinThreshold").toInt
     sqlContext.setConf("spark.sql.autoBroadcastJoinThreshold", "-1")
     sql("DROP TABLE IF EXISTS bucket_table")
   }

   test("test exception if bucketcolumns be measure column") {
     intercept[Exception] {
       sql("DROP TABLE IF EXISTS bucket_table")
       sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
           "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
           "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='ID')")
     }
   }

   test("test exception if bucketcolumns be complex data type column") {
     intercept[Exception] {
       sql("DROP TABLE IF EXISTS bucket_table")
       sql("CREATE TABLE bucket_table (Id int, number double, name string, " +
           "gamePoint array<double>, mac struct<num:double>) STORED BY 'carbondata' TBLPROPERTIES" +
           "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='gamePoint')")
     }
   }

   test("test Int column as bucketcolumns through dictionary_include") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('DICTIONARY_INCLUDE'='ID','BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='ID')")
     sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
     val table: CarbonTable = CarbonMetadata.getInstance().getCarbonTable("default_bucket_table")
     if (table != null && table.getBucketingInfo("bucket_table") != null) {
       assert(true)
     } else {
       assert(false, "Bucketing info does not exist")
     }
   }

   test("test multi columns as bucketcolumns") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name,phonetype')")
     sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
     val table: CarbonTable = CarbonMetadata.getInstance().getCarbonTable("default_bucket_table")
     if (table != null && table.getBucketingInfo("bucket_table") != null) {
       assert(true)
     } else {
       assert(false, "Bucketing info does not exist")
     }
   }

   test("test multi columns as bucketcolumns with bucket join") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='country,name')")
     sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

     val plan = sql(
       """
         |select t1.*, t2.*
         |from bucket_table t1, bucket_table t2
         |where t1.country = t2.country and t1.name = t2.name
       """.stripMargin).queryExecution.executedPlan
     var shuffleExists = false
     plan.collect {
       case s: ShuffleExchangeExec => shuffleExists = true
     }
     assert(!shuffleExists, "shuffle should not exist on bucket column join")
   }

   test("test non bucket column join") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='country')")
     sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

     val plan = sql(
       """
         |select t1.*, t2.*
         |from bucket_table t1, bucket_table t2
         |where t1.name = t2.name
       """.stripMargin).queryExecution.executedPlan
     var shuffleExists = false

     plan.collect {
       case s: ShuffleExchangeExec => shuffleExists = true
     }
     assert(shuffleExists, "shuffle should exist on non-bucket column join")
   }

   test("test bucketcolumns through multi data loading plus compaction") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name')")
     val numOfLoad = 10
     for (j <- 0 until numOfLoad) {
       sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
     }
     sql("ALTER TABLE bucket_table COMPACT 'MAJOR'")

     val plan = sql(
       """
         |select t1.*, t2.*
         |from bucket_table t1, bucket_table t2
         |where t1.name = t2.name
       """.stripMargin).queryExecution.executedPlan
     var shuffleExists = false
     plan.collect {
       case s: ShuffleExchangeExec => shuffleExists = true
     }
     assert(!shuffleExists, "shuffle should not exist on bucket tables")
   }

   test("drop non-bucket column, test bucket column join") {
     sql("DROP TABLE IF EXISTS bucket_table")
     sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
         "serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
         "('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name')")
     sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

     sql("ALTER TABLE bucket_table DROP COLUMNS (ID,country)")

     val plan = sql(
       """
         |select t1.*, t2.*
         |from bucket_table t1, bucket_table t2
         |where t1.name = t2.name
       """.stripMargin).queryExecution.executedPlan
     var shuffleExists = false
     plan.collect {
       case s: ShuffleExchangeExec => shuffleExists = true
     }
     assert(!shuffleExists, "shuffle should not exist on bucket tables")
   }

   override def afterAll {
     sql("DROP TABLE IF EXISTS bucket_table")
     sqlContext.setConf("spark.sql.autoBroadcastJoinThreshold", threshold.toString)
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timeformat)
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.carbondata.cluster.sdv.generated

	import org.apache.carbondata.core.constants.CarbonCommonConstants
	import org.apache.carbondata.core.metadata.CarbonMetadata
	import org.apache.carbondata.core.metadata.schema.table.CarbonTable
	import org.apache.carbondata.core.util.CarbonProperties
	import org.apache.spark.sql.common.util._
	import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
	import org.scalatest.BeforeAndAfterAll

	class BucketingTestCase extends QueryTest with BeforeAndAfterAll {

	var threshold: Int = _
	var timeformat = CarbonProperties.getInstance()
	.getProperty("carbon.timestamp.format", CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)

	override def beforeAll {
	CarbonProperties.getInstance()
	.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
	threshold = sqlContext.getConf("spark.sql.autoBroadcastJoinThreshold").toInt
	sqlContext.setConf("spark.sql.autoBroadcastJoinThreshold", "-1")
	sql("DROP TABLE IF EXISTS bucket_table")
	}

	test("test exception if bucketcolumns be measure column") {
	intercept[Exception] {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='ID')")
	}
	}

	test("test exception if bucketcolumns be complex data type column") {
	intercept[Exception] {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (Id int, number double, name string, " +
	"gamePoint array<double>, mac struct<num:double>) STORED BY 'carbondata' TBLPROPERTIES" +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='gamePoint')")
	}
	}

	test("test Int column as bucketcolumns through dictionary_include") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('DICTIONARY_INCLUDE'='ID','BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='ID')")
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
	val table: CarbonTable = CarbonMetadata.getInstance().getCarbonTable("default_bucket_table")
	if (table != null && table.getBucketingInfo("bucket_table") != null) {
	assert(true)
	} else {
	assert(false, "Bucketing info does not exist")
	}
	}

	test("test multi columns as bucketcolumns") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name,phonetype')")
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
	val table: CarbonTable = CarbonMetadata.getInstance().getCarbonTable("default_bucket_table")
	if (table != null && table.getBucketingInfo("bucket_table") != null) {
	assert(true)
	} else {
	assert(false, "Bucketing info does not exist")
	}
	}

	test("test multi columns as bucketcolumns with bucket join") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='country,name')")
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

	val plan = sql(
	"""
	\|select t1., t2.
	\|from bucket_table t1, bucket_table t2
	\|where t1.country = t2.country and t1.name = t2.name
	""".stripMargin).queryExecution.executedPlan
	var shuffleExists = false
	plan.collect {
	case s: ShuffleExchangeExec => shuffleExists = true
	}
	assert(!shuffleExists, "shuffle should not exist on bucket column join")
	}

	test("test non bucket column join") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='country')")
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

	val plan = sql(
	"""
	\|select t1., t2.
	\|from bucket_table t1, bucket_table t2
	\|where t1.name = t2.name
	""".stripMargin).queryExecution.executedPlan
	var shuffleExists = false

	plan.collect {
	case s: ShuffleExchangeExec => shuffleExists = true
	}
	assert(shuffleExists, "shuffle should exist on non-bucket column join")
	}

	test("test bucketcolumns through multi data loading plus compaction") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name')")
	val numOfLoad = 10
	for (j <- 0 until numOfLoad) {
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")
	}
	sql("ALTER TABLE bucket_table COMPACT 'MAJOR'")

	val plan = sql(
	"""
	\|select t1., t2.
	\|from bucket_table t1, bucket_table t2
	\|where t1.name = t2.name
	""".stripMargin).queryExecution.executedPlan
	var shuffleExists = false
	plan.collect {
	case s: ShuffleExchangeExec => shuffleExists = true
	}
	assert(!shuffleExists, "shuffle should not exist on bucket tables")
	}

	test("drop non-bucket column, test bucket column join") {
	sql("DROP TABLE IF EXISTS bucket_table")
	sql("CREATE TABLE bucket_table (ID Int, date Timestamp, country String, name String, phonetype String," +
	"serialname String, salary Int) STORED BY 'carbondata' TBLPROPERTIES " +
	"('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='name')")
	sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE bucket_table")

	sql("ALTER TABLE bucket_table DROP COLUMNS (ID,country)")

	val plan = sql(
	"""
	\|select t1., t2.
	\|from bucket_table t1, bucket_table t2
	\|where t1.name = t2.name
	""".stripMargin).queryExecution.executedPlan
	var shuffleExists = false
	plan.collect {
	case s: ShuffleExchangeExec => shuffleExists = true
	}
	assert(!shuffleExists, "shuffle should not exist on bucket tables")
	}

	override def afterAll {
	sql("DROP TABLE IF EXISTS bucket_table")
	sqlContext.setConf("spark.sql.autoBroadcastJoinThreshold", threshold.toString)
	CarbonProperties.getInstance()
	.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timeformat)
	}
	}