integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.carbondata.spark.util

 import java.util.concurrent.{Callable, Executors}

 import scala.collection.mutable.ListBuffer

 import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.spark.sql.test.util.QueryTest
 import org.apache.spark.sql.{CarbonEnv, CarbonRelation}
 import org.scalatest.BeforeAndAfterAll

 import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier
 import org.apache.carbondata.core.service.impl.PathFactory
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.carbondata.core.util.path.CarbonStorePath
 import org.apache.carbondata.processing.util.TableOptionConstant
 import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, CarbonLoadModel}

 class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAfterAll {

   var sampleRelation: CarbonRelation = _

   def buildCarbonLoadModel(relation: CarbonRelation,
                            filePath: String,
                            header: String): CarbonLoadModel = {
     val carbonLoadModel = new CarbonLoadModel
     carbonLoadModel.setTableName(relation.tableMeta.carbonTableIdentifier.getDatabaseName)
     carbonLoadModel.setDatabaseName(relation.tableMeta.carbonTableIdentifier.getTableName)
     // carbonLoadModel.setSchema(relation.cubeMeta.schema)
     val table = relation.tableMeta.carbonTable
     val carbonSchema = new CarbonDataLoadSchema(table)
     carbonLoadModel.setDatabaseName(table.getDatabaseName)
     carbonLoadModel.setTableName(table.getFactTableName)
     carbonLoadModel.setCarbonDataLoadSchema(carbonSchema)
     carbonLoadModel.setFactFilePath(filePath)
     carbonLoadModel.setCsvHeader(header)
     carbonLoadModel.setCsvDelimiter(",")
     carbonLoadModel.setComplexDelimiterLevel1("\\$")
     carbonLoadModel.setComplexDelimiterLevel2("\\:")
     carbonLoadModel.setStorePath(relation.tableMeta.storePath)
     carbonLoadModel.setQuoteChar("\"")
     carbonLoadModel.setSerializationNullFormat(
       TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + ",\\N")
     carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(
       CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
       CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
     carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(
       CarbonCommonConstants.CARBON_DATE_FORMAT,
       CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
     carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel))
     carbonLoadModel.setMaxColumns("2000")
     carbonLoadModel
   }

   override def beforeAll {
     // second time comment this line
     buildTable
     buildRelation
   }

   def buildTable() = {
     try {
       sql(
         "CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format' " +
           "tblproperties('dictionary_include'='empid')")
     } catch {
       case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString)
     }
   }

   def buildRelation() = {
     val catalog = CarbonEnv.get.carbonMetastore
     sampleRelation = catalog.lookupRelation1(Option(CarbonCommonConstants.DATABASE_DEFAULT_NAME),
       "employee")(sqlContext)
       .asInstanceOf[CarbonRelation]
   }

   def writedummydata(filePath: String, recCount: Int) = {
     var a: Int = 0
     var records: StringBuilder = StringBuilder.newBuilder
     for (a <- 0 to recCount) {
       records.append(a).append("\n")
     }
     val dis = FileFactory.getDataOutputStream(filePath, FileFactory.getFileType(filePath))
     dis.writeBytes(records.toString())
     dis.close()
   }

   test("concurrent dictionary generation") {
     CarbonProperties.getInstance.addProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME, "-1")
     val noOfFiles = 5
     val files = new ListBuffer[String]()
     val loadModels = new ListBuffer[CarbonLoadModel]()
     for (i <- 0 until noOfFiles) {
       val filePath: String = s"${integrationPath}/spark/target/singlecolumn_${10 * (i + 1)}.csv"
       files += filePath
       loadModels += buildCarbonLoadModel(sampleRelation, filePath, "empid")
       writedummydata(filePath, 10 * (i + 1))
     }
     try {
       val dictGenerators = new java.util.ArrayList[Callable[String]](noOfFiles)
       for (i <- 0 until noOfFiles) {
         dictGenerators.add(new DictGenerator(loadModels(i)))
       }
       val executorService = Executors.newFixedThreadPool(10)
       val results = executorService.invokeAll(dictGenerators)
       for (i <- 0 until noOfFiles) {
         val res = results.get(i).get
         assert("Pass".equals(res))
       }
     } catch {
       case ex: Exception =>
         ex.printStackTrace()
         assert(false)
     }
     val carbonTableIdentifier = sampleRelation.tableMeta.carbonTable.getCarbonTableIdentifier
     val columnIdentifier = sampleRelation.tableMeta.carbonTable.getDimensionByName("employee", "empid").getColumnIdentifier
     val dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(
       carbonTableIdentifier,
       columnIdentifier,
       columnIdentifier.getDataType,
       CarbonStorePath.getCarbonTablePath(storeLocation, carbonTableIdentifier))
     val carbonTablePath = PathFactory.getInstance()
         .getCarbonTablePath(sampleRelation.tableMeta.storePath, carbonTableIdentifier, dictionaryColumnUniqueIdentifier)
     val dictPath = carbonTablePath.getDictionaryFilePath(columnIdentifier.getColumnId)
     val dictFile = FileFactory.getCarbonFile(dictPath, FileFactory.getFileType(dictPath))
     val offSet = dictFile.getSize
     val sortIndexPath = carbonTablePath.getSortIndexFilePath(columnIdentifier.getColumnId, offSet)
     val sortIndexFile = FileFactory.getCarbonFile(sortIndexPath, FileFactory.getFileType(sortIndexPath))
     assert(sortIndexFile.exists())
     val sortIndexFiles = carbonTablePath.getSortIndexFiles(sortIndexFile.getParentFile, columnIdentifier.getColumnId)
     assert(sortIndexFiles.length >= 1)
     deleteFiles(files)
   }

   def deleteFiles(files: ListBuffer[String]) {
     for (i <- 0 until files.length) {
       val file = FileFactory.getCarbonFile(files(i), FileFactory.getFileType(files(i)))
       file.delete()
     }
   }

   override def afterAll {
     sql("drop table if exists employee")
     CarbonProperties.getInstance.addProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME,
         Integer.toString(CarbonCommonConstants.DEFAULT_MAX_QUERY_EXECUTION_TIME))
   }

   class DictGenerator(loadModel: CarbonLoadModel) extends Callable[String] {
    override def call:String = {
      var result = "Pass"
       try {
         GlobalDictionaryUtil
           .generateGlobalDictionary(sqlContext,
             loadModel,
             sampleRelation.tableMeta.storePath)
       } catch {
         case ex: Exception =>
           result = ex.getMessage
           ex.printStackTrace()
       }
       result
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.carbondata.spark.util

	import java.util.concurrent.{Callable, Executors}

	import scala.collection.mutable.ListBuffer

	import org.apache.carbondata.common.logging.LogServiceFactory
	import org.apache.spark.sql.test.util.QueryTest
	import org.apache.spark.sql.{CarbonEnv, CarbonRelation}
	import org.scalatest.BeforeAndAfterAll

	import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier
	import org.apache.carbondata.core.service.impl.PathFactory
	import org.apache.carbondata.core.constants.CarbonCommonConstants
	import org.apache.carbondata.core.datastore.impl.FileFactory
	import org.apache.carbondata.core.util.CarbonProperties
	import org.apache.carbondata.core.util.path.CarbonStorePath
	import org.apache.carbondata.processing.util.TableOptionConstant
	import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, CarbonLoadModel}

	class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAfterAll {

	var sampleRelation: CarbonRelation = _

	def buildCarbonLoadModel(relation: CarbonRelation,
	filePath: String,
	header: String): CarbonLoadModel = {
	val carbonLoadModel = new CarbonLoadModel
	carbonLoadModel.setTableName(relation.tableMeta.carbonTableIdentifier.getDatabaseName)
	carbonLoadModel.setDatabaseName(relation.tableMeta.carbonTableIdentifier.getTableName)
	// carbonLoadModel.setSchema(relation.cubeMeta.schema)
	val table = relation.tableMeta.carbonTable
	val carbonSchema = new CarbonDataLoadSchema(table)
	carbonLoadModel.setDatabaseName(table.getDatabaseName)
	carbonLoadModel.setTableName(table.getFactTableName)
	carbonLoadModel.setCarbonDataLoadSchema(carbonSchema)
	carbonLoadModel.setFactFilePath(filePath)
	carbonLoadModel.setCsvHeader(header)
	carbonLoadModel.setCsvDelimiter(",")
	carbonLoadModel.setComplexDelimiterLevel1("\\$")
	carbonLoadModel.setComplexDelimiterLevel2("\\:")
	carbonLoadModel.setStorePath(relation.tableMeta.storePath)
	carbonLoadModel.setQuoteChar("\"")
	carbonLoadModel.setSerializationNullFormat(
	TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + ",\\N")
	carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(
	CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
	CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
	carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(
	CarbonCommonConstants.CARBON_DATE_FORMAT,
	CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
	carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel))
	carbonLoadModel.setMaxColumns("2000")
	carbonLoadModel
	}

	override def beforeAll {
	// second time comment this line
	buildTable
	buildRelation
	}

	def buildTable() = {
	try {
	sql(
	"CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format' " +
	"tblproperties('dictionary_include'='empid')")
	} catch {
	case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString)
	}
	}

	def buildRelation() = {
	val catalog = CarbonEnv.get.carbonMetastore
	sampleRelation = catalog.lookupRelation1(Option(CarbonCommonConstants.DATABASE_DEFAULT_NAME),
	"employee")(sqlContext)
	.asInstanceOf[CarbonRelation]
	}

	def writedummydata(filePath: String, recCount: Int) = {
	var a: Int = 0
	var records: StringBuilder = StringBuilder.newBuilder
	for (a <- 0 to recCount) {
	records.append(a).append("\n")
	}
	val dis = FileFactory.getDataOutputStream(filePath, FileFactory.getFileType(filePath))
	dis.writeBytes(records.toString())
	dis.close()
	}

	test("concurrent dictionary generation") {
	CarbonProperties.getInstance.addProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME, "-1")
	val noOfFiles = 5
	val files = new ListBuffer[String]()
	val loadModels = new ListBuffer[CarbonLoadModel]()
	for (i <- 0 until noOfFiles) {
	val filePath: String = s"${integrationPath}/spark/target/singlecolumn_${10 * (i + 1)}.csv"
	files += filePath
	loadModels += buildCarbonLoadModel(sampleRelation, filePath, "empid")
	writedummydata(filePath, 10 * (i + 1))
	}
	try {
	val dictGenerators = new java.util.ArrayList[Callable[String]](noOfFiles)
	for (i <- 0 until noOfFiles) {
	dictGenerators.add(new DictGenerator(loadModels(i)))
	}
	val executorService = Executors.newFixedThreadPool(10)
	val results = executorService.invokeAll(dictGenerators)
	for (i <- 0 until noOfFiles) {
	val res = results.get(i).get
	assert("Pass".equals(res))
	}
	} catch {
	case ex: Exception =>
	ex.printStackTrace()
	assert(false)
	}
	val carbonTableIdentifier = sampleRelation.tableMeta.carbonTable.getCarbonTableIdentifier
	val columnIdentifier = sampleRelation.tableMeta.carbonTable.getDimensionByName("employee", "empid").getColumnIdentifier
	val dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(
	carbonTableIdentifier,
	columnIdentifier,
	columnIdentifier.getDataType,
	CarbonStorePath.getCarbonTablePath(storeLocation, carbonTableIdentifier))
	val carbonTablePath = PathFactory.getInstance()
	.getCarbonTablePath(sampleRelation.tableMeta.storePath, carbonTableIdentifier, dictionaryColumnUniqueIdentifier)
	val dictPath = carbonTablePath.getDictionaryFilePath(columnIdentifier.getColumnId)
	val dictFile = FileFactory.getCarbonFile(dictPath, FileFactory.getFileType(dictPath))
	val offSet = dictFile.getSize
	val sortIndexPath = carbonTablePath.getSortIndexFilePath(columnIdentifier.getColumnId, offSet)
	val sortIndexFile = FileFactory.getCarbonFile(sortIndexPath, FileFactory.getFileType(sortIndexPath))
	assert(sortIndexFile.exists())
	val sortIndexFiles = carbonTablePath.getSortIndexFiles(sortIndexFile.getParentFile, columnIdentifier.getColumnId)
	assert(sortIndexFiles.length >= 1)
	deleteFiles(files)
	}

	def deleteFiles(files: ListBuffer[String]) {
	for (i <- 0 until files.length) {
	val file = FileFactory.getCarbonFile(files(i), FileFactory.getFileType(files(i)))
	file.delete()
	}
	}

	override def afterAll {
	sql("drop table if exists employee")
	CarbonProperties.getInstance.addProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME,
	Integer.toString(CarbonCommonConstants.DEFAULT_MAX_QUERY_EXECUTION_TIME))
	}

	class DictGenerator(loadModel: CarbonLoadModel) extends Callable[String] {
	override def call:String = {
	var result = "Pass"
	try {
	GlobalDictionaryUtil
	.generateGlobalDictionary(sqlContext,
	loadModel,
	sampleRelation.tableMeta.storePath)
	} catch {
	case ex: Exception =>
	result = ex.getMessage
	ex.printStackTrace()
	}
	result
	}
	}
	}