integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestSparkCarbonFileFormatWithSparkSession.scala - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.carbondata.spark.testsuite.createTable

 import java.io.File

 import org.apache.commons.io.FileUtils
 import org.apache.spark.sql.SparkSession

 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile
 import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
 import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}


 object TestSparkCarbonFileFormatWithSparkSession {

   var writerPath = new File(this.getClass.getResource("/").getPath
                             +
                             "../." +
                             "./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
     .getCanonicalPath
   //getCanonicalPath gives path with \, so code expects /. Need to handle in code ?
   writerPath = writerPath.replace("\\", "/");

   val filePath = writerPath + "/Fact/Part0/Segment_null/"

   def buildTestData(persistSchema:Boolean) = {

     FileUtils.deleteDirectory(new File(writerPath))

     val schema = new StringBuilder()
       .append("[ \n")
       .append("   {\"name\":\"string\"},\n")
       .append("   {\"age\":\"int\"},\n")
       .append("   {\"height\":\"double\"}\n")
       .append("]")
       .toString()

     try {
       val builder = CarbonWriter.builder()
       val writer =
         if (persistSchema) {
           builder.persistSchemaFile(true)
           builder.withSchema(Schema.parseJson(schema)).outputPath(writerPath).buildWriterForCSVInput()
         } else {
           builder.withSchema(Schema.parseJson(schema)).outputPath(writerPath).buildWriterForCSVInput()
         }

       var i = 0
       while (i < 100) {
         writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2)))
         i += 1
       }
       writer.close()
     } catch {
       case ex: Exception => None
       case _ => None
     }
   }

   def cleanTestData() = {
     FileUtils.deleteDirectory(new File(writerPath))
   }

   def deleteIndexFile(path: String, extension: String) : Unit = {
     val file: CarbonFile = FileFactory
       .getCarbonFile(path, FileFactory.getFileType(path))

     for (eachDir <- file.listFiles) {
       if (!eachDir.isDirectory) {
         if (eachDir.getName.endsWith(extension)) {
           CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
         }
       } else {
         deleteIndexFile(eachDir.getPath, extension)
       }
     }
   }

   def main(args: Array[String]): Unit = {
     val rootPath = new File(this.getClass.getResource("/").getPath
                             + "../../../..").getCanonicalPath
     val storeLocation = s"$rootPath/examples/spark2/target/store"
     val warehouse = s"$rootPath/examples/spark2/target/warehouse"
     val metastoredb = s"$rootPath/examples/spark2/target/metastore_db"

     // clean data folder
     if (true) {
       val clean = (path: String) => FileUtils.deleteDirectory(new File(path))
       clean(storeLocation)
       clean(warehouse)
       clean(metastoredb)
     }

     val spark = SparkSession
       .builder()
       .master("local")
       .appName("TestSparkCarbonFileFormatWithSparkSession")
       .enableHiveSupport()
       .config("spark.sql.warehouse.dir", warehouse)
       .config("javax.jdo.option.ConnectionURL",
         s"jdbc:derby:;databaseName=$metastoredb;create=true")
       .getOrCreate()

     CarbonProperties.getInstance()
       .addProperty("carbon.storelocation", storeLocation)

     spark.sparkContext.setLogLevel("WARN")

     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
       .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
     buildTestData(false)
     assert(new File(filePath).exists())
     //data source file format
     if (spark.sparkContext.version.startsWith("2.1")) {
       //data source file format
       spark.sql(s"""CREATE TABLE sdkOutputTable USING carbonfile OPTIONS (PATH '$filePath') """)
     } else if (spark.sparkContext.version.startsWith("2.2")) {
       //data source file format
       spark.sql(
         s"""CREATE TABLE sdkOutputTable USING carbonfile LOCATION
            |'$filePath' """.stripMargin)
     } else{
       // TO DO
     }

     spark.sql("Describe formatted sdkOutputTable").show(false)

     spark.sql("select * from sdkOutputTable").show(false)

     spark.sql("select * from sdkOutputTable limit 3").show(false)

     spark.sql("select name from sdkOutputTable").show(false)

     spark.sql("select age from sdkOutputTable").show(false)

     spark.sql("select * from sdkOutputTable where age > 2 and age < 8").show(200,false)

     spark.sql("select * from sdkOutputTable where name = 'robot3'").show(200,false)

     spark.sql("select * from sdkOutputTable where name like 'robo%' limit 5").show(200,false)

     spark.sql("select * from sdkOutputTable where name like '%obot%' limit 2").show(200,false)

     spark.sql("select sum(age) from sdkOutputTable where name like 'robot1%' ").show(200,false)

     spark.sql("select count(*) from sdkOutputTable where name like 'robot%' ").show(200,false)

     spark.sql("select count(*) from sdkOutputTable").show(200,false)

     spark.sql("DROP TABLE sdkOutputTable")
     // drop table should not delete the files
     assert(new File(filePath).exists())
     cleanTestData()

     spark.stop()
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.carbondata.spark.testsuite.createTable

	import java.io.File

	import org.apache.commons.io.FileUtils
	import org.apache.spark.sql.SparkSession

	import org.apache.carbondata.core.constants.CarbonCommonConstants
	import org.apache.carbondata.core.datastore.filesystem.CarbonFile
	import org.apache.carbondata.core.datastore.impl.FileFactory
	import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
	import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}


	object TestSparkCarbonFileFormatWithSparkSession {

	var writerPath = new File(this.getClass.getResource("/").getPath
	+
	"../." +
	"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
	.getCanonicalPath
	//getCanonicalPath gives path with \, so code expects /. Need to handle in code ?
	writerPath = writerPath.replace("\\", "/");

	val filePath = writerPath + "/Fact/Part0/Segment_null/"

	def buildTestData(persistSchema:Boolean) = {

	FileUtils.deleteDirectory(new File(writerPath))

	val schema = new StringBuilder()
	.append("[ \n")
	.append(" {\"name\":\"string\"},\n")
	.append(" {\"age\":\"int\"},\n")
	.append(" {\"height\":\"double\"}\n")
	.append("]")
	.toString()

	try {
	val builder = CarbonWriter.builder()
	val writer =
	if (persistSchema) {
	builder.persistSchemaFile(true)
	builder.withSchema(Schema.parseJson(schema)).outputPath(writerPath).buildWriterForCSVInput()
	} else {
	builder.withSchema(Schema.parseJson(schema)).outputPath(writerPath).buildWriterForCSVInput()
	}

	var i = 0
	while (i < 100) {
	writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2)))
	i += 1
	}
	writer.close()
	} catch {
	case ex: Exception => None
	case _ => None
	}
	}

	def cleanTestData() = {
	FileUtils.deleteDirectory(new File(writerPath))
	}

	def deleteIndexFile(path: String, extension: String) : Unit = {
	val file: CarbonFile = FileFactory
	.getCarbonFile(path, FileFactory.getFileType(path))

	for (eachDir <- file.listFiles) {
	if (!eachDir.isDirectory) {
	if (eachDir.getName.endsWith(extension)) {
	CarbonUtil.deleteFoldersAndFilesSilent(eachDir)
	}
	} else {
	deleteIndexFile(eachDir.getPath, extension)
	}
	}
	}

	def main(args: Array[String]): Unit = {
	val rootPath = new File(this.getClass.getResource("/").getPath
	+ "../../../..").getCanonicalPath
	val storeLocation = s"$rootPath/examples/spark2/target/store"
	val warehouse = s"$rootPath/examples/spark2/target/warehouse"
	val metastoredb = s"$rootPath/examples/spark2/target/metastore_db"

	// clean data folder
	if (true) {
	val clean = (path: String) => FileUtils.deleteDirectory(new File(path))
	clean(storeLocation)
	clean(warehouse)
	clean(metastoredb)
	}

	val spark = SparkSession
	.builder()
	.master("local")
	.appName("TestSparkCarbonFileFormatWithSparkSession")
	.enableHiveSupport()
	.config("spark.sql.warehouse.dir", warehouse)
	.config("javax.jdo.option.ConnectionURL",
	s"jdbc:derby:;databaseName=$metastoredb;create=true")
	.getOrCreate()

	CarbonProperties.getInstance()
	.addProperty("carbon.storelocation", storeLocation)

	spark.sparkContext.setLogLevel("WARN")

	CarbonProperties.getInstance()
	.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
	.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
	buildTestData(false)
	assert(new File(filePath).exists())
	//data source file format
	if (spark.sparkContext.version.startsWith("2.1")) {
	//data source file format
	spark.sql(s"""CREATE TABLE sdkOutputTable USING carbonfile OPTIONS (PATH '$filePath') """)
	} else if (spark.sparkContext.version.startsWith("2.2")) {
	//data source file format
	spark.sql(
	s"""CREATE TABLE sdkOutputTable USING carbonfile LOCATION
	\|'$filePath' """.stripMargin)
	} else{
	// TO DO
	}

	spark.sql("Describe formatted sdkOutputTable").show(false)

	spark.sql("select * from sdkOutputTable").show(false)

	spark.sql("select * from sdkOutputTable limit 3").show(false)

	spark.sql("select name from sdkOutputTable").show(false)

	spark.sql("select age from sdkOutputTable").show(false)

	spark.sql("select * from sdkOutputTable where age > 2 and age < 8").show(200,false)

	spark.sql("select * from sdkOutputTable where name = 'robot3'").show(200,false)

	spark.sql("select * from sdkOutputTable where name like 'robo%' limit 5").show(200,false)

	spark.sql("select * from sdkOutputTable where name like '%obot%' limit 2").show(200,false)

	spark.sql("select sum(age) from sdkOutputTable where name like 'robot1%' ").show(200,false)

	spark.sql("select count(*) from sdkOutputTable where name like 'robot%' ").show(200,false)

	spark.sql("select count(*) from sdkOutputTable").show(200,false)

	spark.sql("DROP TABLE sdkOutputTable")
	// drop table should not delete the files
	assert(new File(filePath).exists())
	cleanTestData()

	spark.stop()
	}
	}