blob: 120ab8521a689cde49b30172d5fa13974c338f0a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.examples
import java.io.File
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession
import org.apache.carbondata.core.metadata.datatype.DataTypes
import org.apache.carbondata.examples.util.ExampleUtils
import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
/**
* Running SQL on carbon files directly
* No need to create table first
* TODO: support more than one carbon file
*/
// scalastyle:off println
object DirectSQLExample {
def main(args: Array[String]) {
val carbonSession = ExampleUtils.createCarbonSession("DirectSQLExample")
exampleBody(carbonSession)
carbonSession.close()
}
def exampleBody(carbonSession : SparkSession): Unit = {
val rootPath = new File(this.getClass.getResource("/").getPath
+ "../../../..").getCanonicalPath
val path = s"$rootPath/examples/spark2/target/carbonFile/"
import carbonSession._
// 1. generate data file
cleanTestData(path)
val rows = 20
buildTestData(path, rows)
val readPath = path
println("Running SQL on carbon files directly")
try {
// 2. run queries directly, no need to create table first
sql(s"""select * FROM carbon.`$readPath` limit 10""".stripMargin).show()
// 3. check rows count
val counts = sql(s"""select * FROM carbon.`$readPath`""".stripMargin).count()
assert(rows == counts)
} catch {
case e: Exception => throw e
} finally {
// 3.delete data files
cleanTestData(path)
}
}
// prepare SDK writer output
def buildTestData(
path: String,
num: Int = 3): Unit = {
// getCanonicalPath gives path with \, but the code expects /.
val writerPath = path.replace("\\", "/")
val fields = new Array[Field](3)
fields(0) = new Field("name", DataTypes.STRING)
fields(1) = new Field("age", DataTypes.INT)
fields(2) = new Field("height", DataTypes.DOUBLE)
try {
val builder = CarbonWriter
.builder()
.outputPath(writerPath)
.uniqueIdentifier(System.currentTimeMillis)
.withBlockSize(2)
.withCsvInput(new Schema(fields))
.writtenBy("DirectSQLExample")
val writer = builder.build()
var i = 0
while (i < num) {
writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2)))
i += 1
}
writer.close()
} catch {
case e: Exception => throw e
}
}
def cleanTestData(path: String): Unit = {
FileUtils.deleteDirectory(new File(path))
}
}
// scalastyle:on println