blob: 0aa310ae65282fd7277fa3394ad85ff4c9f89b4f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.spark.util
import scala.collection.JavaConverters._
import scala.collection.mutable
import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, ENDPOINT, SECRET_KEY}
import org.apache.spark.sql.hive.{CarbonMetaData, CarbonRelation, DictionaryMap}
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.metadata.encoder.Encoding
import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo}
import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, ColumnSchema}
import org.apache.carbondata.core.util.CarbonUtil
case class TransformHolder(rdd: Any, mataData: CarbonMetaData)
/**
* carbon spark common methods
*/
object CarbonSparkUtil {
def createSparkMeta(carbonTable: CarbonTable): CarbonMetaData = {
val dimensionsAttr = carbonTable.getVisibleDimensions.asScala.map(x => x.getColName)
val measureAttr = carbonTable.getVisibleMeasures.asScala.map(x => x.getColName)
val dictionary =
carbonTable.getVisibleDimensions.asScala.map { f =>
(f.getColName.toLowerCase,
f.hasEncoding(Encoding.DICTIONARY) && !f.hasEncoding(Encoding.DIRECT_DICTIONARY) &&
!f.getDataType.isComplexType)
}
CarbonMetaData(
dimensionsAttr,
measureAttr,
carbonTable,
DictionaryMap(dictionary.toMap))
}
def createCarbonRelation(tableInfo: TableInfo, tablePath: String): CarbonRelation = {
val table = CarbonTable.buildFromTableInfo(tableInfo)
CarbonRelation(
tableInfo.getDatabaseName,
tableInfo.getFactTable.getTableName,
CarbonSparkUtil.createSparkMeta(table),
table)
}
/**
* return's the formatted column comment if column comment is present else empty("")
*
* @param carbonColumn the column of carbonTable
* @return string
*/
def getColumnComment(carbonColumn: CarbonColumn): String = {
{
val columnProperties = carbonColumn.getColumnProperties
if (columnProperties != null) {
val comment: String = columnProperties.get(CarbonCommonConstants.COLUMN_COMMENT)
if (comment != null && comment != null) {
return " comment \"" + comment + "\""
}
}
""
}
}
/**
* the method return's raw schema
*
* @param carbonRelation logical plan for one carbon table
* @return schema
*/
def getRawSchema(carbonRelation: CarbonRelation): String = {
val fields = new Array[String](
carbonRelation.dimensionsAttr.size + carbonRelation.measureAttr.size)
val carbonTable = carbonRelation.carbonTable
val columnSchemas: mutable.Buffer[ColumnSchema] = carbonTable.getTableInfo.getFactTable.
getListOfColumns.asScala
.filter(cSchema => !cSchema.isInvisible && cSchema.getSchemaOrdinal != -1).
sortWith(_.getSchemaOrdinal < _.getSchemaOrdinal)
val columnList = columnSchemas.toList.asJava
carbonRelation.dimensionsAttr.foreach(attr => {
val carbonColumn = carbonTable.getColumnByName(attr.name)
val columnComment = getColumnComment(carbonColumn)
fields(columnList.indexOf(carbonColumn.getColumnSchema)) =
'`' + attr.name + '`' + ' ' + attr.dataType.catalogString + columnComment
})
carbonRelation.measureAttr.foreach(msrAtrr => {
val carbonColumn = carbonTable.getColumnByName(msrAtrr.name)
val columnComment = getColumnComment(carbonColumn)
fields(columnList.indexOf(carbonColumn.getColumnSchema)) =
'`' + msrAtrr.name + '`' + ' ' + msrAtrr.dataType.catalogString + columnComment
})
fields.mkString(",")
}
/**
* add escape prefix for delimiter
*
* @param delimiter A delimiter is a sequence of one or more characters
* used to specify the boundary between separate
* @return delimiter
*/
def delimiterConverter4Udf(delimiter: String): String = delimiter match {
case "|" | "*" | "." | ":" | "^" | "\\" | "$" | "+" | "?" | "(" | ")" | "{" | "}" | "[" | "]" =>
"\\\\" + delimiter
case _ =>
delimiter
}
def getKeyOnPrefix(path: String): (String, String, String) = {
val prefix = "spark.hadoop."
val endPoint = prefix + ENDPOINT
if (path.startsWith(CarbonCommonConstants.S3A_PREFIX)) {
(prefix + ACCESS_KEY, prefix + SECRET_KEY, endPoint)
} else if (path.startsWith(CarbonCommonConstants.S3N_PREFIX)) {
(prefix + CarbonCommonConstants.S3N_ACCESS_KEY,
prefix + CarbonCommonConstants.S3N_SECRET_KEY, endPoint)
} else if (path.startsWith(CarbonCommonConstants.S3_PREFIX)) {
(prefix + CarbonCommonConstants.S3_ACCESS_KEY,
prefix + CarbonCommonConstants.S3_SECRET_KEY, endPoint)
} else {
throw new Exception("Incorrect Store Path")
}
}
def getS3EndPoint(args: Array[String]): String = {
if (args.length >= 4 && args(3).contains(".com")) args(3)
else ""
}
}