blob: 12155257b9ccc28982be4772b17cc6a5ee99cf3f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql
import java.net.URI
import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
import org.apache.spark.sql.carbondata.execution.datasources.CarbonFileIndexReplaceRule
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, ExprId, Expression, ExpressionSet, NamedExpression, ScalaUDF, SubqueryExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.ExprCode
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, ExprId, Expression, ExpressionSet, NamedExpression, ScalaUDF, SubqueryExpression}
import org.apache.spark.sql.catalyst.optimizer.Optimizer
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.command.ExplainCommand
import org.apache.spark.sql.hive.HiveExternalCatalog
import org.apache.spark.sql.optimizer.{CarbonIUDRule, CarbonUDFTransformRule, MVRewriteRule}
import org.apache.spark.sql.secondaryindex.optimizer.CarbonSITransformationRule
import org.apache.spark.sql.types.{DataType, Metadata}
import org.apache.carbondata.core.util.ThreadLocalSessionInfo
object CarbonToSparkAdapter {
def addSparkSessionListener(sparkSession: SparkSession): Unit = {
SparkSqlAdapter.addSparkSessionListener(sparkSession)
}
def addSparkListener(sparkContext: SparkContext): Unit = {
sparkContext.addSparkListener(new SparkListener {
override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
SparkSession.setDefaultSession(null)
}
})
}
def createAttributeReference(name: String, dataType: DataType, nullable: Boolean,
metadata: Metadata, exprId: ExprId, qualifier: Option[String],
attrRef : NamedExpression = null): AttributeReference = {
AttributeReference(
name,
dataType,
nullable,
metadata)(exprId, qualifier)
}
def createAttributeReference(attr: AttributeReference,
attrName: String,
newSubsume: String): AttributeReference = {
AttributeReference(attrName, attr.dataType)(
exprId = attr.exprId,
qualifier = Some(newSubsume))
}
def getTheLastQualifier(attribute: Attribute): String = {
attribute.qualifier.head
}
def getOutput(subQueryAlias: SubqueryAlias): Seq[Attribute] = {
subQueryAlias.output
}
def createScalaUDF(s: ScalaUDF, reference: AttributeReference): ScalaUDF = {
ScalaUDF(s.function, s.dataType, Seq(reference), s.inputTypes)
}
def createExprCode(code: String, isNull: String, value: String, dataType: DataType = null
): ExprCode = {
ExprCode(code, isNull, value)
}
def createAliasRef(child: Expression,
name: String,
exprId: ExprId = NamedExpression.newExprId,
qualifier: Option[String] = None,
explicitMetadata: Option[Metadata] = None,
namedExpr : Option[NamedExpression] = None ) : Alias = {
Alias(child, name)(exprId, qualifier, explicitMetadata)
}
// Create the aliases using two plan outputs mappings.
def createAliases(mappings: Seq[(NamedExpression, NamedExpression)]): Seq[NamedExpression] = {
mappings.map{ case (o1, o2) =>
o2 match {
case al: Alias if o1.name == o2.name && o1.exprId != o2.exprId =>
Alias(al.child, o1.name)(exprId = o1.exprId)
case other =>
if (o1.name != o2.name || o1.exprId != o2.exprId) {
Alias(o2, o1.name)(exprId = o1.exprId)
} else {
o2
}
}
}
}
def getExplainCommandObj() : ExplainCommand = {
ExplainCommand(OneRowRelation())
}
/**
* As a part of SPARK-24085 Hive tables supports scala subquery for
* the partitioned tables,so Carbon also needs to supports
* @param partitionSet
* @param filterPredicates
* @return
*/
def getPartitionKeyFilter(
partitionSet: AttributeSet,
filterPredicates: Seq[Expression]): ExpressionSet = {
ExpressionSet(
ExpressionSet(filterPredicates)
.filterNot(SubqueryExpression.hasSubquery)
.filter(_.references.subsetOf(partitionSet)))
}
// As per SPARK-22520 OptimizeCodegen is removed in 2.3.1
def getOptimizeCodegenRule(): Seq[Rule[LogicalPlan]] = {
Seq.empty
}
def getUpdatedStorageFormat(storageFormat: CatalogStorageFormat,
map: Map[String, String],
tablePath: String): CatalogStorageFormat = {
storageFormat.copy(properties = map, locationUri = Some(new URI(tablePath)))
}
def getHiveExternalCatalog(sparkSession: SparkSession): HiveExternalCatalog = {
sparkSession.sessionState.catalog.externalCatalog.asInstanceOf[HiveExternalCatalog]
}
}
class CarbonOptimizer(
session: SparkSession,
catalog: SessionCatalog,
optimizer: Optimizer) extends Optimizer(catalog) {
private lazy val mvRules = Seq(Batch("Materialized View Optimizers", Once,
Seq(new MVRewriteRule(session)): _*))
private lazy val iudRule = Batch("IUD Optimizers", fixedPoint,
Seq(new CarbonIUDRule(), new CarbonUDFTransformRule(), new CarbonFileIndexReplaceRule()): _*)
private lazy val secondaryIndexRule = Batch("SI Optimizers", Once,
Seq(new CarbonSITransformationRule(session)): _*)
override def batches: Seq[Batch] = {
mvRules ++ convertedBatch() :+ iudRule :+ secondaryIndexRule
}
def convertedBatch(): Seq[Batch] = {
optimizer.batches.map { batch =>
Batch(
batch.name,
batch.strategy match {
case optimizer.Once =>
Once
case _: optimizer.FixedPoint =>
fixedPoint
},
batch.rules: _*
)
}
}
}