blob: c05d043875cee93e59a0f113ecf984e216a37898 [file] [log] [blame]
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
package org.apache.griffin.measure.launch.batch
import java.util.Date
import scala.util.Try
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SparkSession, SQLContext}
import org.apache.griffin.measure.configuration.dqdefinition._
import org.apache.griffin.measure.configuration.enums._
import org.apache.griffin.measure.context._
import org.apache.griffin.measure.datasource.DataSourceFactory
import org.apache.griffin.measure.job.builder.DQJobBuilder
import org.apache.griffin.measure.launch.DQApp
import org.apache.griffin.measure.step.builder.udf.GriffinUDFAgent
case class BatchDQApp(allParam: GriffinConfig) extends DQApp {
val envParam: EnvConfig = allParam.getEnvConfig
val dqParam: DQConfig = allParam.getDqConfig
val sparkParam = envParam.getSparkParam
val metricName = dqParam.getName
val sinkParams = getSinkParams
var sqlContext: SQLContext = _
var dqContext: DQContext = _
def retryable: Boolean = false
def init: Try[_] = Try {
// build spark 2.0+ application context
val conf = new SparkConf().setAppName(metricName)
conf.setAll(sparkParam.getConfig)
conf.set("spark.sql.crossJoin.enabled", "true")
sparkSession = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
val logLevel = getGriffinLogLevel()
sparkSession.sparkContext.setLogLevel(sparkParam.getLogLevel)
griffinLogger.setLevel(logLevel)
sqlContext = sparkSession.sqlContext
// register udf
GriffinUDFAgent.register(sqlContext)
}
def run: Try[Boolean] = Try {
// start time
val startTime = new Date().getTime
val measureTime = getMeasureTime
val contextId = ContextId(measureTime)
// get data sources
val dataSources = DataSourceFactory.getDataSources(sparkSession, null, dqParam.getDataSources)
dataSources.foreach(_.init)
// create dq context
dqContext = DQContext(
contextId, metricName, dataSources, sinkParams, BatchProcessType
)(sparkSession)
// start id
val applicationId = sparkSession.sparkContext.applicationId
dqContext.getSink().start(applicationId)
// build job
val dqJob = DQJobBuilder.buildDQJob(dqContext, dqParam.getEvaluateRule)
// dq job execute
val result = dqJob.execute(dqContext)
// end time
val endTime = new Date().getTime
dqContext.getSink().log(endTime, s"process using time: ${endTime - startTime} ms")
// clean context
dqContext.clean()
// finish
dqContext.getSink().finish()
result
}
def close: Try[_] = Try {
sparkSession.close()
sparkSession.stop()
}
}