blob: 59289a56896810fe9cd92d48dddd891f02f6c882 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gearpump.streaming.examples.state
import java.util.Properties
import akka.actor.ActorSystem
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.hadoop.conf.Configuration
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.examples.state.processor.CountProcessor
import org.apache.gearpump.streaming.hadoop.HadoopCheckpointStoreFactory
import org.apache.gearpump.streaming.hadoop.lib.rotation.FileSizeRotation
import org.apache.gearpump.streaming.kafka.{KafkaStoreFactory, KafkaSink, KafkaSource}
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.state.impl.PersistentStateConfig
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph}
/** Does exactly-once message count */
object MessageCountApp extends AkkaApp with ArgumentsParser {
val SOURCE_TASK = "sourceTask"
val COUNT_TASK = "countTask"
val SINK_TASK = "sinkTask"
val SOURCE_TOPIC = "sourceTopic"
val SINK_TOPIC = "sinkTopic"
val ZOOKEEPER_CONNECT = "zookeeperConnect"
val BROKER_LIST = "brokerList"
val DEFAULT_FS = "defaultFS"
override val options: Array[(String, CLIOption[Any])] = Array(
SOURCE_TASK -> CLIOption[Int]("<how many kafka source tasks>", required = false,
defaultValue = Some(1)),
COUNT_TASK -> CLIOption("<how many count tasks>", required = false, defaultValue = Some(1)),
SINK_TASK -> CLIOption[Int]("<how many kafka sink tasks>", required = false,
defaultValue = Some(1)),
SOURCE_TOPIC -> CLIOption[String]("<kafka source topic>", required = true),
SINK_TOPIC -> CLIOption[String]("<kafka sink topic>", required = true),
ZOOKEEPER_CONNECT -> CLIOption[String]("<Zookeeper connect string, e.g. localhost:2181/kafka>",
required = true),
BROKER_LIST -> CLIOption[String]("<Kafka broker list, e.g. localhost:9092>", required = true),
DEFAULT_FS -> CLIOption[String]("<name of the default file system, e.g. hdfs://localhost:9000>",
required = true)
)
def application(config: ParseResult)(implicit system: ActorSystem): StreamApplication = {
val appName = "MessageCount"
val hadoopConfig = new Configuration
hadoopConfig.set("fs.defaultFS", config.getString(DEFAULT_FS))
val checkpointStoreFactory = new HadoopCheckpointStoreFactory("MessageCount", hadoopConfig,
// Rotates on 1KB
new FileSizeRotation(1000))
val taskConfig = UserConfig.empty
.withBoolean(PersistentStateConfig.STATE_CHECKPOINT_ENABLE, true)
.withLong(PersistentStateConfig.STATE_CHECKPOINT_INTERVAL_MS, 1000L)
.withValue(PersistentStateConfig.STATE_CHECKPOINT_STORE_FACTORY, checkpointStoreFactory)
val properties = new Properties
properties.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, config.getString(ZOOKEEPER_CONNECT))
val brokerList = config.getString(BROKER_LIST)
properties.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
properties.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
val kafkaStoreFactory = new KafkaStoreFactory(properties)
val sourceTopic = config.getString(SOURCE_TOPIC)
val kafkaSource = new KafkaSource(sourceTopic, properties)
kafkaSource.setCheckpointStore(kafkaStoreFactory)
val sourceProcessor = DataSourceProcessor(kafkaSource, config.getInt(SOURCE_TASK))
val countProcessor = Processor[CountProcessor](config.getInt(COUNT_TASK), taskConf = taskConfig)
val kafkaSink = new KafkaSink(config.getString(SINK_TOPIC), properties)
val sinkProcessor = DataSinkProcessor(kafkaSink, config.getInt(SINK_TASK))
val partitioner = new HashPartitioner()
val graph = Graph(sourceProcessor ~ partitioner
~> countProcessor ~ partitioner ~> sinkProcessor)
val app = StreamApplication(appName, graph, UserConfig.empty)
app
}
def main(akkaConf: Config, args: Array[String]): Unit = {
val config = parse(args)
val context = ClientContext(akkaConf)
implicit val system = context.system
val appId = context.submit(application(config))
context.close()
}
}