blob: 37f14f1a28e5f86285be5f98ab73024ec467527b [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
# topology definition
# name to be used when submitting
name: "kafka-topology"
# Components
# Components are analagous to Spring beans. They are meant to be used as constructor,
# property(setter), and builder arguments.
#
# for the time being, components must be declared in the order they are referenced
components:
- id: "onlyValueRecordTranslator"
className: "org.apache.storm.flux.examples.OnlyValueRecordTranslator"
- id: "spoutConfigBuilder"
className: "org.apache.storm.kafka.spout.KafkaSpoutConfig$Builder"
constructorArgs:
- "localhost:9092"
- ["myKafkaTopic"]
properties:
- name: "firstPollOffsetStrategy"
value: EARLIEST
- name: "recordTranslator"
ref: "onlyValueRecordTranslator"
configMethods:
- name: "setProp"
args:
- {
"key.deserializer" : "org.apache.kafka.common.serialization.StringDeserializer",
"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer"
}
- id: "spoutConfig"
className: "org.apache.storm.kafka.spout.KafkaSpoutConfig"
constructorArgs:
- ref: "spoutConfigBuilder"
# topology configuration
# this will be passed to the submitter as a map of config options
#
config:
topology.workers: 1
# ...
# spout definitions
spouts:
- id: "kafka-spout"
className: "org.apache.storm.kafka.spout.KafkaSpout"
constructorArgs:
- ref: "spoutConfig"
# bolt definitions
bolts:
- id: "splitsentence"
className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt"
constructorArgs:
# command line
- ["python", "splitsentence.py"]
# output fields
- ["word"]
parallelism: 1
- id: "log"
className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
parallelism: 1
# ...
- id: "count"
className: "org.apache.storm.testing.TestWordCounter"
parallelism: 1
#stream definitions
# stream definitions define connections between spouts and bolts.
# note that such connections can be cyclical
# custom stream groupings are also supported
streams:
- name: "kafka --> split" # name isn't used (placeholder for logging, UI, etc.)
from: "kafka-spout"
to: "splitsentence"
grouping:
type: SHUFFLE
- name: "split --> count"
from: "splitsentence"
to: "count"
grouping:
type: FIELDS
args: ["word"]
- name: "count --> log"
from: "count"
to: "log"
grouping:
type: SHUFFLE