conf/defaults.yaml - storm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.


 ########### These all have default values as shown
 ########### Additional configuration goes into storm.yaml

 java.library.path: "/usr/local/lib:/opt/local/lib:/usr/lib:/usr/lib64"

 ### storm.* configs are general configurations
 # the local dir is where jars are kept
 storm.local.dir: "storm-local"
 storm.log4j2.conf.dir: "log4j2"
 storm.zookeeper.servers:
     - "localhost"
 storm.zookeeper.port: 2181
 storm.zookeeper.root: "/storm"
 storm.zookeeper.session.timeout: 20000
 storm.zookeeper.connection.timeout: 15000
 storm.zookeeper.retry.times: 5
 storm.zookeeper.retry.interval: 1000
 storm.zookeeper.retry.intervalceiling.millis: 30000
 storm.zookeeper.auth.user: null
 storm.zookeeper.auth.password: null
 storm.exhibitor.port: 8080
 storm.exhibitor.poll.uripath: "/exhibitor/v1/cluster/list"
 storm.cluster.mode: "distributed" # can be distributed or local
 storm.local.mode.zmq: false
 storm.thrift.transport: "org.apache.storm.security.auth.SimpleTransportPlugin"
 storm.thrift.socket.timeout.ms: 600000
 storm.principal.tolocal: "org.apache.storm.security.auth.DefaultPrincipalToLocal"
 storm.group.mapping.service: "org.apache.storm.security.auth.ShellBasedGroupsMapping"
 storm.group.mapping.service.params: null
 storm.messaging.transport: "org.apache.storm.messaging.netty.Context"
 storm.nimbus.retry.times: 5
 storm.nimbus.retry.interval.millis: 2000
 storm.nimbus.retry.intervalceiling.millis: 60000
 storm.nimbus.zookeeper.acls.check: true
 storm.nimbus.zookeeper.acls.fixup: true

 storm.auth.simple-white-list.users: []
 storm.cluster.state.store: "org.apache.storm.cluster.ZKStateStorageFactory"
 storm.meta.serialization.delegate: "org.apache.storm.serialization.GzipThriftSerializationDelegate"
 storm.codedistributor.class: "org.apache.storm.codedistributor.LocalFileSystemCodeDistributor"
 storm.workers.artifacts.dir: "workers-artifacts"
 storm.health.check.dir: "healthchecks"
 storm.health.check.timeout.ms: 5000
 storm.disable.symlinks: false

 ### nimbus.* configs are for the master
 nimbus.seeds : ["localhost"]
 nimbus.thrift.port: 6627
 nimbus.thrift.threads: 64
 nimbus.thrift.max_buffer_size: 1048576
 nimbus.childopts: "-Xmx1024m"
 nimbus.task.timeout.secs: 30
 nimbus.supervisor.timeout.secs: 60
 nimbus.monitor.freq.secs: 10
 nimbus.cleanup.inbox.freq.secs: 600
 nimbus.inbox.jar.expiration.secs: 3600
 nimbus.code.sync.freq.secs: 120
 nimbus.task.launch.secs: 120
 nimbus.file.copy.expiration.secs: 600
 nimbus.topology.validator: "org.apache.storm.nimbus.DefaultTopologyValidator"
 topology.min.replication.count: 1
 topology.max.replication.wait.time.sec: 60
 nimbus.credential.renewers.freq.secs: 600
 nimbus.queue.size: 100000
 scheduler.display.resource: false
 nimbus.local.assignments.backend.class: "org.apache.storm.assignments.InMemoryAssignmentBackend"
 nimbus.assignments.service.threads: 10
 nimbus.assignments.service.thread.queue.size: 100
 nimbus.worker.heartbeats.recovery.strategy.class: "org.apache.storm.nimbus.TimeOutWorkerHeartbeatsRecoveryStrategy"
 nimbus.topology.blobstore.deletion.delay.ms: 300000

 ### ui.* configs are for the master
 ui.host: 0.0.0.0
 ui.port: 8080
 ui.childopts: "-Xmx768m"
 ui.actions.enabled: true
 ui.filter: null
 ui.filter.params: null
 ui.users: null
 ui.header.buffer.bytes: 4096
 ui.http.creds.plugin: org.apache.storm.security.auth.DefaultHttpCredentialsPlugin
 ui.pagination: 20
 ui.disable.http.binding: true
 ui.disable.spout.lag.monitoring: true

 logviewer.port: 8000
 logviewer.childopts: "-Xmx128m"
 logviewer.cleanup.age.mins: 10080
 logviewer.appender.name: "A1"
 logviewer.max.sum.worker.logs.size.mb: 4096
 logviewer.max.per.worker.logs.size.mb: 2048
 logviewer.disable.http.binding: true
 logviewer.filter: null
 logviewer.filter.params: null

 logs.users: null

 drpc.port: 3772
 drpc.worker.threads: 64
 drpc.max_buffer_size: 1048576
 drpc.queue.size: 128
 drpc.invocations.port: 3773
 drpc.invocations.threads: 64
 drpc.request.timeout.secs: 600
 drpc.childopts: "-Xmx768m"
 drpc.http.port: 3774
 drpc.https.port: -1
 drpc.https.keystore.password: ""
 drpc.https.keystore.type: "JKS"
 drpc.http.creds.plugin: org.apache.storm.security.auth.DefaultHttpCredentialsPlugin
 drpc.authorizer.acl.filename: "drpc-auth-acl.yaml"
 drpc.authorizer.acl.strict: false
 drpc.disable.http.binding: true

 transactional.zookeeper.root: "/transactional"
 transactional.zookeeper.servers: null
 transactional.zookeeper.port: null

 ## blobstore configs
 supervisor.blobstore.class: "org.apache.storm.blobstore.NimbusBlobStore"
 supervisor.blobstore.download.thread.count: 5
 supervisor.blobstore.download.max_retries: 3
 supervisor.localizer.cache.target.size.mb: 10240
 supervisor.localizer.cleanup.interval.ms: 30000

 nimbus.blobstore.class: "org.apache.storm.blobstore.LocalFsBlobStore"
 nimbus.blobstore.expiration.secs: 600

 storm.blobstore.inputstream.buffer.size.bytes: 65536
 storm.blobstore.dependency.jar.upload.chunk.size.bytes: 1048576
 client.blobstore.class: "org.apache.storm.blobstore.NimbusBlobStore"
 storm.blobstore.replication.factor: 3
 # For secure mode we would want to change this config to true
 storm.blobstore.acl.validation.enabled: false

 ### supervisor.* configs are for node supervisors
 # Define the amount of workers that can be run on this machine. Each worker is assigned a port to use for communication
 supervisor.slots.ports:
     - 6700
     - 6701
     - 6702
     - 6703
 supervisor.childopts: "-Xmx256m"
 supervisor.run.worker.as.user: false
 #how long supervisor will wait to ensure that a worker process is started
 supervisor.worker.start.timeout.secs: 120
 #how long between heartbeats until supervisor considers that worker dead and tries to restart it
 supervisor.worker.timeout.secs: 30
 #How many seconds to allow for graceful worker shutdown when killing workers before resorting to force kill
 supervisor.worker.shutdown.sleep.secs: 3
 #how frequently the supervisor checks on the status of the processes it's monitoring and restarts if necessary
 supervisor.monitor.frequency.secs: 3
 #how frequently the supervisor heartbeats to the cluster state (for nimbus)
 supervisor.heartbeat.frequency.secs: 5
 #max timeout for a node worker heartbeats when master gains leadership
 supervisor.worker.heartbeats.max.timeout.secs: 600
 #For topology configurable heartbeat timeout, maximum allowed heartbeat timeout.
 worker.max.timeout.secs: 600
 supervisor.enable: true
 supervisor.supervisors: []
 supervisor.supervisors.commands: []
 supervisor.memory.capacity.mb: 4096.0
 #By convention 1 cpu core should be about 100, but this can be adjusted if needed
 # using 100 makes it simple to set the desired value to the capacity measurement
 # for single threaded bolts
 supervisor.cpu.capacity: 400.0

 #Supervisor thrift config
 supervisor.thrift.port: 6628
 supervisor.queue.size: 128
 supervisor.thrift.threads: 16
 supervisor.thrift.max_buffer_size: 1048576
 supervisor.thrift.socket.timeout.ms: 5000

 ### worker.* configs are for task workers
 worker.heap.memory.mb: 768
 worker.childopts: "-Xmx%HEAP-MEM%m -XX:+PrintGCDetails -Xloggc:artifacts/gc.log -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=1M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=artifacts/heapdump"
 worker.gc.childopts: ""

 # Unlocking commercial features requires a special license from Oracle.
 # See http://www.oracle.com/technetwork/java/javase/terms/products/index.html
 # For this reason, profiler features are disabled by default.
 worker.profiler.enabled: false
 worker.profiler.childopts: "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder"
 worker.profiler.command: "flight.bash"
 worker.heartbeat.frequency.secs: 1

 # check whether dynamic log levels can be reset from DEBUG to INFO in workers
 worker.log.level.reset.poll.secs: 30

 # control how many worker receiver threads we need per worker
 topology.worker.receiver.thread.count: 1

 # Executor metrics reporting interval.
 # Cause the ui only show built in metrics, we should keep sync with the built in metrics interval,
 # also the metrics consumer's collecting interval.
 # See topology.builtin.metrics.bucket.size.secs and storm.cluster.metrics.consumer.publish.interval.secs.
 executor.metrics.frequency.secs: 60

 task.heartbeat.frequency.secs: 3
 task.refresh.poll.secs: 10
 task.credentials.poll.secs: 30

 # Used by workers to communicate
 storm.messaging.netty.server_worker_threads: 1
 storm.messaging.netty.client_worker_threads: 1
 storm.messaging.netty.buffer_size: 5242880 #5MB buffer

 # The netty write buffer high watermark in bytes.
 # If the number of bytes queued in the netty's write buffer exceeds this value, the netty client will block
 # until the value falls below the low water mark.
 storm.messaging.netty.buffer.high.watermark: 16777216 # 16 MB
 # The netty write buffer low watermark in bytes.
 # Once the number of bytes queued in the write buffer exceeded the high water mark and then
 # dropped down below this value, any blocked clients will unblock and start processing further messages.
 storm.messaging.netty.buffer.low.watermark: 8388608 # 8 MB
 # Since nimbus.task.launch.secs and supervisor.worker.start.timeout.secs are 120, other workers should also wait at least that long before giving up on connecting to the other worker. The reconnection period need also be bigger than storm.zookeeper.session.timeout(default is 20s), so that we can abort the reconnection when the target worker is dead.
 storm.messaging.netty.max_wait_ms: 1000
 storm.messaging.netty.min_wait_ms: 100

 # If the Netty messaging layer is busy(netty internal buffer not writable), the Netty client will try to batch message as more as possible up to the size of storm.messaging.netty.transfer.batch.size bytes, otherwise it will try to flush message as soon as possible to reduce latency.
 storm.messaging.netty.transfer.batch.size: 262144
 # Sets the backlog value to specify when the channel binds to a local address
 storm.messaging.netty.socket.backlog: 500

 # By default, the Netty SASL authentication is set to false.  Users can override and set it true for a specific topology.
 storm.messaging.netty.authentication: false

 # Default plugin to use for automatic network topology discovery
 storm.network.topography.plugin: org.apache.storm.networktopography.DefaultRackDNSToSwitchMapping

 # default number of seconds group mapping service will cache user group
 storm.group.mapping.service.cache.duration.secs: 120

 ### topology.* configs are for specific executing storms
 topology.enable.message.timeouts: true
 topology.debug: false
 topology.workers: 1
 topology.acker.executors: null
 topology.eventlogger.executors: 0
 topology.tasks: null
 # maximum amount of time a message has to complete before it's considered failed
 topology.message.timeout.secs: 30
 topology.multilang.serializer: "org.apache.storm.multilang.JsonSerializer"
 topology.shellbolt.max.pending: 100
 topology.skip.missing.kryo.registrations: false
 topology.max.task.parallelism: null
 topology.max.spout.pending: null    # ideally should be larger than topology.producer.batch.size. (esp. if topology.batch.flush.interval.millis=0)
 topology.state.synchronization.timeout.secs: 60
 topology.stats.sample.rate: 0.05
 topology.builtin.metrics.bucket.size.secs: 60
 topology.fall.back.on.java.serialization: true
 topology.worker.childopts: null
 topology.worker.logwriter.childopts: "-Xmx64m"
 topology.tick.tuple.freq.secs: null
 topology.worker.shared.thread.pool.size: 4

 # Spout Wait Strategy - employed when there is no data to produce
 topology.spout.wait.strategy: "org.apache.storm.policy.WaitStrategyProgressive"
 topology.spout.wait.park.microsec : 100          # park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

 topology.spout.wait.progressive.level1.count: 0          # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
 topology.spout.wait.progressive.level2.count: 0          # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
 topology.spout.wait.progressive.level3.sleep.millis: 1   # sleep duration for idling iterations in level 3 of WaitStrategyProgressive

 # Bolt Wait Strategy - employed when there is no data in its receive buffer to process
 topology.bolt.wait.strategy : "org.apache.storm.policy.WaitStrategyProgressive"

 topology.bolt.wait.park.microsec : 100          # park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

 topology.bolt.wait.progressive.level1.count: 1          # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
 topology.bolt.wait.progressive.level2.count: 1000       # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
 topology.bolt.wait.progressive.level3.sleep.millis: 1   # sleep duration for idling iterations in level 3 of WaitStrategyProgressive

 # BackPressure Wait Strategy - for any producer (spout/bolt/transfer thread) when the downstream Q is full
 topology.backpressure.wait.strategy: "org.apache.storm.policy.WaitStrategyProgressive"

 topology.backpressure.wait.park.microsec: 100          #  park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

 topology.backpressure.wait.progressive.level1.count: 1        # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
 topology.backpressure.wait.progressive.level2.count: 1000     # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
 topology.backpressure.wait.progressive.level3.sleep.millis: 1 # sleep duration for idling iterations in level 3 of WaitStrategyProgressive


 topology.backpressure.check.millis: 50   # how often to check if backpressure has relieved on executors under BP, for informing other workers to resume sending msgs to them. Must be > 0
 topology.executor.overflow.limit: 0    # max items in overflowQ of any bolt/spout. When exceeded, worker will drop incoming messages (from the workers) destined to that overflowing spout/bolt. Set to 0 to disable overflow limiting. Enabling this may degrade perf slightly.

 topology.error.throttle.interval.secs: 10
 topology.max.error.report.per.interval: 5
 topology.kryo.factory: "org.apache.storm.serialization.DefaultKryoFactory"
 topology.tuple.serializer: "org.apache.storm.serialization.types.ListDelegateSerializer"
 topology.trident.batch.emit.interval.millis: 500
 topology.testing.always.try.serialize: false
 topology.classpath: null
 topology.environment: null

 topology.transfer.buffer.size: 1000   # size of recv  queue for transfer worker thread
 topology.transfer.batch.size: 1       # can be no larger than half of `topology.transfer.buffer.size`

 topology.executor.receive.buffer.size: 32768  # size of recv queue for spouts & bolts. Will be internally rounded up to next power of 2 (if not already a power of 2)
 topology.producer.batch.size: 1               # can be no larger than half of `topology.executor.receive.buffer.size`

 topology.batch.flush.interval.millis: 1  # Flush tuples are disabled if this is set to 0 or if (topology.producer.batch.size=1 and topology.transfer.batch.size=1).
 topology.spout.recvq.skips: 3  # Check recvQ once every N invocations of Spout's nextTuple() [when ACKs disabled]

 topology.disable.loadaware.messaging: false
 topology.state.checkpoint.interval.ms: 1000
 topology.localityaware.higher.bound: 0.8
 topology.localityaware.lower.bound: 0.2
 topology.serialized.message.size.metrics: false

 # Configs for Resource Aware Scheduler
 # topology priority describing the importance of the topology in decreasing importance starting from 0 (i.e. 0 is the highest priority and the priority importance decreases as the priority number increases).
 # Recommended range of 0-29 but no hard limit set.
 topology.priority: 29
 topology.component.resources.onheap.memory.mb: 128.0
 topology.component.resources.offheap.memory.mb: 0.0
 topology.component.cpu.pcore.percent: 10.0
 topology.worker.max.heap.size.mb: 768.0
 topology.scheduler.strategy: "org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy"
 resource.aware.scheduler.priority.strategy: "org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy"
 topology.ras.constraint.max.state.search: 10_000     # The maximum number of states that will be searched looking for a solution in the constraint solver strategy
 resource.aware.scheduler.constraint.max.state.search: 100_000 # Daemon limit on maximum number of states that will be searched looking for a solution in the constraint solver strategy
 topology.ras.one.executor.per.worker: false
 topology.ras.one.component.per.worker: false

 blacklist.scheduler.tolerance.time.secs: 300
 blacklist.scheduler.tolerance.count: 3
 blacklist.scheduler.resume.time.secs: 1800
 blacklist.scheduler.reporter: "org.apache.storm.scheduler.blacklist.reporters.LogReporter"
 blacklist.scheduler.strategy: "org.apache.storm.scheduler.blacklist.strategies.DefaultBlacklistStrategy"
 blacklist.scheduler.assume.supervisor.bad.based.on.bad.slot: true

 dev.zookeeper.path: "/tmp/dev-storm-zookeeper"

 pacemaker.servers: []
 pacemaker.port: 6699
 pacemaker.base.threads: 10
 pacemaker.max.threads: 50
 pacemaker.client.max.threads: 2
 pacemaker.thread.timeout: 10
 pacemaker.childopts: "-Xmx1024m"
 pacemaker.auth.method: "NONE"
 pacemaker.kerberos.users: []
 pacemaker.thrift.message.size.max: 10485760

 #default storm daemon metrics reporter plugins
 storm.daemon.metrics.reporter.plugins:
      - "org.apache.storm.daemon.metrics.reporters.JmxPreparableReporter"

 storm.metricstore.class: "org.apache.storm.metricstore.rocksdb.RocksDbStore"
 storm.metricprocessor.class: "org.apache.storm.metricstore.NimbusMetricProcessor"
 storm.metricstore.rocksdb.location: "storm_rocks"
 storm.metricstore.rocksdb.create_if_missing: true
 storm.metricstore.rocksdb.metadata_string_cache_capacity: 4000
 storm.metricstore.rocksdb.retention_hours: 240

 # configuration of cluster metrics consumer
 storm.cluster.metrics.consumer.publish.interval.secs: 60

 storm.resource.isolation.plugin: "org.apache.storm.container.cgroup.CgroupManager"
 # Also determines whether the unit tests for cgroup runs.
 # If storm.resource.isolation.plugin.enable is set to false the unit tests for cgroups will not run
 storm.resource.isolation.plugin.enable: false
 storm.cgroup.memory.enforcement.enable: false
 storm.cgroup.inherit.cpuset.configs: false

 # Configs for CGroup support
 storm.cgroup.hierarchy.dir: "/cgroup/storm_resources"
 storm.cgroup.resources:
     - "cpu"
     - "memory"
 storm.cgroup.hierarchy.name: "storm"
 storm.supervisor.cgroup.rootdir: "storm"
 storm.cgroup.cgexec.cmd: "/bin/cgexec"
 storm.cgroup.memory.limit.tolerance.margin.mb: 0.0
 storm.supervisor.memory.limit.tolerance.margin.mb: 128.0
 storm.supervisor.hard.memory.limit.multiplier: 2.0
 storm.supervisor.hard.memory.limit.overage.mb: 2024
 storm.supervisor.low.memory.threshold.mb: 1024
 storm.supervisor.medium.memory.threshold.mb: 1536
 storm.supervisor.medium.memory.grace.period.ms: 30000

 storm.worker.min.cpu.pcore.percent: 0.0

 storm.topology.classpath.beginning.enabled: false
 worker.metrics:
     "CGroupMemory": "org.apache.storm.metric.cgroup.CGroupMemoryUsage"
     "CGroupMemoryLimit": "org.apache.storm.metric.cgroup.CGroupMemoryLimit"
     "CGroupCpu": "org.apache.storm.metric.cgroup.CGroupCpu"
     "CGroupCpuGuarantee": "org.apache.storm.metric.cgroup.CGroupCpuGuarantee"

 # The number of buckets for running statistics
 num.stat.buckets: 20

 # Metrics v2 configuration (optional)
 #storm.metrics.reporters:
 #  # Graphite Reporter
 #  - class: "org.apache.storm.metrics2.reporters.GraphiteStormReporter"
 #    daemons:
 #        - "supervisor"
 #        - "nimbus"
 #        - "worker"
 #    report.period: 60
 #    report.period.units: "SECONDS"
 #    graphite.host: "localhost"
 #    graphite.port: 2003
 #
 #  # Console Reporter
 #  - class: "org.apache.storm.metrics2.reporters.ConsoleStormReporter"
 #    daemons:
 #        - "worker"
 #    report.period: 10
 #    report.period.units: "SECONDS"
 #    filter:
 #        class: "org.apache.storm.metrics2.filters.RegexFilter"
 #        expression: ".*my_component.*emitted.*"