| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| <!-- |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| --> |
| |
| <configuration> |
| <property> |
| <name>java.library.path</name> |
| <value>/usr/local/lib:/opt/local/lib:/usr/lib</value> |
| <description>This value is passed to spawned JVMs (e.g., Nimbus, Supervisor, and Workers) |
| for the java.library.path value. java.library.path tells the JVM where |
| to look for native libraries. It is necessary to set this config correctly since |
| Storm uses the ZeroMQ and JZMQ native libs. </description> |
| </property> |
| <property> |
| <name>storm.local.dir</name> |
| <value>/hadoop/storm</value> |
| <description>A directory on the local filesystem used by Storm for any local |
| filesystem usage it needs. The directory must exist and the Storm daemons must |
| have permission to read/write from this location.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.servers</name> |
| <value>['localhost']</value> |
| <description>A list of hosts of ZooKeeper servers used to manage the cluster.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.port</name> |
| <value>2181</value> |
| <description>The port Storm will use to connect to each of the ZooKeeper servers.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.root</name> |
| <value>/storm</value> |
| <description>The root location at which Storm stores data in ZooKeeper.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.session.timeout</name> |
| <value>20000</value> |
| <description>The session timeout for clients to ZooKeeper.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.connection.timeout</name> |
| <value>15000</value> |
| <description>The connection timeout for clients to ZooKeeper.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.retry.times</name> |
| <value>5</value> |
| <description>The number of times to retry a Zookeeper operation.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.retry.interval</name> |
| <value>1000</value> |
| <description>The interval between retries of a Zookeeper operation.</description> |
| </property> |
| <property> |
| <name>storm.zookeeper.retry.intervalceiling.millis</name> |
| <value>30000</value> |
| <description>The ceiling of the interval between retries of a Zookeeper operation.</description> |
| </property> |
| <property> |
| <name>storm.cluster.mode</name> |
| <value>distributed</value> |
| <description>The mode this Storm cluster is running in. Either "distributed" or "local".</description> |
| </property> |
| <property> |
| <name>storm.local.mode.zmq</name> |
| <value>false</value> |
| <description>Whether or not to use ZeroMQ for messaging in local mode. If this is set |
| to false, then Storm will use a pure-Java messaging system. The purpose |
| of this flag is to make it easy to run Storm in local mode by eliminating |
| the need for native dependencies, which can be difficult to install. |
| </description> |
| </property> |
| <property> |
| <name>storm.thrift.transport</name> |
| <value>backtype.storm.security.auth.SimpleTransportPlugin</value> |
| <description>The transport plug-in for Thrift client/server communication.</description> |
| </property> |
| <property> |
| <name>storm.messaging.transport</name> |
| <value>backtype.storm.messaging.netty.Context</value> |
| <description>The transporter for communication among Storm tasks.</description> |
| </property> |
| <property> |
| <name>nimbus.host</name> |
| <value>localhost</value> |
| <description>The host that the master server is running on.</description> |
| </property> |
| <property> |
| <name>nimbus.thrift.port</name> |
| <value>6627</value> |
| <description> Which port the Thrift interface of Nimbus should run on. Clients should |
| connect to this port to upload jars and submit topologies.</description> |
| </property> |
| <property> |
| <name>nimbus.thrift.max_buffer_size</name> |
| <value>1048576</value> |
| <description>The maximum buffer size thrift should use when reading messages.</description> |
| </property> |
| <property> |
| <name>nimbus.childopts</name> |
| <value>-Xmx1024m</value> |
| <description>This parameter is used by the storm-deploy project to configure the jvm options for the nimbus daemon.</description> |
| </property> |
| <property> |
| <name>nimbus.task.timeout.secs</name> |
| <value>30</value> |
| <description>How long without heartbeating a task can go before nimbus will consider the task dead and reassign it to another location.</description> |
| </property> |
| <property> |
| <name>nimbus.supervisor.timeout.secs</name> |
| <value>60</value> |
| <description>How long before a supervisor can go without heartbeating before nimbus considers it dead and stops assigning new work to it.</description> |
| </property> |
| <property> |
| <name>nimbus.monitor.freq.secs</name> |
| <value>10</value> |
| <description> |
| How often nimbus should wake up to check heartbeats and do reassignments. Note |
| that if a machine ever goes down Nimbus will immediately wake up and take action. |
| This parameter is for checking for failures when there's no explicit event like that occuring. |
| </description> |
| </property> |
| <property> |
| <name>nimbus.cleanup.inbox.freq.secs</name> |
| <value>600</value> |
| <description>How often nimbus should wake the cleanup thread to clean the inbox.</description> |
| </property> |
| <property> |
| <name>nimbus.inbox.jar.expiration.secs</name> |
| <value>3600</value> |
| <description> |
| The length of time a jar file lives in the inbox before being deleted by the cleanup thread. |
| |
| Probably keep this value greater than or equal to NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS. |
| Note that the time it takes to delete an inbox jar file is going to be somewhat more than |
| NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS (depending on how often NIMBUS_CLEANUP_FREQ_SECS is set to). |
| </description> |
| </property> |
| <property> |
| <name>nimbus.task.launch.secs</name> |
| <value>120</value> |
| <description>A special timeout used when a task is initially launched. During launch, this is the timeout |
| used until the first heartbeat, overriding nimbus.task.timeout.secs.</description> |
| </property> |
| <property> |
| <name>nimbus.reassign</name> |
| <value>true</value> |
| <description>Whether or not nimbus should reassign tasks if it detects that a task goes down. |
| Defaults to true, and it's not recommended to change this value.</description> |
| </property> |
| <property> |
| <name>nimbus.file.copy.expiration.secs</name> |
| <value>600</value> |
| <description>During upload/download with the master, how long an upload or download connection is idle |
| before nimbus considers it dead and drops the connection.</description> |
| </property> |
| <property> |
| <name>nimbus.topology.validator</name> |
| <value>backtype.storm.nimbus.DefaultTopologyValidator</value> |
| <description>A custom class that implements ITopologyValidator that is run whenever a |
| topology is submitted. Can be used to provide business-specific logic for |
| whether topologies are allowed to run or not.</description> |
| </property> |
| <property> |
| <name>ui.port</name> |
| <value>8744</value> |
| <description>Storm UI binds to this port.</description> |
| </property> |
| <property> |
| <name>ui.childopts</name> |
| <value>-Xmx768m</value> |
| <description>Childopts for Storm UI Java process.</description> |
| </property> |
| <property> |
| <name>logviewer.port</name> |
| <value>8000</value> |
| <description>HTTP UI port for log viewer.</description> |
| </property> |
| <property> |
| <name>logviewer.childopts</name> |
| <value>-Xmx128m</value> |
| <description>Childopts for log viewer java process.</description> |
| </property> |
| <property> |
| <name>logviewer.appender.name</name> |
| <value>A1</value> |
| <description>Appender name used by log viewer to determine log directory.</description> |
| </property> |
| <property> |
| <name>drpc.port</name> |
| <value>3772</value> |
| <description>This port is used by Storm DRPC for receiving DPRC requests from clients.</description> |
| </property> |
| <property> |
| <name>drpc.worker.threads</name> |
| <value>64</value> |
| <description>DRPC thrift server worker threads.</description> |
| </property> |
| <property> |
| <name>drpc.queue.size</name> |
| <value>128</value> |
| <description>DRPC thrift server queue size.</description> |
| </property> |
| <property> |
| <name>drpc.invocations.port</name> |
| <value>3773</value> |
| <description>This port on Storm DRPC is used by DRPC topologies to receive function invocations and send results back.</description> |
| </property> |
| <property> |
| <name>drpc.request.timeout.secs</name> |
| <value>600</value> |
| <description>The timeout on DRPC requests within the DRPC server. Defaults to 10 minutes. Note that requests can also |
| timeout based on the socket timeout on the DRPC client, and separately based on the topology message |
| timeout for the topology implementing the DRPC function.</description> |
| </property> |
| <property> |
| <name>drpc.childopts</name> |
| <value>-Xmx768m</value> |
| <description>Childopts for Storm DRPC Java process.</description> |
| </property> |
| <property> |
| <name>transactional.zookeeper.root</name> |
| <value>/transactional</value> |
| <description>The root directory in ZooKeeper for metadata about TransactionalSpouts.</description> |
| </property> |
| <property> |
| <name>transactional.zookeeper.servers</name> |
| <value>null</value> |
| <description>The list of zookeeper servers in which to keep the transactional state. If null (which is default), |
| will use storm.zookeeper.servers</description> |
| </property> |
| <property> |
| <name>transactional.zookeeper.port</name> |
| <value>null</value> |
| <description>The port to use to connect to the transactional zookeeper servers. If null (which is default), |
| will use storm.zookeeper.port</description> |
| </property> |
| <property> |
| <name>supervisor.slots.ports</name> |
| <value>[6700, 6701]</value> |
| <description>A list of ports that can run workers on this supervisor. Each worker uses one port, and |
| the supervisor will only run one worker per port. Use this configuration to tune |
| how many workers run on each machine.</description> |
| </property> |
| <property> |
| <name>supervisor.childopts</name> |
| <value>-Xmx256m</value> |
| <description>This parameter is used by the storm-deploy project to configure the jvm options for the supervisor daemon.</description> |
| </property> |
| <property> |
| <name>supervisor.worker.start.timeout.secs</name> |
| <value>120</value> |
| <description>How long a worker can go without heartbeating during the initial launch before |
| the supervisor tries to restart the worker process. This value override |
| supervisor.worker.timeout.secs during launch because there is additional |
| overhead to starting and configuring the JVM on launch.</description> |
| </property> |
| <property> |
| <name>supervisor.worker.timeout.secs</name> |
| <value>30</value> |
| <description>How long a worker can go without heartbeating before the supervisor tries to restart the worker process.</description> |
| </property> |
| <property> |
| <name>supervisor.monitor.frequency.secs</name> |
| <value>3</value> |
| <description>How often the supervisor checks the worker heartbeats to see if any of them need to be restarted.</description> |
| </property> |
| <property> |
| <name>supervisor.heartbeat.frequency.secs</name> |
| <value>5</value> |
| <description>How often the supervisor sends a heartbeat to the master.</description> |
| </property> |
| <property> |
| <name>worker.childopts</name> |
| <value>-Xmx768m</value> |
| <description>The jvm opts provided to workers launched by this supervisor. All \"%ID%\" substrings are replaced with an identifier for this worker.</description> |
| </property> |
| <property> |
| <name>worker.heartbeat.frequency.secs</name> |
| <value>1</value> |
| <description>How often this worker should heartbeat to the supervisor.</description> |
| </property> |
| <property> |
| <name>task.heartbeat.frequency.secs</name> |
| <value>3</value> |
| <description>How often a task should heartbeat its status to the master.</description> |
| </property> |
| <property> |
| <name>task.refresh.poll.secs</name> |
| <value>10</value> |
| <description>How often a task should sync its connections with other tasks (if a task is |
| reassigned, the other tasks sending messages to it need to refresh their connections). |
| In general though, when a reassignment happens other tasks will be notified |
| almost immediately. This configuration is here just in case that notification doesn't |
| come through.</description> |
| </property> |
| <property> |
| <name>zmq.threads</name> |
| <value>1</value> |
| <description>The number of threads that should be used by the zeromq context in each worker process.</description> |
| </property> |
| <property> |
| <name>zmq.linger.millis</name> |
| <value>5000</value> |
| <description>How long a connection should retry sending messages to a target host when |
| the connection is closed. This is an advanced configuration and can almost |
| certainly be ignored.</description> |
| </property> |
| <property> |
| <name>zmq.hwm</name> |
| <value>0</value> |
| <description>The high water for the ZeroMQ push sockets used for networking. Use this config to prevent buffer explosion |
| on the networking layer.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.server_worker_threads</name> |
| <value>1</value> |
| <description>Netty based messaging: The # of worker threads for the server.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.client_worker_threads</name> |
| <value>1</value> |
| <description>Netty based messaging: The # of worker threads for the client.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.buffer_size</name> |
| <value>5242880</value> |
| <description>Netty based messaging: The buffer size for send/recv buffer.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.max_retries</name> |
| <value>30</value> |
| <description>Netty based messaging: The max # of retries that a peer will perform when a remote is not accessible.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.max_wait_ms</name> |
| <value>1000</value> |
| <description>Netty based messaging: The max # of milliseconds that a peer will wait.</description> |
| </property> |
| <property> |
| <name>storm.messaging.netty.min_wait_ms</name> |
| <value>100</value> |
| <description>Netty based messaging: The min # of milliseconds that a peer will wait.</description> |
| </property> |
| <property> |
| <name>topology.enable.message.timeouts</name> |
| <value>true</value> |
| <description>True if Storm should timeout messages or not. Defaults to true. This is meant to be used |
| in unit tests to prevent tuples from being accidentally timed out during the test.</description> |
| </property> |
| <property> |
| <name>topology.debug</name> |
| <value>false</value> |
| <description>When set to true, Storm will log every message that's emitted.</description> |
| </property> |
| <property> |
| <name>topology.optimize</name> |
| <value>true</value> |
| <description>Whether or not the master should optimize topologies by running multiple tasks in a single thread where appropriate.</description> |
| </property> |
| <property> |
| <name>topology.workers</name> |
| <value>1</value> |
| <description>How many processes should be spawned around the cluster to execute this |
| topology. Each process will execute some number of tasks as threads within |
| them. This parameter should be used in conjunction with the parallelism hints |
| on each component in the topology to tune the performance of a topology.</description> |
| </property> |
| <property> |
| <name>topology.acker.executors</name> |
| <value>null</value> |
| <description>How many executors to spawn for ackers. |
| |
| If this is set to 0, then Storm will immediately ack tuples as soon |
| as they come off the spout, effectively disabling reliability. |
| </description> |
| </property> |
| <property> |
| <name>topology.message.timeout.secs</name> |
| <value>30</value> |
| <description>The maximum amount of time given to the topology to fully process a message |
| emitted by a spout. If the message is not acked within this time frame, Storm |
| will fail the message on the spout. Some spouts implementations will then replay |
| the message at a later time.</description> |
| </property> |
| <property> |
| <name>topology.skip.missing.kryo.registrations</name> |
| <value>false</value> |
| <description> Whether or not Storm should skip the loading of kryo registrations for which it |
| does not know the class or have the serializer implementation. Otherwise, the task will |
| fail to load and will throw an error at runtime. The use case of this is if you want to |
| declare your serializations on the storm.yaml files on the cluster rather than every single |
| time you submit a topology. Different applications may use different serializations and so |
| a single application may not have the code for the other serializers used by other apps. |
| By setting this config to true, Storm will ignore that it doesn't have those other serializations |
| rather than throw an error.</description> |
| </property> |
| <property> |
| <name>topology.max.task.parallelism</name> |
| <value>null</value> |
| <description>The maximum parallelism allowed for a component in this topology. This configuration is |
| typically used in testing to limit the number of threads spawned in local mode.</description> |
| </property> |
| <property> |
| <name>topology.max.spout.pending</name> |
| <value>null</value> |
| <description>The maximum number of tuples that can be pending on a spout task at any given time. |
| This config applies to individual tasks, not to spouts or topologies as a whole. |
| |
| A pending tuple is one that has been emitted from a spout but has not been acked or failed yet. |
| Note that this config parameter has no effect for unreliable spouts that don't tag |
| their tuples with a message id.</description> |
| </property> |
| <property> |
| <name>topology.state.synchronization.timeout.secs</name> |
| <value>60</value> |
| <description>The maximum amount of time a component gives a source of state to synchronize before it requests |
| synchronization again.</description> |
| </property> |
| <property> |
| <name>topology.stats.sample.rate</name> |
| <value>0.05</value> |
| <description>The percentage of tuples to sample to produce stats for a task.</description> |
| </property> |
| <property> |
| <name>topology.builtin.metrics.bucket.size.secs</name> |
| <value>60</value> |
| <description>The time period that builtin metrics data in bucketed into.</description> |
| </property> |
| <property> |
| <name>topology.fall.back.on.java.serialization</name> |
| <value>true</value> |
| <description>Whether or not to use Java serialization in a topology.</description> |
| </property> |
| <property> |
| <name>topology.worker.childopts</name> |
| <value>null</value> |
| <description>Topology-specific options for the worker child process. This is used in addition to WORKER_CHILDOPTS.</description> |
| </property> |
| <property> |
| <name>topology.executor.receive.buffer.size</name> |
| <value>1024</value> |
| <description>The size of the Disruptor receive queue for each executor. Must be a power of 2.</description> |
| </property> |
| <property> |
| <name>topology.executor.send.buffer.size</name> |
| <value>1024</value> |
| <description>The size of the Disruptor send queue for each executor. Must be a power of 2.</description> |
| </property> |
| <property> |
| <name>topology.receiver.buffer.size</name> |
| <value>8</value> |
| <description>The maximum number of messages to batch from the thread receiving off the network to the |
| executor queues. Must be a power of 2.</description> |
| </property> |
| <property> |
| <name>topology.transfer.buffer.size</name> |
| <value>1024</value> |
| <description>The size of the Disruptor transfer queue for each worker.</description> |
| </property> |
| <property> |
| <name>topology.tick.tuple.freq.secs</name> |
| <value>null</value> |
| <description>How often a tick tuple from the "__system" component and "__tick" stream should be sent |
| to tasks. Meant to be used as a component-specific configuration.</description> |
| </property> |
| <property> |
| <name>topology.worker.shared.thread.pool.size</name> |
| <value>4</value> |
| <description>The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed |
| via the TopologyContext.</description> |
| </property> |
| <property> |
| <name>topology.disruptor.wait.strategy</name> |
| <value>com.lmax.disruptor.BlockingWaitStrategy</value> |
| <description>Configure the wait strategy used for internal queuing. Can be used to tradeoff latency |
| vs. throughput.</description> |
| </property> |
| <property> |
| <name>topology.executor.send.buffer.size</name> |
| <value>1024</value> |
| <description>The size of the Disruptor send queue for each executor. Must be a power of 2.</description> |
| </property> |
| <property> |
| <name>topology.receiver.buffer.size</name> |
| <value>8</value> |
| <description>The maximum number of messages to batch from the thread receiving off the network to the |
| executor queues. Must be a power of 2.</description> |
| </property> |
| <property> |
| <name>topology.transfer.buffer.size</name> |
| <value>1024</value> |
| <description>The size of the Disruptor transfer queue for each worker.</description> |
| </property> |
| <property> |
| <name>topology.tick.tuple.freq.secs</name> |
| <value>null</value> |
| <description>How often a tick tuple from the "__system" component and "__tick" stream should be sent |
| to tasks. Meant to be used as a component-specific configuration.</description> |
| </property> |
| <property> |
| <name>topology.worker.shared.thread.pool.size</name> |
| <value>4</value> |
| <description>The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed |
| via the TopologyContext.</description> |
| </property> |
| <property> |
| <name>topology.spout.wait.strategy</name> |
| <value>backtype.storm.spout.SleepSpoutWaitStrategy</value> |
| <description>A class that implements a strategy for what to do when a spout needs to wait. Waiting is |
| triggered in one of two conditions: |
| |
| 1. nextTuple emits no tuples |
| 2. The spout has hit maxSpoutPending and can't emit any more tuples</description> |
| </property> |
| <property> |
| <name>topology.sleep.spout.wait.strategy.time.ms</name> |
| <value>1</value> |
| <description>The amount of milliseconds the SleepEmptyEmitStrategy should sleep for.</description> |
| </property> |
| <property> |
| <name>topology.error.throttle.interval.secs</name> |
| <value>10</value> |
| <description>The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example, |
| an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be |
| reported to Zookeeper per task for every 10 second interval of time.</description> |
| </property> |
| <property> |
| <name>topology.max.error.report.per.interval</name> |
| <value>5</value> |
| <description>The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example, |
| an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be |
| reported to Zookeeper per task for every 10 second interval of time.</description> |
| </property> |
| <property> |
| <name>topology.kryo.factory</name> |
| <value>backtype.storm.serialization.DefaultKryoFactory</value> |
| <description>Class that specifies how to create a Kryo instance for serialization. Storm will then apply |
| topology.kryo.register and topology.kryo.decorators on top of this. The default implementation |
| implements topology.fall.back.on.java.serialization and turns references off.</description> |
| </property> |
| <property> |
| <name>topology.tuple.serializer</name> |
| <value>backtype.storm.serialization.types.ListDelegateSerializer</value> |
| <description>The serializer class for ListDelegate (tuple payload). |
| The default serializer will be ListDelegateSerializer</description> |
| </property> |
| <property> |
| <name>topology.trident.batch.emit.interval.millis</name> |
| <value>500</value> |
| <description>How often a batch can be emitted in a Trident topology.</description> |
| </property> |
| <property> |
| <name>dev.zookeeper.path</name> |
| <value>/tmp/dev-storm-zookeeper</value> |
| <description>The path to use as the zookeeper dir when running a zookeeper server via |
| "storm dev-zookeeper". This zookeeper instance is only intended for development; |
| it is not a production grade zookeeper setup.</description> |
| </property> |
| </configuration> |