| # Cassandra storage config YAML |
| |
| # NOTE: |
| # See http://wiki.apache.org/cassandra/StorageConfiguration for |
| # full explanations of configuration directives |
| # /NOTE |
| |
| # The name of the cluster. This is mainly used to prevent machines in |
| # one logical cluster from joining another. |
| cluster_name: 'Test Cluster' |
| |
| # You should always specify InitialToken when setting up a production |
| # cluster for the first time, and often when adding capacity later. |
| # The principle is that each node should be given an equal slice of |
| # the token ring; see http://wiki.apache.org/cassandra/Operations |
| # for more details. |
| # |
| # If blank, Cassandra will request a token bisecting the range of |
| # the heaviest-loaded existing node. If there is no load information |
| # available, such as is the case with a new cluster, it will pick |
| # a random token, which will lead to hot spots. |
| initial_token: |
| |
| # Set to true to make new [non-seed] nodes automatically migrate data |
| # to themselves from the pre-existing nodes in the cluster. Defaults |
| # to false because you can only bootstrap N machines at a time from |
| # an existing cluster of N, so if you are bringing up a cluster of |
| # 10 machines with 3 seeds you would have to do it in stages. Leaving |
| # this off for the initial start simplifies that. |
| auto_bootstrap: false |
| |
| # See http://wiki.apache.org/cassandra/HintedHandoff |
| hinted_handoff_enabled: true |
| |
| # authentication backend, implementing IAuthenticator; used to identify users |
| authenticator: org.apache.cassandra.auth.AllowAllAuthenticator |
| |
| # authorization backend, implementing IAuthority; used to limit access/provide permissions |
| authority: org.apache.cassandra.auth.AllowAllAuthority |
| |
| # any IPartitioner may be used, including your own as long as it is on |
| # the classpath. Out of the box, Cassandra provides |
| # org.apache.cassandra.dht.RandomPartitioner |
| # org.apache.cassandra.dht.ByteOrderedPartitioner, |
| # org.apache.cassandra.dht.OrderPreservingPartitioner, and |
| # org.apache.cassandra.dht.CollatingOrderPreservingPartitioner. |
| # (CollatingOPP colates according to EN,US rules, not naive byte |
| # ordering. Use this as an example if you need locale-aware collation.) |
| partitioner: org.apache.cassandra.dht.RandomPartitioner |
| |
| # directories where Cassandra should store data on disk. |
| data_file_directories: |
| - /var/lib/cassandra/data |
| |
| # commit log |
| commitlog_directory: /var/lib/cassandra/commitlog |
| |
| # saved caches |
| saved_caches_directory: /var/lib/cassandra/saved_caches |
| |
| # Size to allow commitlog to grow to before creating a new segment |
| commitlog_rotation_threshold_in_mb: 128 |
| |
| # commitlog_sync supports the following modes: |
| # |
| # batch: |
| # In batch mode, Cassandra won't ack writes until the commit log |
| # has been fsynced to disk. But fsyncing each write at once is |
| # performance-prohibitive, so instead Cassandra will wait up to |
| # commitlog_sync_batch_window_in_ms milliseconds for other writes, before |
| # syncing that "batch" at once. This causes a performance penalty |
| # of about 15% when the commitlog is on a separate device, and much more |
| # when it shares the same device as the data files. |
| # |
| # periodic: |
| # Writes may be acked immediately (without waiting for the commitlog |
| # append) and the CommitLog is simply synced every |
| # commitlog_sync_period_in_ms milliseconds. |
| # |
| # periodic_without_flush: |
| # Like periodic, but the commitlog write buffer is only flushed |
| # before the sync, so any interruption to the process can be |
| # expected to lose some writes. This is the old 0.6 periodic |
| # behavior and will be removed in future versions if testing |
| # continues to show no performance benefit over normal periodic. |
| commitlog_sync: periodic |
| commitlog_sync_period_in_ms: 10000 |
| # commitlog_sync: batch |
| # commitlog_sync_batch_window_in_ms: 10 |
| |
| # Addresses of hosts that are deemed contact points. |
| # Cassandra nodes use this list of hosts to find each other and learn |
| # the topology of the ring. You must change this if you are running |
| # multiple nodes! |
| seeds: |
| - 127.0.0.1 |
| |
| # Access mode. mmapped i/o is substantially faster, but only practical on |
| # a 64bit machine (which notably does not include EC2 "small" instances) |
| # or relatively small datasets. "auto", the safe choice, will enable |
| # mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only" |
| # (which may allow you to get part of the benefits of mmap on a 32bit |
| # machine by mmapping only index files) and "standard". |
| # (The buffer size settings that follow only apply to standard, |
| # non-mmapped i/o.) |
| disk_access_mode: auto |
| |
| # Unlike most systems, in Cassandra writes are faster than reads, so |
| # you can afford more of those in parallel. A good rule of thumb is 2 |
| # concurrent reads per processor core. Increase ConcurrentWrites to |
| # the number of clients writing at once if you enable CommitLogSync + |
| # CommitLogSyncDelay. --> |
| concurrent_reads: 8 |
| concurrent_writes: 32 |
| |
| # This sets the amount of memtable flush writer threads. These will |
| # be blocked by disk io, and each one will hold a memtable in memory |
| # while blocked. If you have a large heap and many data directories, |
| # you can increase this value for better flush performance. |
| # By default this will be set to the amount of data directories defined. |
| #memtable_flush_writers: 1 |
| |
| # Buffer size to use when performing contiguous column slices. |
| # Increase this to the size of the column slices you typically perform |
| sliced_buffer_size_in_kb: 64 |
| |
| # TCP port, for commands and data |
| storage_port: 7000 |
| |
| # Address to bind to and tell other Cassandra nodes to connect to. You |
| # _must_ change this if you want multiple nodes to be able to |
| # communicate! |
| # |
| # Leaving it blank leaves it up to InetAddress.getLocalHost(). This |
| # will always do the Right Thing *if* the node is properly configured |
| # (hostname, name resolution, etc), and the Right Thing is to use the |
| # address associated with the hostname (it might not be). |
| # |
| # Setting this to 0.0.0.0 is always wrong. |
| listen_address: localhost |
| |
| # The address to bind the Thrift RPC service to -- clients connect |
| # here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if |
| # you want Thrift to listen on all interfaces. |
| # |
| # Leaving this blank has the same effect it does for ListenAddress, |
| # (i.e. it will be based on the configured hostname of the node). |
| rpc_address: localhost |
| # port for Thrift to listen for clients on |
| rpc_port: 9160 |
| |
| # enable or disable keepalive on rpc connections |
| rpc_keepalive: true |
| |
| # uncomment to set socket buffer sizes on rpc connections |
| # rpc_send_buff_size_in_bytes: |
| # rpc_recv_buff_size_in_bytes: |
| |
| # Frame size for thrift (maximum field length). |
| # 0 disables TFramedTransport in favor of TSocket. This option |
| # is deprecated; we strongly recommend using Framed mode. |
| thrift_framed_transport_size_in_mb: 15 |
| |
| # The max length of a thrift message, including all fields and |
| # internal thrift overhead. |
| thrift_max_message_length_in_mb: 16 |
| |
| # Whether or not to take a snapshot before each compaction. Be |
| # careful using this option, since Cassandra won't clean up the |
| # snapshots for you. Mostly useful if you're paranoid when there |
| # is a data format change. |
| snapshot_before_compaction: false |
| |
| # change this to increase the compaction thread's priority. In java, 1 is the |
| # lowest priority and that is our default. |
| # compaction_thread_priority: 1 |
| |
| # The threshold size in megabytes the binary memtable must grow to, |
| # before it's submitted for flushing to disk. |
| binary_memtable_throughput_in_mb: 256 |
| |
| # Add column indexes to a row after its contents reach this size. |
| # Increase if your column values are large, or if you have a very large |
| # number of columns. The competing causes are, Cassandra has to |
| # deserialize this much of the row to read a single column, so you want |
| # it to be small - at least if you do many partial-row reads - but all |
| # the index data is read for each access, so you don't want to generate |
| # that wastefully either. |
| column_index_size_in_kb: 64 |
| |
| # Size limit for rows being compacted in memory. Larger rows will spill |
| # over to disk and use a slower two-pass compaction process. A message |
| # will be logged specifying the row key. |
| in_memory_compaction_limit_in_mb: 64 |
| |
| # Time to wait for a reply from other nodes before failing the command |
| rpc_timeout_in_ms: 10000 |
| |
| # phi value that must be reached for a host to be marked down. |
| # most users should never need to adjust this. |
| # phi_convict_threshold: 8 |
| |
| # endpoint_snitch -- Set this to a class that implements |
| # IEndpointSnitch, which will let Cassandra know enough |
| # about your network topology to route requests efficiently. |
| # Out of the box, Cassandra provides |
| # - org.apache.cassandra.locator.SimpleSnitch: |
| # Treats Strategy order as proximity. This improves cache locality |
| # when disabling read repair, which can further improve throughput. |
| # - org.apache.cassandra.locator.RackInferringSnitch: |
| # Proximity is determined by rack and data center, which are |
| # assumed to correspond to the 3rd and 2nd octet of each node's |
| # IP address, respectively |
| # org.apache.cassandra.locator.PropertyFileSnitch: |
| # - Proximity is determined by rack and data center, which are |
| # explicitly configured in cassandra-topology.properties. |
| endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch |
| |
| # dynamic_snitch -- This boolean controls whether the above snitch is |
| # wrapped with a dynamic snitch, which will monitor read latencies |
| # and avoid reading from hosts that have slowed (due to compaction, |
| # for instance) |
| dynamic_snitch: true |
| # controls how often to perform the more expensive part of host score |
| # calculation |
| dynamic_snitch_update_interval_in_ms: 100 |
| # controls how often to reset all host scores, allowing a bad host to |
| # possibly recover |
| dynamic_snitch_reset_interval_in_ms: 600000 |
| # if set greater than zero and read_repair_chance is < 1.0, this will allow |
| # 'pinning' of replicas to hosts in order to increase cache capacity. |
| # The badness threshold will control how much worse the pinned host has to be |
| # before the dynamic snitch will prefer other replicas over it. This is |
| # expressed as a double which represents a percentage. Thus, a value of |
| # 0.2 means Cassandra would continue to prefer the static snitch values |
| # until the pinned host was 20% worse than the fastest. |
| dynamic_snitch_badness_threshold: 0.0 |
| |
| # request_scheduler -- Set this to a class that implements |
| # RequestScheduler, which will schedule incoming client requests |
| # according to the specific policy. This is useful for multi-tenancy |
| # with a single Cassandra cluster. |
| # NOTE: This is specifically for requests from the client and does |
| # not affect inter node communication. |
| # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place |
| # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of |
| # client requests to a node with a separate queue for each |
| # request_scheduler_id. The scheduler is further customized by |
| # request_scheduler_options as described below. |
| request_scheduler: org.apache.cassandra.scheduler.NoScheduler |
| |
| # Scheduler Options vary based on the type of scheduler |
| # NoScheduler - Has no options |
| # RoundRobin |
| # - throttle_limit -- The throttle_limit is the number of in-flight |
| # requests per client. Requests beyond |
| # that limit are queued up until |
| # running requests can complete. |
| # The value of 80 here is twice the number of |
| # concurrent_reads + concurrent_writes. |
| # - default_weight -- default_weight is optional and allows for |
| # overriding the default which is 1. |
| # - weights -- Weights are optional and will default to 1 or the |
| # overridden default_weight. The weight translates into how |
| # many requests are handled during each turn of the |
| # RoundRobin, based on the scheduler id. |
| # |
| # request_scheduler_options: |
| # throttle_limit: 80 |
| # default_weight: 5 |
| # weights: |
| # Keyspace1: 1 |
| # Keyspace2: 5 |
| |
| # request_scheduler_id -- An identifer based on which to perform |
| # the request scheduling. Currently the only valid option is keyspace. |
| # request_scheduler_id: keyspace |
| |
| # The Index Interval determines how large the sampling of row keys |
| # is for a given SSTable. The larger the sampling, the more effective |
| # the index is at the cost of space. |
| index_interval: 128 |
| |
| # Keyspaces have ColumnFamilies. (Usually 1 KS per application.) |
| # ColumnFamilies have Rows. (Dozens of CFs per KS.) |
| # Rows contain Columns. (Many per CF.) |
| # Columns contain name:value:timestamp. (Many per Row.) |
| # |
| # A KS is most similar to a schema, and a CF is most similar to a relational table. |
| # |
| # Keyspaces, ColumnFamilies, and Columns may carry additional |
| # metadata that change their behavior. These are as follows: |
| # |
| # Keyspace required parameters: |
| # - name: name of the keyspace; "system" is |
| # reserved for Cassandra Internals. |
| # - replica_placement_strategy: the class that determines how replicas |
| # are distributed among nodes. Contains both the class as well as |
| # configuration information. Must extend AbstractReplicationStrategy. |
| # Out of the box, Cassandra provides |
| # * org.apache.cassandra.locator.SimpleStrategy |
| # * org.apache.cassandra.locator.NetworkTopologyStrategy |
| # * org.apache.cassandra.locator.OldNetworkTopologyStrategy |
| # |
| # SimpleStrategy merely places the first |
| # replica at the node whose token is closest to the key (as determined |
| # by the Partitioner), and additional replicas on subsequent nodes |
| # along the ring in increasing Token order. |
| # |
| # With NetworkTopologyStrategy, |
| # for each datacenter, you can specify how many replicas you want |
| # on a per-keyspace basis. Replicas are placed on different racks |
| # within each DC, if possible. This strategy also requires rack aware |
| # snitch, such as RackInferringSnitch or PropertyFileSnitch. |
| # An example: |
| # - name: Keyspace1 |
| # replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy |
| # strategy_options: |
| # DC1 : 3 |
| # DC2 : 2 |
| # DC3 : 1 |
| # |
| # OldNetworkToplogyStrategy [formerly RackAwareStrategy] |
| # places one replica in each of two datacenters, and the third on a |
| # different rack in in the first. Additional datacenters are not |
| # guaranteed to get a replica. Additional replicas after three are placed |
| # in ring order after the third without regard to rack or datacenter. |
| # - replication_factor: Number of replicas of each row |
| # Keyspace optional paramaters: |
| # - strategy_options: Additional information for the replication strategy. |
| # - column_families: |
| # ColumnFamily required parameters: |
| # - name: name of the ColumnFamily. Must not contain the character "-". |
| # - compare_with: tells Cassandra how to sort the columns for slicing |
| # operations. The default is BytesType, which is a straightforward |
| # lexical comparison of the bytes in each column. Other options are |
| # AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType, |
| # and IntegerType (a generic variable-length integer type). |
| # You can also specify the fully-qualified class name to a class of |
| # your choice extending org.apache.cassandra.db.marshal.AbstractType. |
| # |
| # ColumnFamily optional parameters: |
| # - keys_cached: specifies the number of keys per sstable whose |
| # locations we keep in memory in "mostly LRU" order. (JUST the key |
| # locations, NOT any column values.) Specify a fraction (value less |
| # than 1) or an absolute number of keys to cache. Defaults to 200000 |
| # keys. |
| # - rows_cached: specifies the number of rows whose entire contents we |
| # cache in memory. Do not use this on ColumnFamilies with large rows, |
| # or ColumnFamilies with high write:read ratios. Specify a fraction |
| # (value less than 1) or an absolute number of rows to cache. |
| # Defaults to 0. (i.e. row caching is off by default) |
| # - comment: used to attach additional human-readable information about |
| # the column family to its definition. |
| # - read_repair_chance: specifies the probability with which read |
| # repairs should be invoked on non-quorum reads. must be between 0 |
| # and 1. defaults to 1.0 (always read repair). |
| # - gc_grace_seconds: specifies the time to wait before garbage |
| # collecting tombstones (deletion markers). defaults to 864000 (10 |
| # days). See http://wiki.apache.org/cassandra/DistributedDeletes |
| # - default_validation_class: specifies a validator class to use for |
| # validating all the column values in the CF. |
| # NOTE: |
| # min_ must be less than max_compaction_threshold! |
| # - min_compaction_threshold: the minimum number of SSTables needed |
| # to start a minor compaction. increasing this will cause minor |
| # compactions to start less frequently and be more intensive. setting |
| # this to 0 disables minor compactions. defaults to 4. |
| # - max_compaction_threshold: the maximum number of SSTables allowed |
| # before a minor compaction is forced. decreasing this will cause |
| # minor compactions to start more frequently and be less intensive. |
| # setting this to 0 disables minor compactions. defaults to 32. |
| # /NOTE |
| # - row_cache_save_period_in_seconds: number of seconds between saving |
| # row caches. The row caches can be saved periodically and if one |
| # exists on startup it will be loaded. |
| # - key_cache_save_period_in_seconds: number of seconds between saving |
| # key caches. The key caches can be saved periodically and if one |
| # exists on startup it will be loaded. |
| # - memtable_flush_after_mins: The maximum time to leave a dirty table |
| # unflushed. This should be large enough that it won't cause a flush |
| # storm of all memtables during periods of inactivity. |
| # - memtable_throughput_in_mb: The maximum size of the memtable before |
| # it is flushed. If undefined, 1/8 * heapsize will be used. |
| # - memtable_operations_in_millions: Number of operations in millions |
| # before the memtable is flushed. If undefined, throughput / 64 * 0.3 |
| # will be used. |
| # - column_metadata: |
| # Column required parameters: |
| # - name: binds a validator (and optionally an indexer) to columns |
| # with this name in any row of the enclosing column family. |
| # - validator: like cf.compare_with, an AbstractType that checks |
| # that the value of the column is well-defined. |
| # Column optional parameters: |
| # NOTE: |
| # index_name cannot be set if index_type is not also set! |
| # - index_name: User-friendly name for the index. |
| # - index_type: The type of index to be created. Currently only |
| # KEYS is supported. |
| # /NOTE |
| # |
| # NOTE: |
| # this keyspace definition is for demonstration purposes only. |
| # Cassandra will not load these definitions during startup. See |
| # http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation. |
| # /NOTE |
| keyspaces: |
| - name: Keyspace1 |
| replica_placement_strategy: org.apache.cassandra.locator.SimpleStrategy |
| replication_factor: 1 |
| column_families: |
| - name: Standard1 |
| compare_with: BytesType |
| keys_cached: 10000 |
| rows_cached: 1000 |
| row_cache_save_period_in_seconds: 0 |
| key_cache_save_period_in_seconds: 3600 |
| memtable_flush_after_mins: 59 |
| memtable_throughput_in_mb: 255 |
| memtable_operations_in_millions: 0.29 |
| |
| - name: Standard2 |
| compare_with: UTF8Type |
| read_repair_chance: 0.1 |
| keys_cached: 100 |
| gc_grace_seconds: 0 |
| min_compaction_threshold: 5 |
| max_compaction_threshold: 31 |
| |
| - name: StandardByUUID1 |
| compare_with: TimeUUIDType |
| |
| - name: Super1 |
| column_type: Super |
| compare_with: BytesType |
| compare_subcolumns_with: BytesType |
| |
| - name: Super2 |
| column_type: Super |
| compare_subcolumns_with: UTF8Type |
| rows_cached: 10000 |
| keys_cached: 50 |
| comment: 'A column family with supercolumns, whose column and subcolumn names are UTF8 strings' |
| |
| - name: Super3 |
| column_type: Super |
| compare_with: LongType |
| comment: 'A column family with supercolumns, whose column names are Longs (8 bytes)' |
| |
| - name: Indexed1 |
| default_validation_class: LongType |
| column_metadata: |
| - name: birthdate |
| validator_class: LongType |
| index_name: birthdate_idx |
| index_type: KEYS |