| # |
| # Copyright 2016 Yahoo Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| ## Bookie settings |
| |
| # Port that bookie server listen on |
| bookiePort=3181 |
| |
| # Set the network interface that the bookie should listen on. |
| # If not set, the bookie will listen on all interfaces. |
| #listeningInterface=eth0 |
| |
| # Whether the bookie allowed to use a loopback interface as its primary |
| # interface(i.e. the interface it uses to establish its identity)? |
| # By default, loopback interfaces are not allowed as the primary |
| # interface. |
| # Using a loopback interface as the primary interface usually indicates |
| # a configuration error. For example, its fairly common in some VPS setups |
| # to not configure a hostname, or to have the hostname resolve to |
| # 127.0.0.1. If this is the case, then all bookies in the cluster will |
| # establish their identities as 127.0.0.1:3181, and only one will be able |
| # to join the cluster. For VPSs configured like this, you should explicitly |
| # set the listening interface. |
| #allowLoopback=false |
| |
| # Directory Bookkeeper outputs its write ahead log |
| journalDirectory=data/bookkeeper/journal |
| |
| # Directory Bookkeeper outputs ledger snapshots |
| # could define multi directories to store snapshots, separated by ',' |
| # For example: |
| # ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data |
| # |
| # Ideally ledger dirs and journal dir are each in a differet device, |
| # which reduce the contention between random i/o and sequential write. |
| # It is possible to run with a single disk, but performance will be significantly lower. |
| ledgerDirectories=data/bookkeeper/ledgers |
| # Directories to store index files. If not specified, will use ledgerDirectories to store. |
| # indexDirectories=data/bookkeeper/ledgers |
| |
| # Ledger Manager Class |
| # What kind of ledger manager is used to manage how ledgers are stored, managed |
| # and garbage collected. Try to read 'BookKeeper Internals' for detail info. |
| ledgerManagerType=hierarchical |
| |
| # Root zookeeper path to store ledger metadata |
| # This parameter is used by zookeeper-based ledger manager as a root znode to |
| # store all ledgers. |
| # zkLedgersRootPath=/ledgers |
| |
| # Ledger storage implementation class |
| ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage |
| |
| # Enable/Disable entry logger preallocation |
| # entryLogFilePreallocationEnabled=true |
| |
| # Max file size of entry logger, in bytes |
| # A new entry log file will be created when the old one reaches the file size limitation |
| # logSizeLimit=2147483648 |
| |
| # Threshold of minor compaction |
| # For those entry log files whose remaining size percentage reaches below |
| # this threshold will be compacted in a minor compaction. |
| # If it is set to less than zero, the minor compaction is disabled. |
| # minorCompactionThreshold=0.2 |
| |
| # Interval to run minor compaction, in seconds |
| # If it is set to less than zero, the minor compaction is disabled. |
| # minorCompactionInterval=3600 |
| |
| # Threshold of major compaction |
| # For those entry log files whose remaining size percentage reaches below |
| # this threshold will be compacted in a major compaction. |
| # Those entry log files whose remaining size percentage is still |
| # higher than the threshold will never be compacted. |
| # If it is set to less than zero, the minor compaction is disabled. |
| majorCompactionThreshold=0.5 |
| |
| # Interval to run major compaction, in seconds |
| # If it is set to less than zero, the major compaction is disabled. |
| # majorCompactionInterval=86400 |
| |
| # Set the maximum number of entries which can be compacted without flushing. |
| # When compacting, the entries are written to the entrylog and the new offsets |
| # are cached in memory. Once the entrylog is flushed the index is updated with |
| # the new offsets. This parameter controls the number of entries added to the |
| # entrylog before a flush is forced. A higher value for this parameter means |
| # more memory will be used for offsets. Each offset consists of 3 longs. |
| # This parameter should _not_ be modified unless you know what you're doing. |
| # The default is 100,000. |
| #compactionMaxOutstandingRequests=100000 |
| |
| # Set the rate at which compaction will readd entries. The unit is adds per second. |
| #compactionRate=1000 |
| |
| # Throttle compaction by bytes or by entries. |
| #isThrottleByBytes=false |
| |
| # Set the rate at which compaction will readd entries. The unit is adds per second. |
| #compactionRateByEntries=1000 |
| |
| # Set the rate at which compaction will readd entries. The unit is bytes added per second. |
| #compactionRateByBytes=1000000 |
| |
| # Max file size of journal file, in mega bytes |
| # A new journal file will be created when the old one reaches the file size limitation |
| # |
| # journalMaxSizeMB=2048 |
| |
| # Max number of old journal file to kept |
| # Keep a number of old journal files would help data recovery in specia case |
| # |
| # journalMaxBackups=5 |
| |
| # How much space should we pre-allocate at a time in the journal |
| # journalPreAllocSizeMB=16 |
| |
| # Size of the write buffers used for the journal |
| # journalWriteBufferSizeKB=64 |
| |
| # Should we remove pages from page cache after force write |
| journalRemoveFromPageCache=true |
| |
| # Should we group journal force writes, which optimize group commit |
| # for higher throughput |
| # journalAdaptiveGroupWrites=true |
| |
| # Maximum latency to impose on a journal write to achieve grouping |
| journalMaxGroupWaitMSec=1 |
| |
| # All the journal writes and commits should be aligned to given size |
| journalAlignmentSize=4096 |
| |
| # Maximum writes to buffer to achieve grouping |
| # journalBufferedWritesThreshold=524288 |
| |
| # If we should flush the journal when journal queue is empty |
| # journalFlushWhenQueueEmpty=false |
| |
| # The number of threads that should handle journal callbacks |
| numJournalCallbackThreads=8 |
| |
| # The number of max entries to keep in fragment for re-replication |
| rereplicationEntryBatchSize=5000 |
| |
| # How long the interval to trigger next garbage collection, in milliseconds |
| # Since garbage collection is running in background, too frequent gc |
| # will heart performance. It is better to give a higher number of gc |
| # interval if there is enough disk capacity. |
| gcWaitTime=900000 |
| |
| # How long the interval to trigger next garbage collection of overreplicated |
| # ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently since we read |
| # the metadata for all the ledgers on the bookie from zk |
| # gcOverreplicatedLedgerWaitTime=86400000 |
| |
| # How long the interval to flush ledger index pages to disk, in milliseconds |
| # Flushing index files will introduce much random disk I/O. |
| # If separating journal dir and ledger dirs each on different devices, |
| # flushing would not affect performance. But if putting journal dir |
| # and ledger dirs on same device, performance degrade significantly |
| # on too frequent flushing. You can consider increment flush interval |
| # to get better performance, but you need to pay more time on bookie |
| # server restart after failure. |
| # |
| flushInterval=60000 |
| |
| # Interval to watch whether bookie is dead or not, in milliseconds |
| # |
| # bookieDeathWatchInterval=1000 |
| |
| ## zookeeper client settings |
| |
| # A list of one of more servers on which zookeeper is running. |
| # The server list can be comma separated values, for example: |
| # zkServers=zk1:2181,zk2:2181,zk3:2181 |
| zkServers=localhost:2181 |
| # ZooKeeper client session timeout in milliseconds |
| # Bookie server will exit if it received SESSION_EXPIRED because it |
| # was partitioned off from ZooKeeper for more than the session timeout |
| # JVM garbage collection, disk I/O will cause SESSION_EXPIRED. |
| # Increment this value could help avoiding this issue |
| zkTimeout=30000 |
| |
| ## NIO Server settings |
| |
| # This settings is used to enabled/disabled Nagle's algorithm, which is a means of |
| # improving the efficiency of TCP/IP networks by reducing the number of packets |
| # that need to be sent over the network. |
| # If you are sending many small messages, such that more than one can fit in |
| # a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm |
| # can provide better performance. |
| # Default value is true. |
| # |
| # serverTcpNoDelay=true |
| |
| ## ledger cache settings |
| |
| # Max number of ledger index files could be opened in bookie server |
| # If number of ledger index files reaches this limitation, bookie |
| # server started to swap some ledgers from memory to disk. |
| # Too frequent swap will affect performance. You can tune this number |
| # to gain performance according your requirements. |
| openFileLimit=0 |
| |
| # Size of a index page in ledger cache, in bytes |
| # A larger index page can improve performance writing page to disk, |
| # which is efficent when you have small number of ledgers and these |
| # ledgers have similar number of entries. |
| # If you have large number of ledgers and each ledger has fewer entries, |
| # smaller index page would improve memory usage. |
| # pageSize=8192 |
| |
| # How many index pages provided in ledger cache |
| # If number of index pages reaches this limitation, bookie server |
| # starts to swap some ledgers from memory to disk. You can increment |
| # this value when you found swap became more frequent. But make sure |
| # pageLimit*pageSize should not more than JVM max memory limitation, |
| # otherwise you would got OutOfMemoryException. |
| # In general, incrementing pageLimit, using smaller index page would |
| # gain bettern performance in lager number of ledgers with fewer entries case |
| # If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute |
| # the limitation of number of index pages. |
| pageLimit=0 |
| |
| #If all ledger directories configured are full, then support only read requests for clients. |
| #If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted |
| #to read-only mode and serve only read requests. Otherwise the bookie will be shutdown. |
| #By default this will be disabled. |
| readOnlyModeEnabled=true |
| |
| #For each ledger dir, maximum disk space which can be used. |
| #Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will |
| #be written to that partition. If all ledger dir partions are full, then bookie |
| #will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will |
| #shutdown. |
| #Valid values should be in between 0 and 1 (exclusive). |
| #diskUsageThreshold=0.95 |
| |
| #Disk check interval in milli seconds, interval to check the ledger dirs usage. |
| #Default is 10000 |
| #diskCheckInterval=10000 |
| |
| # Interval at which the auditor will do a check of all ledgers in the cluster. |
| # By default this runs once a week. The interval is set in seconds. |
| # To disable the periodic check completely, set this to 0. |
| # Note that periodic checking will put extra load on the cluster, so it should |
| # not be run more frequently than once a day. |
| #auditorPeriodicCheckInterval=604800 |
| |
| # The interval between auditor bookie checks. |
| # The auditor bookie check, checks ledger metadata to see which bookies should |
| # contain entries for each ledger. If a bookie which should contain entries is |
| # unavailable, then the ledger containing that entry is marked for recovery. |
| # Setting this to 0 disabled the periodic check. Bookie checks will still |
| # run when a bookie fails. |
| # The interval is specified in seconds. |
| #auditorPeriodicBookieCheckInterval=86400 |
| |
| # number of threads that should handle write requests. if zero, the writes would |
| # be handled by netty threads directly. |
| numAddWorkerThreads=0 |
| |
| # number of threads that should handle read requests. if zero, the reads would |
| # be handled by netty threads directly. |
| numReadWorkerThreads=8 |
| |
| # If read workers threads are enabled, limit the number of pending requests, to |
| # avoid the executor queue to grow indefinitely |
| maxPendingReadRequestsPerThread=2500 |
| |
| # The number of bytes we should use as capacity for BufferedReadChannel. Default is 512 bytes. |
| readBufferSizeBytes=4096 |
| |
| # The number of bytes used as capacity for the write buffer. Default is 64KB. |
| # writeBufferSizeBytes=65536 |
| |
| # Whether the bookie should use its hostname to register with the |
| # co-ordination service(eg: zookeeper service). |
| # When false, bookie will use its ipaddress for the registration. |
| # Defaults to false. |
| #useHostNameAsBookieID=false |
| |
| # Stats Provider Class |
| statsProviderClass=org.apache.bokkeeper.stats.datasketches.DataSketchesMetricsProvider |
| dataSketchesMetricsJsonFileReporter=data/bookie-stats.json |
| dataSketchesMetricsUpdateIntervalSeconds=60 |
| |
| |
| ## DB Ledger storage configuration |
| |
| # Size of Write Cache. Memory is allocated from JVM direct memory. |
| # Write cache is used to buffer entries before flushing into the entry log |
| # For good performance, it should be big enough to hold a sub |
| dbStorage_writeCacheMaxSizeMb=512 |
| |
| # Size of Read cache. Memory is allocated from JVM direct memory. |
| # This read cache is pre-filled doing read-ahead whenever a cache miss happens |
| dbStorage_readAheadCacheMaxSizeMb=256 |
| |
| # How many entries to pre-fill in cache after a read cache miss |
| dbStorage_readAheadCacheBatchSize=1000 |
| |
| ## RocksDB specific configurations |
| ## DbLedgerStorage uses RocksDB to store the indexes from |
| ## (ledgerId, entryId) -> (entryLog, offset) |
| |
| # Size of RocksDB block-cache. For best performance, this cache |
| # should be big enough to hold a significant portion of the index |
| # database which can reach ~2GB in some cases |
| # dbStorage_rocksDB_blockCacheSize=268435456 # 256 MBytes |
| |
| # dbStorage_rocksDB_writeBufferSizeMB=64 |
| # dbStorage_rocksDB_sstSizeInMB=64 |
| # dbStorage_rocksDB_blockSize=65536 |
| # dbStorage_rocksDB_bloomFilterBitsPerKey=10 |
| # dbStorage_rocksDB_numLevels=-1 |
| # dbStorage_rocksDB_numFilesInLevel0=4 |
| # dbStorage_rocksDB_maxSizeInLevel1MB=256 |