blob: 78b2ae44fd208c5f08f9930a6e5f0226071f4d23 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
## Bookie settings
# Port that bookie server listen on
bookiePort=3181
# Set the network interface that the bookie should listen on.
# If not set, the bookie will listen on all interfaces.
#listeningInterface=eth0
# Whether the bookie allowed to use a loopback interface as its primary
# interface(i.e. the interface it uses to establish its identity)?
# By default, loopback interfaces are not allowed as the primary
# interface.
# Using a loopback interface as the primary interface usually indicates
# a configuration error. For example, its fairly common in some VPS setups
# to not configure a hostname, or to have the hostname resolve to
# 127.0.0.1. If this is the case, then all bookies in the cluster will
# establish their identities as 127.0.0.1:3181, and only one will be able
# to join the cluster. For VPSs configured like this, you should explicitly
# set the listening interface.
allowLoopback=false
# Configure a specific hostname or IP address that the bookie should use to advertise itself to
# clients. If not set, bookie will advertised its own IP address or hostname, depending on the
# listeningInterface and `seHostNameAsBookieID settings.
advertisedAddress=
# Directory Bookkeeper outputs its write ahead log
journalDirectory=data/bookkeeper/journal
# Directory Bookkeeper outputs ledger snapshots
# could define multi directories to store snapshots, separated by ','
# For example:
# ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data
#
# Ideally ledger dirs and journal dir are each in a differet device,
# which reduce the contention between random i/o and sequential write.
# It is possible to run with a single disk, but performance will be significantly lower.
ledgerDirectories=data/bookkeeper/ledgers
# Directories to store index files. If not specified, will use ledgerDirectories to store.
# indexDirectories=data/bookkeeper/ledgers
# Ledger Manager Class
# What kind of ledger manager is used to manage how ledgers are stored, managed
# and garbage collected. Try to read 'BookKeeper Internals' for detail info.
ledgerManagerType=hierarchical
# Root zookeeper path to store ledger metadata
# This parameter is used by zookeeper-based ledger manager as a root znode to
# store all ledgers.
zkLedgersRootPath=/ledgers
# Ledger storage implementation class
ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage
# Enable/Disable entry logger preallocation
entryLogFilePreallocationEnabled=true
# Max file size of entry logger, in bytes
# A new entry log file will be created when the old one reaches the file size limitation
logSizeLimit=2147483648
# Threshold of minor compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a minor compaction.
# If it is set to less than zero, the minor compaction is disabled.
minorCompactionThreshold=0.2
# Interval to run minor compaction, in seconds
# If it is set to less than zero, the minor compaction is disabled.
minorCompactionInterval=3600
# Threshold of major compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a major compaction.
# Those entry log files whose remaining size percentage is still
# higher than the threshold will never be compacted.
# If it is set to less than zero, the minor compaction is disabled.
majorCompactionThreshold=0.5
# Interval to run major compaction, in seconds
# If it is set to less than zero, the major compaction is disabled.
majorCompactionInterval=86400
# Set the maximum number of entries which can be compacted without flushing.
# When compacting, the entries are written to the entrylog and the new offsets
# are cached in memory. Once the entrylog is flushed the index is updated with
# the new offsets. This parameter controls the number of entries added to the
# entrylog before a flush is forced. A higher value for this parameter means
# more memory will be used for offsets. Each offset consists of 3 longs.
# This parameter should _not_ be modified unless you know what you're doing.
# The default is 100,000.
compactionMaxOutstandingRequests=100000
# Set the rate at which compaction will readd entries. The unit is adds per second.
compactionRate=1000
# Throttle compaction by bytes or by entries.
isThrottleByBytes=false
# Set the rate at which compaction will readd entries. The unit is adds per second.
compactionRateByEntries=1000
# Set the rate at which compaction will readd entries. The unit is bytes added per second.
compactionRateByBytes=1000000
# Max file size of journal file, in mega bytes
# A new journal file will be created when the old one reaches the file size limitation
#
journalMaxSizeMB=2048
# Max number of old journal file to kept
# Keep a number of old journal files would help data recovery in specia case
#
journalMaxBackups=5
# How much space should we pre-allocate at a time in the journal
journalPreAllocSizeMB=16
# Size of the write buffers used for the journal
journalWriteBufferSizeKB=64
# Should we remove pages from page cache after force write
journalRemoveFromPageCache=true
# Should the data be fsynced on journal before acknowledgment.
# By default, data sync is enabled to guarantee durability of writes.
# Beware: while disabling data sync in the Bookie journal might improve the bookie write performance, it will also
# introduce the possibility of data loss. With no sync, the journal entries are written in the OS page cache but
# not flushed to disk. In case of power failure, the affected bookie might lose the unflushed data. If the ledger
# is replicated to multiple bookies, the chances of data loss are reduced though still present.
journalSyncData=true
# Should we group journal force writes, which optimize group commit
# for higher throughput
journalAdaptiveGroupWrites=true
# Maximum latency to impose on a journal write to achieve grouping
journalMaxGroupWaitMSec=1
# All the journal writes and commits should be aligned to given size
journalAlignmentSize=4096
# Maximum writes to buffer to achieve grouping
journalBufferedWritesThreshold=524288
# If we should flush the journal when journal queue is empty
journalFlushWhenQueueEmpty=false
# The number of threads that should handle journal callbacks
numJournalCallbackThreads=8
# The number of max entries to keep in fragment for re-replication
rereplicationEntryBatchSize=5000
# How long the interval to trigger next garbage collection, in milliseconds
# Since garbage collection is running in background, too frequent gc
# will heart performance. It is better to give a higher number of gc
# interval if there is enough disk capacity.
gcWaitTime=900000
# How long the interval to trigger next garbage collection of overreplicated
# ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently since we read
# the metadata for all the ledgers on the bookie from zk
gcOverreplicatedLedgerWaitTime=86400000
# How long the interval to flush ledger index pages to disk, in milliseconds
# Flushing index files will introduce much random disk I/O.
# If separating journal dir and ledger dirs each on different devices,
# flushing would not affect performance. But if putting journal dir
# and ledger dirs on same device, performance degrade significantly
# on too frequent flushing. You can consider increment flush interval
# to get better performance, but you need to pay more time on bookie
# server restart after failure.
#
flushInterval=60000
# Interval to watch whether bookie is dead or not, in milliseconds
#
bookieDeathWatchInterval=1000
## zookeeper client settings
# A list of one of more servers on which zookeeper is running.
# The server list can be comma separated values, for example:
# zkServers=zk1:2181,zk2:2181,zk3:2181
zkServers=localhost:2181
# ZooKeeper client session timeout in milliseconds
# Bookie server will exit if it received SESSION_EXPIRED because it
# was partitioned off from ZooKeeper for more than the session timeout
# JVM garbage collection, disk I/O will cause SESSION_EXPIRED.
# Increment this value could help avoiding this issue
zkTimeout=30000
## NIO Server settings
# This settings is used to enabled/disabled Nagle's algorithm, which is a means of
# improving the efficiency of TCP/IP networks by reducing the number of packets
# that need to be sent over the network.
# If you are sending many small messages, such that more than one can fit in
# a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm
# can provide better performance.
# Default value is true.
#
serverTcpNoDelay=true
## ledger cache settings
# Max number of ledger index files could be opened in bookie server
# If number of ledger index files reaches this limitation, bookie
# server started to swap some ledgers from memory to disk.
# Too frequent swap will affect performance. You can tune this number
# to gain performance according your requirements.
openFileLimit=0
# Size of a index page in ledger cache, in bytes
# A larger index page can improve performance writing page to disk,
# which is efficent when you have small number of ledgers and these
# ledgers have similar number of entries.
# If you have large number of ledgers and each ledger has fewer entries,
# smaller index page would improve memory usage.
# pageSize=8192
# How many index pages provided in ledger cache
# If number of index pages reaches this limitation, bookie server
# starts to swap some ledgers from memory to disk. You can increment
# this value when you found swap became more frequent. But make sure
# pageLimit*pageSize should not more than JVM max memory limitation,
# otherwise you would got OutOfMemoryException.
# In general, incrementing pageLimit, using smaller index page would
# gain bettern performance in lager number of ledgers with fewer entries case
# If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute
# the limitation of number of index pages.
pageLimit=0
#If all ledger directories configured are full, then support only read requests for clients.
#If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted
#to read-only mode and serve only read requests. Otherwise the bookie will be shutdown.
#By default this will be disabled.
readOnlyModeEnabled=true
#For each ledger dir, maximum disk space which can be used.
#Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will
#be written to that partition. If all ledger dir partions are full, then bookie
#will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will
#shutdown.
#Valid values should be in between 0 and 1 (exclusive).
diskUsageThreshold=0.95
#Disk check interval in milli seconds, interval to check the ledger dirs usage.
#Default is 10000
diskCheckInterval=10000
# Interval at which the auditor will do a check of all ledgers in the cluster.
# By default this runs once a week. The interval is set in seconds.
# To disable the periodic check completely, set this to 0.
# Note that periodic checking will put extra load on the cluster, so it should
# not be run more frequently than once a day.
auditorPeriodicCheckInterval=604800
# The interval between auditor bookie checks.
# The auditor bookie check, checks ledger metadata to see which bookies should
# contain entries for each ledger. If a bookie which should contain entries is
# unavailable, then the ledger containing that entry is marked for recovery.
# Setting this to 0 disabled the periodic check. Bookie checks will still
# run when a bookie fails.
# The interval is specified in seconds.
auditorPeriodicBookieCheckInterval=86400
# number of threads that should handle write requests. if zero, the writes would
# be handled by netty threads directly.
numAddWorkerThreads=0
# number of threads that should handle read requests. if zero, the reads would
# be handled by netty threads directly.
numReadWorkerThreads=8
# If read workers threads are enabled, limit the number of pending requests, to
# avoid the executor queue to grow indefinitely
maxPendingReadRequestsPerThread=2500
# The number of bytes we should use as capacity for BufferedReadChannel. Default is 512 bytes.
readBufferSizeBytes=4096
# The number of bytes used as capacity for the write buffer. Default is 64KB.
writeBufferSizeBytes=65536
# Whether the bookie should use its hostname to register with the
# co-ordination service(eg: zookeeper service).
# When false, bookie will use its ipaddress for the registration.
# Defaults to false.
useHostNameAsBookieID=false
# Stats Provider Class
statsProviderClass=org.apache.bookkeeper.stats.PrometheusMetricsProvider
# Default port for Prometheus metrics exporter
prometheusStatsHttpPort=8000
## DB Ledger storage configuration
# Size of Write Cache. Memory is allocated from JVM direct memory.
# Write cache is used to buffer entries before flushing into the entry log
# For good performance, it should be big enough to hold a sub
dbStorage_writeCacheMaxSizeMb=512
# Size of Read cache. Memory is allocated from JVM direct memory.
# This read cache is pre-filled doing read-ahead whenever a cache miss happens
dbStorage_readAheadCacheMaxSizeMb=256
# How many entries to pre-fill in cache after a read cache miss
dbStorage_readAheadCacheBatchSize=1000
## RocksDB specific configurations
## DbLedgerStorage uses RocksDB to store the indexes from
## (ledgerId, entryId) -> (entryLog, offset)
# Size of RocksDB block-cache. For best performance, this cache
# should be big enough to hold a significant portion of the index
# database which can reach ~2GB in some cases
# 256 MBytes
dbStorage_rocksDB_blockCacheSize=268435456
dbStorage_rocksDB_writeBufferSizeMB=64
dbStorage_rocksDB_sstSizeInMB=64
dbStorage_rocksDB_blockSize=65536
dbStorage_rocksDB_bloomFilterBitsPerKey=10
dbStorage_rocksDB_numLevels=-1
dbStorage_rocksDB_numFilesInLevel0=4
dbStorage_rocksDB_maxSizeInLevel1MB=256