blob: 95620773b8c5a2d21dad36303a240fe2bf2b838b [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
## Bookie settings
#############################################################################
## Server parameters
#############################################################################
# Port that bookie server listen on
bookiePort=3181
# Directories BookKeeper outputs its write ahead log.
# Could define multi directories to store write head logs, separated by ','.
# For example:
# journalDirectories=/tmp/bk-journal1,/tmp/bk-journal2
# If journalDirectories is set, bookies will skip journalDirectory and use
# this setting directory.
# journalDirectories=/tmp/bk-journal
# Directory Bookkeeper outputs its write ahead log
# @deprecated since 4.5.0. journalDirectories is preferred over journalDirectory.
journalDirectory=data/bookkeeper/journal
# Configure the bookie to allow/disallow multiple ledger/index/journal directories
# in the same filesystem disk partition
# allowMultipleDirsUnderSameDiskPartition=false
# Minimum safe Usable size to be available in index directory for bookie to create
# Index File while replaying journal at the time of bookie Start in Readonly Mode (in bytes)
minUsableSizeForIndexFileCreation=1073741824
# Set the network interface that the bookie should listen on.
# If not set, the bookie will listen on all interfaces.
# listeningInterface=eth0
# Configure a specific hostname or IP address that the bookie should use to advertise itself to
# clients. If not set, bookie will advertised its own IP address or hostname, depending on the
# listeningInterface and `seHostNameAsBookieID settings.
advertisedAddress=
# Whether the bookie allowed to use a loopback interface as its primary
# interface(i.e. the interface it uses to establish its identity)?
# By default, loopback interfaces are not allowed as the primary
# interface.
# Using a loopback interface as the primary interface usually indicates
# a configuration error. For example, its fairly common in some VPS setups
# to not configure a hostname, or to have the hostname resolve to
# 127.0.0.1. If this is the case, then all bookies in the cluster will
# establish their identities as 127.0.0.1:3181, and only one will be able
# to join the cluster. For VPSs configured like this, you should explicitly
# set the listening interface.
allowLoopback=false
# Interval to watch whether bookie is dead or not, in milliseconds
bookieDeathWatchInterval=1000
# When entryLogPerLedgerEnabled is enabled, checkpoint doesn't happens
# when a new active entrylog is created / previous one is rolled over.
# Instead SyncThread checkpoints periodically with 'flushInterval' delay
# (in milliseconds) in between executions. Checkpoint flushes both ledger
# entryLogs and ledger index pages to disk.
# Flushing entrylog and index files will introduce much random disk I/O.
# If separating journal dir and ledger dirs each on different devices,
# flushing would not affect performance. But if putting journal dir
# and ledger dirs on same device, performance degrade significantly
# on too frequent flushing. You can consider increment flush interval
# to get better performance, but you need to pay more time on bookie
# server restart after failure.
# This config is used only when entryLogPerLedgerEnabled is enabled.
flushInterval=60000
# Allow the expansion of bookie storage capacity. Newly added ledger
# and index dirs must be empty.
# allowStorageExpansion=false
# Whether the bookie should use its hostname to register with the
# co-ordination service(eg: Zookeeper service).
# When false, bookie will use its ipaddress for the registration.
# Defaults to false.
useHostNameAsBookieID=false
# Whether the bookie is allowed to use an ephemeral port (port 0) as its
# server port. By default, an ephemeral port is not allowed.
# Using an ephemeral port as the service port usually indicates a configuration
# error. However, in unit tests, using an ephemeral port will address port
# conflict problems and allow running tests in parallel.
# allowEphemeralPorts=false
# Whether allow the bookie to listen for BookKeeper clients executed on the local JVM.
# enableLocalTransport=false
# Whether allow the bookie to disable bind on network interfaces,
# this bookie will be available only to BookKeeper clients executed on the local JVM.
# disableServerSocketBind=false
# The number of bytes we should use as chunk allocation for
# org.apache.bookkeeper.bookie.SkipListArena
# skipListArenaChunkSize=4194304
# The max size we should allocate from the skiplist arena. Allocations
# larger than this should be allocated directly by the VM to avoid fragmentation.
# skipListArenaMaxAllocSize=131072
# The bookie authentication provider factory class name.
# If this is null, no authentication will take place.
# bookieAuthProviderFactoryClass=null
#############################################################################
## Garbage collection settings
#############################################################################
# How long the interval to trigger next garbage collection, in milliseconds
# Since garbage collection is running in background, too frequent gc
# will heart performance. It is better to give a higher number of gc
# interval if there is enough disk capacity.
gcWaitTime=900000
# How long the interval to trigger next garbage collection of overreplicated
# ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently
# since we read the metadata for all the ledgers on the bookie from zk
gcOverreplicatedLedgerWaitTime=86400000
# Number of threads that should handle write requests. if zero, the writes would
# be handled by netty threads directly.
numAddWorkerThreads=0
# Number of threads that should handle read requests. if zero, the reads would
# be handled by netty threads directly.
numReadWorkerThreads=8
# Number of threads that should be used for high priority requests
# (i.e. recovery reads and adds, and fencing).
numHighPriorityWorkerThreads=8
# If read workers threads are enabled, limit the number of pending requests, to
# avoid the executor queue to grow indefinitely
maxPendingReadRequestsPerThread=2500
# If add workers threads are enabled, limit the number of pending requests, to
# avoid the executor queue to grow indefinitely
maxPendingAddRequestsPerThread=10000
# Whether force compaction is allowed when the disk is full or almost full.
# Forcing GC may get some space back, but may also fill up disk space more quickly.
# This is because new log files are created before GC, while old garbage
# log files are deleted after GC.
# isForceGCAllowWhenNoSpace=false
# True if the bookie should double check readMetadata prior to gc
# verifyMetadataOnGC=false
#############################################################################
## TLS settings
#############################################################################
# TLS Provider (JDK or OpenSSL).
# tlsProvider=OpenSSL
# The path to the class that provides security.
# tlsProviderFactoryClass=org.apache.bookkeeper.security.SSLContextFactory
# Type of security used by server.
# tlsClientAuthentication=true
# Bookie Keystore type.
# tlsKeyStoreType=JKS
# Bookie Keystore location (path).
# tlsKeyStore=null
# Bookie Keystore password path, if the keystore is protected by a password.
# tlsKeyStorePasswordPath=null
# Bookie Truststore type.
# tlsTrustStoreType=null
# Bookie Truststore location (path).
# tlsTrustStore=null
# Bookie Truststore password path, if the trust store is protected by a password.
# tlsTrustStorePasswordPath=null
#############################################################################
## Long poll request parameter settings
#############################################################################
# The number of threads that should handle long poll requests.
# numLongPollWorkerThreads=10
# The tick duration in milliseconds for long poll requests.
# requestTimerTickDurationMs=10
# The number of ticks per wheel for the long poll request timer.
# requestTimerNumTicks=1024
#############################################################################
## AutoRecovery settings
#############################################################################
# The interval between auditor bookie checks.
# The auditor bookie check, checks ledger metadata to see which bookies should
# contain entries for each ledger. If a bookie which should contain entries is
# unavailable, then the ledger containing that entry is marked for recovery.
# Setting this to 0 disabled the periodic check. Bookie checks will still
# run when a bookie fails.
# The interval is specified in seconds.
auditorPeriodicBookieCheckInterval=86400
# The number of entries that a replication will rereplicate in parallel.
rereplicationEntryBatchSize=5000
# Auto-replication
# The grace period, in seconds, that the replication worker waits before fencing and
# replicating a ledger fragment that's still being written to upon bookie failure.
# openLedgerRereplicationGracePeriod=30
# Whether the bookie itself can start auto-recovery service also or not
# autoRecoveryDaemonEnabled=false
# How long to wait, in seconds, before starting auto recovery of a lost bookie
# lostBookieRecoveryDelay=0
#############################################################################
## Netty server settings
#############################################################################
# This settings is used to enabled/disabled Nagle's algorithm, which is a means of
# improving the efficiency of TCP/IP networks by reducing the number of packets
# that need to be sent over the network.
# If you are sending many small messages, such that more than one can fit in
# a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm
# can provide better performance.
# Default value is true.
serverTcpNoDelay=true
# This setting is used to send keep-alive messages on connection-oriented sockets.
# serverSockKeepalive=true
# The socket linger timeout on close.
# When enabled, a close or shutdown will not return until all queued messages for
# the socket have been successfully sent or the linger timeout has been reached.
# Otherwise, the call returns immediately and the closing is done in the background.
# serverTcpLinger=0
# The Recv ByteBuf allocator initial buf size.
# byteBufAllocatorSizeInitial=65536
# The Recv ByteBuf allocator min buf size.
# byteBufAllocatorSizeMin=65536
# The Recv ByteBuf allocator max buf size.
# byteBufAllocatorSizeMax=1048576
#############################################################################
## Journal settings
#############################################################################
# The journal format version to write.
# Available formats are 1-5:
# 1: no header
# 2: a header section was added
# 3: ledger key was introduced
# 4: fencing key was introduced
# 5: expanding header to 512 and padding writes to align sector size configured by `journalAlignmentSize`
# By default, it is `4`. If you'd like to enable `padding-writes` feature, you can set journal version to `5`.
# You can disable `padding-writes` by setting journal version back to `4`. This feature is available in 4.5.0
# and onward versions.
# journalFormatVersionToWrite=4
# Max file size of journal file, in mega bytes
# A new journal file will be created when the old one reaches the file size limitation
journalMaxSizeMB=2048
# Max number of old journal file to kept
# Keep a number of old journal files would help data recovery in specia case
journalMaxBackups=5
# How much space should we pre-allocate at a time in the journal.
journalPreAllocSizeMB=16
# Size of the write buffers used for the journal
journalWriteBufferSizeKB=64
# Should we remove pages from page cache after force write
journalRemoveFromPageCache=true
# Should the data be fsynced on journal before acknowledgment.
# By default, data sync is enabled to guarantee durability of writes.
# Beware: while disabling data sync in the Bookie journal might improve the bookie write performance, it will also
# introduce the possibility of data loss. With no sync, the journal entries are written in the OS page cache but
# not flushed to disk. In case of power failure, the affected bookie might lose the unflushed data. If the ledger
# is replicated to multiple bookies, the chances of data loss are reduced though still present.
journalSyncData=true
# Should we group journal force writes, which optimize group commit
# for higher throughput
journalAdaptiveGroupWrites=true
# Maximum latency to impose on a journal write to achieve grouping
journalMaxGroupWaitMSec=1
# Maximum writes to buffer to achieve grouping
journalBufferedWritesThreshold=524288
# The number of threads that should handle journal callbacks
numJournalCallbackThreads=8
# All the journal writes and commits should be aligned to given size.
# If not, zeros will be padded to align to given size.
# It only takes effects when journalFormatVersionToWrite is set to 5
journalAlignmentSize=4096
# Maximum entries to buffer to impose on a journal write to achieve grouping.
# journalBufferedEntriesThreshold=0
# If we should flush the journal when journal queue is empty
journalFlushWhenQueueEmpty=false
#############################################################################
## Ledger storage settings
#############################################################################
# Ledger storage implementation class
ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage
# Directory Bookkeeper outputs ledger snapshots
# could define multi directories to store snapshots, separated by ','
# For example:
# ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data
#
# Ideally ledger dirs and journal dir are each in a differet device,
# which reduce the contention between random i/o and sequential write.
# It is possible to run with a single disk, but performance will be significantly lower.
ledgerDirectories=data/bookkeeper/ledgers
# Directories to store index files. If not specified, will use ledgerDirectories to store.
# indexDirectories=data/bookkeeper/ledgers
# Interval at which the auditor will do a check of all ledgers in the cluster.
# By default this runs once a week. The interval is set in seconds.
# To disable the periodic check completely, set this to 0.
# Note that periodic checking will put extra load on the cluster, so it should
# not be run more frequently than once a day.
auditorPeriodicCheckInterval=604800
# Whether sorted-ledger storage enabled (default true)
# sortedLedgerStorageEnabled=ture
# The skip list data size limitation (default 64MB) in EntryMemTable
# skipListSizeLimit=67108864L
#############################################################################
## Ledger cache settings
#############################################################################
# Max number of ledger index files could be opened in bookie server
# If number of ledger index files reaches this limitation, bookie
# server started to swap some ledgers from memory to disk.
# Too frequent swap will affect performance. You can tune this number
# to gain performance according your requirements.
openFileLimit=0
# Size of a index page in ledger cache, in bytes
# A larger index page can improve performance writing page to disk,
# which is efficent when you have small number of ledgers and these
# ledgers have similar number of entries.
# If you have large number of ledgers and each ledger has fewer entries,
# smaller index page would improve memory usage.
# pageSize=8192
# How many index pages provided in ledger cache
# If number of index pages reaches this limitation, bookie server
# starts to swap some ledgers from memory to disk. You can increment
# this value when you found swap became more frequent. But make sure
# pageLimit*pageSize should not more than JVM max memory limitation,
# otherwise you would got OutOfMemoryException.
# In general, incrementing pageLimit, using smaller index page would
# gain bettern performance in lager number of ledgers with fewer entries case
# If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute
# the limitation of number of index pages.
pageLimit=0
#############################################################################
## Ledger manager settings
#############################################################################
# Ledger Manager Class
# What kind of ledger manager is used to manage how ledgers are stored, managed
# and garbage collected. Try to read 'BookKeeper Internals' for detail info.
ledgerManagerFactoryClass=org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory
# @Drepcated - `ledgerManagerType` is deprecated in favor of using `ledgerManagerFactoryClass`.
# ledgerManagerType=hierarchical
# Root Zookeeper path to store ledger metadata
# This parameter is used by zookeeper-based ledger manager as a root znode to
# store all ledgers.
zkLedgersRootPath=/ledgers
#############################################################################
## Entry log settings
#############################################################################
# Max file size of entry logger, in bytes
# A new entry log file will be created when the old one reaches the file size limitation
logSizeLimit=1073741824
# Enable/Disable entry logger preallocation
entryLogFilePreallocationEnabled=true
# Entry log flush interval in bytes.
# Default is 0. 0 or less disables this feature and effectively flush
# happens on log rotation.
# Flushing in smaller chunks but more frequently reduces spikes in disk
# I/O. Flushing too frequently may also affect performance negatively.
# flushEntrylogBytes=0
# The number of bytes we should use as capacity for BufferedReadChannel. Default is 512 bytes.
readBufferSizeBytes=4096
# The number of bytes used as capacity for the write buffer. Default is 64KB.
writeBufferSizeBytes=65536
# Specifies if entryLog per ledger is enabled/disabled. If it is enabled, then there would be a
# active entrylog for each ledger. It would be ideal to enable this feature if the underlying
# storage device has multiple DiskPartitions or SSD and if in a given moment, entries of fewer
# number of active ledgers are written to a bookie.
# entryLogPerLedgerEnabled=false
#############################################################################
## Entry log compaction settings
#############################################################################
# Set the rate at which compaction will readd entries. The unit is adds per second.
compactionRate=1000
# If bookie is using hostname for registration and in ledger metadata then
# whether to use short hostname or FQDN hostname. Defaults to false.
# useShortHostName=false
# Threshold of minor compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a minor compaction.
# If it is set to less than zero, the minor compaction is disabled.
minorCompactionThreshold=0.2
# Interval to run minor compaction, in seconds
# If it is set to less than zero, the minor compaction is disabled.
minorCompactionInterval=3600
# Set the maximum number of entries which can be compacted without flushing.
# When compacting, the entries are written to the entrylog and the new offsets
# are cached in memory. Once the entrylog is flushed the index is updated with
# the new offsets. This parameter controls the number of entries added to the
# entrylog before a flush is forced. A higher value for this parameter means
# more memory will be used for offsets. Each offset consists of 3 longs.
# This parameter should _not_ be modified unless you know what you're doing.
# The default is 100,000.
compactionMaxOutstandingRequests=100000
# Threshold of major compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a major compaction.
# Those entry log files whose remaining size percentage is still
# higher than the threshold will never be compacted.
# If it is set to less than zero, the minor compaction is disabled.
majorCompactionThreshold=0.5
# Interval to run major compaction, in seconds
# If it is set to less than zero, the major compaction is disabled.
majorCompactionInterval=86400
# Throttle compaction by bytes or by entries.
isThrottleByBytes=false
# Set the rate at which compaction will readd entries. The unit is adds per second.
compactionRateByEntries=1000
# Set the rate at which compaction will readd entries. The unit is bytes added per second.
compactionRateByBytes=1000000
#############################################################################
## Statistics
#############################################################################
# Whether statistics are enabled
# enableStatistics=true
# Stats Provider Class (if statistics are enabled)
statsProviderClass=org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider
# Default port for Prometheus metrics exporter
prometheusStatsHttpPort=8000
#############################################################################
## Read-only mode support
#############################################################################
# If all ledger directories configured are full, then support only read requests for clients.
# If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted
# to read-only mode and serve only read requests. Otherwise the bookie will be shutdown.
# By default this will be disabled.
readOnlyModeEnabled=true
# Whether the bookie is force started in read only mode or not
# forceReadOnlyBookie=false
# Persiste the bookie status locally on the disks. So the bookies can keep their status upon restarts
# @Since 4.6
# persistBookieStatusEnabled=false
#############################################################################
## Disk utilization
#############################################################################
# For each ledger dir, maximum disk space which can be used.
# Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will
# be written to that partition. If all ledger dir partions are full, then bookie
# will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will
# shutdown.
# Valid values should be in between 0 and 1 (exclusive).
diskUsageThreshold=0.95
# The disk free space low water mark threshold.
# Disk is considered full when usage threshold is exceeded.
# Disk returns back to non-full state when usage is below low water mark threshold.
# This prevents it from going back and forth between these states frequently
# when concurrent writes and compaction are happening. This also prevent bookie from
# switching frequently between read-only and read-writes states in the same cases.
# diskUsageWarnThreshold=0.95
# Set the disk free space low water mark threshold. Disk is considered full when
# usage threshold is exceeded. Disk returns back to non-full state when usage is
# below low water mark threshold. This prevents it from going back and forth
# between these states frequently when concurrent writes and compaction are
# happening. This also prevent bookie from switching frequently between
# read-only and read-writes states in the same cases.
# diskUsageLwmThreshold=0.90
# Disk check interval in milli seconds, interval to check the ledger dirs usage.
# Default is 10000
diskCheckInterval=10000
#############################################################################
## ZooKeeper parameters
#############################################################################
# A list of one of more servers on which Zookeeper is running.
# The server list can be comma separated values, for example:
# zkServers=zk1:2181,zk2:2181,zk3:2181
zkServers=localhost:2181
# ZooKeeper client session timeout in milliseconds
# Bookie server will exit if it received SESSION_EXPIRED because it
# was partitioned off from ZooKeeper for more than the session timeout
# JVM garbage collection, disk I/O will cause SESSION_EXPIRED.
# Increment this value could help avoiding this issue
zkTimeout=30000
# The Zookeeper client backoff retry start time in millis.
# zkRetryBackoffStartMs=1000
# The Zookeeper client backoff retry max time in millis.
# zkRetryBackoffMaxMs=10000
# Set ACLs on every node written on ZooKeeper, this way only allowed users
# will be able to read and write BookKeeper metadata stored on ZooKeeper.
# In order to make ACLs work you need to setup ZooKeeper JAAS authentication
# all the bookies and Client need to share the same user, and this is usually
# done using Kerberos authentication. See ZooKeeper documentation
zkEnableSecurity=false
#############################################################################
## Server parameters
#############################################################################
# Configure a list of server components to enable and load on a bookie server.
# This provides the plugin run extra services along with a bookie server.
#
# extraServerComponents=
#############################################################################
## DB Ledger storage configuration
#############################################################################
# These configs are used when the selected 'ledgerStorageClass' is
# org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage
# Size of Write Cache. Memory is allocated from JVM direct memory.
# Write cache is used to buffer entries before flushing into the entry log
# For good performance, it should be big enough to hold a sub
dbStorage_writeCacheMaxSizeMb=512
# Size of Read cache. Memory is allocated from JVM direct memory.
# This read cache is pre-filled doing read-ahead whenever a cache miss happens
dbStorage_readAheadCacheMaxSizeMb=256
# How many entries to pre-fill in cache after a read cache miss
dbStorage_readAheadCacheBatchSize=1000
## RocksDB specific configurations
## DbLedgerStorage uses RocksDB to store the indexes from
## (ledgerId, entryId) -> (entryLog, offset)
# Size of RocksDB block-cache. For best performance, this cache
# should be big enough to hold a significant portion of the index
# database which can reach ~2GB in some cases
# Default is 256 MBytes
dbStorage_rocksDB_blockCacheSize=268435456
# Other RocksDB specific tunables
dbStorage_rocksDB_writeBufferSizeMB=64
dbStorage_rocksDB_sstSizeInMB=64
dbStorage_rocksDB_blockSize=65536
dbStorage_rocksDB_bloomFilterBitsPerKey=10
dbStorage_rocksDB_numLevels=-1
dbStorage_rocksDB_numFilesInLevel0=4
dbStorage_rocksDB_maxSizeInLevel1MB=256