blob: 4b52ffd0b61a972556c7df64fd9a1429c743ab70 [file] [log] [blame]
[apps..default]
run = true
count = 1
[apps.meta]
type = meta
name = meta
arguments =
ports = 34601
pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_META_STATE,THREAD_POOL_FD,THREAD_POOL_DLOCK,THREAD_POOL_BLOCK_SERVICE
run = true
count = 1
[apps.replica]
type = replica
name = replica
arguments =
ports = 34801
pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_COMPACT,THREAD_POOL_INGESTION,THREAD_POOL_SLOG,THREAD_POOL_PLOG
run = true
count = 1
[apps.collector]
name = collector
type = collector
arguments =
ports = 34101
pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION
run = true
count = 1
[apps.mimic]
name = mimic
type = dsn.app.mimic
arguments =
pools = THREAD_POOL_DEFAULT
run = true
count = 1
delay_seconds = 30
[core]
data_dir = %{app.dir}
tool = nativerun
;toollets = tracer, profiler, fault_injector
toollets = profiler
pause_on_start = false
enable_default_app_mimic = true
tls_trans_memory_KB = 1024
tcmalloc_release_rate = 1.0
logging_start_level = LOG_LEVEL_DEBUG
logging_factory_name = dsn::tools::simple_logger
logging_flush_on_exit = true
[tools.simple_logger]
short_header = false
fast_flush = false
max_number_of_log_files_on_disk = 500
stderr_start_level = LOG_LEVEL_ERROR
[nfs]
nfs_copy_block_bytes = 4194304
max_concurrent_remote_copy_requests = 50
max_concurrent_local_writes = 5
max_buffered_local_writes = 500
high_priority_speed_rate = 2
file_close_expire_time_ms = 60000
file_close_timer_interval_ms_on_server = 30000
max_file_copy_request_count_per_file = 10
[network]
primary_interface =
; how many network threads for network library(used by asio)
io_service_worker_count = 4
; how many connections can be established from one ip address to a server(both replica and meta), 0 means no threshold
conn_threshold_per_ip = 0
; specification for each thread pool
[threadpool..default]
worker_count = 4
[threadpool.THREAD_POOL_DEFAULT]
name = default
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
# The worker count in THREAD_POOL_DEFAULT must be >= 5.
# Because in info collector server, there are four timer tasks(LPC_PEGASUS_APP_STAT_TIMER, LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER,
# LPC_DETECT_AVAILABLE and LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER). Each of these timer tasks occupies a thread in THREAD_POOL_DEFAULT.
# Each of these timer tasks calls remote procedure to meta server(which produce a callback), and waits for the rpc's callback to execute.
# If the worker_count <= 4, all of these threads are occupied by these timer tasks. so their rpc's callbacks can't get a thread to run.
# it comes to be a deadlock(timer task wait for rpc's callback to execute, and rpc's callback wait for the timer task to release the thread).
worker_count = 8
[threadpool.THREAD_POOL_REPLICATION]
name = replica
partitioned = true
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 24
[threadpool.THREAD_POOL_META_STATE]
name = meta_state
partitioned = true
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 1
[threadpool.THREAD_POOL_DLOCK]
name = dist_lock
partitioned = true
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 1
[threadpool.THREAD_POOL_FD]
name = fd
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 2
[threadpool.THREAD_POOL_LOCAL_APP]
name = local_app
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 24
[threadpool.THREAD_POOL_REPLICATION_LONG]
name = rep_long
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 8
[threadpool.THREAD_POOL_BLOCK_SERVICE]
name = block_service
worker_count = 8
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 8
[threadpool.THREAD_POOL_COMPACT]
name = compact
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 8
[threadpool.THREAD_POOL_INGESTION]
name = ingestion
partitioned = false
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 24
[threadpool.THREAD_POOL_SLOG]
name = slog
worker_count = 1
[threadpool.THREAD_POOL_PLOG]
name = plog
partitioned = true
worker_count = 4
[meta_server]
server_list = %{meta.server.list}
cluster_root = /pegasus/%{cluster.name}
distributed_lock_service_type = distributed_lock_service_zookeeper
distributed_lock_service_parameters = /pegasus/%{cluster.name}/lock
meta_state_service_type = meta_state_service_zookeeper
meta_state_service_parameters =
node_live_percentage_threshold_for_update = 50
min_live_node_count_for_unfreeze = 3
meta_function_level_on_start = steady
recover_from_replica_server = false
hold_seconds_for_dropped_app = 604800
add_secondary_enable_flow_control = true
add_secondary_max_count_for_one_node = 20
stable_rs_min_running_seconds = 600
max_succssive_unstable_restart = 5
server_load_balancer_type = greedy_load_balancer
replica_assign_delay_ms_for_dropouts = 600000
max_replicas_in_group = 3
balancer_in_turn = false
only_primary_balancer = false
only_move_primary = false
cold_backup_disabled = false
enable_white_list = false
replica_white_list =
[replication]
slog_dir = %{slog.dir}
data_dirs = %{data.dirs}
data_dirs_black_list_file = %{home.dir}/.pegasus_data_dirs_black_list
cluster_name = %{cluster.name}
deny_client_on_start = false
verbose_client_log_on_start = false
verbose_commit_log_on_start = false
delay_for_fd_timeout_on_start = false
empty_write_disabled = false
prepare_timeout_ms_for_secondaries = 3000
prepare_timeout_ms_for_potential_secondaries = 5000
prepare_decree_gap_for_debug_logging = 10000
batch_write_disabled = false
staleness_for_commit = 20
max_mutation_count_in_prepare_list = 110
mutation_2pc_min_replica_count = 2
group_check_disabled = false
group_check_interval_ms = 100000
checkpoint_disabled = false
checkpoint_interval_seconds = 300
checkpoint_min_decree_gap = 10000
checkpoint_max_interval_hours = 2
gc_disabled = false
gc_interval_ms = 30000
gc_memory_replica_interval_ms = 600000
gc_disk_error_replica_interval_seconds = 86400
gc_disk_garbage_replica_interval_seconds = 86400
disk_stat_disabled = false
disk_stat_interval_seconds = 600
fd_disabled = false
fd_check_interval_seconds = 2
fd_beacon_interval_seconds = 3
fd_lease_seconds = 20
fd_grace_seconds = 22
log_private_file_size_mb = 32
log_private_batch_buffer_kb = 7
log_private_batch_buffer_count = 512
log_private_batch_buffer_flush_interval_ms = 30000
log_private_reserve_max_size_mb = 1000
log_private_reserve_max_time_seconds = 36000
log_shared_file_size_mb = 128
log_shared_file_count_limit = 100
log_shared_batch_buffer_kb = 0
log_shared_force_flush = false
log_shared_pending_size_throttling_threshold_kb = 0
log_shared_pending_size_throttling_delay_ms = 0
config_sync_disabled = false
config_sync_interval_ms = 30000
;; WARNING: memory release may incur major performance downgrade when inproperly configured.
;; ensure this feature is only enabled when it's necessary.
mem_release_enabled = false
mem_release_check_interval_ms = 3600000
mem_release_max_reserved_mem_percentage = 10
lb_interval_ms = 10000
learn_app_max_concurrent_count = 5
;; the prefix of the path that to save backup-data on cold backup media
;; recommand using cluster name as the root
cold_backup_root = %{cluster.name}
max_concurrent_uploading_file_count = 10
max_concurrent_bulk_load_downloading_count = 5
[pegasus.server]
rocksdb_verbose_log = false
# get: {100ms,1MB} ; multiGet: {100ms,10MB,1000}
rocksdb_slow_query_threshold_ns = 100000000
rocksdb_abnormal_get_size_threshold = 1000000
rocksdb_abnormal_multi_get_size_threshold = 10000000
rocksdb_abnormal_multi_get_iterate_count_threshold = 1000
rocksdb_write_buffer_size = 67108864
rocksdb_max_write_buffer_number = 3
rocksdb_max_background_flushes = 4
rocksdb_max_background_compactions = 12
rocksdb_num_levels = 6
rocksdb_target_file_size_base = 67108864
rocksdb_target_file_size_multiplier = 1
rocksdb_max_bytes_for_level_base = 671088640
rocksdb_max_bytes_for_level_multiplier = 10
rocksdb_level0_file_num_compaction_trigger = 4
rocksdb_level0_slowdown_writes_trigger = 30
rocksdb_level0_stop_writes_trigger = 60
rocksdb_compression_type = lz4
rocksdb_disable_table_block_cache = false
rocksdb_block_cache_capacity = 10737418240
rocksdb_block_cache_num_shard_bits = -1
rocksdb_disable_bloom_filter = false
# Bloom filter type, should be either 'common' or 'prefix'
rocksdb_filter_type = prefix
# rocksdb_bloom_filter_bits_per_key | false positive rate
# | rocksdb_format_version < 5 | rocksdb_format_version = 5
# 6 5.70953 5.69888
# 8 2.45766 2.29709
# 10 1.13977 0.959254
# 12 0.662498 0.411593
# 16 0.353023 0.0873754
# 24 0.261552 0.0060971
# 50 0.225453 ~0.00003
rocksdb_bloom_filter_bits_per_key = 10
# SST file format version, should be either 2 or 5
# COMPATIBILITY ATTENTION:
# Although old releases would see the new structure as corrupt filter data and read the
# table as if there's no filter, we've decided only to enable the new Bloom filter with new
# format_version=5. This provides a smooth path for automatic adoption over time, with an
# option for early opt-in.
rocksdb_format_version = 2
# 3000, 30MB, 1000, 30s
rocksdb_multi_get_max_iteration_count = 3000
rocksdb_multi_get_max_iteration_size = 31457280
rocksdb_max_iteration_count = 1000
rocksdb_iteration_threshold_time_ms = 30000
rocksdb_limiter_max_write_megabytes_per_sec = 500
rocksdb_limiter_enable_auto_tune = false
rocksdb_enable_write_buffer_manager = false
rocksdb_total_size_across_write_buffer = 0
rocksdb_max_open_files = -1
rocksdb_index_type = binary_search
rocksdb_partition_filters = false
rocksdb_metadata_block_size = 4096
rocksdb_cache_index_and_filter_blocks = false
rocksdb_pin_top_level_index_and_filter = true
rocksdb_cache_index_and_filter_blocks_with_high_priority = true
rocksdb_pin_l0_filter_and_index_blocks_in_cache = false
checkpoint_reserve_min_count = 2
checkpoint_reserve_time_seconds = 1800
update_rdb_stat_interval = 600
manual_compact_min_interval_seconds = 600
perf_counter_update_interval_seconds = 10
perf_counter_enable_logging = false
# Where the metrics are collected. If no value is given, no sink is used.
# Options:
# - falcon
# - prometheus
perf_counter_sink =
perf_counter_read_capacity_unit_size = 4096
perf_counter_write_capacity_unit_size = 4096
falcon_host = 127.0.0.1
falcon_port = 1988
falcon_path = /v1/push
# The HTTP port exposed to Prometheus for pulling metrics from pegasus server.
prometheus_port = 9091
[pegasus.collector]
available_detect_app = temp
available_detect_alert_script_dir = ./package/bin
available_detect_alert_email_address =
available_detect_interval_seconds = 3
available_detect_alert_fail_count = 30
available_detect_timeout = 5000
app_stat_interval_seconds = 10
usage_stat_app = stat
capacity_unit_fetch_interval_seconds = 8
storage_size_fetch_interval_seconds = 3600
[pegasus.clusters]
%{cluster.name} = %{meta.server.list}
[components.pegasus_perf_counter_number_percentile_atomic]
counter_computation_interval_seconds = 10
[zookeeper]
hosts_list = %{zk.server.list}
timeout_ms = 10000
logfile = zoo.log
[task..default]
is_trace = false
is_profile = false
allow_inline = false
rpc_call_channel = RPC_CHANNEL_TCP
rpc_call_header_format = NET_HDR_DSN
rpc_message_crc_required = false
rpc_call_header_format_name = dsn
rpc_timeout_milliseconds = 5000
[task.LPC_REPLICATION_INIT_LOAD]
;is_profile = true
[task.RPC_REPLICATION_WRITE_EMPTY]
;is_profile = true
[task.RPC_REPLICATION_WRITE_EMPTY_ACK]
;is_profile = true
[task.LPC_PER_REPLICA_CHECKPOINT_TIMER]
;is_profile = true
[task.LPC_PER_REPLICA_COLLECT_INFO_TIMER]
;is_profile = true
[task.LPC_GROUP_CHECK]
;is_profile = true
[task.LPC_CM_DISCONNECTED_SCATTER]
;is_profile = true
[task.LPC_QUERY_NODE_CONFIGURATION_SCATTER]
;is_profile = true
[task.LPC_QUERY_NODE_CONFIGURATION_SCATTER2]
;is_profile = true
[task.LPC_DELAY_UPDATE_CONFIG]
;is_profile = true
[task.LPC_DELAY_LEARN]
;is_profile = true
[task.LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED]
;is_profile = true
[task.LPC_CHECKPOINT_REPLICA_COMPLETED]
;is_profile = true
[task.LPC_SIM_UPDATE_PARTITION_CONFIGURATION_REPLY]
;is_profile = true
[task.LPC_WRITE_REPLICATION_LOG]
;is_profile = true
[task.LPC_REPLICATION_ERROR]
;is_profile = true
[task.LPC_LERARN_REMOTE_DISK_STATE]
;is_profile = true
[task.RPC_CONFIG_PROPOSAL]
;is_profile = true
[task.RPC_CONFIG_PROPOSAL_ACK]
;is_profile = true
[task.RPC_QUERY_PN_DECREE]
;is_profile = true
[task.RPC_QUERY_PN_DECREE_ACK]
;is_profile = true
[task.RPC_QUERY_REPLICA_INFO]
;is_profile = true
[task.RPC_QUERY_REPLICA_INFO_ACK]
;is_profile = true
[task.RPC_PREPARE]
is_profile = true
[task.RPC_PREPARE_ACK]
is_profile = true
[task.LPC_DELAY_PREPARE]
;is_profile = true
[task.RPC_GROUP_CHECK]
;is_profile = true
[task.RPC_GROUP_CHECK_ACK]
;is_profile = true
[task.RPC_QUERY_APP_INFO]
;is_profile = true
[task.RPC_QUERY_APP_INFO_ACK]
;is_profile = true
[task.RPC_LEARN]
;is_profile = true
[task.RPC_LEARN_ACK]
;is_profile = true
[task.RPC_LEARN_COMPLETION_NOTIFY]
;is_profile = true
[task.RPC_LEARN_COMPLETION_NOTIFY_ACK]
;is_profile = true
[task.RPC_LEARN_ADD_LEARNER]
;is_profile = true
[task.RPC_LEARN_ADD_LEARNER_ACK]
;is_profile = true
[task.RPC_REMOVE_REPLICA]
;is_profile = true
[task.RPC_REMOVE_REPLICA_ACK]
;is_profile = true
[task.RPC_REPLICA_COPY_LAST_CHECKPOINT]
;is_profile = true
[task.RPC_REPLICA_COPY_LAST_CHECKPOINT_ACK]
;is_profile = true
[task.LPC_REPLICA_COPY_LAST_CHECKPOINT_DONE]
;is_profile = true
[task.RPC_COLD_BACKUP]
;is_profile = true
[task.RPC_COLD_BACKUP_ACK]
;is_profile = true
[task.LPC_REPLICATION_COLD_BACKUP]
;is_profile = true
[task.RPC_RRDB_RRDB_PUT]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
profiler::size.request.server = true
[task.RPC_RRDB_RRDB_PUT_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_MULTI_PUT]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
profiler::size.request.server = true
[task.RPC_RRDB_RRDB_MULTI_PUT_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_REMOVE]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_REMOVE_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_MULTI_REMOVE]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_MULTI_REMOVE_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_INCR]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_INCR_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_CHECK_AND_SET]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_CHECK_AND_SET_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_CHECK_AND_MUTATE]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_CHECK_AND_MUTATE_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_GET]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
profiler::size.response.server = true
[task.RPC_RRDB_RRDB_GET_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_MULTI_GET]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
profiler::size.response.server = true
[task.RPC_RRDB_RRDB_MULTI_GET_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_SORTKEY_COUNT]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_SORTKEY_COUNT_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_TTL]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_TTL_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_GET_SCANNER]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_GET_SCANNER_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_SCAN]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_SCAN_ACK]
is_profile = true
[task.RPC_RRDB_RRDB_CLEAR_SCANNER]
rpc_request_throttling_mode = TM_DELAY
rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100
is_profile = true
[task.RPC_RRDB_RRDB_CLEAR_SCANNER_ACK]
is_profile = true
[task.RPC_FD_FAILURE_DETECTOR_PING]
rpc_call_header_format = NET_HDR_DSN
rpc_call_channel = RPC_CHANNEL_UDP
rpc_message_crc_required = true
;is_profile = true
[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]
rpc_call_header_format = NET_HDR_DSN
rpc_call_channel = RPC_CHANNEL_UDP
rpc_message_crc_required = true
;is_profile = true