blob: 6167d71efef41656c0a9d2ed1c1d08ef9bc4d6b2 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.ozone.container.common.statemachine;
import org.apache.hadoop.hdds.conf.Config;
import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.ConfigType;
import org.apache.hadoop.hdds.conf.PostConstruct;
import org.apache.hadoop.hdds.conf.ConfigTag;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static org.apache.hadoop.hdds.conf.ConfigTag.DATANODE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
/**
* Configuration class used for high level datanode configuration parameters.
*/
@ConfigGroup(prefix = "hdds.datanode")
public class DatanodeConfiguration {
private static final Logger LOG =
LoggerFactory.getLogger(DatanodeConfiguration.class);
static final String CONTAINER_DELETE_THREADS_MAX_KEY =
"hdds.datanode.container.delete.threads.max";
static final String PERIODIC_DISK_CHECK_INTERVAL_MINUTES_KEY =
"hdds.datanode.periodic.disk.check.interval.minutes";
public static final String FAILED_DATA_VOLUMES_TOLERATED_KEY =
"hdds.datanode.failed.data.volumes.tolerated";
public static final String FAILED_METADATA_VOLUMES_TOLERATED_KEY =
"hdds.datanode.failed.metadata.volumes.tolerated";
public static final String FAILED_DB_VOLUMES_TOLERATED_KEY =
"hdds.datanode.failed.db.volumes.tolerated";
public static final String DISK_CHECK_MIN_GAP_KEY =
"hdds.datanode.disk.check.min.gap";
public static final String DISK_CHECK_TIMEOUT_KEY =
"hdds.datanode.disk.check.timeout";
public static final String WAIT_ON_ALL_FOLLOWERS =
"hdds.datanode.wait.on.all.followers";
public static final String CONTAINER_SCHEMA_V3_ENABLED =
"hdds.datanode.container.schema.v3.enabled";
static final boolean CHUNK_DATA_VALIDATION_CHECK_DEFAULT = false;
static final long PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT = 60;
static final int FAILED_VOLUMES_TOLERATED_DEFAULT = -1;
static final boolean WAIT_ON_ALL_FOLLOWERS_DEFAULT = false;
static final long DISK_CHECK_MIN_GAP_DEFAULT =
Duration.ofMinutes(15).toMillis();
static final long DISK_CHECK_TIMEOUT_DEFAULT =
Duration.ofMinutes(10).toMillis();
static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = false;
static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024;
static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64;
static final long ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT =
6 * 60 * 60 * 1000 * 1000;
public static final String ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_KEY =
"hdds.datanode.rocksdb.log.max-file-size";
public static final String ROCKSDB_LOG_MAX_FILE_NUM_KEY =
"hdds.datanode.rocksdb.log.max-file-num";
public static final String
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_KEY =
"hdds.datanode.rocksdb.delete_obsolete_files_period";
/**
* Number of threads per volume that Datanode will use for chunk read.
*/
@Config(key = "read.chunk.threads.per.volume",
type = ConfigType.INT,
defaultValue = "10",
tags = {DATANODE},
description = "Number of threads per volume that Datanode will use for " +
"reading replicated chunks."
)
private int numReadThreadPerVolume = 10;
static final int CONTAINER_DELETE_THREADS_DEFAULT = 2;
static final int BLOCK_DELETE_THREADS_DEFAULT = 5;
/**
* The maximum number of threads used to delete containers on a datanode
* simultaneously.
*/
@Config(key = "container.delete.threads.max",
type = ConfigType.INT,
defaultValue = "2",
tags = {DATANODE},
description = "The maximum number of threads used to delete containers " +
"on a datanode"
)
private int containerDeleteThreads = CONTAINER_DELETE_THREADS_DEFAULT;
/**
* The maximum number of threads used to handle delete block commands.
* It takes about 200ms to open a RocksDB with HDD media, so basically DN
* can handle 300 individual container delete tx every 60s if RocksDB cache
* missed. With max threads 5, optimistically DN can handle 1500 individual
* container delete tx in 60s with RocksDB cache miss.
*/
@Config(key = "block.delete.threads.max",
type = ConfigType.INT,
defaultValue = "5",
tags = {DATANODE},
description = "The maximum number of threads used to handle delete " +
" blocks on a datanode"
)
private int blockDeleteThreads = BLOCK_DELETE_THREADS_DEFAULT;
/**
* The maximum number of commands in queued list.
* 1440 = 60 * 24, which means if SCM send a delete command every minute,
* if the commands are pined up for more than 1 day, DN will start to discard
* new comming commands.
*/
@Config(key = "block.delete.queue.limit",
type = ConfigType.INT,
defaultValue = "1440",
tags = {DATANODE},
description = "The maximum number of block delete commands queued on " +
" a datanode"
)
private int blockDeleteQueueLimit = 60 * 24;
@Config(key = "block.deleting.service.interval",
defaultValue = "60s",
type = ConfigType.TIME,
tags = { ConfigTag.SCM, ConfigTag.DELETION },
description =
"Time interval of the Datanode block deleting service. The "
+ "block deleting service runs on Datanode "
+ "periodically and deletes blocks queued for "
+ "deletion. Unit could be defined with "
+ "postfix (ns,ms,s,m,h,d). "
)
private long blockDeletionInterval = Duration.ofSeconds(60).toMillis();
@Config(key = "recovering.container.scrubbing.service.interval",
defaultValue = "1m",
type = ConfigType.TIME,
tags = { ConfigTag.SCM, ConfigTag.DELETION },
description =
"Time interval of the stale recovering container scrubbing " +
"service. The recovering container scrubbing service runs " +
"on Datanode periodically and deletes stale recovering " +
"container Unit could be defined with postfix (ns,ms,s,m,h,d)."
)
private long recoveringContainerScrubInterval =
Duration.ofMinutes(10).toMillis();
public Duration getBlockDeletionInterval() {
return Duration.ofMillis(blockDeletionInterval);
}
public Duration getRecoveringContainerScrubInterval() {
return Duration.ofMillis(recoveringContainerScrubInterval);
}
public void setBlockDeletionInterval(Duration duration) {
this.blockDeletionInterval = duration.toMillis();
}
@Config(key = "block.deleting.limit.per.interval",
defaultValue = "5000",
type = ConfigType.INT,
tags = { ConfigTag.SCM, ConfigTag.DELETION },
description =
"Number of blocks to be deleted in an interval."
)
private int blockLimitPerInterval = 5000;
public int getBlockDeletionLimit() {
return blockLimitPerInterval;
}
public void setBlockDeletionLimit(int limit) {
this.blockLimitPerInterval = limit;
}
@Config(key = "periodic.disk.check.interval.minutes",
defaultValue = "60",
type = ConfigType.LONG,
tags = { DATANODE },
description = "Periodic disk check run interval in minutes."
)
private long periodicDiskCheckIntervalMinutes =
PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT;
@Config(key = "failed.data.volumes.tolerated",
defaultValue = "-1",
type = ConfigType.INT,
tags = { DATANODE },
description = "The number of data volumes that are allowed to fail "
+ "before a datanode stops offering service. "
+ "Config this to -1 means unlimited, but we should have "
+ "at least one good volume left."
)
private int failedDataVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
@Config(key = "failed.metadata.volumes.tolerated",
defaultValue = "-1",
type = ConfigType.INT,
tags = { DATANODE },
description = "The number of metadata volumes that are allowed to fail "
+ "before a datanode stops offering service. "
+ "Config this to -1 means unlimited, but we should have "
+ "at least one good volume left."
)
private int failedMetadataVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
@Config(key = "failed.db.volumes.tolerated",
defaultValue = "-1",
type = ConfigType.INT,
tags = { DATANODE },
description = "The number of db volumes that are allowed to fail "
+ "before a datanode stops offering service. "
+ "Config this to -1 means unlimited, but we should have "
+ "at least one good volume left."
)
private int failedDbVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
@Config(key = "disk.check.min.gap",
defaultValue = "15m",
type = ConfigType.TIME,
tags = { DATANODE },
description = "The minimum gap between two successive checks of the same"
+ " Datanode volume. Unit could be defined with"
+ " postfix (ns,ms,s,m,h,d)."
)
private long diskCheckMinGap = DISK_CHECK_MIN_GAP_DEFAULT;
@Config(key = "disk.check.timeout",
defaultValue = "10m",
type = ConfigType.TIME,
tags = { DATANODE },
description = "Maximum allowed time for a disk check to complete."
+ " If the check does not complete within this time interval"
+ " then the disk is declared as failed. Unit could be defined with"
+ " postfix (ns,ms,s,m,h,d)."
)
private long diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;
@Config(key = "chunk.data.validation.check",
defaultValue = "false",
type = ConfigType.BOOLEAN,
tags = { DATANODE },
description = "Enable safety checks such as checksum validation"
+ " for Ratis calls."
)
private boolean isChunkDataValidationCheck =
CHUNK_DATA_VALIDATION_CHECK_DEFAULT;
@Config(key = "wait.on.all.followers",
defaultValue = "false",
type = ConfigType.BOOLEAN,
tags = { DATANODE },
description = "Defines whether the leader datanode will wait for both"
+ "followers to catch up before removing the stateMachineData from "
+ "the cache."
)
private boolean waitOnAllFollowers = WAIT_ON_ALL_FOLLOWERS_DEFAULT;
public boolean waitOnAllFollowers() {
return waitOnAllFollowers;
}
public void setWaitOnAllFollowers(boolean val) {
this.waitOnAllFollowers = val;
}
@Config(key = "container.schema.v3.enabled",
defaultValue = "false",
type = ConfigType.BOOLEAN,
tags = { DATANODE },
description = "Enable use of container schema v3(one rocksdb per disk)."
)
private boolean containerSchemaV3Enabled =
CONTAINER_SCHEMA_V3_ENABLED_DEFAULT;
@Config(key = "container.schema.v3.key.separator",
defaultValue = "|",
type = ConfigType.STRING,
tags = { DATANODE },
description = "The default separator between Container ID and container" +
" meta key name."
)
private String containerSchemaV3KeySeparator = "|";
/**
* Following RocksDB related configuration applies to Schema V3 only.
*/
@Config(key = "rocksdb.log.level",
defaultValue = "INFO",
type = ConfigType.STRING,
tags = { DATANODE },
description =
"The user log level of RocksDB(DEBUG/INFO/WARN/ERROR/FATAL))"
)
private String rocksdbLogLevel = "INFO";
@Config(key = "rocksdb.log.max-file-size",
defaultValue = "32MB",
type = ConfigType.SIZE,
tags = { DATANODE },
description = "The max size of each user log file of RocksDB. " +
"O means no size limit."
)
private long rocksdbMaxFileSize = ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT;
@Config(key = "rocksdb.log.max-file-num",
defaultValue = "64",
type = ConfigType.INT,
tags = { DATANODE },
description = "The max user log file number to keep for each RocksDB"
)
private int rocksdbMaxFileNum = ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT;
@Config(key = "rocksdb.delete_obsolete_files_period",
defaultValue = "6h", timeUnit = MICROSECONDS,
type = ConfigType.TIME,
tags = { DATANODE },
description = "Periodicity when obsolete files get deleted. " +
"Default is 6h."
)
private long rocksdbDeleteObsoleteFilesPeriod =
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT;
@PostConstruct
public void validate() {
if (containerDeleteThreads < 1) {
LOG.warn(CONTAINER_DELETE_THREADS_MAX_KEY + " must be greater than zero" +
" and was set to {}. Defaulting to {}",
containerDeleteThreads, CONTAINER_DELETE_THREADS_DEFAULT);
containerDeleteThreads = CONTAINER_DELETE_THREADS_DEFAULT;
}
if (periodicDiskCheckIntervalMinutes < 1) {
LOG.warn(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
periodicDiskCheckIntervalMinutes,
PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT);
periodicDiskCheckIntervalMinutes =
PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT;
}
if (failedDataVolumesTolerated < -1) {
LOG.warn(FAILED_DATA_VOLUMES_TOLERATED_KEY +
"must be greater than -1 and was set to {}. Defaulting to {}",
failedDataVolumesTolerated, FAILED_VOLUMES_TOLERATED_DEFAULT);
failedDataVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
}
if (failedMetadataVolumesTolerated < -1) {
LOG.warn(FAILED_METADATA_VOLUMES_TOLERATED_KEY +
"must be greater than -1 and was set to {}. Defaulting to {}",
failedMetadataVolumesTolerated, FAILED_VOLUMES_TOLERATED_DEFAULT);
failedMetadataVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
}
if (failedDbVolumesTolerated < -1) {
LOG.warn(FAILED_DB_VOLUMES_TOLERATED_KEY +
"must be greater than -1 and was set to {}. Defaulting to {}",
failedDbVolumesTolerated, FAILED_VOLUMES_TOLERATED_DEFAULT);
failedDbVolumesTolerated = FAILED_VOLUMES_TOLERATED_DEFAULT;
}
if (diskCheckMinGap < 0) {
LOG.warn(DISK_CHECK_MIN_GAP_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
diskCheckMinGap, DISK_CHECK_MIN_GAP_DEFAULT);
diskCheckMinGap = DISK_CHECK_MIN_GAP_DEFAULT;
}
if (diskCheckTimeout < 0) {
LOG.warn(DISK_CHECK_TIMEOUT_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
diskCheckTimeout, DISK_CHECK_TIMEOUT_DEFAULT);
diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT;
}
if (rocksdbMaxFileSize < 0) {
LOG.warn(ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_KEY +
" must be no less than zero and was set to {}. Defaulting to {}",
rocksdbMaxFileSize, ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT);
rocksdbMaxFileSize = ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT;
}
if (rocksdbMaxFileNum <= 0) {
LOG.warn(ROCKSDB_LOG_MAX_FILE_NUM_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
rocksdbMaxFileNum, ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT);
rocksdbMaxFileNum = ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT;
}
if (rocksdbDeleteObsoleteFilesPeriod <= 0) {
LOG.warn(ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_KEY +
" must be greater than zero and was set to {}. Defaulting to {}",
rocksdbDeleteObsoleteFilesPeriod,
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT);
rocksdbDeleteObsoleteFilesPeriod =
ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT;
}
}
public void setContainerDeleteThreads(int containerDeleteThreads) {
this.containerDeleteThreads = containerDeleteThreads;
}
public int getContainerDeleteThreads() {
return containerDeleteThreads;
}
public long getPeriodicDiskCheckIntervalMinutes() {
return periodicDiskCheckIntervalMinutes;
}
public void setPeriodicDiskCheckIntervalMinutes(
long periodicDiskCheckIntervalMinutes) {
this.periodicDiskCheckIntervalMinutes = periodicDiskCheckIntervalMinutes;
}
public int getFailedDataVolumesTolerated() {
return failedDataVolumesTolerated;
}
public void setFailedDataVolumesTolerated(int failedVolumesTolerated) {
this.failedDataVolumesTolerated = failedVolumesTolerated;
}
public int getFailedMetadataVolumesTolerated() {
return failedMetadataVolumesTolerated;
}
public void setFailedMetadataVolumesTolerated(int failedVolumesTolerated) {
this.failedMetadataVolumesTolerated = failedVolumesTolerated;
}
public int getFailedDbVolumesTolerated() {
return failedDbVolumesTolerated;
}
public void setFailedDbVolumesTolerated(int failedVolumesTolerated) {
this.failedDbVolumesTolerated = failedVolumesTolerated;
}
public Duration getDiskCheckMinGap() {
return Duration.ofMillis(diskCheckMinGap);
}
public void setDiskCheckMinGap(Duration duration) {
this.diskCheckMinGap = duration.toMillis();
}
public Duration getDiskCheckTimeout() {
return Duration.ofMillis(diskCheckTimeout);
}
public void setDiskCheckTimeout(Duration duration) {
this.diskCheckTimeout = duration.toMillis();
}
public int getBlockDeleteThreads() {
return blockDeleteThreads;
}
public void setBlockDeleteThreads(int threads) {
this.blockDeleteThreads = threads;
}
public int getBlockDeleteQueueLimit() {
return blockDeleteQueueLimit;
}
public void setBlockDeleteQueueLimit(int queueLimit) {
this.blockDeleteQueueLimit = queueLimit;
}
public boolean isChunkDataValidationCheck() {
return isChunkDataValidationCheck;
}
public void setChunkDataValidationCheck(boolean writeChunkValidationCheck) {
isChunkDataValidationCheck = writeChunkValidationCheck;
}
public void setNumReadThreadPerVolume(int threads) {
this.numReadThreadPerVolume = threads;
}
public int getNumReadThreadPerVolume() {
return numReadThreadPerVolume;
}
public boolean getContainerSchemaV3Enabled() {
return this.containerSchemaV3Enabled;
}
public void setContainerSchemaV3Enabled(boolean containerSchemaV3Enabled) {
this.containerSchemaV3Enabled = containerSchemaV3Enabled;
}
public String getContainerSchemaV3KeySeparator() {
return this.containerSchemaV3KeySeparator;
}
public void setContainerSchemaV3KeySeparator(String separator) {
this.containerSchemaV3KeySeparator = separator;
}
public String getRocksdbLogLevel() {
return rocksdbLogLevel;
}
public void setRocksdbLogLevel(String level) {
this.rocksdbLogLevel = level;
}
public void setRocksdbMaxFileNum(int count) {
this.rocksdbMaxFileNum = count;
}
public int getRocksdbMaxFileNum() {
return rocksdbMaxFileNum;
}
public void setRocksdbMaxFileSize(long size) {
this.rocksdbMaxFileSize = size;
}
public long getRocksdbMaxFileSize() {
return rocksdbMaxFileSize;
}
public long getRocksdbDeleteObsoleteFilesPeriod() {
return rocksdbDeleteObsoleteFilesPeriod;
}
public void setRocksdbDeleteObsoleteFilesPeriod(long period) {
this.rocksdbDeleteObsoleteFilesPeriod = period;
}
}