| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <!-- Do not modify this file directly. Instead, copy entries that you --> |
| <!-- wish to modify from this file into hdfs-site.xml and change them --> |
| <!-- there. If hdfs-site.xml does not already exist, create it. --> |
| |
| <configuration> |
| |
| <property> |
| <name>hadoop.hdfs.configuration.version</name> |
| <value>1</value> |
| <description>version of this configuration file</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-address</name> |
| <value></value> |
| <description> |
| RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist, |
| the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1 |
| dfs.namenode.rpc-address.EXAMPLENAMESERVICE |
| The value of this property will take the form of nn-host1:rpc-port. The NameNode's default RPC port is 8020. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.namenode.rpc-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.servicerpc-address</name> |
| <value></value> |
| <description> |
| RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be |
| connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist, |
| the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1 |
| dfs.namenode.rpc-address.EXAMPLENAMESERVICE |
| The value of this property will take the form of nn-host1:rpc-port. |
| If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.servicerpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the service RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.namenode.servicerpc-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.rpc-address</name> |
| <value></value> |
| <description> |
| NameNode RPC lifeline address. This is an optional separate RPC address |
| that can be used to isolate health checks and liveness to protect against |
| resource exhaustion in the main RPC handler pool. In the case of |
| HA/Federation where multiple NameNodes exist, the name service ID is added |
| to the name e.g. dfs.namenode.lifeline.rpc-address.ns1. The value of this |
| property will take the form of nn-host1:rpc-port. If this property is not |
| defined, then the NameNode will not start a lifeline RPC server. By |
| default, the property is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the lifeline RPC server will bind to. If this optional |
| address is set, it overrides only the hostname portion of |
| dfs.namenode.lifeline.rpc-address. It can also be specified per name node |
| or name service for HA/Federation. This is useful for making the name node |
| listen on all interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.secondary.http-address</name> |
| <value>0.0.0.0:9868</value> |
| <description> |
| The secondary namenode http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.secondary.https-address</name> |
| <value>0.0.0.0:9869</value> |
| <description> |
| The secondary namenode HTTPS server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.address</name> |
| <value>0.0.0.0:9866</value> |
| <description> |
| The datanode server address and port for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.address</name> |
| <value>0.0.0.0:9864</value> |
| <description> |
| The datanode http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ipc.address</name> |
| <value>0.0.0.0:9867</value> |
| <description> |
| The datanode ipc server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.internal-proxy.port</name> |
| <value>0</value> |
| <description> |
| The datanode's internal web proxy port. |
| By default it selects a random port available in runtime. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.handler.count</name> |
| <value>10</value> |
| <description> |
| The number of Datanode RPC server threads that listen to |
| requests from client.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.http-address</name> |
| <value>0.0.0.0:9870</value> |
| <description> |
| The address and the base port where the dfs namenode web ui will listen on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.http-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of dfs.namenode.http-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node HTTP server listen on all |
| interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.heartbeat.recheck-interval</name> |
| <value>300000</value> |
| <description> |
| This time decides the interval to check for expired datanodes. |
| With this value and dfs.heartbeat.interval, the interval of |
| deciding the datanode is stale or not is also calculated. |
| The unit of this configuration is millisecond. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.policy</name> |
| <value>HTTP_ONLY</value> |
| <description>Decide if HTTPS(SSL) is supported on HDFS |
| This configures the HTTP endpoint for HDFS daemons: |
| The following values are supported: |
| - HTTP_ONLY : Service is provided only on http |
| - HTTPS_ONLY : Service is provided only on https |
| - HTTP_AND_HTTPS : Service is provided both on http and https |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.need-auth</name> |
| <value>false</value> |
| <description>Whether SSL client certificate authentication is required |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cached.conn.retry</name> |
| <value>3</value> |
| <description>The number of times the HDFS client will pull a socket from the |
| cache. Once this number is exceeded, the client will try to create a new |
| socket. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.https.server.keystore.resource</name> |
| <value>ssl-server.xml</value> |
| <description>Resource file from which ssl server keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.keystore.resource</name> |
| <value>ssl-client.xml</value> |
| <description>Resource file from which ssl client keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.https.address</name> |
| <value>0.0.0.0:9865</value> |
| <description>The datanode secure http server address and port.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.https-address</name> |
| <value>0.0.0.0:9871</value> |
| <description>The namenode secure http server address and port.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.https-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTPS server will bind to. If this optional address |
| is set, it overrides only the hostname portion of dfs.namenode.https-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node HTTPS server listen on all |
| interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.interface</name> |
| <value>default</value> |
| <description> |
| The name of the Network Interface from which a data node should |
| report its IP address. e.g. eth2. This setting may be required for some |
| multi-homed nodes where the DataNodes are assigned multiple hostnames |
| and it is desirable for the DataNodes to use a non-default hostname. |
| |
| Prefer using hadoop.security.dns.interface over |
| dfs.datanode.dns.interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.nameserver</name> |
| <value>default</value> |
| <description> |
| The host name or IP address of the name server (DNS) which a DataNode |
| should use to determine its own host name. |
| |
| Prefer using hadoop.security.dns.nameserver over |
| dfs.datanode.dns.nameserver. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.address</name> |
| <value>0.0.0.0:50100</value> |
| <description> |
| The backup node server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.http-address</name> |
| <value>0.0.0.0:50105</value> |
| <description> |
| The backup node http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoad</name> |
| <value>true</value> |
| <description> |
| Decide if chooseTarget considers the target's load or not when write. |
| Turn on by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoadByStorageType</name> |
| <value>false</value> |
| <description> |
| Decide if chooseTarget considers the target's load with respect to the |
| storage type. Typically to be used when datanodes contain homogenous |
| storage types. Irrelevent if dfs.namenode.redundancy.considerLoad is |
| false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoad.factor</name> |
| <value>2.0</value> |
| <description>The factor by which a node's load can exceed the average |
| before being rejected for writes, only if considerLoad is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read.considerLoad</name> |
| <value>false</value> |
| <description> |
| Decide if sort block locations considers the target's load or not when read. |
| Turn off by default. |
| It is not possible to enable this feature along with dfs.namenode.read.considerStorageType as only one sort can be |
| enabled at a time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read.considerStorageType</name> |
| <value>false</value> |
| <description> |
| Decide if sort block locations considers the target's storage type or not when read. Any locations with the same |
| network distance are sorted in order of the storage speed, fastest first (RAM, SSD, Disk, Archive). This is |
| disabled by default, and the locations will be ordered randomly. |
| It is not possible to enable this feature along with dfs.namenode.read.considerLoad as only one sort can be |
| enabled at a time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.httpserver.filter.handlers</name> |
| <value>org.apache.hadoop.hdfs.server.datanode.web.RestCsrfPreventionFilterHandler</value> |
| <description>Comma separated list of Netty servlet-style filter handlers to inject into the Datanode WebHDFS I/O path |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.default.chunk.view.size</name> |
| <value>32768</value> |
| <description>The number of bytes to view for a file on the browser. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved.calculator</name> |
| <value>org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReservedSpaceCalculator$ReservedSpaceCalculatorAbsolute</value> |
| <description>Determines the class of ReservedSpaceCalculator to be used for |
| calculating disk space reservedfor non-HDFS data. The default calculator is |
| ReservedSpaceCalculatorAbsolute which will use dfs.datanode.du.reserved |
| for a static reserved number of bytes. ReservedSpaceCalculatorPercentage |
| will use dfs.datanode.du.reserved.pct to calculate the reserved number |
| of bytes based on the size of the storage. ReservedSpaceCalculatorConservative and |
| ReservedSpaceCalculatorAggressive will use their combination, Conservative will use |
| maximum, Aggressive minimum. For more details see ReservedSpaceCalculator. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved</name> |
| <value>0</value> |
| <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use. |
| Specific storage type based reservation is also supported. The property can be followed with |
| corresponding storage types ([ssd]/[disk]/[archive]/[ram_disk]/[nvdimm]) for cluster with heterogeneous storage. |
| For example, reserved space for RAM_DISK storage can be configured using property |
| 'dfs.datanode.du.reserved.ram_disk'. If specific storage type reservation is not configured |
| then dfs.datanode.du.reserved will be used. Support multiple size unit suffix(case insensitive), |
| as described in dfs.blocksize. |
| Note: In case of using tune2fs to set reserved-blocks-percentage, or other filesystem tools, |
| then you can possibly run into out of disk errors because hadoop will not check those |
| external tool configurations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved.pct</name> |
| <value>0</value> |
| <description>Reserved space in percentage. Read dfs.datanode.du.reserved.calculator to see |
| when this takes effect. The actual number of bytes reserved will be calculated by using the |
| total capacity of the data directory in question. Specific storage type based reservation |
| is also supported. The property can be followed with corresponding storage types |
| ([ssd]/[disk]/[archive]/[ram_disk]/[nvdimm]) for cluster with heterogeneous storage. |
| For example, reserved percentage space for RAM_DISK storage can be configured using property |
| 'dfs.datanode.du.reserved.pct.ram_disk'. If specific storage type reservation is not configured |
| then dfs.datanode.du.reserved.pct will be used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/name</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the name table(fsimage). If this is a comma-delimited list |
| of directories then the name table is replicated in all of the |
| directories, for redundancy. </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir.restore</name> |
| <value>false</value> |
| <description>Set to true to enable NameNode to attempt recovering a |
| previously failed dfs.namenode.name.dir. When enabled, a recovery of any |
| failed directory is attempted during checkpoint.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-component-length</name> |
| <value>255</value> |
| <description>Defines the maximum number of bytes in UTF-8 encoding in each |
| component of a path. A value of 0 will disable the check. Support |
| multiple size unit suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-directory-items</name> |
| <value>1048576</value> |
| <description>Defines the maximum number of items that a directory may |
| contain. Cannot set the property to a value less than 1 or more than |
| 6400000.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.min-block-size</name> |
| <value>1048576</value> |
| <description>Minimum block size in bytes, enforced by the Namenode at create |
| time. This prevents the accidental creation of files with tiny block |
| sizes (and thus many blocks), which can degrade performance. Support multiple |
| size unit suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-blocks-per-file</name> |
| <value>10000</value> |
| <description>Maximum number of blocks per file, enforced by the Namenode on |
| write. This prevents the creation of extremely large files which can |
| degrade performance.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir</name> |
| <value>${dfs.namenode.name.dir}</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the transaction (edits) file. If this is a comma-delimited list |
| of directories then the transaction file is replicated in all of the |
| directories, for redundancy. Default value is same as dfs.namenode.name.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir.required</name> |
| <value></value> |
| <description>This should be a subset of dfs.namenode.edits.dir, |
| to ensure that the transaction (edits) file |
| in these places is always up-to-date. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.shared.edits.dir</name> |
| <value></value> |
| <description>A directory on shared storage between the multiple namenodes |
| in an HA cluster. This directory will be written by the active and read |
| by the standby in order to keep the namespaces synchronized. This directory |
| does not need to be listed in dfs.namenode.edits.dir above. It should be |
| left empty in a non-HA cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.journal-plugin.qjournal</name> |
| <value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.enabled</name> |
| <value>true</value> |
| <description> |
| If "true", enable permission checking in HDFS. |
| If "false", permission checking is turned off, |
| but all other behavior is unchanged. |
| Switching from one parameter value to the other does not change the mode, |
| owner or group of files or directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.ContentSummary.subAccess</name> |
| <value>false</value> |
| <description> |
| If "true", the ContentSummary permission checking will use subAccess. |
| If "false", the ContentSummary permission checking will NOT use subAccess. |
| subAccess means using recursion to check the access of all descendants. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.superusergroup</name> |
| <value>supergroup</value> |
| <description>The name of the group of super-users. |
| The value should be a single group name. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.cluster.administrators</name> |
| <value></value> |
| <description>ACL for the admins, this configuration is used to control |
| who can access the default servlets in the namenode, etc. The value |
| should be a comma separated list of users and groups. The user list |
| comes first and is separated by a space followed by the group list, |
| e.g. "user1,user2 group1,group2". Both users and groups are optional, |
| so "user1", " group1", "", "user1 group1", "user1,user2 group1,group2" |
| are all valid (note the leading space in " group1"). '*' grants access |
| to all users and groups, e.g. '*', '* ' and ' *' are all valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.acls.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable support for HDFS ACLs (Access Control Lists). By |
| default, ACLs are enabled. When ACLs are disabled, the NameNode rejects |
| all RPCs related to setting or getting ACLs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.posix.acl.inheritance.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable POSIX style ACL inheritance. When it is enabled |
| and the create request comes from a compatible client, the NameNode |
| will apply default ACLs from the parent directory to the create mode |
| and ignore the client umask. If no default ACL found, it will apply the |
| client umask. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lazypersist.file.scrub.interval.sec</name> |
| <value>300</value> |
| <description> |
| The NameNode periodically scans the namespace for LazyPersist files with |
| missing blocks and unlinks them from the namespace. This configuration key |
| controls the interval between successive scans. If this value is set to 0, |
| the file scrubber is disabled. |
| </description> |
| </property> |
| <property> |
| <name>dfs.block.access.token.enable</name> |
| <value>false</value> |
| <description> |
| If "true", access tokens are used as capabilities for accessing datanodes. |
| If "false", no access tokens are checked on accessing datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.key.update.interval</name> |
| <value>600</value> |
| <description> |
| Interval in minutes at which namenode updates its access keys. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.lifetime</name> |
| <value>600</value> |
| <description>The lifetime of access tokens in minutes.</description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.protobuf.enable</name> |
| <value>false</value> |
| <description> |
| If "true", block tokens are written using Protocol Buffers. |
| If "false", block tokens are written using Legacy format. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/data</value> |
| <description>Determines where on the local filesystem an DFS data node |
| should store its blocks. If this is a comma-delimited |
| list of directories, then data will be stored in all named |
| directories, typically on different devices. The directories should be tagged |
| with corresponding storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]/[NVDIMM]) for HDFS |
| storage policies. The default storage type will be DISK if the directory does |
| not have a storage type tagged explicitly. Directories that do not exist will |
| be created if local filesystem permission allows. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir.perm</name> |
| <value>700</value> |
| <description>Permissions for the directories on on the local filesystem where |
| the DFS data node store its blocks. The permissions can either be octal or |
| symbolic.</description> |
| </property> |
| |
| <property> |
| <name>dfs.replication</name> |
| <value>3</value> |
| <description>Default block replication. |
| The actual number of replications can be specified when the file is created. |
| The default is used if replication is not specified in create time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.max</name> |
| <value>512</value> |
| <description>Maximal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.min</name> |
| <value>1</value> |
| <description>Minimal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.maintenance.replication.min</name> |
| <value>1</value> |
| <description>Minimal live block replication in existence of maintenance mode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.replication.min</name> |
| <value></value> |
| <description> |
| a separate minimum replication factor for calculating safe block count. |
| This is an expert level setting. |
| Setting this lower than the dfs.namenode.replication.min |
| is not recommend and/or dangerous for production setups. |
| When it's not set it takes value from dfs.namenode.replication.min |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max-corrupt-file-blocks-returned</name> |
| <value>100</value> |
| <description> |
| The maximum number of corrupt file blocks listed by NameNode Web UI, |
| JMX and other client request. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blocksize</name> |
| <value>134217728</value> |
| <description> |
| The default block size for new files, in bytes. |
| You can use the following suffix (case insensitive): |
| k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.), |
| Or provide complete size in bytes (such as 134217728 for 128 MB). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.retries</name> |
| <value>3</value> |
| <description>The number of retries for writing blocks to the data nodes, |
| before we signal failure to the application. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.enable</name> |
| <value>true</value> |
| <description> |
| If there is a datanode/network failure in the write pipeline, |
| DFSClient will try to remove the failed datanode from the pipeline |
| and then continue writing with the remaining datanodes. As a result, |
| the number of datanodes in the pipeline is decreased. The feature is |
| to add new datanodes to the pipeline. |
| |
| This is a site-wide property to enable/disable the feature. |
| |
| When the cluster size is extremely small, e.g. 3 nodes or less, cluster |
| administrators may want to set the policy to NEVER in the default |
| configuration file or disable this feature. Otherwise, users may |
| experience an unusually high rate of pipeline failures since it is |
| impossible to find new datanodes for replacement. |
| |
| See also dfs.client.block.write.replace-datanode-on-failure.policy |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> |
| <value>DEFAULT</value> |
| <description> |
| This property is used only if the value of |
| dfs.client.block.write.replace-datanode-on-failure.enable is true. |
| |
| ALWAYS: always add a new datanode when an existing datanode is removed. |
| |
| NEVER: never add a new datanode. |
| |
| DEFAULT: |
| Let r be the replication number. |
| Let n be the number of existing datanodes. |
| Add a new datanode only if r is greater than or equal to 3 and either |
| (1) floor(r/2) is greater than or equal to n; or |
| (2) r is greater than n and the block is hflushed/appended. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.best-effort</name> |
| <value>false</value> |
| <description> |
| This property is used only if the value of |
| dfs.client.block.write.replace-datanode-on-failure.enable is true. |
| |
| Best effort means that the client will try to replace a failed datanode |
| in write pipeline (provided that the policy is satisfied), however, it |
| continues the write operation in case that the datanode replacement also |
| fails. |
| |
| Suppose the datanode replacement fails. |
| false: An exception should be thrown so that the write will fail. |
| true : The write should be resumed with the remaining datandoes. |
| |
| Note that setting this property to true allows writing to a pipeline |
| with a smaller number of datanodes. As a result, it increases the |
| probability of data loss. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.min-replication</name> |
| <value>0</value> |
| <description> |
| The minimum number of replications that are needed to not to fail |
| the write pipeline if new datanodes can not be found to replace |
| failed datanodes (could be due to network failure) in the write pipeline. |
| If the number of the remaining datanodes in the write pipeline is greater |
| than or equal to this property value, continue writing to the remaining nodes. |
| Otherwise throw exception. |
| |
| If this is set to 0, an exception will be thrown, when a replacement |
| can not be found. |
| See also dfs.client.block.write.replace-datanode-on-failure.policy |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.intervalMsec</name> |
| <value>21600000</value> |
| <description>Determines block reporting interval in milliseconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.initialDelay</name> |
| <value>0s</value> |
| <description> |
| Delay for first block report in seconds. Support multiple time unit |
| suffix(case insensitive), as described in dfs.heartbeat.interval.If |
| no time unit is specified then seconds is assumed |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.split.threshold</name> |
| <value>1000000</value> |
| <description>If the number of blocks on the DataNode is below this |
| threshold then it will send block reports for all Storage Directories |
| in a single message. |
| |
| If the number of blocks exceeds this threshold then the DataNode will |
| send block reports for each Storage Directory in separate messages. |
| |
| Set to zero to always split. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.full.block.report.leases</name> |
| <value>6</value> |
| <description>The maximum number of leases for full block reports that the |
| NameNode will issue at any given time. This prevents the NameNode from |
| being flooded with full block reports that use up all the RPC handler |
| threads. This number should never be more than the number of RPC handler |
| threads or less than 1. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.full.block.report.lease.length.ms</name> |
| <value>300000</value> |
| <description> |
| The number of milliseconds that the NameNode will wait before invalidating |
| a full block report lease. This prevents a crashed DataNode from |
| permanently using up a full block report lease. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.interval</name> |
| <value>21600s</value> |
| <description>Interval in seconds for Datanode to scan data directories and |
| reconcile the difference between blocks in memory and on the disk. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.threads</name> |
| <value>1</value> |
| <description>How many threads should the threadpool used to compile reports |
| for volumes in parallel have. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name> |
| <value>1000</value> |
| <description>The report compilation threads are limited to only running for |
| a given number of milliseconds per second, as configured by the |
| property. The limit is taken per thread, not in aggregate, e.g. setting |
| a limit of 100ms for 4 compiler threads will result in each thread being |
| limited to 100ms, not 25ms. |
| |
| Note that the throttle does not interrupt the report compiler threads, so the |
| actual running time of the threads per second will typically be somewhat |
| higher than the throttle limit, usually by no more than 20%. |
| |
| Setting this limit to 1000 disables compiler thread throttling. Only |
| values between 1 and 1000 are valid. Setting an invalid value will result |
| in the throttle being disabled and an error message being logged. 1000 is |
| the default setting. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.heartbeat.interval</name> |
| <value>3s</value> |
| <description> |
| Determines datanode heartbeat interval in seconds. |
| Can use the following suffix (case insensitive): |
| ms(millis), s(sec), m(min), h(hour), d(day) |
| to specify the time (such as 2s, 2m, 1h, etc.). |
| Or provide complete number in seconds (such as 30 for 30 seconds). |
| If no time unit is specified then seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lifeline.interval.seconds</name> |
| <value></value> |
| <description> |
| Sets the interval in seconds between sending DataNode Lifeline Protocol |
| messages from the DataNode to the NameNode. The value must be greater than |
| the value of dfs.heartbeat.interval. If this property is not defined, then |
| the default behavior is to calculate the interval as 3x the value of |
| dfs.heartbeat.interval. Note that normal heartbeat processing may cause the |
| DataNode to postpone sending lifeline messages if they are not required. |
| Under normal operations with speedy heartbeat processing, it is possible |
| that no lifeline messages will need to be sent at all. This property has no |
| effect if dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.handler.count</name> |
| <value>10</value> |
| <description>The number of Namenode RPC server threads that listen to |
| requests from clients. |
| If dfs.namenode.servicerpc-address is not configured then |
| Namenode RPC server threads listen to requests from all nodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.service.handler.count</name> |
| <value>10</value> |
| <description>The number of Namenode RPC server threads that listen to |
| requests from DataNodes and from all other non-client nodes. |
| dfs.namenode.service.handler.count will be valid only if |
| dfs.namenode.servicerpc-address is configured. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.handler.ratio</name> |
| <value>0.10</value> |
| <description> |
| A ratio applied to the value of dfs.namenode.handler.count, which then |
| provides the number of RPC server threads the NameNode runs for handling the |
| lifeline RPC server. For example, if dfs.namenode.handler.count is 100, and |
| dfs.namenode.lifeline.handler.factor is 0.10, then the NameNode starts |
| 100 * 0.10 = 10 threads for handling the lifeline RPC server. It is common |
| to tune the value of dfs.namenode.handler.count as a function of the number |
| of DataNodes in a cluster. Using this property allows for the lifeline RPC |
| server handler threads to be tuned automatically without needing to touch a |
| separate property. Lifeline message processing is lightweight, so it is |
| expected to require many fewer threads than the main NameNode RPC server. |
| This property is not used if dfs.namenode.lifeline.handler.count is defined, |
| which sets an absolute thread count. This property has no effect if |
| dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.handler.count</name> |
| <value></value> |
| <description> |
| Sets an absolute number of RPC server threads the NameNode runs for handling |
| the DataNode Lifeline Protocol and HA health check requests from ZKFC. If |
| this property is defined, then it overrides the behavior of |
| dfs.namenode.lifeline.handler.ratio. By default, it is not defined. This |
| property has no effect if dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.threshold-pct</name> |
| <value>0.999f</value> |
| <description> |
| Specifies the percentage of blocks that should satisfy |
| the minimal replication requirement defined by dfs.namenode.replication.min. |
| Values less than or equal to 0 mean not to wait for any particular |
| percentage of blocks before exiting safemode. |
| Values greater than 1 will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.min.datanodes</name> |
| <value>0</value> |
| <description> |
| Specifies the number of datanodes that must be considered alive |
| before the name node exits safemode. |
| Values less than or equal to 0 mean not to take the number of live |
| datanodes into account when deciding whether to remain in safe mode |
| during startup. |
| Values greater than the number of datanodes in the cluster |
| will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.extension</name> |
| <value>30000</value> |
| <description> |
| Determines extension of safe mode in milliseconds after the threshold level |
| is reached. Support multiple time unit suffix (case insensitive), as |
| described in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.check.interval</name> |
| <value>5000</value> |
| <description> |
| The interval in milliseconds at which the NameNode resource checker runs. |
| The checker calculates the number of the NameNode storage volumes whose |
| available spaces are more than dfs.namenode.resource.du.reserved, and |
| enters safemode if the number becomes lower than the minimum value |
| specified by dfs.namenode.resource.checked.volumes.minimum. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.du.reserved</name> |
| <value>104857600</value> |
| <description> |
| The amount of space to reserve/require for a NameNode storage directory |
| in bytes. The default is 100MB. Support multiple size unit |
| suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.checked.volumes</name> |
| <value></value> |
| <description> |
| A list of local directories for the NameNode resource checker to check in |
| addition to the local edits directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.checked.volumes.minimum</name> |
| <value>1</value> |
| <description> |
| The minimum number of redundant NameNode storage volumes required. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.balance.bandwidthPerSec</name> |
| <value>100m</value> |
| <description> |
| Specifies the maximum amount of bandwidth that each datanode |
| can utilize for the balancing purpose in term of |
| the number of bytes per second. You can use the following |
| suffix (case insensitive): |
| k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa)to specify the size |
| (such as 128k, 512m, 1g, etc.). |
| Or provide complete size in bytes (such as 134217728 for 128 MB). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| permitted to connect to the namenode. The full pathname of the file |
| must be specified. If the value is empty, all hosts are |
| permitted.</description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts.exclude</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| not permitted to connect to the namenode. The full pathname of the |
| file must be specified. If the value is empty, no hosts are |
| excluded.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.objects</name> |
| <value>0</value> |
| <description>The maximum number of files, directories and blocks |
| dfs supports. A value of zero indicates no limit to the number |
| of objects that dfs supports. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.datanode.registration.ip-hostname-check</name> |
| <value>true</value> |
| <description> |
| If true (the default), then the namenode requires that a connecting |
| datanode's address must be resolved to a hostname. If necessary, a reverse |
| DNS lookup is performed. All attempts to register a datanode from an |
| unresolvable address are rejected. |
| |
| It is recommended that this setting be left on to prevent accidental |
| registration of datanodes listed by hostname in the excludes file during a |
| DNS outage. Only set this to false in environments where there is no |
| infrastructure to support reverse DNS lookup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.interval</name> |
| <value>30s</value> |
| <description>Namenode periodicity in seconds to check if |
| decommission or maintenance is complete. Support multiple time unit |
| suffix(case insensitive), as described in dfs.heartbeat.interval. |
| If no time unit is specified then seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.blocks.per.interval</name> |
| <value>500000</value> |
| <description>The approximate number of blocks to process per decommission |
| or maintenance interval, as defined in dfs.namenode.decommission.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.max.concurrent.tracked.nodes</name> |
| <value>100</value> |
| <description> |
| The maximum number of decommission-in-progress or |
| entering-maintenance datanodes nodes that will be tracked at one time by |
| the namenode. Tracking these datanode consumes additional NN memory |
| proportional to the number of blocks on the datnode. Having a conservative |
| limit reduces the potential impact of decommissioning or maintenance of |
| a large number of nodes at once. |
| |
| A value of 0 means no limit will be enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.monitor.class</name> |
| <value>org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminDefaultMonitor</value> |
| <description> |
| Determines the implementation used for the decommission manager. The only |
| valid options are: |
| |
| org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminDefaultMonitor |
| org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminBackoffMonitor |
| |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.backoff.monitor.pending.limit</name> |
| <value>10000</value> |
| <description> |
| When the Backoff monitor is enabled, determines the maximum number of blocks |
| related to decommission and maintenance operations that can be loaded |
| into the replication queue at any given time. Every |
| dfs.namenode.decommission.interval seconds, the list is checked to see if |
| the blocks have become fully replicated and then further blocks are added |
| to reach the limit defined in this parameter. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.backoff.monitor.pending.blocks.per.lock</name> |
| <value>1000</value> |
| <description> |
| When loading blocks into the replication queue, release the namenode write |
| lock after the defined number of blocks have been processed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.interval.seconds</name> |
| <value>3s</value> |
| <description>The periodicity in seconds with which the namenode computes |
| low redundancy work for datanodes. Support multiple time unit suffix(case insensitive), |
| as described in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.queue.restart.iterations</name> |
| <value>2400</value> |
| <description>When picking blocks from the low redundancy queues, reset the |
| bookmarked iterator after the set number of iterations to ensure any blocks |
| which were not processed on the first pass are retried before the iterators |
| would naturally reach their end point. This ensures blocks are retried |
| more frequently when there are many pending blocks or blocks are |
| continuously added to the queues preventing the iterator reaching its |
| natural endpoint. |
| The default setting of 2400 combined with the default of |
| dfs.namenode.redundancy.interval.seconds means the iterators will be reset |
| approximately every 2 hours. |
| Setting this parameter to zero disables the feature and the iterators will |
| be reset only when the end of all queues has been reached. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.accesstime.precision</name> |
| <value>3600000</value> |
| <description>The access time for HDFS file is precise upto this value. |
| The default value is 1 hour. Setting a value of 0 disables |
| access times for HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of datanode plug-ins to be activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of namenode plug-ins to be activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.block-placement-policy.default.prefer-local-node</name> |
| <value>true</value> |
| <description>Controls how the default block placement policy places |
| the first replica of a block. When true, it will prefer the node where |
| the client is running. When false, it will prefer a node in the same rack |
| as the client. Setting to false avoids situations where entire copies of |
| large files end up on a single node, thus creating hotspots. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.stream-buffer-size</name> |
| <value>4096</value> |
| <description>The size of buffer to stream files. |
| The size of this buffer should probably be a multiple of hardware |
| page size (4096 on Intel x86), and it determines how much data is |
| buffered during read and write operations.</description> |
| </property> |
| |
| <property> |
| <name>dfs.bytes-per-checksum</name> |
| <value>512</value> |
| <description>The number of bytes per checksum. Must not be larger than |
| dfs.stream-buffer-size</description> |
| </property> |
| |
| <property> |
| <name>dfs.client-write-packet-size</name> |
| <value>65536</value> |
| <description>Packet size for clients to write</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name> |
| <value>600000</value> |
| <description>The maximum period to keep a DN in the excluded nodes list |
| at a client. After this period, in milliseconds, the previously excluded node(s) will |
| be removed automatically from the cache and will be considered good for block allocations |
| again. Useful to lower or raise in situations where you keep a file open for very long |
| periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance |
| restarts. Defaults to 10 minutes.</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.recover.lease.on.close.exception</name> |
| <value>false</value> |
| <description> |
| Set to true to call recoverLease operation automatically when DFSOutputSteam closing encounters exception. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value> |
| <description>Determines where on the local filesystem the DFS secondary |
| name node should store the temporary images to merge. |
| If this is a comma-delimited list of directories then the image is |
| replicated in all of the directories for redundancy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.edits.dir</name> |
| <value>${dfs.namenode.checkpoint.dir}</value> |
| <description>Determines where on the local filesystem the DFS secondary |
| name node should store the temporary edits to merge. |
| If this is a comma-delimited list of directories then the edits is |
| replicated in all of the directories for redundancy. |
| Default value is same as dfs.namenode.checkpoint.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.period</name> |
| <value>3600s</value> |
| <description> |
| The number of seconds between two periodic checkpoints. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.txns</name> |
| <value>1000000</value> |
| <description>The Secondary NameNode or CheckpointNode will create a checkpoint |
| of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless |
| of whether 'dfs.namenode.checkpoint.period' has expired. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.check.period</name> |
| <value>60s</value> |
| <description>The SecondaryNameNode and CheckpointNode will poll the NameNode |
| every 'dfs.namenode.checkpoint.check.period' seconds to query the number |
| of uncheckpointed transactions. Support multiple time unit suffix(case insensitive), |
| as described in dfs.heartbeat.interval.If no time unit is specified then |
| seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.max-retries</name> |
| <value>3</value> |
| <description>The SecondaryNameNode retries failed checkpointing. If the |
| failure occurs while loading fsimage or replaying edits, the number of |
| retries is limited by this variable. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.check.quiet-multiplier</name> |
| <value>1.5</value> |
| <description> |
| Used to calculate the amount of time between retries when in the 'quiet' period |
| for creating checkpoints (active namenode already has an up-to-date image from another |
| checkpointer), so we wait a multiplier of the dfs.namenode.checkpoint.check.period before |
| retrying the checkpoint because another node likely is already managing the checkpoints, |
| allowing us to save bandwidth to transfer checkpoints that don't need to be used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.checkpoints.retained</name> |
| <value>2</value> |
| <description>The number of image checkpoint files (fsimage_*) that will be retained by |
| the NameNode and Secondary NameNode in their storage directories. All edit |
| logs (stored on edits_* files) necessary to recover an up-to-date namespace from the oldest retained |
| checkpoint will also be retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.extra.edits.retained</name> |
| <value>1000000</value> |
| <description>The number of extra transactions which should be retained |
| beyond what is minimally necessary for a NN restart. |
| It does not translate directly to file's age, or the number of files kept, |
| but to the number of transactions (here "edits" means transactions). |
| One edit file may contain several transactions (edits). |
| During checkpoint, NameNode will identify the total number of edits to retain as extra by |
| checking the latest checkpoint transaction value, subtracted by the value of this property. |
| Then, it scans edits files to identify the older ones that don't include the computed range of |
| retained transactions that are to be kept around, and purges them subsequently. |
| The retainment can be useful for audit purposes or for an HA setup where a remote Standby Node may have |
| been offline for some time and need to have a longer backlog of retained |
| edits in order to start again. |
| Typically each edit is on the order of a few hundred bytes, so the default |
| of 1 million edits should be on the order of hundreds of MBs or low GBs. |
| |
| NOTE: Fewer extra edits may be retained than value specified for this setting |
| if doing so would mean that more segments would be retained than the number |
| configured by dfs.namenode.max.extra.edits.segments.retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.extra.edits.segments.retained</name> |
| <value>10000</value> |
| <description>The maximum number of extra edit log segments which should be retained |
| beyond what is minimally necessary for a NN restart. When used in conjunction with |
| dfs.namenode.num.extra.edits.retained, this configuration property serves to cap |
| the number of extra edits files to a reasonable value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.key.update-interval</name> |
| <value>86400000</value> |
| <description>The update interval for master key for delegation tokens |
| in the namenode in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.max-lifetime</name> |
| <value>604800000</value> |
| <description>The maximum lifetime in milliseconds for which a delegation |
| token is valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.renew-interval</name> |
| <value>86400000</value> |
| <description>The renewal interval for delegation token in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.failed.volumes.tolerated</name> |
| <value>0</value> |
| <description>The number of volumes that are allowed to |
| fail before a datanode stops offering service. By default |
| any volume failure will cause a datanode to shutdown. |
| The value should be greater than or equal to -1 , -1 represents minimum |
| 1 valid volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.volumes.replica-add.threadpool.size</name> |
| <value></value> |
| <description>Specifies the maximum number of threads to use for |
| adding block in volume. Default value for this configuration is |
| max of (volume * number of bp_service, number of processor). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compress</name> |
| <value>false</value> |
| <description>When this value is true, the dfs image will be compressed. |
| Enabling this will be very helpful if dfs image is large since it can |
| avoid consuming a lot of network bandwidth when SBN uploads a new dfs |
| image to ANN. The compressed codec is specified by the setting |
| dfs.image.compression.codec. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compression.codec</name> |
| <value>org.apache.hadoop.io.compress.DefaultCodec</value> |
| <description>If the dfs image is compressed, how should they be compressed? |
| This has to be a codec defined in io.compression.codecs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.timeout</name> |
| <value>60000</value> |
| <description> |
| Socket timeout for the HttpURLConnection instance used in the image |
| transfer. This is measured in milliseconds. |
| This timeout prevents client hangs if the connection is idle |
| for this configured timeout, during image transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.bandwidthPerSec</name> |
| <value>52428800</value> |
| <description> |
| Maximum bandwidth used for regular image transfers (instead of |
| bootstrapping the standby namenode), in bytes per second. |
| This can help keep normal namenode operations responsive during |
| checkpointing. |
| A default value is 50mb per second. |
| The maximum bandwidth used for bootstrapping standby namenode is |
| configured with dfs.image.transfer-bootstrap-standby.bandwidthPerSec. |
| Support multiple size unit suffix(case insensitive), as described |
| in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer-bootstrap-standby.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Maximum bandwidth used for transferring image to bootstrap standby |
| namenode, in bytes per second. |
| A default value of 0 indicates that throttling is disabled. This default |
| value should be used in most cases, to ensure timely HA operations. |
| The maximum bandwidth used for regular image transfers is configured |
| with dfs.image.transfer.bandwidthPerSec. |
| Support multiple size unit suffix(case insensitive), as described in |
| dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.chunksize</name> |
| <value>65536</value> |
| <description> |
| Chunksize in bytes to upload the checkpoint. |
| Chunked streaming is used to avoid internal buffering of contents |
| of image file of huge size. |
| Support multiple size unit suffix(case insensitive), as described |
| in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.load</name> |
| <value>false</value> |
| <description> |
| If true, write sub-section entries to the fsimage index so it can |
| be loaded in parallel. Also controls whether parallel loading |
| will be used for an image previously created with sub-sections. |
| If the image contains sub-sections and this is set to false, |
| parallel loading will not be used. |
| Parallel loading is not compatible with image compression, |
| so if dfs.image.compress is set to true this setting will be |
| ignored and no parallel loading will occur. |
| Enabling this feature may impact rolling upgrades and downgrades if |
| the previous version does not support this feature. If the feature was |
| enabled and a downgrade is required, first set this parameter to |
| false and then save the namespace to create a fsimage with no |
| sub-sections and then perform the downgrade. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.target.sections</name> |
| <value>12</value> |
| <description> |
| Controls the number of sub-sections that will be written to |
| fsimage for each section. This should be larger than |
| dfs.image.parallel.threads, otherwise all threads will not be |
| used when loading. Ideally, have at least twice the number |
| of target sections as threads, so each thread must load more |
| than one section to avoid one long running section affecting |
| the load time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.inode.threshold</name> |
| <value>1000000</value> |
| <description> |
| If the image contains less inodes than this setting, then |
| do not write sub-sections and hence disable parallel loading. |
| This is because small images load very quickly in serial and |
| parallel loading is not needed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.threads</name> |
| <value>4</value> |
| <description> |
| The number of threads to use when dfs.image.parallel.load is |
| enabled. This setting should be less than |
| dfs.image.parallel.target.sections. The optimal number of |
| threads will depend on the hardware and environment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.edit.log.transfer.timeout</name> |
| <value>30000</value> |
| <description> |
| Socket timeout for edit log transfer in milliseconds. This timeout |
| should be configured such that normal edit log transfer for journal |
| node syncing can complete successfully. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.edit.log.transfer.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Maximum bandwidth used for transferring edit log to between journal nodes |
| for syncing, in bytes per second. |
| A default value of 0 indicates that throttling is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.support.allow.format</name> |
| <value>true</value> |
| <description>Does HDFS namenode allow itself to be formatted? |
| You may consider setting this to false for any production |
| cluster, to avoid any possibility of formatting a running DFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.transfer.threads</name> |
| <value>4096</value> |
| <description> |
| Specifies the maximum number of threads to use for transferring data |
| in and out of the DN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.scan.period.hours</name> |
| <value>504</value> |
| <description> |
| If this is positive, the DataNode will not scan any |
| individual block more than once in the specified scan period. |
| If this is negative, the block scanner is disabled. |
| If this is set to zero, then the default value of 504 hours |
| or 3 weeks is used. Prior versions of HDFS incorrectly documented |
| that setting this key to zero will disable the block scanner. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.volume.bytes.per.second</name> |
| <value>1048576</value> |
| <description> |
| If this is 0, the DataNode's block scanner will be disabled. If this |
| is positive, this is the number of bytes per second that the DataNode's |
| block scanner will try to scan from each volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.skip.recent.accessed</name> |
| <value>false</value> |
| <description> |
| If this is true, scanner will check the access time of block file to avoid |
| scanning blocks accessed during recent scan peroid, reducing disk IO. |
| This feature will not work if the DataNode volume has noatime mount option. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.volume.join.timeout.ms</name> |
| <value>5000</value> |
| <description> |
| The amount of time in milliseconds that the BlockScanner times out waiting |
| for the VolumeScanner thread to join during a shutdown call. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.readahead.bytes</name> |
| <value>4194304</value> |
| <description> |
| While reading block files, if the Hadoop native libraries are available, |
| the datanode can use the posix_fadvise system call to explicitly |
| page data into the operating system buffer cache ahead of the current |
| reader's position. This can improve performance especially when |
| disks are highly contended. |
| |
| This configuration specifies the number of bytes ahead of the current |
| read position which the datanode will attempt to read ahead. This |
| feature may be disabled by configuring this property to 0. |
| |
| If the native libraries are not available, this configuration has no |
| effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.reads</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data read from HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is delivered to the client. This behavior is automatically |
| disabled for workloads which read only short sections of a block |
| (e.g HBase random-IO workloads). |
| |
| This may improve performance for some workloads by freeing buffer |
| cache space usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.writes</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data written to HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is written to disk. |
| |
| This may improve performance for some workloads by freeing buffer |
| cache space usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.sync.behind.writes</name> |
| <value>false</value> |
| <description> |
| If this configuration is enabled, the datanode will instruct the |
| operating system to enqueue all written data to the disk immediately |
| after it is written. This differs from the usual OS policy which |
| may wait for up to 30 seconds before triggering writeback. |
| |
| This may improve performance for some workloads by smoothing the |
| IO profile for data written to disk. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.max.attempts</name> |
| <value>15</value> |
| <description> |
| Expert only. The number of client failover attempts that should be |
| made before the failover is considered failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.base.millis</name> |
| <value>500</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the base value used in the failover calculation. The |
| first failover will retry immediately. The 2nd failover attempt |
| will delay at least dfs.client.failover.sleep.base.millis |
| milliseconds. And so on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.max.millis</name> |
| <value>15000</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the maximum value to wait between failovers. |
| Specifically, the time between two failover attempts will not |
| exceed +/- 50% of dfs.client.failover.sleep.max.millis |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries</name> |
| <value>0</value> |
| <description> |
| Expert only. Indicates the number of retries a failover IPC client |
| will make to establish a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries.on.timeouts</name> |
| <value>0</value> |
| <description> |
| Expert only. The number of retry attempts a failover IPC client |
| will make on socket timeout when establishing a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.datanode-restart.timeout</name> |
| <value>30s</value> |
| <description> |
| Expert only. The time to wait, in seconds, from reception of an |
| datanode shutdown notification for quick restart, until declaring |
| the datanode dead and invoking the normal recovery mechanisms. |
| The notification is sent by a datanode when it is being shutdown |
| using the shutdownDatanode admin command with the upgrade option. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservices</name> |
| <value></value> |
| <description> |
| Comma-separated list of nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservice.id</name> |
| <value></value> |
| <description> |
| The ID of this nameservice. If the nameservice ID is not |
| configured or more than one nameservice is configured for |
| dfs.nameservices it is determined automatically by |
| matching the local node's address with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.internal.nameservices</name> |
| <value></value> |
| <description> |
| Comma-separated list of nameservices that belong to this cluster. |
| Datanode will report to all the nameservices in this list. By default |
| this is set to the value of dfs.nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name> |
| <value></value> |
| <description> |
| The prefix for a given nameservice, contains a comma-separated |
| list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE). |
| |
| Unique identifiers for each NameNode in the nameservice, delimited by |
| commas. This will be used by DataNodes to determine all the NameNodes |
| in the cluster. For example, if you used “mycluster” as the nameservice |
| ID previously, and you wanted to use “nn1” and “nn2” as the individual |
| IDs of the NameNodes, you would configure a property |
| dfs.ha.namenodes.mycluster, and its value "nn1,nn2". |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenode.id</name> |
| <value></value> |
| <description> |
| The ID of this namenode. If the namenode ID is not configured it |
| is determined automatically by matching the local node's address |
| with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.log-roll.period</name> |
| <value>120s</value> |
| <description> |
| How often, in seconds, the StandbyNode should ask the active to |
| roll edit logs. Since the StandbyNode only reads from finalized |
| log segments, the StandbyNode will only be as up-to-date as how |
| often the logs are rolled. Note that failover triggers a log roll |
| so the StandbyNode will be up to date before it becomes active. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.period</name> |
| <value>60s</value> |
| <description> |
| How often, the StandbyNode and ObserverNode should check if there are new |
| edit log entries ready to be consumed. This is the minimum period between |
| checking; exponential backoff will be applied if no edits are found and |
| dfs.ha.tail-edits.period.backoff-max is configured. By default, no |
| backoff is applied. |
| Supports multiple time unit suffix (case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.period.backoff-max</name> |
| <value>0</value> |
| <description> |
| The maximum time the tailer should wait between checking for new edit log |
| entries. Exponential backoff will be applied when an edit log tail is |
| performed but no edits are available to be read. Values less than or |
| equal to zero disable backoff entirely; this is the default behavior. |
| Supports multiple time unit suffix (case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.namenode-retries</name> |
| <value>3</value> |
| <description> |
| Number of retries to use when contacting the namenode when tailing the log. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.rolledits.timeout</name> |
| <value>60</value> |
| <description>The timeout in seconds of calling rollEdits RPC on Active NN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.automatic-failover.enabled</name> |
| <value>false</value> |
| <description> |
| Whether automatic failover is enabled. See the HDFS High |
| Availability documentation for details on automatic HA |
| configuration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether clients should use datanode hostnames when |
| connecting to datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether datanodes should use datanode hostnames when |
| connecting to other datanodes for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.local.interfaces</name> |
| <value></value> |
| <description>A comma separated list of network interface names to use |
| for data transfer between the client and datanodes. When creating |
| a connection to read from or write to a datanode, the client |
| chooses one of the specified interfaces at random and binds its |
| socket to the IP of that interface. Individual names may be |
| specified as either an interface name (eg "eth0"), a subinterface |
| name (eg "eth0:0"), or an IP address (which may be specified using |
| CIDR notation to match a range of IPs). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.shared.file.descriptor.paths</name> |
| <value>/dev/shm,/tmp</value> |
| <description> |
| A comma-separated list of paths to use when creating file descriptors that |
| will be shared between the DataNode and the DFSClient. Typically we use |
| /dev/shm, so that the file descriptors will not be written to disk. |
| It tries paths in order until creation of shared memory segment succeeds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.short.circuit.shared.memory.watcher.interrupt.check.ms</name> |
| <value>60000</value> |
| <description> |
| The length of time in milliseconds that the short-circuit shared memory |
| watcher will go between checking for java interruptions sent from other |
| threads. This is provided mainly for unit tests. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The NameNode service principal. This is typically set to |
| nn/_HOST@REALM.TLD. Each NameNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on both NameNodes |
| in an HA setup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each NameNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.namenode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The DataNode service principal. This is typically set to |
| dn/_HOST@REALM.TLD. Each DataNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on all DataNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each DataNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.datanode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The JournalNode service principal. This is typically set to |
| jn/_HOST@REALM.TLD. Each JournalNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on all JournalNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each JournalNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.journalnode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| <description> |
| The server principal used by the NameNode for web UI SPNEGO |
| authentication when Kerberos security is enabled. This is |
| typically set to HTTP/_HOST@REALM.TLD The SPNEGO server principal |
| begins with the prefix HTTP/ by convention. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.internal.spnego.principal</name> |
| <value></value> |
| <description> |
| The server principal used by the JournalNode HTTP Server for |
| SPNEGO authentication when Kerberos security is enabled. This is |
| typically set to HTTP/_HOST@REALM.TLD. The SPNEGO server principal |
| begins with the prefix HTTP/ by convention. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| |
| For most deployments this can be set to ${dfs.web.authentication.kerberos.principal} |
| i.e use the value of dfs.web.authentication.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| <description> |
| The server principal used by the Secondary NameNode for web UI SPNEGO |
| authentication when Kerberos security is enabled. Like all other |
| Secondary NameNode settings, it is ignored in an HA setup. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.authentication.kerberos.principal</name> |
| <value></value> |
| <description> |
| The server principal used by the NameNode for WebHDFS SPNEGO |
| authentication. |
| |
| Required when WebHDFS and security are enabled. In most secure clusters this |
| setting is also used to specify the values for |
| dfs.namenode.kerberos.internal.spnego.principal and |
| dfs.journalnode.kerberos.internal.spnego.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.authentication.kerberos.keytab</name> |
| <value></value> |
| <description> |
| The keytab file for the principal corresponding to |
| dfs.web.authentication.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.principal.pattern</name> |
| <value>*</value> |
| <description> |
| A client-side RegEx that can be configured to control |
| allowed realms to authenticate with (useful in cross-realm env.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.read.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid reading from "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Stale datanodes will be |
| moved to the end of the node list returned for reading. See |
| dfs.namenode.avoid.write.stale.datanode for a similar setting for writes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.write.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid writing to "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Writes will avoid using |
| stale datanodes unless more than a configured ratio |
| (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as |
| stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting |
| for reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.enable.log.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Enable and disable logging datanode staleness. Disabled by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.stale.datanode.interval</name> |
| <value>30000</value> |
| <description> |
| Default time interval in milliseconds for marking a datanode as "stale", |
| i.e., if the namenode has not received heartbeat msg from a datanode for |
| more than this time interval, the datanode will be marked and treated |
| as "stale" by default. The stale interval cannot be too small since |
| otherwise this may cause too frequent change of stale states. |
| We thus set a minimum stale interval value (the default value is 3 times |
| of heartbeat interval) and guarantee that the stale interval cannot be less |
| than the minimum value. A stale data node is avoided during lease/block |
| recovery. It can be conditionally avoided for reads (see |
| dfs.namenode.avoid.read.stale.datanode) and for writes (see |
| dfs.namenode.avoid.write.stale.datanode). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.write.stale.datanode.ratio</name> |
| <value>0.5f</value> |
| <description> |
| When the ratio of number stale datanodes to total datanodes marked |
| is greater than this ratio, stop avoiding writing to stale nodes so |
| as to prevent causing hotspots. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.invalidate.work.pct.per.iteration</name> |
| <value>0.32f</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the percentage amount of block |
| invalidations (deletes) to do over a single DN heartbeat |
| deletion command. The final deletion count is determined by applying this |
| percentage to the number of live nodes in the system. |
| The resultant number is the number of blocks from the deletion list |
| chosen for proper invalidation over a single heartbeat of a single DN. |
| Value should be a positive, non-zero percentage in float notation (X.Yf), |
| with 1.0f meaning 100%. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.work.multiplier.per.iteration</name> |
| <value>2</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the total amount of block transfers to begin in |
| parallel at a DN, for replication, when such a command list is being |
| sent over a DN heartbeat by the NN. The actual number is obtained by |
| multiplying this multiplier with the total number of live nodes in the |
| cluster. The result number is the number of blocks to begin transfers |
| immediately for, per DN heartbeat. This number can be any positive, |
| non-zero integer. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.server.port</name> |
| <value>2049</value> |
| <description> |
| Specify the port number used by Hadoop NFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.mountd.port</name> |
| <value>4242</value> |
| <description> |
| Specify the port number used by Hadoop mount daemon. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.dump.dir</name> |
| <value>/tmp/.hdfs-nfs</value> |
| <description> |
| This directory is used to temporarily save out-of-order writes before |
| writing to HDFS. For each file, the out-of-order writes are dumped after |
| they are accumulated to exceed certain threshold (e.g., 1MB) in memory. |
| One needs to make sure the directory has enough space. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.rtmax</name> |
| <value>1048576</value> |
| <description>This is the maximum size in bytes of a READ request |
| supported by the NFS gateway. If you change this, make sure you |
| also update the nfs mount's rsize(add rsize= # of bytes to the |
| mount directive). |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.wtmax</name> |
| <value>1048576</value> |
| <description>This is the maximum size in bytes of a WRITE request |
| supported by the NFS gateway. If you change this, make sure you |
| also update the nfs mount's wsize(add wsize= # of bytes to the |
| mount directive). |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.keytab.file</name> |
| <value></value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This is the path to the keytab file for the hdfs-nfs gateway. |
| This is required when the cluster is kerberized. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.kerberos.principal</name> |
| <value></value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This is the name of the kerberos principal. This is required when |
| the cluster is kerberized.It must be of this format: |
| nfs-gateway-user/nfs-gateway-host@kerberos-realm |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.allow.insecure.ports</name> |
| <value>true</value> |
| <description> |
| When set to false, client connections originating from unprivileged ports |
| (those above 1023) will be rejected. This is to ensure that clients |
| connecting to this NFS Gateway must have had root privilege on the machine |
| where they're connecting from. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.connection.timeout</name> |
| <value>300</value> |
| <description> |
| The minimum number of seconds that we'll cache libhdfs connection objects |
| in fuse_dfs. Lower values will result in lower memory consumption; higher |
| values may speed up access by avoiding the overhead of creating new |
| connection objects. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.timer.period</name> |
| <value>5</value> |
| <description> |
| The number of seconds between cache expiry checks in fuse_dfs. Lower values |
| will result in fuse_dfs noticing changes to Kerberos ticket caches more |
| quickly. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.metrics.logger.period.seconds</name> |
| <value>600</value> |
| <description> |
| This setting controls how frequently the NameNode logs its metrics. The |
| logging configuration must also define one or more appenders for |
| NameNodeMetricsLog for the metrics to be logged. |
| NameNode metrics logging is disabled if this value is set to zero or |
| less than zero. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.metrics.logger.period.seconds</name> |
| <value>600</value> |
| <description> |
| This setting controls how frequently the DataNode logs its metrics. The |
| logging configuration must also define one or more appenders for |
| DataNodeMetricsLog for the metrics to be logged. |
| DataNode metrics logging is disabled if this value is set to zero or |
| less than zero. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.metrics.percentiles.intervals</name> |
| <value></value> |
| <description> |
| Comma-delimited set of integers denoting the desired rollover intervals |
| (in seconds) for percentile latency metrics on the Namenode and Datanode. |
| By default, percentile latency metrics are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.peer.stats.enabled</name> |
| <value>false</value> |
| <description> |
| A switch to turn on/off tracking DataNode peer statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.peer.metrics.min.outlier.detection.samples</name> |
| <value>1000</value> |
| <description> |
| Minimum number of packet send samples which are required to qualify for outlier detection. |
| If the number of samples is below this then outlier detection is skipped. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.min.outlier.detection.nodes</name> |
| <value>10</value> |
| <description> |
| Minimum number of nodes to run outlier detection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slowpeer.low.threshold.ms</name> |
| <value>5</value> |
| <description> |
| Threshold in milliseconds below which a DataNode is definitely not slow. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.nodes.to.report</name> |
| <value>5</value> |
| <description> |
| Number of nodes to include in JSON report. We will return nodes with |
| the highest number of votes from peers. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.outliers.report.interval</name> |
| <value>30m</value> |
| <description> |
| This setting controls how frequently DataNodes will report their peer |
| latencies to the NameNode via heartbeats. This setting supports |
| multiple time unit suffixes as described in dfs.heartbeat.interval. |
| If no suffix is specified then milliseconds is assumed. |
| |
| It is ignored if dfs.datanode.peer.stats.enabled is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fileio.profiling.sampling.percentage</name> |
| <value>0</value> |
| <description> |
| This setting controls the percentage of file I/O events which will be |
| profiled for DataNode disk statistics. The default value of 0 disables |
| disk statistics. Set to an integer value between 1 and 100 to enable disk |
| statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.min.outlier.detection.disks</name> |
| <value>5</value> |
| <description> |
| Minimum number of disks to run outlier detection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slowdisk.low.threshold.ms</name> |
| <value>20</value> |
| <description> |
| Threshold in milliseconds below which a disk is definitely not slow. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.disks.to.report</name> |
| <value>5</value> |
| <description> |
| Number of disks to include in JSON report per operation. We will return |
| disks with the highest latency. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.user.group.metrics.percentiles.intervals</name> |
| <value></value> |
| <description> |
| A comma-separated list of the granularity in seconds for the metrics |
| which describe the 50/75/90/95/99th percentile latency for group resolution |
| in milliseconds. |
| By default, percentile latency metrics are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer</name> |
| <value>false</value> |
| <description> |
| Whether or not actual block data that is read/written from/to HDFS should |
| be encrypted on the wire. This only needs to be set on the NN and DNs, |
| clients will deduce this automatically. It is possible to override this setting |
| per connection by specifying custom logic via dfs.trustedchannel.resolver.class. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.algorithm</name> |
| <value></value> |
| <description> |
| This value may be set to either "3des" or "rc4". If nothing is set, then |
| the configured JCE default on the system is used (usually 3DES.) It is |
| widely believed that 3DES is more cryptographically secure, but RC4 is |
| substantially faster. |
| |
| Note that if AES is supported by both the client and server then this |
| encryption algorithm will only be used to initially transfer keys for AES. |
| (See dfs.encrypt.data.transfer.cipher.suites.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.cipher.suites</name> |
| <value></value> |
| <description> |
| This value may be either undefined or AES/CTR/NoPadding. If defined, then |
| dfs.encrypt.data.transfer uses the specified cipher suite for data |
| encryption. If not defined, then only the algorithm specified in |
| dfs.encrypt.data.transfer.algorithm is used. By default, the property is |
| not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.cipher.key.bitlength</name> |
| <value>128</value> |
| <description> |
| The key bitlength negotiated by dfsclient and datanode for encryption. |
| This value may be set to either 128, 192 or 256. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.trustedchannel.resolver.class</name> |
| <value></value> |
| <description> |
| TrustedChannelResolver is used to determine whether a channel |
| is trusted for plain data transfer. The TrustedChannelResolver is |
| invoked on both client and server side. If the resolver indicates |
| that the channel is trusted, then the data transfer will not be |
| encrypted even if dfs.encrypt.data.transfer is set to true. The |
| default implementation returns false indicating that the channel |
| is not trusted. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.protection</name> |
| <value></value> |
| <description> |
| A comma-separated list of SASL protection values used for secured |
| connections to the DataNode when reading or writing block data. Possible |
| values are authentication, integrity and privacy. authentication means |
| authentication only and no integrity or privacy; integrity implies |
| authentication and integrity are enabled; and privacy implies all of |
| authentication, integrity and privacy are enabled. If |
| dfs.encrypt.data.transfer is set to true, then it supersedes the setting for |
| dfs.data.transfer.protection and enforces that all connections must use a |
| specialized encrypted SASL handshake. This property is ignored for |
| connections to a DataNode listening on a privileged port. In this case, it |
| is assumed that the use of a privileged port establishes sufficient trust. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.saslproperties.resolver.class</name> |
| <value></value> |
| <description> |
| SaslPropertiesResolver used to resolve the QOP used for a connection to the |
| DataNode when reading or writing block data. If not specified, the value of |
| hadoop.security.saslproperties.resolver.class is used as the default value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.rpc-address</name> |
| <value>0.0.0.0:8485</value> |
| <description> |
| The JournalNode RPC server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.journalnode.rpc-address. |
| This is useful for making the JournalNode listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.http-address</name> |
| <value>0.0.0.0:8480</value> |
| <description> |
| The address and port the JournalNode HTTP server listens on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.http-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of |
| dfs.journalnode.http-address. This is useful for making the JournalNode |
| HTTP server listen on allinterfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.https-address</name> |
| <value>0.0.0.0:8481</value> |
| <description> |
| The address and port the JournalNode HTTPS server listens on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.https-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of |
| dfs.journalnode.https-address. This is useful for making the JournalNode |
| HTTP server listen on all interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.loggers</name> |
| <value>default</value> |
| <description> |
| List of classes implementing audit loggers that will receive audit events. |
| These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger. |
| The special value "default" can be used to reference the default audit |
| logger, which uses the configured log system. Installing custom audit loggers |
| may affect the performance and stability of the NameNode. Refer to the custom |
| logger's documentation for more details. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name> |
| <value>10737418240</value> <!-- 10 GB --> |
| <description> |
| Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. |
| This setting controls how much DN volumes are allowed to differ in terms of |
| bytes of free disk space before they are considered imbalanced. If the free |
| space of all the volumes are within this range of each other, the volumes |
| will be considered balanced and block assignments will be done on a pure |
| round robin basis. Support multiple size unit suffix(case insensitive), as |
| described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name> |
| <value>0.75f</value> |
| <description> |
| Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. |
| This setting controls what percentage of new block allocations will be sent |
| to volumes with more available disk space than others. This setting should |
| be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should |
| be no reason to prefer that volumes with less available disk space receive |
| more block allocations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.noeditlogchannelflush</name> |
| <value>false</value> |
| <description> |
| Specifies whether to flush edit log file channel. When set, expensive |
| FileChannel#force calls are skipped and synchronous disk writes are |
| enabled instead by opening the edit log file with RandomAccessFile("rws") |
| flags. This can significantly improve the performance of edit log writes |
| on the Windows platform. |
| Note that the behavior of the "rws" flags is platform and hardware specific |
| and might not provide the same level of guarantees as FileChannel#force. |
| For example, the write will skip the disk-cache on SAS and SCSI devices |
| while it might not on SATA devices. This is an expert level setting, |
| change with caution. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.drop.behind.writes</name> |
| <value></value> |
| <description> |
| Just like dfs.datanode.drop.cache.behind.writes, this setting causes the |
| page cache to be dropped behind HDFS writes, potentially freeing up more |
| memory for other uses. Unlike dfs.datanode.drop.cache.behind.writes, this |
| is a client-side setting rather than a setting for the entire datanode. |
| If present, this setting will override the DataNode default. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.drop.behind.reads</name> |
| <value></value> |
| <description> |
| Just like dfs.datanode.drop.cache.behind.reads, this setting causes the |
| page cache to be dropped behind HDFS reads, potentially freeing up more |
| memory for other uses. Unlike dfs.datanode.drop.cache.behind.reads, this |
| is a client-side setting rather than a setting for the entire datanode. If |
| present, this setting will override the DataNode default. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.readahead</name> |
| <value></value> |
| <description> |
| When using remote reads, this setting causes the datanode to |
| read ahead in the block file using posix_fadvise, potentially decreasing |
| I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side |
| setting rather than a setting for the entire datanode. If present, this |
| setting will override the DataNode default. Support multiple size unit |
| suffix(case insensitive), as described in dfs.blocksize. |
| |
| When using local reads, this setting determines how much readahead we do in |
| BlockReaderLocal. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.server-defaults.validity.period.ms</name> |
| <value>3600000</value> |
| <description> |
| The amount of milliseconds after which cached server defaults are updated. |
| |
| By default this parameter is set to 1 hour. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.enable.retrycache</name> |
| <value>true</value> |
| <description> |
| This enables the retry cache on the namenode. Namenode tracks for |
| non-idempotent requests the corresponding response. If a client retries the |
| request, the response from the retry cache is sent. Such operations |
| are tagged with annotation @AtMostOnce in namenode protocols. It is |
| recommended that this flag be set to true. Setting it to false, will result |
| in clients getting failure responses to retried request. This flag must |
| be enabled in HA setup for transparent fail-overs. |
| |
| The entries in the cache have expiration time configurable |
| using dfs.namenode.retrycache.expirytime.millis. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.retrycache.expirytime.millis</name> |
| <value>600000</value> |
| <description> |
| The time for which retry cache entries are retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.retrycache.heap.percent</name> |
| <value>0.03f</value> |
| <description> |
| This parameter configures the heap size allocated for retry cache |
| (excluding the response cached). This corresponds to approximately |
| 4096 entries for every 64MB of namenode process java heap size. |
| Assuming retry cache entry expiration time (configured using |
| dfs.namenode.retrycache.expirytime.millis) of 10 minutes, this |
| enables retry cache to support 7 operations per second sustained |
| for 10 minutes. As the heap size is increased, the operation rate |
| linearly increases. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.enabled</name> |
| <value>true</value> |
| <description> |
| If this is set to false, the client won't attempt to perform memory-mapped reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.cache.size</name> |
| <value>256</value> |
| <description> |
| When zero-copy reads are used, the DFSClient keeps a cache of recently used |
| memory mapped regions. This parameter controls the maximum number of |
| entries that we will keep in that cache. |
| |
| The larger this number is, the more file descriptors we will potentially |
| use for memory-mapped files. mmaped files also use virtual address space. |
| You may need to increase your ulimit virtual address space limits before |
| increasing the client mmap cache size. |
| |
| Note that you can still do zero-copy reads when this size is set to 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.cache.timeout.ms</name> |
| <value>3600000</value> |
| <description> |
| The minimum length of time that we will keep an mmap entry in the cache |
| between uses. If an entry is in the cache longer than this, and nobody |
| uses it, it will be removed by a background thread. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.retry.timeout.ms</name> |
| <value>300000</value> |
| <description> |
| The minimum amount of time that we will wait before retrying a failed mmap |
| operation. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.short.circuit.replica.stale.threshold.ms</name> |
| <value>1800000</value> |
| <description> |
| The maximum amount of time that we will consider a short-circuit replica to |
| be valid, if there is no communication from the DataNode. After this time |
| has elapsed, we will re-fetch the short-circuit replica even if it is in |
| the cache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.caching.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable block caching. This flag enables the NameNode to |
| maintain a mapping of cached blocks to DataNodes via processing DataNode |
| cache reports. Based on these reports and addition and removal of caching |
| directives, the NameNode will schedule caching and uncaching work. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name> |
| <value>0.25</value> |
| <description> |
| The percentage of the Java heap which we will allocate to the cached blocks |
| map. The cached blocks map is a hash map which uses chained hashing. |
| Smaller maps may be accessed more slowly if the number of cached blocks is |
| large; larger maps will consume more memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.locked.memory</name> |
| <value>0</value> |
| <description> |
| The amount of memory in bytes to use for caching of block replicas in |
| memory on the datanode. The datanode's maximum locked memory soft ulimit |
| (RLIMIT_MEMLOCK) must be set to at least this value, else the datanode |
| will abort on startup. Support multiple size unit suffix(case insensitive), |
| as described in dfs.blocksize. |
| |
| By default, this parameter is set to 0, which disables in-memory caching. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.pmem.cache.dirs</name> |
| <value></value> |
| <description> |
| This value specifies the persistent memory directory used for caching block |
| replica. Multiple directories separated by "," are acceptable. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.pmem.cache.recovery</name> |
| <value>true</value> |
| <description> |
| This value specifies whether previous cache on persistent memory will be recovered. |
| This configuration can take effect only if persistent memory cache is enabled by |
| specifying value for 'dfs.datanode.pmem.cache.dirs'. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.cache.directives.num.responses</name> |
| <value>100</value> |
| <description> |
| This value controls the number of cache directives that the NameNode will |
| send over the wire in response to a listDirectives RPC. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.cache.pools.num.responses</name> |
| <value>100</value> |
| <description> |
| This value controls the number of cache pools that the NameNode will |
| send over the wire in response to a listPools RPC. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.refresh.interval.ms</name> |
| <value>30000</value> |
| <description> |
| The amount of milliseconds between subsequent path cache rescans. Path |
| cache rescans are when we calculate which blocks should be cached, and on |
| what datanodes. |
| |
| By default, this parameter is set to 30 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.retry.interval.ms</name> |
| <value>30000</value> |
| <description> |
| When the NameNode needs to uncache something that is cached, or cache |
| something that is not cached, it must direct the DataNodes to do so by |
| sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode |
| heartbeat. This parameter controls how frequently the NameNode will |
| resend these commands. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fsdatasetcache.max.threads.per.volume</name> |
| <value>4</value> |
| <description> |
| The maximum number of threads per volume to use for caching new data |
| on the datanode. These threads consume both I/O and CPU. This can affect |
| normal datanode operations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.cachereport.intervalMsec</name> |
| <value>10000</value> |
| <description> |
| Determines cache reporting interval in milliseconds. After this amount of |
| time, the DataNode sends a full report of its cache state to the NameNode. |
| The NameNode uses the cache report to update its map of cached blocks to |
| DataNode locations. |
| |
| This configuration has no effect if in-memory caching has been disabled by |
| setting dfs.datanode.max.locked.memory to 0 (which is the default). |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name> |
| <value>0.5</value> |
| <description> |
| Determines when an active namenode will roll its own edit log. |
| The actual threshold (in number of edits) is determined by multiplying |
| this value by dfs.namenode.checkpoint.txns. |
| |
| This prevents extremely large edit files from accumulating on the active |
| namenode, which can cause timeouts during namenode startup and pose an |
| administrative hassle. This behavior is intended as a failsafe for when |
| the standby or secondary namenode fail to roll the edit log by the normal |
| checkpoint threshold. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edit.log.autoroll.check.interval.ms</name> |
| <value>300000</value> |
| <description> |
| How often an active namenode will check if it needs to roll its edit log, |
| in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.user.provider.user.pattern</name> |
| <value>^[A-Za-z_][A-Za-z0-9._-]*[$]?$</value> |
| <description> |
| Valid pattern for user and group names for webhdfs, it must be a valid java regex. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.acl.provider.permission.pattern</name> |
| <value>^(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?(,(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?)*$</value> |
| <description> |
| Valid pattern for user and group names in webhdfs acl operations, it must be a valid java regex. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.socket.connect-timeout</name> |
| <value>60s</value> |
| <description> |
| Socket timeout for connecting to WebHDFS servers. This prevents a |
| WebHDFS client from hanging if the server hostname is |
| misconfigured, or the server does not response before the timeout |
| expires. Value is followed by a unit specifier: ns, us, ms, s, m, |
| h, d for nanoseconds, microseconds, milliseconds, seconds, |
| minutes, hours, days respectively. Values should provide units, |
| but milliseconds are assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.socket.read-timeout</name> |
| <value>60s</value> |
| <description> |
| Socket timeout for reading data from WebHDFS servers. This |
| prevents a WebHDFS client from hanging if the server stops sending |
| data. Value is followed by a unit specifier: ns, us, ms, s, m, h, |
| d for nanoseconds, microseconds, milliseconds, seconds, minutes, |
| hours, days respectively. Values should provide units, |
| but milliseconds are assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.context</name> |
| <value>default</value> |
| <description> |
| The name of the DFSClient context that we should use. Clients that share |
| a context share a socket cache and short-circuit cache, among other things. |
| You should only change this if you don't want to share with another set of |
| threads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit</name> |
| <value>false</value> |
| <description> |
| This configuration parameter turns on short-circuit local reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.socket.send.buffer.size</name> |
| <value>0</value> |
| <description> |
| Socket send buffer size for a write pipeline in DFSClient side. |
| This may affect TCP connection throughput. |
| If it is set to zero or negative value, |
| no buffer size will be set explicitly, |
| thus enable tcp auto-tuning on some system. |
| The default value is 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.domain.socket.path</name> |
| <value></value> |
| <description> |
| Optional. This is a path to a UNIX domain socket that will be used for |
| communication between the DataNode and local HDFS clients. |
| If the string "_PORT" is present in this path, it will be replaced by the |
| TCP port of the DataNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.domain.socket.disable.interval.seconds</name> |
| <value>600</value> |
| <description> |
| The interval that a DataNode is disabled for future Short-Circuit Reads, |
| after an error happens during a Short-Circuit Read. Setting this to 0 will |
| not disable Short-Circuit Reads at all after errors happen. Negative values |
| are invalid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.skip.checksum</name> |
| <value>false</value> |
| <description> |
| If this configuration parameter is set, |
| short-circuit local reads will skip checksums. |
| This is normally not recommended, |
| but it may be useful for special setups. |
| You might consider using this |
| if you are doing your own checksumming outside of HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.streams.cache.size</name> |
| <value>256</value> |
| <description> |
| The DFSClient maintains a cache of recently opened file descriptors. |
| This parameter controls the maximum number of file descriptors in the cache. |
| Setting this higher will use more file descriptors, |
| but potentially provide better performance on workloads |
| involving lots of seeks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name> |
| <value>300000</value> |
| <description> |
| This controls the minimum amount of time |
| file descriptors need to sit in the client cache context |
| before they can be closed for being inactive for too long. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.debug.cmdlist</name> |
| <value></value> |
| <description> |
| A comma separated list of NameNode commands that are written to the HDFS |
| namenode audit log only if the audit log level is debug. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.legacy.blockreader.local</name> |
| <value>false</value> |
| <description> |
| Legacy short-circuit reader implementation based on HDFS-2246 is used |
| if this configuration parameter is true. |
| This is for the platforms other than Linux |
| where the new implementation based on HDFS-347 is not available. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.use.cache.priority</name> |
| <value>false</value> |
| <description> |
| If true, the cached replica of the datanode is preferred |
| else the replica closest to client is preferred. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.local-path-access.user</name> |
| <value></value> |
| <description> |
| Comma separated list of the users allowed to open block files |
| on legacy short-circuit local read. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.domain.socket.data.traffic</name> |
| <value>false</value> |
| <description> |
| This control whether we will try to pass normal data traffic |
| over UNIX domain socket rather than over TCP socket |
| on node-local data transfer. |
| This is currently experimental and turned off by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reject-unresolved-dn-topology-mapping</name> |
| <value>false</value> |
| <description> |
| If the value is set to true, then namenode will reject datanode |
| registration if the topology mapping for a datanode is not resolved and |
| NULL is returned (script defined by net.topology.script.file.name fails |
| to execute). Otherwise, datanode will be registered and the default rack |
| will be assigned as the topology path. Topology paths are important for |
| data resiliency, since they define fault domains. Thus it may be unwanted |
| behavior to allow datanode registration with the default rack if the |
| resolving topology failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.xattrs.enabled</name> |
| <value>true</value> |
| <description> |
| Whether support for extended attributes is enabled on the NameNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-xattrs-per-inode</name> |
| <value>32</value> |
| <description> |
| Maximum number of extended attributes per inode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-xattr-size</name> |
| <value>16384</value> |
| <description> |
| The maximum combined size of the name and value of an extended attribute |
| in bytes. It should be larger than 0, and less than or equal to maximum |
| size hard limit which is 32768. |
| Support multiple size unit suffix(case insensitive), as described in |
| dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.slow.io.warning.threshold.ms</name> |
| <value>30000</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| io warning in a dfsclient. By default, this parameter is set to 30000 |
| milliseconds (30 seconds). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slow.io.warning.threshold.ms</name> |
| <value>300</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| io warning in a datanode. By default, this parameter is set to 300 |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.processcommands.threshold</name> |
| <value>2s</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| command processing in BPServiceActor. By default, this parameter is set |
| to 2 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.enabled</name> |
| <value>false</value> |
| <description> |
| Set to true to enable dead node detection in client side. Then all the DFSInputStreams of the same client can |
| share the dead node information. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.deadnode.threads</name> |
| <value>10</value> |
| <description> |
| The maximum number of threads to use for probing dead node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.idle.sleep.ms</name> |
| <value>10000</value> |
| <description> |
| The sleep time of DeadNodeDetector per iteration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.suspectnode.threads</name> |
| <value>10</value> |
| <description> |
| The maximum number of threads to use for probing suspect node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.rpc.threads</name> |
| <value>20</value> |
| <description> |
| The maximum number of threads to use for calling RPC call to recheck the liveness of dead node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.deadnode.interval.ms</name> |
| <value>60000</value> |
| <description> |
| Interval time in milliseconds for probing dead node behavior. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.suspectnode.interval.ms</name> |
| <value>300</value> |
| <description> |
| Interval time in milliseconds for probing suspect node behavior. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.connection.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| Connection timeout for probing dead node in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.refresh.read-block-locations.ms</name> |
| <value>0</value> |
| <description> |
| Refreshing LocatedBlocks period. A value of 0 disables the feature. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lease-recheck-interval-ms</name> |
| <value>2000</value> |
| <description>During the release of lease a lock is hold that make any |
| operations on the namenode stuck. In order to not block them during |
| a too long duration we stop releasing lease after this max lock limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max-lock-hold-to-release-lease-ms</name> |
| <value>25</value> |
| <description>During the release of lease a lock is hold that make any |
| operations on the namenode stuck. In order to not block them during |
| a too long duration we stop releasing lease after this max lock limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.write-lock-reporting-threshold-ms</name> |
| <value>5000</value> |
| <description>When a write lock is held on the namenode for a long time, |
| this will be logged as the lock is released. This sets how long the |
| lock must be held for logging to occur. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read-lock-reporting-threshold-ms</name> |
| <value>5000</value> |
| <description>When a read lock is held on the namenode for a long time, |
| this will be logged as the lock is released. This sets how long the |
| lock must be held for logging to occur. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lock.detailed-metrics.enabled</name> |
| <value>false</value> |
| <description>If true, the namenode will keep track of how long various |
| operations hold the Namesystem lock for and emit this as metrics. These |
| metrics have names of the form FSN(Read|Write)LockNanosOperationName, |
| where OperationName denotes the name of the operation that initiated the |
| lock hold (this will be OTHER for certain uncategorized operations) and |
| they export the hold time values in nanoseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fslock.fair</name> |
| <value>true</value> |
| <description>If this is true, the FS Namesystem lock will be used in Fair mode, |
| which will help to prevent writer threads from being starved, but can provide |
| lower lock throughput. See java.util.concurrent.locks.ReentrantReadWriteLock |
| for more information on fair/non-fair locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lock.fair</name> |
| <value>true</value> |
| <description>If this is true, the Datanode FsDataset lock will be used in Fair |
| mode, which will help to prevent writer threads from being starved, but can |
| lower lock throughput. See java.util.concurrent.locks.ReentrantReadWriteLock |
| for more information on fair/non-fair locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lock.read.write.enabled</name> |
| <value>true</value> |
| <description>If this is true, the FsDataset lock will be a read write lock. If |
| it is false, all locks will be a write lock. |
| Enabling this should give better datanode throughput, as many read only |
| functions can run concurrently under the read lock, when they would |
| previously have required the exclusive write lock. As the feature is |
| experimental, this switch can be used to disable the shared read lock, and |
| cause all lock acquisitions to use the exclusive write lock. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lock-reporting-threshold-ms</name> |
| <value>300</value> |
| <description>When thread waits to obtain a lock, or a thread holds a lock for |
| more than the threshold, a log message will be written. Note that |
| dfs.lock.suppress.warning.interval ensures a single log message is |
| emitted per interval for waiting threads and a single message for holding |
| threads to avoid excessive logging. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.startup.delay.block.deletion.sec</name> |
| <value>0</value> |
| <description>The delay in seconds at which we will pause the blocks deletion |
| after Namenode startup. By default it's disabled. |
| In the case a directory has large number of directories and files are |
| deleted, suggested delay is one hour to give the administrator enough time |
| to notice large number of pending deletion blocks and take corrective |
| action. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.block.id.layout.upgrade.threads</name> |
| <value>6</value> |
| <description>The number of threads to use when creating hard links from |
| current to previous blocks during upgrade of a DataNode to block ID-based |
| block layout (see HDFS-6482 for details on the layout).</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.encryption.zones.num.responses</name> |
| <value>100</value> |
| <description>When listing encryption zones, the maximum number of zones |
| that will be returned in a batch. Fetching the list incrementally in |
| batches improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.reencryption.status.num.responses</name> |
| <value>100</value> |
| <description>When listing re-encryption status, the maximum number of zones |
| that will be returned in a batch. Fetching the list incrementally in |
| batches improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.openfiles.num.responses</name> |
| <value>1000</value> |
| <description> |
| When listing open files, the maximum number of open files that will be |
| returned in a single batch. Fetching the list incrementally in batches |
| improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edekcacheloader.interval.ms</name> |
| <value>1000</value> |
| <description>When KeyProvider is configured, the interval time of warming |
| up edek cache on NN starts up / becomes active. All edeks will be loaded |
| from KMS into provider cache. The edek cache loader will try to warm up the |
| cache until succeed or NN leaves active state. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edekcacheloader.initial.delay.ms</name> |
| <value>3000</value> |
| <description>When KeyProvider is configured, the time delayed until the first |
| attempt to warm up edek cache on NN start up / become active. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.sleep.interval</name> |
| <value>1m</value> |
| <description>Interval the re-encrypt EDEK thread sleeps in the main loop. The |
| interval accepts units. If none given, millisecond is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.batch.size</name> |
| <value>1000</value> |
| <description>How many EDEKs should the re-encrypt thread process in one batch. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.throttle.limit.handler.ratio</name> |
| <value>1.0</value> |
| <description>Throttling ratio for the re-encryption, indicating what fraction |
| of time should the re-encrypt handler thread work under NN read lock. |
| Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are |
| invalid values and will fail NN startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.throttle.limit.updater.ratio</name> |
| <value>1.0</value> |
| <description>Throttling ratio for the re-encryption, indicating what fraction |
| of time should the re-encrypt updater thread work under NN write lock. |
| Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are |
| invalid values and will fail NN startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.edek.threads</name> |
| <value>10</value> |
| <description>Maximum number of re-encrypt threads to contact the KMS |
| and re-encrypt the edeks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.inotify.max.events.per.rpc</name> |
| <value>1000</value> |
| <description>Maximum number of events that will be sent to an inotify client |
| in a single RPC response. The default value attempts to amortize away |
| the overhead for this RPC while avoiding huge memory requirements for the |
| client and NameNode (1000 events should consume no more than 1 MB.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.user.home.dir.prefix</name> |
| <value>/user</value> |
| <description>The directory to prepend to user name to get the user's |
| home direcotry. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.cache.revocation.timeout.ms</name> |
| <value>900000</value> |
| <description>When the DFSClient reads from a block file which the DataNode is |
| caching, the DFSClient can skip verifying checksums. The DataNode will |
| keep the block file in cache until the client is done. If the client takes |
| an unusually long time, though, the DataNode may need to evict the block |
| file from the cache anyway. This value controls how long the DataNode will |
| wait for the client to release a replica that it is reading without |
| checksums. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.cache.revocation.polling.ms</name> |
| <value>500</value> |
| <description>How often the DataNode should poll to see if the clients have |
| stopped using a replica that the DataNode wants to uncache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.enabled</name> |
| <value>true</value> |
| <description> |
| Allow users to change the storage policy on files and directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.permissions.superuser-only</name> |
| <value>false</value> |
| <description> |
| Allow only superuser role to change the storage policy on files and |
| directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.legacy-oiv-image.dir</name> |
| <value></value> |
| <description>Determines where to save the namespace in the old fsimage format |
| during checkpointing by standby NameNode or SecondaryNameNode. Users can |
| dump the contents of the old format fsimage by oiv_legacy command. If |
| the value is not specified, old format fsimage will not be saved in |
| checkpoint. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.enabled</name> |
| <value>true</value> |
| <description>Enable nntop: reporting top users on namenode |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.window.num.buckets</name> |
| <value>10</value> |
| <description>Number of buckets in the rolling window implementation of nntop |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.num.users</name> |
| <value>10</value> |
| <description>Number of top users returned by the top tool |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.windows.minutes</name> |
| <value>1,5,25</value> |
| <description>comma separated list of nntop reporting periods in minutes |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.ugi.expire.after.access</name> |
| <value>600000</value> |
| <description>How long in milliseconds after the last access |
| the cached UGI will expire. With 0, never expire. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.blocks.per.postponedblocks.rescan</name> |
| <value>10000</value> |
| <description>Number of blocks to rescan for each iteration of |
| postponedMisreplicatedBlocks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.block-pinning.enabled</name> |
| <value>false</value> |
| <description>Whether pin blocks on favored DataNode.</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.locateFollowingBlock.initial.delay.ms</name> |
| <value>400</value> |
| <description>The initial delay (unit is ms) for locateFollowingBlock, |
| the delay time will increase exponentially(double) for each retry |
| until dfs.client.block.write.locateFollowingBlock.max.delay.ms is reached, |
| after that the delay for each retry will be |
| dfs.client.block.write.locateFollowingBlock.max.delay.ms. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.locateFollowingBlock.max.delay.ms</name> |
| <value>60000</value> |
| <description> |
| The maximum delay (unit is ms) before retrying locateFollowingBlock. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.zkfc.nn.http.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| The HTTP connection and read timeout value (unit is ms ) when DFS ZKFC |
| tries to get local NN thread dump after local NN becomes |
| SERVICE_NOT_RESPONDING or SERVICE_UNHEALTHY. |
| If it is set to zero, DFS ZKFC won't get local NN thread dump. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.nn.not-become-active-in-safemode</name> |
| <value>false</value> |
| <description> |
| This will prevent safe mode namenodes to become active while other standby |
| namenodes might be ready to serve requests when it is set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.in-progress</name> |
| <value>false</value> |
| <description> |
| Whether enable standby namenode to tail in-progress edit logs. |
| Clients might want to turn it on when they want Standby NN to have |
| more up-to-date data. When using the QuorumJournalManager, this enables |
| tailing of edit logs via the RPC-based mechanism, rather than streaming, |
| which allows for much fresher data. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.state.context.enabled</name> |
| <value>false</value> |
| <description> |
| Whether enable namenode sending back its current txnid back to client. |
| Setting this to true is required by Consistent Read from Standby feature. |
| But for regular cases, this should be set to false to avoid the overhead |
| of updating and maintaining this state. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.system.default.policy</name> |
| <value>RS-6-3-1024k</value> |
| <description>The default erasure coding policy name will be used |
| on the path if no policy name is passed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.policies.max.cellsize</name> |
| <value>4194304</value> |
| <description>The maximum cell size of erasure coding policy. Default is 4MB. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.userdefined.policy.allowed</name> |
| <value>true</value> |
| <description>If set to false, doesn't allow addition of user defined |
| erasure coding policies. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ec.reconstruction.stripedread.timeout.millis</name> |
| <value>5000</value> |
| <description>Datanode striped read timeout in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ec.reconstruction.stripedread.buffer.size</name> |
| <value>65536</value> |
| <description>Datanode striped read buffer size. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ec.reconstruction.threads</name> |
| <value>8</value> |
| <description> |
| Number of threads used by the Datanode for background |
| reconstruction work. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ec.reconstruction.xmits.weight</name> |
| <value>0.5</value> |
| <description> |
| Datanode uses xmits weight to calculate the relative cost of EC recovery |
| tasks comparing to replicated block recovery, of which xmits is always 1. |
| Namenode then uses xmits reported from datanode to throttle recovery tasks |
| for EC and replicated blocks. |
| The xmits of an erasure coding recovery task is calculated as the maximum |
| value between the number of read streams and the number of write streams. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ec.reconstruction.validation</name> |
| <value>false</value> |
| <description> |
| Decide if datanode validates that EC reconstruction tasks reconstruct |
| target blocks correctly. When validation fails, reconstruction tasks |
| will fail and be retried by namenode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.quota.init-threads</name> |
| <value>4</value> |
| <description> |
| The number of concurrent threads to be used in quota initialization. The |
| speed of quota initialization also affects the namenode fail-over latency. |
| If the size of name space is big, try increasing this. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.transfer.socket.send.buffer.size</name> |
| <value>0</value> |
| <description> |
| Socket send buffer size for DataXceiver (mirroring packets to downstream |
| in pipeline). This may affect TCP connection throughput. |
| If it is set to zero or negative value, no buffer size will be set |
| explicitly, thus enable tcp auto-tuning on some system. |
| The default value is 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.transfer.socket.recv.buffer.size</name> |
| <value>0</value> |
| <description> |
| Socket receive buffer size for DataXceiver (receiving packets from client |
| during block writing). This may affect TCP connection throughput. |
| If it is set to zero or negative value, no buffer size will be set |
| explicitly, thus enable tcp auto-tuning on some system. |
| The default value is 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.upgrade.domain.factor</name> |
| <value>${dfs.replication}</value> |
| <description> |
| This is valid only when block placement policy is set to |
| BlockPlacementPolicyWithUpgradeDomain. It defines the number of |
| unique upgrade domains any block's replicas should have. |
| When the number of replicas is less or equal to this value, the policy |
| ensures each replica has an unique upgrade domain. When the number of |
| replicas is greater than this value, the policy ensures the number of |
| unique domains is at least this value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.bp-ready.timeout</name> |
| <value>20s</value> |
| <description> |
| The maximum wait time for datanode to be ready before failing the |
| received request. Setting this to 0 fails requests right away if the |
| datanode is not yet registered with the namenode. This wait time |
| reduces initial request failures after datanode restart. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.cached-dfsused.check.interval.ms</name> |
| <value>600000</value> |
| <description> |
| The interval check time of loading DU_CACHE_FILE in each volume. |
| When the cluster doing the rolling upgrade operations, it will |
| usually lead dfsUsed cache file of each volume expired and redo the |
| du operations in datanode and that makes datanode start slowly. Adjust |
| this property can make cache file be available for the time as you want. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.rest-csrf.enabled</name> |
| <value>false</value> |
| <description> |
| If true, then enables WebHDFS protection against cross-site request forgery |
| (CSRF). The WebHDFS client also uses this property to determine whether or |
| not it needs to send the custom CSRF prevention header in its HTTP requests. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.rest-csrf.custom-header</name> |
| <value>X-XSRF-HEADER</value> |
| <description> |
| The name of a custom header that HTTP requests must send when protection |
| against cross-site request forgery (CSRF) is enabled for WebHDFS by setting |
| dfs.webhdfs.rest-csrf.enabled to true. The WebHDFS client also uses this |
| property to determine whether or not it needs to send the custom CSRF |
| prevention header in its HTTP requests. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.rest-csrf.methods-to-ignore</name> |
| <value>GET,OPTIONS,HEAD,TRACE</value> |
| <description> |
| A comma-separated list of HTTP methods that do not require HTTP requests to |
| include a custom header when protection against cross-site request forgery |
| (CSRF) is enabled for WebHDFS by setting dfs.webhdfs.rest-csrf.enabled to |
| true. The WebHDFS client also uses this property to determine whether or |
| not it needs to send the custom CSRF prevention header in its HTTP requests. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.rest-csrf.browser-useragents-regex</name> |
| <value>^Mozilla.*,^Opera.*</value> |
| <description> |
| A comma-separated list of regular expressions used to match against an HTTP |
| request's User-Agent header when protection against cross-site request |
| forgery (CSRF) is enabled for WebHDFS by setting |
| dfs.webhdfs.reset-csrf.enabled to true. If the incoming User-Agent matches |
| any of these regular expressions, then the request is considered to be sent |
| by a browser, and therefore CSRF prevention is enforced. If the request's |
| User-Agent does not match any of these regular expressions, then the request |
| is considered to be sent by something other than a browser, such as scripted |
| automation. In this case, CSRF is not a potential attack vector, so |
| the prevention is not enforced. This helps achieve backwards-compatibility |
| with existing automation that has not been updated to send the CSRF |
| prevention header. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.xframe.enabled</name> |
| <value>true</value> |
| <description> |
| If true, then enables protection against clickjacking by returning |
| X_FRAME_OPTIONS header value set to SAMEORIGIN. |
| Clickjacking protection prevents an attacker from using transparent or |
| opaque layers to trick a user into clicking on a button |
| or link on another page. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.xframe.value</name> |
| <value>SAMEORIGIN</value> |
| <description> |
| This configration value allows user to specify the value for the |
| X-FRAME-OPTIONS. The possible values for this field are |
| DENY, SAMEORIGIN and ALLOW-FROM. Any other value will throw an |
| exception when namenode and datanodes are starting up. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.balancer.keytab.enabled</name> |
| <value>false</value> |
| <description> |
| Set to true to enable login using a keytab for Kerberized Hadoop. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.address</name> |
| <value>0.0.0.0:0</value> |
| <description> |
| The hostname used for a keytab based Kerberos login. Keytab based login |
| can be enabled with dfs.balancer.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by the Balancer to login as its |
| service principal. The principal name is configured with |
| dfs.balancer.kerberos.principal. Keytab based login can be |
| enabled with dfs.balancer.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.kerberos.principal</name> |
| <value></value> |
| <description> |
| The Balancer principal. This is typically set to |
| balancer/_HOST@REALM.TLD. The Balancer will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on different servers. |
| Keytab based login can be enabled with dfs.balancer.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.retry.policy.enabled</name> |
| <value>false</value> |
| <description> |
| If "true", enable the retry policy of WebHDFS client. |
| If "false", retry policy is turned off. |
| Enabling the retry policy can be quite useful while using WebHDFS to |
| copy large files between clusters that could timeout, or |
| copy files between HA clusters that could failover during the copy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.retry.policy.spec</name> |
| <value>10000,6,60000,10</value> |
| <description> |
| Specify a policy of multiple linear random retry for WebHDFS client, |
| e.g. given pairs of number of retries and sleep time (n0, t0), (n1, t1), |
| ..., the first n0 retries sleep t0 milliseconds on average, |
| the following n1 retries sleep t1 milliseconds on average, and so on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.failover.max.attempts</name> |
| <value>15</value> |
| <description> |
| Specify the max number of failover attempts for WebHDFS client |
| in case of network exception. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.retry.max.attempts</name> |
| <value>10</value> |
| <description> |
| Specify the max number of retry attempts for WebHDFS client, |
| if the difference between retried attempts and failovered attempts is |
| larger than the max number of retry attempts, there will be no more |
| retries. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.failover.sleep.base.millis</name> |
| <value>500</value> |
| <description> |
| Specify the base amount of time in milliseconds upon which the |
| exponentially increased sleep time between retries or failovers |
| is calculated for WebHDFS client. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.client.failover.sleep.max.millis</name> |
| <value>15000</value> |
| <description> |
| Specify the upper bound of sleep time in milliseconds between |
| retries or failovers for WebHDFS client. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.hosts.provider.classname</name> |
| <value>org.apache.hadoop.hdfs.server.blockmanagement.HostFileManager</value> |
| <description> |
| The class that provides access for host files. |
| org.apache.hadoop.hdfs.server.blockmanagement.HostFileManager is used |
| by default which loads files specified by dfs.hosts and dfs.hosts.exclude. |
| If org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager is |
| used, it will load the JSON file defined in dfs.hosts. |
| To change class name, nn restart is required. "dfsadmin -refreshNodes" only |
| refreshes the configuration files used by the class. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanode.https.port</name> |
| <value>50475</value> |
| <description> |
| HTTPS port for DataNode. |
| </description> |
| </property> |
| <property> |
| <name>dfs.namenode.get-blocks.max-qps</name> |
| <value>20</value> |
| <description> |
| The maximum number of getBlocks RPCs data movement utilities can make to |
| a NameNode per second. Values less than or equal to 0 disable throttling. |
| This affects anything that uses a NameNodeConnector, i.e., the Balancer, |
| Mover, and StoragePolicySatisfier. |
| </description> |
| </property> |
| <property> |
| <name>dfs.balancer.dispatcherThreads</name> |
| <value>200</value> |
| <description> |
| Size of the thread pool for the HDFS balancer block mover. |
| dispatchExecutor |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.movedWinWidth</name> |
| <value>5400000</value> |
| <description> |
| Window of time in ms for the HDFS balancer tracking blocks and its |
| locations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.moverThreads</name> |
| <value>1000</value> |
| <description> |
| Thread pool size for executing block moves. |
| moverThreadAllocator |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.max-size-to-move</name> |
| <value>10737418240</value> |
| <description> |
| Maximum number of bytes that can be moved by the balancer in a single |
| thread. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.getBlocks.min-block-size</name> |
| <value>10485760</value> |
| <description> |
| Minimum block threshold size in bytes to ignore when fetching a source's |
| block list. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.getBlocks.size</name> |
| <value>2147483648</value> |
| <description> |
| Total size in bytes of Datanode blocks to get when fetching a source's |
| block list. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.block-move.timeout</name> |
| <value>0</value> |
| <description> |
| Maximum amount of time in milliseconds for a block to move. If this is set |
| greater than 0, Balancer will stop waiting for a block move completion |
| after this time. In typical clusters, a 3 to 5 minute timeout is reasonable. |
| If timeout happens to a large proportion of block moves, this needs to be |
| increased. It could also be that too much work is dispatched and many nodes |
| are constantly exceeding the bandwidth limit as a result. In that case, |
| other balancer parameters might need to be adjusted. |
| It is disabled (0) by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.max-no-move-interval</name> |
| <value>60000</value> |
| <description> |
| If this specified amount of time has elapsed and no block has been moved |
| out of a source DataNode, on more effort will be made to move blocks out of |
| this DataNode in the current Balancer iteration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.max-iteration-time</name> |
| <value>1200000</value> |
| <description> |
| Maximum amount of time while an iteration can be run by the Balancer. After |
| this time the Balancer will stop the iteration, and reevaluate the work |
| needs to be done to Balance the cluster. The default value is 20 minutes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.invalidate.limit</name> |
| <value>1000</value> |
| <description> |
| The maximum number of invalidate blocks sent by namenode to a datanode |
| per heartbeat deletion command. This property works with |
| "dfs.namenode.invalidate.work.pct.per.iteration" to throttle block |
| deletions. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.service.interval</name> |
| <value>5m</value> |
| <description> |
| The schedule interval of balancer when running as a long service. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.service.retries.on.exception</name> |
| <value>5</value> |
| <description> |
| When the balancer is executed as a long-running service, it will retry upon encountering an exception. This |
| configuration determines how many times it will retry before considering the exception to be fatal and quitting. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.misreplication.processing.limit</name> |
| <value>10000</value> |
| <description> |
| Maximum number of blocks to process for initializing replication queues. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.placement.ec.classname</name> |
| <value>org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant</value> |
| <description> |
| Placement policy class for striped files. |
| Defaults to BlockPlacementPolicyRackFaultTolerant.class |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.replicator.classname</name> |
| <value>org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault</value> |
| <description> |
| Class representing block placement policy for non-striped files. |
| There are four block placement policies currently being supported: |
| BlockPlacementPolicyDefault, BlockPlacementPolicyWithNodeGroup, |
| BlockPlacementPolicyRackFaultTolerant and BlockPlacementPolicyWithUpgradeDomain. |
| BlockPlacementPolicyDefault chooses the desired number of targets |
| for placing block replicas in a default way. BlockPlacementPolicyWithNodeGroup |
| places block replicas on environment with node-group layer. BlockPlacementPolicyRackFaultTolerant |
| places the replicas to more racks. |
| BlockPlacementPolicyWithUpgradeDomain places block replicas that honors upgrade domain policy. |
| The details of placing replicas are documented in the javadoc of the corresponding policy classes. |
| The default policy is BlockPlacementPolicyDefault, and the corresponding class is |
| org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.incremental.intervalMsec</name> |
| <value>0</value> |
| <description> |
| If set to a positive integer, the value in ms to wait between sending |
| incremental block reports from the Datanode to the Namenode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.checksum.type</name> |
| <value>CRC32C</value> |
| <description> |
| Checksum type |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.checksum.combine.mode</name> |
| <value>MD5MD5CRC</value> |
| <description> |
| Defines how lower-level chunk/block checksums are combined into file-level |
| checksums; the original MD5MD5CRC mode is not comparable between files |
| with different block layouts, while modes like COMPOSITE_CRC are |
| comparable independently of block layout. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.locateFollowingBlock.retries</name> |
| <value>5</value> |
| <description> |
| Number of retries to use when finding the next block during HDFS writes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.proxy.provider</name> |
| <value></value> |
| <description> |
| The prefix (plus a required nameservice ID) for the class name of the configured |
| Failover proxy provider for the host. For normal HA mode, please consult |
| the "Configuration Details" section of the HDFS High Availability documentation. |
| For observer reading mode, please choose a custom class--ObserverReadProxyProvider. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.random.order</name> |
| <value>true</value> |
| <description> |
| Determines if the failover proxies are picked in random order instead of the |
| configured order. Random order may be enabled for better load balancing |
| or to avoid always hitting failed ones first if the failed ones appear in the |
| beginning of the configured or resolved list. |
| For example, In the case of multiple RBF routers or ObserverNameNodes, |
| it is recommended to be turned on for load balancing. |
| The config name can be extended with an optional nameservice ID |
| (of form dfs.client.failover.random.order[.nameservice]) in case multiple |
| nameservices exist and random order should be enabled for specific |
| nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.resolve-needed</name> |
| <value>false</value> |
| <description> |
| Determines if the given nameservice address is a domain name which needs to |
| be resolved (using the resolver configured by dfs.client.failover.resolver-impl). |
| This adds a transparency layer in the client so physical server address |
| can change without changing the client. The config name can be extended with |
| an optional nameservice ID (of form dfs.client.failover.resolve-needed[.nameservice]) |
| to configure specific nameservices when multiple nameservices exist. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.resolver.impl</name> |
| <value>org.apache.hadoop.net.DNSDomainNameResolver</value> |
| <description> |
| Determines what class to use to resolve nameservice name to specific machine |
| address(es). The config name can be extended with an optional nameservice ID |
| (of form dfs.client.failover.resolver.impl[.nameservice]) to configure |
| specific nameservices when multiple nameservices exist. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.resolver.useFQDN</name> |
| <value>true</value> |
| <description> |
| Determines whether the resolved result is fully qualified domain name instead |
| of pure IP address(es). The config name can be extended with an optional |
| nameservice ID (of form dfs.client.failover.resolver.impl[.nameservice]) to |
| configure specific nameservices when multiple nameservices exist. |
| In secure environment, this has to be enabled since Kerberos is using fqdn |
| in machine's principal therefore accessing servers by IP won't be recognized |
| by the KDC. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.key.provider.cache.expiry</name> |
| <value>864000000</value> |
| <description> |
| DFS client security key cache expiration in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.max.block.acquire.failures</name> |
| <value>3</value> |
| <description> |
| Maximum failures allowed when trying to get block information from a specific datanode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.prefetch.size</name> |
| <value></value> |
| <description> |
| The number of bytes for the DFSClient will fetch from the Namenode |
| during a read operation. Defaults to 10 * ${dfs.blocksize}. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.uri.cache.enabled</name> |
| <value>false</value> |
| <description> |
| If true, dfs client will use cache when creating URI based on host:port |
| to reduce the frequency of URI object creation. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.short.circuit.replica.stale.threshold.ms</name> |
| <value>1800000</value> |
| <description> |
| Threshold in milliseconds for read entries during short-circuit local reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.buffer.size</name> |
| <value>1048576</value> |
| <description> |
| Buffer size in bytes for short-circuit local reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.short.circuit.num</name> |
| <value>1</value> |
| <description> |
| Number of short-circuit caches. This setting should |
| be in the range 1 - 5. Lower values will result in lower CPU consumption; higher |
| values may speed up massive parallel reading files. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.striped.threadpool.size</name> |
| <value>18</value> |
| <description> |
| The maximum number of threads used for parallel reading |
| in striped layout. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.replica.accessor.builder.classes</name> |
| <value></value> |
| <description> |
| Comma-separated classes for building ReplicaAccessor. If the classes |
| are specified, client will use external BlockReader that uses the |
| ReplicaAccessor built by the builder. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.interval-ms.get-last-block-length</name> |
| <value>4000</value> |
| <description> |
| Retry interval in milliseconds to wait between retries in getting |
| block lengths from the datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.max.attempts</name> |
| <value>10</value> |
| <description> |
| Max retry attempts for DFSClient talking to namenodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.policy.enabled</name> |
| <value>false</value> |
| <description> |
| If true, turns on DFSClient retry policy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.policy.spec</name> |
| <value>10000,6,60000,10</value> |
| <description> |
| Set to pairs of timeouts and retries for DFSClient. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.times.get-last-block-length</name> |
| <value>3</value> |
| <description> |
| Number of retries for calls to fetchLocatedBlocksAndGetLastBlockLength(). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.retry.window.base</name> |
| <value>3000</value> |
| <description> |
| Base time window in ms for DFSClient retries. For each retry attempt, |
| this value is extended linearly (e.g. 3000 ms for first attempt and |
| first retry, 6000 ms for second retry, 9000 ms for third retry, etc.). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.pipeline.recovery.max-retries</name> |
| <value>5</value> |
| <description> |
| if the DFS client encounters errors in write pipeline, |
| retry up to the number defined by this property before giving up. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.socket-timeout</name> |
| <value>60000</value> |
| <description> |
| Default timeout value in milliseconds for all sockets. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.socketcache.capacity</name> |
| <value>16</value> |
| <description> |
| Socket cache capacity (in entries) for short-circuit reads. |
| If this value is set to 0, the client socket cache is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.socketcache.expiryMsec</name> |
| <value>3000</value> |
| <description> |
| Socket cache expiration for short-circuit reads in msec. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.test.drop.namenode.response.number</name> |
| <value>0</value> |
| <description> |
| The number of Namenode responses dropped by DFSClient for each RPC call. Used |
| for testing the NN retry cache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.hedged.read.threadpool.size</name> |
| <value>0</value> |
| <description> |
| Support 'hedged' reads in DFSClient. To enable this feature, set the parameter |
| to a positive number. The threadpool size is how many threads to dedicate |
| to the running of these 'hedged', concurrent reads in your client. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.hedged.read.threshold.millis</name> |
| <value>500</value> |
| <description> |
| Configure 'hedged' reads in DFSClient. This is the number of milliseconds |
| to wait before starting up a 'hedged' read. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.byte-array-manager.count-limit</name> |
| <value>2048</value> |
| <description> |
| The maximum number of arrays allowed for each array length. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.byte-array-manager.count-reset-time-period-ms</name> |
| <value>10000</value> |
| <description> |
| The time period in milliseconds that the allocation count for each array length is |
| reset to zero if there is no increment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.byte-array-manager.count-threshold</name> |
| <value>128</value> |
| <description> |
| The count threshold for each array length so that a manager is created only after the |
| allocation count exceeds the threshold. In other words, the particular array length |
| is not managed until the allocation count exceeds the threshold. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.byte-array-manager.enabled</name> |
| <value>false</value> |
| <description> |
| If true, enables byte array manager used by DFSOutputStream. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.max-packets-in-flight</name> |
| <value>80</value> |
| <description> |
| The maximum number of DFSPackets allowed in flight. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.reader.remote.buffer.size</name> |
| <value>512</value> |
| <description> |
| The output stream buffer size of a DFSClient remote read. The buffer default value is 512B. The buffer includes |
| only some request parameters that are: block, blockToken, clientName, startOffset, len, verifyChecksum, |
| cachingStrategy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.content-summary.limit</name> |
| <value>5000</value> |
| <description> |
| The maximum content summary counts allowed in one locking period. 0 or a negative number |
| means no limit (i.e. no yielding). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.content-summary.sleep-microsec</name> |
| <value>500</value> |
| <description> |
| The length of time in microseconds to put the thread to sleep, between reaquiring the locks |
| in content summary computation. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.client.tcpnodelay</name> |
| <value>true</value> |
| <description> |
| If true, set TCP_NODELAY to sockets for transferring data from DFS client. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.server.tcpnodelay</name> |
| <value>true</value> |
| <description> |
| If true, set TCP_NODELAY to sockets for transferring data between Datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.max.packet.size</name> |
| <value>16777216</value> |
| <description> |
| The max size of any single packet. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.balance.max.concurrent.moves</name> |
| <value>100</value> |
| <description> |
| Maximum number of threads for Datanode balancer pending moves. This |
| value is reconfigurable via the "dfsadmin -reconfig" command. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.transfer.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Specifies the maximum amount of bandwidth that the data transfering can utilize for transfering block when |
| BlockConstructionStage is |
| PIPELINE_SETUP_CREATE and clientName is empty. |
| When the bandwidth value is zero, there is no limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.write.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Specifies the maximum amount of bandwidth that the data transfering can utilize for writing block or pipeline |
| recovery when |
| BlockConstructionStage is PIPELINE_SETUP_APPEND_RECOVERY or PIPELINE_SETUP_STREAMING_RECOVERY. |
| When the bandwidth value is zero, there is no limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fsdataset.factory</name> |
| <value></value> |
| <description> |
| The class name for the underlying storage that stores replicas for a |
| Datanode. Defaults to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fsdataset.volume.choosing.policy</name> |
| <value></value> |
| <description> |
| The class name of the policy for choosing volumes in the list of |
| directories. Defaults to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy. |
| If you would like to take into account available disk space, set the |
| value to |
| "org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy". |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.hostname</name> |
| <value></value> |
| <description> |
| Optional. The hostname for the Datanode containing this |
| configuration file. Will be different for each machine. |
| Defaults to current hostname. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lazywriter.interval.sec</name> |
| <value>60</value> |
| <description> |
| Interval in seconds for Datanodes for lazy persist writes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.network.counts.cache.max.size</name> |
| <value>2147483647</value> |
| <description> |
| The maximum number of entries the datanode per-host network error |
| count cache may contain. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.oob.timeout-ms</name> |
| <value>1500,0,0,0</value> |
| <description> |
| Timeout value when sending OOB response for each OOB type, which are |
| OOB_RESTART, OOB_RESERVED1, OOB_RESERVED2, and OOB_RESERVED3, |
| respectively. Currently, only OOB_RESTART is used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.parallel.volumes.load.threads.num</name> |
| <value></value> |
| <description> |
| Maximum number of threads to use for upgrading data directories. |
| The default value is the number of storage directories in the |
| DataNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ram.disk.replica.tracker</name> |
| <value></value> |
| <description> |
| Name of the class implementing the RamDiskReplicaTracker interface. |
| Defaults to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.RamDiskReplicaLruTracker. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.restart.replica.expiration</name> |
| <value>50</value> |
| <description> |
| During shutdown for restart, the amount of time in seconds budgeted for |
| datanode restart. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.socket.reuse.keepalive</name> |
| <value>4000</value> |
| <description> |
| The window of time in ms before the DataXceiver closes a socket for a |
| single request. If a second request occurs within that window, the |
| socket can be reused. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.socket.write.timeout</name> |
| <value>480000</value> |
| <description> |
| Timeout in ms for clients socket writes to DataNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.sync.behind.writes.in.background</name> |
| <value>false</value> |
| <description> |
| If set to true, then sync_file_range() system call will occur |
| asynchronously. This property is only valid when the property |
| dfs.datanode.sync.behind.writes is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.transferTo.allowed</name> |
| <value>true</value> |
| <description> |
| If false, break block transfers on 32-bit machines greater than |
| or equal to 2GB into smaller chunks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fixed.volume.size</name> |
| <value>false</value> |
| <description> |
| If false, call function getTotalSpace of File to get capacity of volume |
| during every heartbeat. |
| If true, cache the capacity when when the first call, and reuse it later. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.replica.cache.root.dir</name> |
| <value></value> |
| <description> |
| Use this key to change root dir of replica cache. |
| The default root dir is currentDir. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.replica.cache.expiry.time</name> |
| <value>5m</value> |
| <description> |
| Living time of replica cached files in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.fencing.methods</name> |
| <value></value> |
| <description> |
| A list of scripts or Java classes which will be used to fence |
| the Active NameNode during a failover. See the HDFS High |
| Availability documentation for details on automatic HA |
| configuration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.standby.checkpoints</name> |
| <value>true</value> |
| <description> |
| If true, a NameNode in Standby state periodically takes a checkpoint |
| of the namespace, saves it to its local storage and then upload to |
| the remote NameNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.zkfc.port</name> |
| <value>8019</value> |
| <description> |
| The port number that the zookeeper failover controller RPC |
| server binds to. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.allow.stale.reads</name> |
| <value>false</value> |
| <description> |
| If true, a NameNode in Standby state can process read request and the result |
| could be stale. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.edits.dir</name> |
| <value>/tmp/hadoop/dfs/journalnode/</value> |
| <description> |
| The directory where the journal edit files are stored. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.enable.sync</name> |
| <value>true</value> |
| <description> |
| If true, the journal nodes wil sync with each other. The journal nodes |
| will periodically gossip with other journal nodes to compare edit log |
| manifests and if they detect any missing log segment, they will download |
| it from the other journal nodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.sync.interval</name> |
| <value>120000</value> |
| <description> |
| Time interval, in milliseconds, between two Journal Node syncs. |
| This configuration takes effect only if the journalnode sync is enabled |
| by setting the configuration parameter dfs.journalnode.enable.sync to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.edit-cache-size.bytes</name> |
| <value>1048576</value> |
| <description> |
| The size, in bytes, of the in-memory cache of edits to keep on the |
| JournalNode. This cache is used to serve edits for tailing via the RPC-based |
| mechanism, and is only enabled when dfs.ha.tail-edits.in-progress is true. |
| Transactions range in size but are around 200 bytes on average, so the |
| default of 1MB can store around 5000 transactions. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.internal.spnego.principal</name> |
| <value></value> |
| <description> |
| Kerberos SPNEGO principal name used by the journal node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.principal</name> |
| <value></value> |
| <description> |
| Kerberos principal name for the journal node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.keytab.file</name> |
| <value></value> |
| <description> |
| Kerberos keytab file for the journal node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.batched.ls.limit</name> |
| <value>100</value> |
| <description> |
| Limit the number of paths that can be listed in a single batched |
| listing call. printed by ls. If less or equal to |
| zero, at most DFS_LIST_LIMIT_DEFAULT (= 1000) will be printed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ls.limit</name> |
| <value>1000</value> |
| <description> |
| Limit the number of files printed by ls. If less or equal to |
| zero, at most DFS_LIST_LIMIT_DEFAULT (= 1000) will be printed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.movedWinWidth</name> |
| <value>5400000</value> |
| <description> |
| The minimum time interval, in milliseconds, that a block can be |
| moved to another location again. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.moverThreads</name> |
| <value>1000</value> |
| <description> |
| Configure the balancer's mover thread pool size. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.retry.max.attempts</name> |
| <value>10</value> |
| <description> |
| The maximum number of retries before the mover consider the |
| move failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.keytab.enabled</name> |
| <value>false</value> |
| <description> |
| Set to true to enable login using a keytab for Kerberized Hadoop. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.address</name> |
| <value>0.0.0.0:0</value> |
| <description> |
| The hostname used for a keytab based Kerberos login. Keytab based login |
| can be enabled with dfs.mover.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by the Mover to login as its |
| service principal. The principal name is configured with |
| dfs.mover.kerberos.principal. Keytab based login can be |
| enabled with dfs.mover.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.kerberos.principal</name> |
| <value></value> |
| <description> |
| The Mover principal. This is typically set to |
| mover/_HOST@REALM.TLD. The Mover will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on different servers. |
| Keytab based login can be enabled with dfs.mover.keytab.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.mover.max-no-move-interval</name> |
| <value>60000</value> |
| <description> |
| If this specified amount of time has elapsed and no block has been moved |
| out of a source DataNode, on more effort will be made to move blocks out of |
| this DataNode in the current Mover iteration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.async</name> |
| <value>false</value> |
| <description> |
| If true, enables asynchronous audit log. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.async.blocking</name> |
| <value>true</value> |
| <description> |
| Only used when enables asynchronous audit log. Sets whether audit log async |
| appender should wait if there is no space available in the event buffer or |
| immediately return. Default value is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.async.buffer.size</name> |
| <value>128</value> |
| <description> |
| Only used when enables asynchronous audit log. Sets the number of audit |
| logs allowed in the event buffer before the calling thread is blocked |
| (if dfs.namenode.audit.log.async.blocking is true) or until logs are |
| summarized and discarded. Default value is 128. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.token.tracking.id</name> |
| <value>false</value> |
| <description> |
| If true, adds a tracking ID for all audit log events. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.available-space-block-placement-policy.balanced-space-preference-fraction</name> |
| <value>0.6</value> |
| <description> |
| Only used when the dfs.block.replicator.classname is set to |
| org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceBlockPlacementPolicy. |
| Special value between 0 and 1, noninclusive. Increases chance of |
| placing blocks on Datanodes with less disk space used. |
| </description> |
| </property> |
| |
| <property> |
| <name> |
| dfs.namenode.available-space-block-placement-policy.balance-local-node |
| </name> |
| <value>false</value> |
| <description> |
| Only used when the dfs.block.replicator.classname is set to |
| org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceBlockPlacementPolicy. |
| If true, balances the local node too. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy.balanced-space-preference-fraction</name> |
| <value>0.6</value> |
| <description> |
| Only used when the dfs.block.replicator.classname is set to |
| org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy. |
| Special value between 0 and 1, noninclusive. Increases chance of |
| placing blocks on Datanodes with less disk space used. More the value near 1 |
| more are the chances of choosing the datanode with less percentage of data. |
| Similarly as the value moves near 0, the chances of choosing datanode with |
| high load increases as the value reaches near 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.dnrpc-address</name> |
| <value></value> |
| <description> |
| Service RPC address for the backup Namenode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.always-use</name> |
| <value>false</value> |
| <description> |
| For testing. Setting to true always allows the DT secret manager |
| to be used, even if security is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.asynclogging</name> |
| <value>true</value> |
| <description> |
| If set to true, enables asynchronous edit logs in the Namenode. If set |
| to false, the Namenode uses the traditional synchronous edit logs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.asynclogging.pending.queue.size</name> |
| <value>4096</value> |
| <description> |
| The queue size of edit pending queue for FSEditLogAsync. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir.minimum</name> |
| <value>1</value> |
| <description> |
| dfs.namenode.edits.dir includes both required directories |
| (specified by dfs.namenode.edits.dir.required) and optional directories. |
| |
| The number of usable optional directories must be greater than or equal |
| to this property. If the number of usable optional directories falls |
| below dfs.namenode.edits.dir.minimum, HDFS will issue an error. |
| |
| This property defaults to 1. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.journal-plugin</name> |
| <value></value> |
| <description> |
| When FSEditLog is creating JournalManagers from dfs.namenode.edits.dir, |
| and it encounters a URI with a schema different to "file" it loads the |
| name of the implementing class from |
| "dfs.namenode.edits.journal-plugin.[schema]". This class must implement |
| JournalManager and have a constructor which takes (Configuration, URI). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.file.close.num-committed-allowed</name> |
| <value>0</value> |
| <description> |
| Normally a file can only be closed with all its blocks are committed. |
| When this value is set to a positive integer N, a file can be closed |
| when N blocks are committed and the rest complete. In case of Erasure Coded |
| blocks, the committed block shall be allowed only when the block group is |
| complete. i.e no missing/lost block in the blockgroup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.inode.attributes.provider.class</name> |
| <value></value> |
| <description> |
| Name of class to use for delegating HDFS authorization. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.inode.attributes.provider.bypass.users</name> |
| <value></value> |
| <description> |
| A list of user principals (in secure cluster) or user names (in insecure |
| cluster) for whom the external attributes provider will be bypassed for all |
| operations. This means file attributes stored in HDFS instead of the |
| external provider will be used for permission checking and be returned when |
| requested. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max-num-blocks-to-log</name> |
| <value>1000</value> |
| <description> |
| Puts a limit on the number of blocks printed to the log by the Namenode |
| after a block report. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.op.size</name> |
| <value>52428800</value> |
| <description> |
| Maximum opcode size in bytes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.missing.checkpoint.periods.before.shutdown</name> |
| <value>3</value> |
| <description> |
| The number of checkpoint period windows (as defined by the property |
| dfs.namenode.checkpoint.period) allowed by the Namenode to perform |
| saving the namespace before shutdown. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.cache.threshold</name> |
| <value>10</value> |
| <description> |
| Frequently accessed files that are accessed more times than this |
| threshold are cached in the FSDirectory nameCache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.max-streams</name> |
| <value>2</value> |
| <description> |
| Hard limit for the number of replication streams other than those with highest-priority. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.max-streams-hard-limit</name> |
| <value>4</value> |
| <description> |
| Hard limit for all replication streams. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reconstruction.pending.timeout-sec</name> |
| <value>300</value> |
| <description> |
| Timeout in seconds for block reconstruction. If this value is 0 or less, |
| then it will default to 5 minutes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.stale.datanode.minimum.interval</name> |
| <value>3</value> |
| <description> |
| Minimum number of missed heartbeats intervals for a datanode to |
| be marked stale by the Namenode. The actual interval is calculated as |
| (dfs.namenode.stale.datanode.minimum.interval * dfs.heartbeat.interval) |
| in seconds. If this value is greater than the property |
| dfs.namenode.stale.datanode.interval, then the calculated value above |
| is used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.storageinfo.defragment.timeout.ms</name> |
| <value>4</value> |
| <description> |
| Timeout value in ms for the StorageInfo compaction run. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.storageinfo.defragment.interval.ms</name> |
| <value>600000</value> |
| <description> |
| The thread for checking the StorageInfo for defragmentation will |
| run periodically. The time between runs is determined by this |
| property. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.storageinfo.defragment.ratio</name> |
| <value>0.75</value> |
| <description> |
| The defragmentation threshold for the StorageInfo. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.snapshot.capture.openfiles</name> |
| <value>false</value> |
| <description> |
| If true, snapshots taken will have an immutable shared copy of |
| the open files that have valid leases. Even after the open files |
| grow or shrink in size, snapshot will always have the previous |
| point-in-time version of the open files, just like all other |
| closed files. Default is false. |
| Note: The file length captured for open files in snapshot is |
| whats recorded in NameNode at the time of snapshot and it may |
| be shorter than what the client has written till then. In order |
| to capture the latest length, the client can call hflush/hsync |
| with the flag SyncFlag.UPDATE_LENGTH on the open files handles. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.snapshot.skip.capture.accesstime-only-change</name> |
| <value>false</value> |
| <description> |
| If accessTime of a file/directory changed but there is no other |
| modification made to the file/directory, the changed accesstime will |
| not be captured in next snapshot. However, if there is other modification |
| made to the file/directory, the latest access time will be captured |
| together with the modification in next snapshot. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.snapshotdiff.allow.snap-root-descendant</name> |
| <value>true</value> |
| <description> |
| If enabled, snapshotDiff command can be run for any descendant directory |
| under a snapshot root directory and the diff calculation will be scoped |
| to the given descendant directory. Otherwise, snapshot diff command can |
| only be run for a snapshot root directory. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.snapshotdiff.listing.limit</name> |
| <value>1000</value> |
| <description> |
| Limit the number of entries generated by getSnapshotDiffReportListing within |
| one rpc call to the namenode.If less or equal to zero, at most |
| DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT (= 1000) will be sent |
| across to the client within one rpc call. |
| </description> |
| </property> |
| <property> |
| <name>dfs.namenode.snapshot.max.limit</name> |
| <value>65536</value> |
| <description> |
| Limits the maximum number of snapshots allowed per snapshottable |
| directory.If the configuration is not set, the default limit |
| for maximum no of snapshots allowed is 65536. |
| </description> |
| </property> |
| <property> |
| <name>dfs.namenode.snapshot.filesystem.limit</name> |
| <value>65536</value> |
| <description> |
| Limits the maximum number of snapshots allowed on the entire filesystem. |
| If the configuration is not set, the default limit |
| for maximum no of snapshots allowed is 65536. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.snapshot.skiplist.max.levels</name> |
| <value>0</value> |
| <description> |
| Maximum no of the skip levels to be maintained in the skip list for |
| storing directory snapshot diffs. By default, it is set to 0 and a linear |
| list will be used to store the directory snapshot diffs. |
| </description> |
| </property> |
| <property> |
| <name>dfs.namenode.snapshot.skiplist.interval</name> |
| <value>10</value> |
| <description> |
| The interval after which the skip levels will be formed in the skip list |
| for storing directory snapshot diffs. By default, value is set to 10. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.enabled</name> |
| <value>false</value> |
| <name>dfs.storage.policy.satisfier.mode</name> |
| <value>none</value> |
| <description> |
| Following values are supported - external, none. |
| If external, StoragePolicySatisfier will be enabled and started as an independent service outside namenode. |
| If none, StoragePolicySatisfier is disabled. |
| By default, StoragePolicySatisfier is disabled. |
| Administrator can dynamically change StoragePolicySatisfier mode by using reconfiguration option. |
| Dynamic mode change can be achieved in the following way. |
| 1. Edit/update this configuration property values in hdfs-site.xml |
| 2. Execute the reconfig command on hadoop command line prompt. |
| For example:$hdfs -reconfig namenode nn_host:port start |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.queue.limit</name> |
| <value>1000</value> |
| <description> |
| Storage policy satisfier queue size. This queue contains the currently |
| scheduled file's inode ID for statisfy the policy. |
| Default value is 1000. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.work.multiplier.per.iteration</name> |
| <value>1</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the total amount of block transfers to begin in |
| one iteration, for satisfy the policy. The actual number is obtained by |
| multiplying this multiplier with the total number of live nodes in the |
| cluster. The result number is the number of blocks to begin transfers |
| immediately. This number can be any positive, non-zero integer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.recheck.timeout.millis</name> |
| <value>60000</value> |
| <description> |
| Blocks storage movements monitor re-check interval in milliseconds. |
| This check will verify whether any blocks storage movement results arrived from DN |
| and also verify if any of file blocks movements not at all reported to DN |
| since dfs.storage.policy.satisfier.self.retry.timeout. |
| The default value is 1 * 60 * 1000 (1 mins) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.self.retry.timeout.millis</name> |
| <value>300000</value> |
| <description> |
| If any of file related block movements not at all reported by datanode, |
| then after this timeout(in milliseconds), the item will be added back to movement needed list |
| at namenode which will be retried for block movements. |
| The default value is 5 * 60 * 1000 (5 mins) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.retry.max.attempts</name> |
| <value>3</value> |
| <description> |
| Max retry to satisfy the block storage policy. After this retry block will be removed |
| from the movement needed queue. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.datanode.cache.refresh.interval.ms</name> |
| <value>300000</value> |
| <description> |
| How often to refresh the datanode storages cache in milliseconds. This cache |
| keeps live datanode storage reports fetched from namenode. After elapsed time, |
| it will again fetch latest datanodes from namenode. |
| By default, this parameter is set to 5 minutes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.max.outstanding.paths</name> |
| <value>10000</value> |
| <description> |
| Defines the maximum number of paths to satisfy that can be queued up in the |
| Satisfier call queue in a period of time. Default value is 10000. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.address</name> |
| <value>0.0.0.0:0</value> |
| <description> |
| The hostname used for a keytab based Kerberos login. Keytab based login |
| is required when dfs.storage.policy.satisfier.mode is external. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by external StoragePolicySatisfier to login as its |
| service principal. The principal name is configured with |
| dfs.storage.policy.satisfier.kerberos.principal. Keytab based login |
| is required when dfs.storage.policy.satisfier.mode is external. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.satisfier.kerberos.principal</name> |
| <value></value> |
| <description> |
| The StoragePolicySatisfier principal. This is typically set to |
| satisfier/_HOST@REALM.TLD. The StoragePolicySatisfier will substitute |
| _HOST with its own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on different servers. Keytab |
| based login is required when dfs.storage.policy.satisfier.mode is external. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.pipeline.ecn</name> |
| <value>false</value> |
| <description> |
| If true, allows ECN (explicit congestion notification) from the |
| Datanode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.accept-recovery.timeout.ms</name> |
| <value>120000</value> |
| <description> |
| Quorum timeout in milliseconds during accept phase of |
| recovery/synchronization for a specific segment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.finalize-segment.timeout.ms</name> |
| <value>120000</value> |
| <description> |
| Quorum timeout in milliseconds during finalizing for a specific |
| segment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.get-journal-state.timeout.ms</name> |
| <value>120000</value> |
| <description> |
| Timeout in milliseconds when calling getJournalState(). |
| JournalNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.new-epoch.timeout.ms</name> |
| <value>120000</value> |
| <description> |
| Timeout in milliseconds when getting an epoch number for write |
| access to JournalNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.prepare-recovery.timeout.ms</name> |
| <value>120000</value> |
| <description> |
| Quorum timeout in milliseconds during preparation phase of |
| recovery/synchronization for a specific segment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.queued-edits.limit.mb</name> |
| <value>10</value> |
| <description> |
| Queue size in MB for quorum journal edits. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.select-input-streams.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| Timeout in milliseconds for accepting streams from JournalManagers. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.start-segment.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| Quorum timeout in milliseconds for starting a log segment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.write-txns.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| Write timeout in milliseconds when writing to a quorum of remote |
| journals. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.http.open.timeout.ms</name> |
| <value>60000</value> |
| <description> |
| Timeout in milliseconds when open a new HTTP connection to remote |
| journals. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.http.read.timeout.ms</name> |
| <value>60000</value> |
| <description> |
| Timeout in milliseconds when reading from a HTTP connection from remote |
| journals. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjournal.parallel-read.num-threads</name> |
| <value>5</value> |
| <description> |
| Number of threads per JN to be used for tailing edits. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.quota.by.storage.type.enabled</name> |
| <value>true</value> |
| <description> |
| If true, enables quotas based on storage type. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.kerberos.principal</name> |
| <value></value> |
| <description> |
| Kerberos principal name for the Secondary NameNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.keytab.file</name> |
| <value></value> |
| <description> |
| Kerberos keytab file for the Secondary NameNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.authentication.simple.anonymous.allowed</name> |
| <value></value> |
| <description> |
| If true, allow anonymous user to access WebHDFS. Set to |
| false to disable anonymous authentication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.ugi</name> |
| <value></value> |
| <description> |
| dfs.web.ugi is deprecated. Use hadoop.http.staticuser.user instead. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.netty.high.watermark</name> |
| <value>65535</value> |
| <description> |
| High watermark configuration to Netty for Datanode WebHdfs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.netty.low.watermark</name> |
| <value>32768</value> |
| <description> |
| Low watermark configuration to Netty for Datanode WebHdfs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.oauth2.access.token.provider</name> |
| <value></value> |
| <description> |
| Access token provider class for WebHDFS using OAuth2. |
| Defaults to org.apache.hadoop.hdfs.web.oauth2.ConfCredentialBasedAccessTokenProvider. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.oauth2.client.id</name> |
| <value></value> |
| <description> |
| Client id used to obtain access token with either credential or |
| refresh token. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.oauth2.enabled</name> |
| <value>false</value> |
| <description> |
| If true, enables OAuth2 in WebHDFS |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.oauth2.refresh.url</name> |
| <value></value> |
| <description> |
| URL against which to post for obtaining bearer token with |
| either credential or refresh token. |
| </description> |
| </property> |
| |
| <property> |
| <name>ssl.server.keystore.keypassword</name> |
| <value></value> |
| <description> |
| Keystore key password for HTTPS SSL configuration |
| </description> |
| </property> |
| |
| <property> |
| <name>ssl.server.keystore.location</name> |
| <value></value> |
| <description> |
| Keystore location for HTTPS SSL configuration |
| </description> |
| </property> |
| |
| <property> |
| <name>ssl.server.keystore.password</name> |
| <value></value> |
| <description> |
| Keystore password for HTTPS SSL configuration |
| </description> |
| </property> |
| |
| <property> |
| <name>ssl.server.truststore.location</name> |
| <value></value> |
| <description> |
| Truststore location for HTTPS SSL configuration |
| </description> |
| </property> |
| |
| <property> |
| <name>ssl.server.truststore.password</name> |
| <value></value> |
| <description> |
| Truststore password for HTTPS SSL configuration |
| </description> |
| </property> |
| |
| <!--Disk baalncer properties--> |
| <property> |
| <name>dfs.disk.balancer.max.disk.throughputInMBperSec</name> |
| <value>10</value> |
| <description>Maximum disk bandwidth used by diskbalancer |
| during read from a source disk. The unit is MB/sec. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.disk.balancer.block.tolerance.percent</name> |
| <value>10</value> |
| <description> |
| When a disk balancer copy operation is proceeding, the datanode is still |
| active. So it might not be possible to move the exactly specified |
| amount of data. So tolerance allows us to define a percentage which |
| defines a good enough move. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.disk.balancer.max.disk.errors</name> |
| <value>5</value> |
| <description> |
| During a block move from a source to destination disk, we might |
| encounter various errors. This defines how many errors we can tolerate |
| before we declare a move between 2 disks (or a step) has failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.disk.balancer.plan.valid.interval</name> |
| <value>1d</value> |
| <description> |
| Maximum amount of time disk balancer plan is valid. This setting |
| supports multiple time unit suffixes as described in |
| dfs.heartbeat.interval. If no suffix is specified then milliseconds |
| is assumed. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.disk.balancer.enabled</name> |
| <value>true</value> |
| <description> |
| This enables the diskbalancer feature on a cluster. By default, disk |
| balancer is enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.disk.balancer.plan.threshold.percent</name> |
| <value>10</value> |
| <description> |
| The percentage threshold value for volume Data Density in a plan. |
| If the absolute value of volume Data Density which is out of |
| threshold value in a node, it means that the volumes corresponding to |
| the disks should do the balancing in the plan. The default value is 10. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.provided.enabled</name> |
| <value>false</value> |
| <description> |
| Enables the Namenode to handle provided storages. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.storage.id</name> |
| <value>DS-PROVIDED</value> |
| <description> |
| The storage ID used for provided stores. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.class</name> |
| <value>org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.TextFileRegionAliasMap</value> |
| <description> |
| The class that is used to specify the input format of the blocks on |
| provided storages. The default is |
| org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.TextFileRegionAliasMap which uses |
| file regions to describe blocks. The file regions are specified as a |
| delimited text file. Each file region is a 6-tuple containing the |
| block id, remote file path, offset into file, length of block, the |
| block pool id containing the block, and the generation stamp of the |
| block. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.batch-size</name> |
| <value>500</value> |
| <description> |
| The batch size when iterating over the database backing the aliasmap |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.dnrpc-address</name> |
| <value></value> |
| <description> |
| The address where the aliasmap server will be running. In the case of |
| HA/Federation where multiple namenodes exist, and if the Namenode is |
| configured to run the aliasmap server |
| (dfs.provided.aliasmap.inmemory.enabled is set to true), |
| the name service id is added to the name, e.g., |
| dfs.provided.aliasmap.inmemory.rpc.address.EXAMPLENAMESERVICE. |
| The value of this property will take the form of host:rpc-port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.rpc.bind-host</name> |
| <value></value> |
| <description> |
| The actual address the in-memory aliasmap server will bind to. |
| If this optional address is set, it overrides the hostname portion of |
| dfs.provided.aliasmap.inmemory.rpc.address. |
| This is useful for making the name node listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.leveldb.dir</name> |
| <value>/tmp</value> |
| <description> |
| The directory where the leveldb files will be kept |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.enabled</name> |
| <value>false</value> |
| <description> |
| Don't use the aliasmap by default. Some tests will fail |
| because they try to start the namenode twice with the |
| same parameters if you turn it on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.inmemory.server.log</name> |
| <value>false</value> |
| <description> |
| Ensures that InMemoryAliasMap server logs every call to it. |
| Set to false by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.text.delimiter</name> |
| <value>,</value> |
| <description> |
| The delimiter used when the provided block map is specified as |
| a text file. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.text.read.file</name> |
| <value></value> |
| <description> |
| The path specifying the provided block map as a text file, specified as |
| a URI. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.text.codec</name> |
| <value></value> |
| <description> |
| The codec used to de-compress the provided block map. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.text.write.dir</name> |
| <value></value> |
| <description> |
| The path to which the provided block map should be written as a text |
| file, specified as a URI. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.leveldb.path</name> |
| <value></value> |
| <description> |
| The read/write path for the leveldb-based alias map |
| (org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.LevelDBFileRegionAliasMap). |
| The path has to be explicitly configured when this alias map is used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.acls.import.enabled</name> |
| <value>false</value> |
| <description> |
| Set to true to inherit ACLs (Access Control Lists) from remote stores |
| during mount. Disabled by default, i.e., ACLs are not inherited from |
| remote stores. Note had HDFS ACLs have to be enabled |
| (dfs.namenode.acls.enabled must be set to true) for this to take effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.provided.aliasmap.load.retries</name> |
| <value>0</value> |
| <description> |
| The number of retries on the Datanode to load the provided aliasmap; |
| defaults to 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.lock.suppress.warning.interval</name> |
| <value>10s</value> |
| <description>Instrumentation reporting long critical sections will suppress |
| consecutive warnings within this interval.</description> |
| </property> |
| |
| <property> |
| <name>httpfs.buffer.size</name> |
| <value>4096</value> |
| <description> |
| The size buffer to be used when creating or opening httpfs filesystem IO stream. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.use.ipc.callq</name> |
| <value>true</value> |
| <description>Enables routing of webhdfs calls through rpc |
| call queue</description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.disk.check.min.gap</name> |
| <value>15m</value> |
| <description> |
| The minimum gap between two successive checks of the same DataNode |
| volume. This setting supports multiple time unit suffixes as described |
| in dfs.heartbeat.interval. If no suffix is specified then milliseconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.disk.check.timeout</name> |
| <value>10m</value> |
| <description> |
| Maximum allowed time for a disk check to complete. If the check does not |
| complete within this time interval then the disk is declared as failed. |
| This setting supports multiple time unit suffixes as described in |
| dfs.heartbeat.interval. If no suffix is specified then milliseconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.use.dfs.network.topology</name> |
| <value>true</value> |
| <description> |
| Enables DFSNetworkTopology to choose nodes for placing replicas. |
| When enabled, NetworkTopology will be instantiated as class defined in |
| property dfs.net.topology.impl, otherwise NetworkTopology will be |
| instantiated as class defined in property net.topology.impl. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.net.topology.impl</name> |
| <value>org.apache.hadoop.hdfs.net.DFSNetworkTopology</value> |
| <description> |
| The implementation class of NetworkTopology used in HDFS. By default, |
| the class org.apache.hadoop.hdfs.net.DFSNetworkTopology is specified and |
| used in block placement. |
| This property only works when dfs.use.dfs.network.topology is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.qjm.operations.timeout</name> |
| <value>60s</value> |
| <description> |
| Common key to set timeout for related operations in |
| QuorumJournalManager. This setting supports multiple time unit suffixes |
| as described in dfs.heartbeat.interval. |
| If no suffix is specified then milliseconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.reformat.disabled</name> |
| <value>false</value> |
| <description> |
| Disable reformat of NameNode. If it's value is set to "true" |
| and metadata directories already exist then attempt to format NameNode |
| will throw NameNodeFormatException. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.block.deletion.increment</name> |
| <value>1000</value> |
| <description> |
| The number of block deletion increment. |
| This setting will control the block increment deletion rate to |
| ensure that other waiters on the lock can get in. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-address.auxiliary-ports</name> |
| <value></value> |
| <description> |
| A comma separated list of auxiliary ports for the NameNode to listen on. |
| This allows exposing multiple NN addresses to clients. |
| Particularly, it is used to enforce different SASL levels on different ports. |
| Empty list indicates that auxiliary ports are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.send.qop.enabled</name> |
| <value>false</value> |
| <description> |
| A boolean specifies whether NameNode should encrypt the established QOP |
| and include it in block token. The encrypted QOP will be used by DataNode |
| as target QOP, overwriting DataNode configuration. This ensures DataNode |
| will use exactly the same QOP NameNode and client has already agreed on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.overwrite.downstream.derived.qop</name> |
| <value>false</value> |
| <description> |
| A boolean specifies whether DN should overwrite the downstream |
| QOP in a write pipeline. This is used in the case where client |
| talks to first DN with a QOP, but inter-DN communication needs to be |
| using a different QOP. If set to false, the default behaviour is that |
| inter-DN communication will use the same QOP as client-DN connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.overwrite.downstream.new.qop</name> |
| <value></value> |
| <description> |
| When dfs.datanode.overwrite.downstream.derived.qop is set to true, |
| this configuration specifies the new QOP to be used to overwrite |
| inter-DN QOP. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.blockreport.queue.size</name> |
| <value>1024</value> |
| <description> |
| The queue size of BlockReportProcessingThread in BlockManager. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.storage.dir.perm</name> |
| <value>700</value> |
| <description> |
| Permissions for the directories on on the local filesystem where |
| the DFS namenode stores the fsImage. The permissions can either be |
| octal or symbolic. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.blockreport.max.lock.hold.time</name> |
| <value>4</value> |
| <description> |
| The BlockReportProcessingThread max write lock hold time in ms. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.namenode.corrupt.block.delete.immediately.enabled</name> |
| <value>true</value> |
| <description> |
| Whether the corrupt replicas should be deleted immediately, irrespective |
| of other replicas on stale storages.. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.edits.dir.perm</name> |
| <value>700</value> |
| <description> |
| Permissions for the directories on on the local filesystem where |
| the DFS journal node stores the edits. The permissions can either be |
| octal or symbolic. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lease-hard-limit-sec</name> |
| <value>1200</value> |
| <description> |
| Determines the namenode automatic lease recovery interval in seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.gc.time.monitor.enable</name> |
| <value>true</value> |
| <description> |
| Enable the GcTimePercentage metrics in NameNode's JvmMetrics. It will |
| start a thread(GcTimeMonitor) computing the metric. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.gc.time.monitor.observation.window.ms</name> |
| <value>1m</value> |
| <description> |
| Determines the windows size of GcTimeMonitor. A window is a period of time |
| starts at now-windowSize and ends at now. The GcTimePercentage is the gc |
| time proportion of the window. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.gc.time.monitor.sleep.interval.ms</name> |
| <value>5s</value> |
| <description> |
| Determines the sleep interval in the window. The GcTimeMonitor wakes up in |
| the sleep interval periodically to compute the gc time proportion. The |
| shorter the interval the preciser the GcTimePercentage. The sleep interval |
| must be shorter than the window size. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.allow.owner.set.quota</name> |
| <value>false</value> |
| <description> |
| Whether the owner(not superuser) of a directory can set quota of his sub |
| directories when permissions is enabled. Default value is false; |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.protected.subdirectories.enable</name> |
| <value>false</value> |
| <description>whether to protect the subdirectories of directories which |
| set on fs.protected.directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.default.policy</name> |
| <value>HOT</value> |
| <description> |
| Set the default Storage Policy name with following value, |
| LAZY_PERSIST: memory storage policy. |
| ALL_SSD : all SSD storage policy. |
| ONE_SSD : one SSD_storage policy. |
| HOT : hot storage policy. |
| WARM : warm policy. |
| COLD : cold_storage policy. |
| PROVIDED : provided storage policy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.same-disk-tiering.enabled</name> |
| <value>false</value> |
| <description> |
| HDFS-15548 to allow DISK/ARCHIVE to be |
| configured on the same disk mount to manage disk IO. |
| When this is enabled, datanode will control the capacity |
| of DISK/ARCHIVE based on reserve-for-archive.percentage. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.reserve-for-archive.default.percentage</name> |
| <value>0.0</value> |
| <description> |
| Default disk capacity ratio of ARCHIVE volume, |
| expected the value to be between 0 to 1. |
| This will be applied when DISK/ARCHIVE volumes are configured |
| on the same mount, which is detected by datanode. |
| Beware that capacity usage might be >100% if there are already |
| data blocks exist and the configured ratio is small, which will |
| prevent the volume from taking new blocks |
| until capacity is balanced out. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.same-disk-tiering.capacity-ratio.percentage</name> |
| <value></value> |
| <description> |
| Disk capacity ratio of DISK or ARCHIVE volume |
| when dfs.datanode.same-disk-tiering is turned on |
| This will override the value of |
| dfs.datanode.reserve-for-archive.default.percentage . |
| Example value: |
| [0.3]/disk1/archive,[0.7]/disk1/disk,[0.4]/disk2/archive,[0.6]/disk2/disk |
| This is only effective for configured |
| DISK/ARCHIVE volumes in dfs.datanode.data.dir. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balancer.getBlocks.hot-time-interval</name> |
| <value>0</value> |
| <description> |
| Balancer prefer moving cold blocks i.e blocks associated with files |
| accessed or modified before the specified time interval. |
| </description> |
| </property> |
| </configuration> |