| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <!-- Do not modify this file directly. Instead, copy entries that you --> |
| <!-- wish to modify from this file into hdfs-site.xml and change them --> |
| <!-- there. If hdfs-site.xml does not already exist, create it. --> |
| |
| <configuration> |
| |
| <property> |
| <name>hadoop.hdfs.configuration.version</name> |
| <value>1</value> |
| <description>version of this configuration file</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-address</name> |
| <value></value> |
| <description> |
| RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist, |
| the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1 |
| dfs.namenode.rpc-address.EXAMPLENAMESERVICE |
| The value of this property will take the form of nn-host1:rpc-port. The NameNode's default RPC port is 8020. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.namenode.rpc-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.servicerpc-address</name> |
| <value></value> |
| <description> |
| RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be |
| connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist, |
| the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1 |
| dfs.namenode.rpc-address.EXAMPLENAMESERVICE |
| The value of this property will take the form of nn-host1:rpc-port. |
| If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.servicerpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the service RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.namenode.servicerpc-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.rpc-address</name> |
| <value></value> |
| <description> |
| NameNode RPC lifeline address. This is an optional separate RPC address |
| that can be used to isolate health checks and liveness to protect against |
| resource exhaustion in the main RPC handler pool. In the case of |
| HA/Federation where multiple NameNodes exist, the name service ID is added |
| to the name e.g. dfs.namenode.lifeline.rpc-address.ns1. The value of this |
| property will take the form of nn-host1:rpc-port. If this property is not |
| defined, then the NameNode will not start a lifeline RPC server. By |
| default, the property is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the lifeline RPC server will bind to. If this optional |
| address is set, it overrides only the hostname portion of |
| dfs.namenode.lifeline.rpc-address. It can also be specified per name node |
| or name service for HA/Federation. This is useful for making the name node |
| listen on all interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.secondary.http-address</name> |
| <value>0.0.0.0:9868</value> |
| <description> |
| The secondary namenode http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.secondary.https-address</name> |
| <value>0.0.0.0:9869</value> |
| <description> |
| The secondary namenode HTTPS server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.address</name> |
| <value>0.0.0.0:9866</value> |
| <description> |
| The datanode server address and port for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.address</name> |
| <value>0.0.0.0:9864</value> |
| <description> |
| The datanode http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ipc.address</name> |
| <value>0.0.0.0:9867</value> |
| <description> |
| The datanode ipc server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.internal-proxy.port</name> |
| <value>0</value> |
| <description> |
| The datanode's internal web proxy port. |
| By default it selects a random port available in runtime. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.handler.count</name> |
| <value>10</value> |
| <description> |
| The number of Datanode RPC server threads that listen to |
| requests from client.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.http-address</name> |
| <value>0.0.0.0:9870</value> |
| <description> |
| The address and the base port where the dfs namenode web ui will listen on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.http-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of dfs.namenode.http-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node HTTP server listen on all |
| interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.heartbeat.recheck-interval</name> |
| <value>300000</value> |
| <description> |
| This time decides the interval to check for expired datanodes. |
| With this value and dfs.heartbeat.interval, the interval of |
| deciding the datanode is stale or not is also calculated. |
| The unit of this configuration is millisecond. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.http.policy</name> |
| <value>HTTP_ONLY</value> |
| <description>Decide if HTTPS(SSL) is supported on HDFS |
| This configures the HTTP endpoint for HDFS daemons: |
| The following values are supported: |
| - HTTP_ONLY : Service is provided only on http |
| - HTTPS_ONLY : Service is provided only on https |
| - HTTP_AND_HTTPS : Service is provided both on http and https |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.need-auth</name> |
| <value>false</value> |
| <description>Whether SSL client certificate authentication is required |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cached.conn.retry</name> |
| <value>3</value> |
| <description>The number of times the HDFS client will pull a socket from the |
| cache. Once this number is exceeded, the client will try to create a new |
| socket. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.https.server.keystore.resource</name> |
| <value>ssl-server.xml</value> |
| <description>Resource file from which ssl server keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.keystore.resource</name> |
| <value>ssl-client.xml</value> |
| <description>Resource file from which ssl client keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.https.address</name> |
| <value>0.0.0.0:9865</value> |
| <description>The datanode secure http server address and port.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.https-address</name> |
| <value>0.0.0.0:9871</value> |
| <description>The namenode secure http server address and port.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.https-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTPS server will bind to. If this optional address |
| is set, it overrides only the hostname portion of dfs.namenode.https-address. |
| It can also be specified per name node or name service for HA/Federation. |
| This is useful for making the name node HTTPS server listen on all |
| interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.interface</name> |
| <value>default</value> |
| <description> |
| The name of the Network Interface from which a data node should |
| report its IP address. e.g. eth2. This setting may be required for some |
| multi-homed nodes where the DataNodes are assigned multiple hostnames |
| and it is desirable for the DataNodes to use a non-default hostname. |
| |
| Prefer using hadoop.security.dns.interface over |
| dfs.datanode.dns.interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.nameserver</name> |
| <value>default</value> |
| <description> |
| The host name or IP address of the name server (DNS) which a DataNode |
| should use to determine its own host name. |
| |
| Prefer using hadoop.security.dns.nameserver over |
| dfs.datanode.dns.nameserver. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.address</name> |
| <value>0.0.0.0:50100</value> |
| <description> |
| The backup node server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.http-address</name> |
| <value>0.0.0.0:50105</value> |
| <description> |
| The backup node http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoad</name> |
| <value>true</value> |
| <description> |
| Decide if chooseTarget considers the target's load or not when write. |
| Turn on by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoadByStorageType</name> |
| <value>false</value> |
| <description> |
| Decide if chooseTarget considers the target's load with respect to the |
| storage type. Typically to be used when datanodes contain homogenous |
| storage types. Irrelevent if dfs.namenode.redundancy.considerLoad is |
| false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoad.factor</name> |
| <value>2.0</value> |
| <description>The factor by which a node's load can exceed the average |
| before being rejected for writes, only if considerLoad is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.considerLoadByVolume</name> |
| <value>false</value> |
| <description>Decide if chooseTarget considers the target's volume load or |
| not. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read.considerLoad</name> |
| <value>false</value> |
| <description> |
| Decide if sort block locations considers the target's load or not when read. |
| Turn off by default. |
| It is not possible to enable this feature along with dfs.namenode.read.considerStorageType as only one sort can be |
| enabled at a time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read.considerStorageType</name> |
| <value>false</value> |
| <description> |
| Decide if sort block locations considers the target's storage type or not when read. Any locations with the same |
| network distance are sorted in order of the storage speed, fastest first (RAM, SSD, Disk, Archive). This is |
| disabled by default, and the locations will be ordered randomly. |
| It is not possible to enable this feature along with dfs.namenode.read.considerLoad as only one sort can be |
| enabled at a time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.httpserver.filter.handlers</name> |
| <value>org.apache.hadoop.hdfs.server.datanode.web.RestCsrfPreventionFilterHandler</value> |
| <description>Comma separated list of Netty servlet-style filter handlers to inject into the Datanode WebHDFS I/O path |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.default.chunk.view.size</name> |
| <value>32768</value> |
| <description>The number of bytes to view for a file on the browser. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved.calculator</name> |
| <value>org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReservedSpaceCalculator$ReservedSpaceCalculatorAbsolute</value> |
| <description>Determines the class of ReservedSpaceCalculator to be used for |
| calculating disk space reservedfor non-HDFS data. The default calculator is |
| ReservedSpaceCalculatorAbsolute which will use dfs.datanode.du.reserved |
| for a static reserved number of bytes. ReservedSpaceCalculatorPercentage |
| will use dfs.datanode.du.reserved.pct to calculate the reserved number |
| of bytes based on the size of the storage. ReservedSpaceCalculatorConservative and |
| ReservedSpaceCalculatorAggressive will use their combination, Conservative will use |
| maximum, Aggressive minimum. For more details see ReservedSpaceCalculator. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved</name> |
| <value>0</value> |
| <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use. |
| Specific directory based reservation is supported. The property can be followed with directory |
| name which is set at 'dfs.datanode.data.dir'. For example, reserved space for /data/hdfs1/data |
| can be configured using property 'dfs.datanode.du.reserved./data/hdfs1/data'. If specific directory |
| reservation is not configured then dfs.datanode.du.reserved will be used. |
| Specific storage type based reservation is also supported. The property can be followed with |
| corresponding storage types ([ssd]/[disk]/[archive]/[ram_disk]/[nvdimm]) for cluster with heterogeneous storage. |
| For example, reserved space for RAM_DISK storage can be configured using property |
| 'dfs.datanode.du.reserved.ram_disk'. If specific storage type reservation is not configured |
| then dfs.datanode.du.reserved will be used. Support multiple size unit suffix(case insensitive), |
| as described in dfs.blocksize. Use directory name and storage type based reservation at the |
| same time is also allowed if both are configured. |
| Property priority example: dfs.datanode.du.reserved./data/hdfs1/data.ram_disk > |
| dfs.datanode.du.reserved./data/hdfs1/data > dfs.datanode.du.reserved.ram_disk > dfs.datanode.du.reserved |
| Note: In case of using tune2fs to set reserved-blocks-percentage, or other filesystem tools, |
| then you can possibly run into out of disk errors because hadoop will not check those |
| external tool configurations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved.pct</name> |
| <value>0</value> |
| <description>Reserved space in percentage. Read dfs.datanode.du.reserved.calculator to see |
| when this takes effect. The actual number of bytes reserved will be calculated by using the |
| total capacity of the data directory in question. Specific directory based reservation is |
| supported. The property can be followed with directory name which is set at 'dfs.datanode.data.dir'. |
| For example, reserved percentage space for /data/hdfs1/data can be configured using property |
| 'dfs.datanode.du.reserved.pct./data/hdfs1/data'. If specific directory reservation is not |
| configured then dfs.datanode.du.reserved.pct will be used. Specific storage type based reservation |
| is also supported. The property can be followed with corresponding storage types |
| ([ssd]/[disk]/[archive]/[ram_disk]/[nvdimm]) for cluster with heterogeneous storage. |
| For example, reserved percentage space for RAM_DISK storage can be configured using property |
| 'dfs.datanode.du.reserved.pct.ram_disk'. If specific storage type reservation is not configured |
| then dfs.datanode.du.reserved.pct will be used. Use directory and storage type based reservation |
| is also allowed if both are configured. |
| Priority example: dfs.datanode.du.reserved.pct./data/hdfs1/data.ram_disk > dfs.datanode.du.reserved.pct./data/hdfs1/data |
| > dfs.datanode.du.reserved.pct.ram_disk > dfs.datanode.du.reserved.pct |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/name</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the name table(fsimage). If this is a comma-delimited list |
| of directories then the name table is replicated in all of the |
| directories, for redundancy. </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir.restore</name> |
| <value>false</value> |
| <description>Set to true to enable NameNode to attempt recovering a |
| previously failed dfs.namenode.name.dir. When enabled, a recovery of any |
| failed directory is attempted during checkpoint.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-component-length</name> |
| <value>255</value> |
| <description>Defines the maximum number of bytes in UTF-8 encoding in each |
| component of a path. A value of 0 will disable the check. Support |
| multiple size unit suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-directory-items</name> |
| <value>1048576</value> |
| <description>Defines the maximum number of items that a directory may |
| contain. Cannot set the property to a value less than 1 or more than |
| 6400000.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.min-block-size</name> |
| <value>1048576</value> |
| <description>Minimum block size in bytes, enforced by the Namenode at create |
| time. This prevents the accidental creation of files with tiny block |
| sizes (and thus many blocks), which can degrade performance. Support multiple |
| size unit suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-blocks-per-file</name> |
| <value>10000</value> |
| <description>Maximum number of blocks per file, enforced by the Namenode on |
| write. This prevents the creation of extremely large files which can |
| degrade performance.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir</name> |
| <value>${dfs.namenode.name.dir}</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the transaction (edits) file. If this is a comma-delimited list |
| of directories then the transaction file is replicated in all of the |
| directories, for redundancy. Default value is same as dfs.namenode.name.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir.required</name> |
| <value></value> |
| <description>This should be a subset of dfs.namenode.edits.dir, |
| to ensure that the transaction (edits) file |
| in these places is always up-to-date. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.shared.edits.dir</name> |
| <value></value> |
| <description>A directory on shared storage between the multiple namenodes |
| in an HA cluster. This directory will be written by the active and read |
| by the standby in order to keep the namespaces synchronized. This directory |
| does not need to be listed in dfs.namenode.edits.dir above. It should be |
| left empty in a non-HA cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.journal-plugin.qjournal</name> |
| <value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.qjournals.resolution-enabled</name> |
| <value>false</value> |
| <description> |
| Determines if the given qjournals address is a domain name which needs to |
| be resolved. |
| This is used by namenode to resolve qjournals. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.qjournals.resolver.impl</name> |
| <value></value> |
| <description> |
| Qjournals resolver implementation used by namenode. |
| Effective with dfs.namenode.edits.qjournals.resolution-enabled on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.enabled</name> |
| <value>true</value> |
| <description> |
| If "true", enable permission checking in HDFS. |
| If "false", permission checking is turned off, |
| but all other behavior is unchanged. |
| Switching from one parameter value to the other does not change the mode, |
| owner or group of files or directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.ContentSummary.subAccess</name> |
| <value>false</value> |
| <description> |
| If "true", the ContentSummary permission checking will use subAccess. |
| If "false", the ContentSummary permission checking will NOT use subAccess. |
| subAccess means using recursion to check the access of all descendants. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.superusergroup</name> |
| <value>supergroup</value> |
| <description>The name of the group of super-users. |
| The value should be a single group name. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.cluster.administrators</name> |
| <value></value> |
| <description>ACL for the admins, this configuration is used to control |
| who can access the default servlets in the namenode, etc. The value |
| should be a comma separated list of users and groups. The user list |
| comes first and is separated by a space followed by the group list, |
| e.g. "user1,user2 group1,group2". Both users and groups are optional, |
| so "user1", " group1", "", "user1 group1", "user1,user2 group1,group2" |
| are all valid (note the leading space in " group1"). '*' grants access |
| to all users and groups, e.g. '*', '* ' and ' *' are all valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ip-proxy-users</name> |
| <value></value> |
| <description>A comma separated list of user names that are allowed by the |
| NameNode to specify a different client IP address in the caller context. |
| This is used by Router-Based Federation (RBF) to provide the actual client's |
| IP address to the NameNode, which is critical to preserve data locality when |
| using RBF. If you are using RBF, add the user that runs the routers. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.acls.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable support for HDFS ACLs (Access Control Lists). By |
| default, ACLs are enabled. When ACLs are disabled, the NameNode rejects |
| all RPCs related to setting or getting ACLs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.posix.acl.inheritance.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable POSIX style ACL inheritance. When it is enabled |
| and the create request comes from a compatible client, the NameNode |
| will apply default ACLs from the parent directory to the create mode |
| and ignore the client umask. If no default ACL found, it will apply the |
| client umask. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lazypersist.file.scrub.interval.sec</name> |
| <value>300</value> |
| <description> |
| The NameNode periodically scans the namespace for LazyPersist files with |
| missing blocks and unlinks them from the namespace. This configuration key |
| controls the interval between successive scans. If this value is set to 0, |
| the file scrubber is disabled. |
| </description> |
| </property> |
| <property> |
| <name>dfs.block.access.token.enable</name> |
| <value>false</value> |
| <description> |
| If "true", access tokens are used as capabilities for accessing datanodes. |
| If "false", no access tokens are checked on accessing datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.key.update.interval</name> |
| <value>600</value> |
| <description> |
| Interval in minutes at which namenode updates its access keys. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.lifetime</name> |
| <value>600</value> |
| <description>The lifetime of access tokens in minutes.</description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.protobuf.enable</name> |
| <value>false</value> |
| <description> |
| If "true", block tokens are written using Protocol Buffers. |
| If "false", block tokens are written using Legacy format. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/data</value> |
| <description>Determines where on the local filesystem an DFS data node |
| should store its blocks. If this is a comma-delimited |
| list of directories, then data will be stored in all named |
| directories, typically on different devices. The directories should be tagged |
| with corresponding storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]/[NVDIMM]) for HDFS |
| storage policies. The default storage type will be DISK if the directory does |
| not have a storage type tagged explicitly. Directories that do not exist will |
| be created if local filesystem permission allows. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir.perm</name> |
| <value>700</value> |
| <description>Permissions for the directories on on the local filesystem where |
| the DFS data node store its blocks. The permissions can either be octal or |
| symbolic.</description> |
| </property> |
| |
| <property> |
| <name>dfs.replication</name> |
| <value>3</value> |
| <description>Default block replication. |
| The actual number of replications can be specified when the file is created. |
| The default is used if replication is not specified in create time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.max</name> |
| <value>512</value> |
| <description>Maximal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.min</name> |
| <value>1</value> |
| <description>Minimal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.maintenance.replication.min</name> |
| <value>1</value> |
| <description>Minimal live block replication in existence of maintenance mode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.replication.min</name> |
| <value></value> |
| <description> |
| a separate minimum replication factor for calculating safe block count. |
| This is an expert level setting. |
| Setting this lower than the dfs.namenode.replication.min |
| is not recommend and/or dangerous for production setups. |
| When it's not set it takes value from dfs.namenode.replication.min |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max-corrupt-file-blocks-returned</name> |
| <value>100</value> |
| <description> |
| The maximum number of corrupt file blocks listed by NameNode Web UI, |
| JMX and other client request. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blocksize</name> |
| <value>134217728</value> |
| <description> |
| The default block size for new files, in bytes. |
| You can use the following suffix (case insensitive): |
| k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.), |
| Or provide complete size in bytes (such as 134217728 for 128 MB). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.retries</name> |
| <value>3</value> |
| <description>The number of retries for writing blocks to the data nodes, |
| before we signal failure to the application. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.enable</name> |
| <value>true</value> |
| <description> |
| If there is a datanode/network failure in the write pipeline, |
| DFSClient will try to remove the failed datanode from the pipeline |
| and then continue writing with the remaining datanodes. As a result, |
| the number of datanodes in the pipeline is decreased. The feature is |
| to add new datanodes to the pipeline. |
| |
| This is a site-wide property to enable/disable the feature. |
| |
| When the cluster size is extremely small, e.g. 3 nodes or less, cluster |
| administrators may want to set the policy to NEVER in the default |
| configuration file or disable this feature. Otherwise, users may |
| experience an unusually high rate of pipeline failures since it is |
| impossible to find new datanodes for replacement. |
| |
| See also dfs.client.block.write.replace-datanode-on-failure.policy |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> |
| <value>DEFAULT</value> |
| <description> |
| This property is used only if the value of |
| dfs.client.block.write.replace-datanode-on-failure.enable is true. |
| |
| ALWAYS: always add a new datanode when an existing datanode is removed. |
| |
| NEVER: never add a new datanode. |
| |
| DEFAULT: |
| Let r be the replication number. |
| Let n be the number of existing datanodes. |
| Add a new datanode only if r is greater than or equal to 3 and either |
| (1) floor(r/2) is greater than or equal to n; or |
| (2) r is greater than n and the block is hflushed/appended. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.best-effort</name> |
| <value>false</value> |
| <description> |
| This property is used only if the value of |
| dfs.client.block.write.replace-datanode-on-failure.enable is true. |
| |
| Best effort means that the client will try to replace a failed datanode |
| in write pipeline (provided that the policy is satisfied), however, it |
| continues the write operation in case that the datanode replacement also |
| fails. |
| |
| Suppose the datanode replacement fails. |
| false: An exception should be thrown so that the write will fail. |
| true : The write should be resumed with the remaining datandoes. |
| |
| Note that setting this property to true allows writing to a pipeline |
| with a smaller number of datanodes. As a result, it increases the |
| probability of data loss. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.min-replication</name> |
| <value>0</value> |
| <description> |
| The minimum number of replications that are needed to not to fail |
| the write pipeline if new datanodes can not be found to replace |
| failed datanodes (could be due to network failure) in the write pipeline. |
| If the number of the remaining datanodes in the write pipeline is greater |
| than or equal to this property value, continue writing to the remaining nodes. |
| Otherwise throw exception. |
| |
| If this is set to 0, an exception will be thrown, when a replacement |
| can not be found. |
| See also dfs.client.block.write.replace-datanode-on-failure.policy |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.intervalMsec</name> |
| <value>21600000</value> |
| <description>Determines block reporting interval in milliseconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.initialDelay</name> |
| <value>0</value> |
| <description> |
| Delay for first block report in seconds. Support multiple time unit |
| suffix(case insensitive), as described in dfs.heartbeat.interval.If |
| no time unit is specified then seconds is assumed |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.split.threshold</name> |
| <value>1000000</value> |
| <description>If the number of blocks on the DataNode is below this |
| threshold then it will send block reports for all Storage Directories |
| in a single message. |
| |
| If the number of blocks exceeds this threshold then the DataNode will |
| send block reports for each Storage Directory in separate messages. |
| |
| Set to zero to always split. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.full.block.report.leases</name> |
| <value>6</value> |
| <description>The maximum number of leases for full block reports that the |
| NameNode will issue at any given time. This prevents the NameNode from |
| being flooded with full block reports that use up all the RPC handler |
| threads. This number should never be more than the number of RPC handler |
| threads or less than 1. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.full.block.report.lease.length.ms</name> |
| <value>300000</value> |
| <description> |
| The number of milliseconds that the NameNode will wait before invalidating |
| a full block report lease. This prevents a crashed DataNode from |
| permanently using up a full block report lease. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.interval</name> |
| <value>21600</value> |
| <description>Interval in seconds for Datanode to scan data directories and |
| reconcile the difference between blocks in memory and on the disk. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.threads</name> |
| <value>1</value> |
| <description>How many threads should the threadpool used to compile reports |
| for volumes in parallel have. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name> |
| <value>1000</value> |
| <description>The report compilation threads are limited to only running for |
| a given number of milliseconds per second, as configured by the |
| property. The limit is taken per thread, not in aggregate, e.g. setting |
| a limit of 100ms for 4 compiler threads will result in each thread being |
| limited to 100ms, not 25ms. |
| |
| Note that the throttle does not interrupt the report compiler threads, so the |
| actual running time of the threads per second will typically be somewhat |
| higher than the throttle limit, usually by no more than 20%. |
| |
| Setting this limit to 1000 disables compiler thread throttling. Only |
| values between 1 and 1000 are valid. Setting an invalid value will result |
| in the throttle being disabled and an error message being logged. 1000 is |
| the default setting. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.reconcile.blocks.batch.size</name> |
| <value>1000</value> |
| <description>Setting this to define reconcile batch size.</description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.reconcile.blocks.batch.interval</name> |
| <value>2000</value> |
| <description>Setting this to define interval between batches.</description> |
| </property> |
| |
| <property> |
| <name>dfs.heartbeat.interval</name> |
| <value>3</value> |
| <description> |
| Determines datanode heartbeat interval in seconds. |
| Can use the following suffix (case insensitive): |
| ms(millis), s(sec), m(min), h(hour), d(day) |
| to specify the time (such as 2s, 2m, 1h, etc.). |
| Or provide complete number in seconds (such as 30 for 30 seconds). |
| If no time unit is specified then seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lifeline.interval.seconds</name> |
| <value></value> |
| <description> |
| Sets the interval in seconds between sending DataNode Lifeline Protocol |
| messages from the DataNode to the NameNode. The value must be greater than |
| the value of dfs.heartbeat.interval. If this property is not defined, then |
| the default behavior is to calculate the interval as 3x the value of |
| dfs.heartbeat.interval. Note that normal heartbeat processing may cause the |
| DataNode to postpone sending lifeline messages if they are not required. |
| Under normal operations with speedy heartbeat processing, it is possible |
| that no lifeline messages will need to be sent at all. This property has no |
| effect if dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.handler.count</name> |
| <value>10</value> |
| <description>The number of Namenode RPC server threads that listen to |
| requests from clients. |
| If dfs.namenode.servicerpc-address is not configured then |
| Namenode RPC server threads listen to requests from all nodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.service.handler.count</name> |
| <value>10</value> |
| <description>The number of Namenode RPC server threads that listen to |
| requests from DataNodes and from all other non-client nodes. |
| dfs.namenode.service.handler.count will be valid only if |
| dfs.namenode.servicerpc-address is configured. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.handler.ratio</name> |
| <value>0.10</value> |
| <description> |
| A ratio applied to the value of dfs.namenode.handler.count, which then |
| provides the number of RPC server threads the NameNode runs for handling the |
| lifeline RPC server. For example, if dfs.namenode.handler.count is 100, and |
| dfs.namenode.lifeline.handler.factor is 0.10, then the NameNode starts |
| 100 * 0.10 = 10 threads for handling the lifeline RPC server. It is common |
| to tune the value of dfs.namenode.handler.count as a function of the number |
| of DataNodes in a cluster. Using this property allows for the lifeline RPC |
| server handler threads to be tuned automatically without needing to touch a |
| separate property. Lifeline message processing is lightweight, so it is |
| expected to require many fewer threads than the main NameNode RPC server. |
| This property is not used if dfs.namenode.lifeline.handler.count is defined, |
| which sets an absolute thread count. This property has no effect if |
| dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lifeline.handler.count</name> |
| <value></value> |
| <description> |
| Sets an absolute number of RPC server threads the NameNode runs for handling |
| the DataNode Lifeline Protocol and HA health check requests from ZKFC. If |
| this property is defined, then it overrides the behavior of |
| dfs.namenode.lifeline.handler.ratio. By default, it is not defined. This |
| property has no effect if dfs.namenode.lifeline.rpc-address is not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.threshold-pct</name> |
| <value>0.999f</value> |
| <description> |
| Specifies the percentage of blocks that should satisfy |
| the minimal replication requirement defined by dfs.namenode.replication.min. |
| Values less than or equal to 0 mean not to wait for any particular |
| percentage of blocks before exiting safemode. |
| Values greater than 1 will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.min.datanodes</name> |
| <value>0</value> |
| <description> |
| Specifies the number of datanodes that must be considered alive |
| before the name node exits safemode. |
| Values less than or equal to 0 mean not to take the number of live |
| datanodes into account when deciding whether to remain in safe mode |
| during startup. |
| Values greater than the number of datanodes in the cluster |
| will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.recheck.interval</name> |
| <value>1000</value> |
| <description> |
| Interval in msec for checking safe mode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.extension</name> |
| <value>30000</value> |
| <description> |
| Determines extension of safe mode in milliseconds after the threshold level |
| is reached. Support multiple time unit suffix (case insensitive), as |
| described in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.check.interval</name> |
| <value>5000</value> |
| <description> |
| The interval in milliseconds at which the NameNode resource checker runs. |
| The checker calculates the number of the NameNode storage volumes whose |
| available spaces are more than dfs.namenode.resource.du.reserved, and |
| enters safemode if the number becomes lower than the minimum value |
| specified by dfs.namenode.resource.checked.volumes.minimum. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.du.reserved</name> |
| <value>104857600</value> |
| <description> |
| The amount of space to reserve/require for a NameNode storage directory |
| in bytes. The default is 100MB. Support multiple size unit |
| suffix(case insensitive), as described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.checked.volumes</name> |
| <value></value> |
| <description> |
| A list of local directories for the NameNode resource checker to check in |
| addition to the local edits directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.resource.checked.volumes.minimum</name> |
| <value>1</value> |
| <description> |
| The minimum number of redundant NameNode storage volumes required. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.balance.bandwidthPerSec</name> |
| <value>100m</value> |
| <description> |
| Specifies the maximum amount of bandwidth that each datanode |
| can utilize for the balancing purpose in term of |
| the number of bytes per second. You can use the following |
| suffix (case insensitive): |
| k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa)to specify the size |
| (such as 128k, 512m, 1g, etc.). |
| Or provide complete size in bytes (such as 134217728 for 128 MB). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| permitted to connect to the namenode. The full pathname of the file |
| must be specified. If the value is empty, all hosts are |
| permitted.</description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts.exclude</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| not permitted to connect to the namenode. The full pathname of the |
| file must be specified. If the value is empty, no hosts are |
| excluded.</description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts.timeout</name> |
| <value>0</value> |
| <description>Specifies a timeout (in milliseconds) for reading the dfs.hosts file. |
| A value of zero indicates no timeout to be set.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.objects</name> |
| <value>0</value> |
| <description>The maximum number of files, directories and blocks |
| dfs supports. A value of zero indicates no limit to the number |
| of objects that dfs supports. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.datanode.registration.ip-hostname-check</name> |
| <value>true</value> |
| <description> |
| If true (the default), then the namenode requires that a connecting |
| datanode's address must be resolved to a hostname. If necessary, a reverse |
| DNS lookup is performed. All attempts to register a datanode from an |
| unresolvable address are rejected. |
| |
| It is recommended that this setting be left on to prevent accidental |
| registration of datanodes listed by hostname in the excludes file during a |
| DNS outage. Only set this to false in environments where there is no |
| infrastructure to support reverse DNS lookup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.interval</name> |
| <value>30</value> |
| <description>Namenode periodicity in seconds to check if |
| decommission or maintenance is complete. Support multiple time unit |
| suffix(case insensitive), as described in dfs.heartbeat.interval. |
| If no time unit is specified then seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.blocks.per.interval</name> |
| <value>500000</value> |
| <description>The approximate number of blocks to process per decommission |
| or maintenance interval, as defined in dfs.namenode.decommission.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.max.concurrent.tracked.nodes</name> |
| <value>100</value> |
| <description> |
| The maximum number of decommission-in-progress or |
| entering-maintenance datanodes nodes that will be tracked at one time by |
| the namenode. Tracking these datanode consumes additional NN memory |
| proportional to the number of blocks on the datnode. Having a conservative |
| limit reduces the potential impact of decommissioning or maintenance of |
| a large number of nodes at once. |
| |
| A value of 0 means no limit will be enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.monitor.class</name> |
| <value>org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminDefaultMonitor</value> |
| <description> |
| Determines the implementation used for the decommission manager. The only |
| valid options are: |
| |
| org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminDefaultMonitor |
| org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminBackoffMonitor |
| |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.backoff.monitor.pending.limit</name> |
| <value>10000</value> |
| <description> |
| When the Backoff monitor is enabled, determines the maximum number of blocks |
| related to decommission and maintenance operations that can be loaded |
| into the replication queue at any given time. Every |
| dfs.namenode.decommission.interval seconds, the list is checked to see if |
| the blocks have become fully replicated and then further blocks are added |
| to reach the limit defined in this parameter. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.backoff.monitor.pending.blocks.per.lock</name> |
| <value>1000</value> |
| <description> |
| When loading blocks into the replication queue, release the namenode write |
| lock after the defined number of blocks have been processed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.interval.seconds</name> |
| <value>3</value> |
| <description>The periodicity in seconds with which the namenode computes |
| low redundancy work for datanodes. Support multiple time unit suffix(case insensitive), |
| as described in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.redundancy.queue.restart.iterations</name> |
| <value>2400</value> |
| <description>When picking blocks from the low redundancy queues, reset the |
| bookmarked iterator after the set number of iterations to ensure any blocks |
| which were not processed on the first pass are retried before the iterators |
| would naturally reach their end point. This ensures blocks are retried |
| more frequently when there are many pending blocks or blocks are |
| continuously added to the queues preventing the iterator reaching its |
| natural endpoint. |
| The default setting of 2400 combined with the default of |
| dfs.namenode.redundancy.interval.seconds means the iterators will be reset |
| approximately every 2 hours. |
| Setting this parameter to zero disables the feature and the iterators will |
| be reset only when the end of all queues has been reached. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.accesstime.precision</name> |
| <value>3600000</value> |
| <description>The access time for HDFS file is precise upto this value. |
| The default value is 1 hour. Setting a value of 0 disables |
| access times for HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of datanode plug-ins to be activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of namenode plug-ins to be activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.block-placement-policy.default.prefer-local-node</name> |
| <value>true</value> |
| <description>Controls how the default block placement policy places |
| the first replica of a block. When true, it will prefer the node where |
| the client is running. When false, it will prefer a node in the same rack |
| as the client. Setting to false avoids situations where entire copies of |
| large files end up on a single node, thus creating hotspots. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.stream-buffer-size</name> |
| <value>4096</value> |
| <description>The size of buffer to stream files. |
| The size of this buffer should probably be a multiple of hardware |
| page size (4096 on Intel x86), and it determines how much data is |
| buffered during read and write operations.</description> |
| </property> |
| |
| <property> |
| <name>dfs.bytes-per-checksum</name> |
| <value>512</value> |
| <description>The number of bytes per checksum. Must not be larger than |
| dfs.stream-buffer-size</description> |
| </property> |
| |
| <property> |
| <name>dfs.client-write-packet-size</name> |
| <value>65536</value> |
| <description>Packet size for clients to write</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name> |
| <value>600000</value> |
| <description>The maximum period to keep a DN in the excluded nodes list |
| at a client. After this period, in milliseconds, the previously excluded node(s) will |
| be removed automatically from the cache and will be considered good for block allocations |
| again. Useful to lower or raise in situations where you keep a file open for very long |
| periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance |
| restarts. Defaults to 10 minutes.</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.write.recover.lease.on.close.exception</name> |
| <value>false</value> |
| <description> |
| Set to true to call recoverLease operation automatically when DFSOutputSteam closing encounters exception. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value> |
| <description>Determines where on the local filesystem the DFS secondary |
| name node should store the temporary images to merge. |
| If this is a comma-delimited list of directories then the image is |
| replicated in all of the directories for redundancy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.edits.dir</name> |
| <value>${dfs.namenode.checkpoint.dir}</value> |
| <description>Determines where on the local filesystem the DFS secondary |
| name node should store the temporary edits to merge. |
| If this is a comma-delimited list of directories then the edits is |
| replicated in all of the directories for redundancy. |
| Default value is same as dfs.namenode.checkpoint.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.period</name> |
| <value>3600</value> |
| <description> |
| The number of seconds between two periodic checkpoints. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.txns</name> |
| <value>1000000</value> |
| <description>The Secondary NameNode or CheckpointNode will create a checkpoint |
| of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless |
| of whether 'dfs.namenode.checkpoint.period' has expired. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.check.period</name> |
| <value>60</value> |
| <description>The SecondaryNameNode and CheckpointNode will poll the NameNode |
| every 'dfs.namenode.checkpoint.check.period' seconds to query the number |
| of uncheckpointed transactions. Support multiple time unit suffix(case insensitive), |
| as described in dfs.heartbeat.interval.If no time unit is specified then |
| seconds is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.max-retries</name> |
| <value>3</value> |
| <description>The SecondaryNameNode retries failed checkpointing. If the |
| failure occurs while loading fsimage or replaying edits, the number of |
| retries is limited by this variable. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.check.quiet-multiplier</name> |
| <value>1.5</value> |
| <description> |
| Used to calculate the amount of time between retries when in the 'quiet' period |
| for creating checkpoints (active namenode already has an up-to-date image from another |
| checkpointer), so we wait a multiplier of the dfs.namenode.checkpoint.check.period before |
| retrying the checkpoint because another node likely is already managing the checkpoints, |
| allowing us to save bandwidth to transfer checkpoints that don't need to be used. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.checkpoints.retained</name> |
| <value>2</value> |
| <description>The number of image checkpoint files (fsimage_*) that will be retained by |
| the NameNode and Secondary NameNode in their storage directories. All edit |
| logs (stored on edits_* files) necessary to recover an up-to-date namespace from the oldest retained |
| checkpoint will also be retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.extra.edits.retained</name> |
| <value>1000000</value> |
| <description>The number of extra transactions which should be retained |
| beyond what is minimally necessary for a NN restart. |
| It does not translate directly to file's age, or the number of files kept, |
| but to the number of transactions (here "edits" means transactions). |
| One edit file may contain several transactions (edits). |
| During checkpoint, NameNode will identify the total number of edits to retain as extra by |
| checking the latest checkpoint transaction value, subtracted by the value of this property. |
| Then, it scans edits files to identify the older ones that don't include the computed range of |
| retained transactions that are to be kept around, and purges them subsequently. |
| The retainment can be useful for audit purposes or for an HA setup where a remote Standby Node may have |
| been offline for some time and need to have a longer backlog of retained |
| edits in order to start again. |
| Typically each edit is on the order of a few hundred bytes, so the default |
| of 1 million edits should be on the order of hundreds of MBs or low GBs. |
| |
| NOTE: Fewer extra edits may be retained than value specified for this setting |
| if doing so would mean that more segments would be retained than the number |
| configured by dfs.namenode.max.extra.edits.segments.retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.extra.edits.segments.retained</name> |
| <value>10000</value> |
| <description>The maximum number of extra edit log segments which should be retained |
| beyond what is minimally necessary for a NN restart. When used in conjunction with |
| dfs.namenode.num.extra.edits.retained, this configuration property serves to cap |
| the number of extra edits files to a reasonable value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.key.update-interval</name> |
| <value>86400000</value> |
| <description>The update interval for master key for delegation tokens |
| in the namenode in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.max-lifetime</name> |
| <value>604800000</value> |
| <description>The maximum lifetime in milliseconds for which a delegation |
| token is valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.renew-interval</name> |
| <value>86400000</value> |
| <description>The renewal interval for delegation token in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.failed.volumes.tolerated</name> |
| <value>0</value> |
| <description>The number of volumes that are allowed to |
| fail before a datanode stops offering service. By default |
| any volume failure will cause a datanode to shutdown. |
| The value should be greater than or equal to -1 , -1 represents minimum |
| 1 valid volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.volumes.replica-add.threadpool.size</name> |
| <value></value> |
| <description>Specifies the maximum number of threads to use for |
| adding block in volume. Default value for this configuration is |
| max of (volume * number of bp_service, number of processor). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compress</name> |
| <value>false</value> |
| <description>When this value is true, the dfs image will be compressed. |
| Enabling this will be very helpful if dfs image is large since it can |
| avoid consuming a lot of network bandwidth when SBN uploads a new dfs |
| image to ANN. The compressed codec is specified by the setting |
| dfs.image.compression.codec. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compression.codec</name> |
| <value>org.apache.hadoop.io.compress.DefaultCodec</value> |
| <description>If the dfs image is compressed, how should they be compressed? |
| This has to be a codec defined in io.compression.codecs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.timeout</name> |
| <value>60000</value> |
| <description> |
| Socket timeout for the HttpURLConnection instance used in the image |
| transfer. This is measured in milliseconds. |
| This timeout prevents client hangs if the connection is idle |
| for this configured timeout, during image transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.bandwidthPerSec</name> |
| <value>52428800</value> |
| <description> |
| Maximum bandwidth used for regular image transfers (instead of |
| bootstrapping the standby namenode), in bytes per second. |
| This can help keep normal namenode operations responsive during |
| checkpointing. |
| A default value is 50mb per second. |
| The maximum bandwidth used for bootstrapping standby namenode is |
| configured with dfs.image.transfer-bootstrap-standby.bandwidthPerSec. |
| Support multiple size unit suffix(case insensitive), as described |
| in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer-bootstrap-standby.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Maximum bandwidth used for transferring image to bootstrap standby |
| namenode, in bytes per second. |
| A default value of 0 indicates that throttling is disabled. This default |
| value should be used in most cases, to ensure timely HA operations. |
| The maximum bandwidth used for regular image transfers is configured |
| with dfs.image.transfer.bandwidthPerSec. |
| Support multiple size unit suffix(case insensitive), as described in |
| dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.chunksize</name> |
| <value>65536</value> |
| <description> |
| Chunksize in bytes to upload the checkpoint. |
| Chunked streaming is used to avoid internal buffering of contents |
| of image file of huge size. |
| Support multiple size unit suffix(case insensitive), as described |
| in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.load</name> |
| <value>false</value> |
| <description> |
| If true, write sub-section entries to the fsimage index so it can |
| be loaded in parallel. Also controls whether parallel loading |
| will be used for an image previously created with sub-sections. |
| If the image contains sub-sections and this is set to false, |
| parallel loading will not be used. |
| Parallel loading is not compatible with image compression, |
| so if dfs.image.compress is set to true this setting will be |
| ignored and no parallel loading will occur. |
| Enabling this feature may impact rolling upgrades and downgrades if |
| the previous version does not support this feature. If the feature was |
| enabled and a downgrade is required, first set this parameter to |
| false and then save the namespace to create a fsimage with no |
| sub-sections and then perform the downgrade. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.target.sections</name> |
| <value>12</value> |
| <description> |
| Controls the number of sub-sections that will be written to |
| fsimage for each section. This should be larger than |
| dfs.image.parallel.threads, otherwise all threads will not be |
| used when loading. Ideally, have at least twice the number |
| of target sections as threads, so each thread must load more |
| than one section to avoid one long running section affecting |
| the load time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.inode.threshold</name> |
| <value>1000000</value> |
| <description> |
| If the image contains less inodes than this setting, then |
| do not write sub-sections and hence disable parallel loading. |
| This is because small images load very quickly in serial and |
| parallel loading is not needed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.parallel.threads</name> |
| <value>4</value> |
| <description> |
| The number of threads to use when dfs.image.parallel.load is |
| enabled. This setting should be less than |
| dfs.image.parallel.target.sections. The optimal number of |
| threads will depend on the hardware and environment. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.edit.log.transfer.timeout</name> |
| <value>30000</value> |
| <description> |
| Socket timeout for edit log transfer in milliseconds. This timeout |
| should be configured such that normal edit log transfer for journal |
| node syncing can complete successfully. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.edit.log.transfer.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Maximum bandwidth used for transferring edit log to between journal nodes |
| for syncing, in bytes per second. |
| A default value of 0 indicates that throttling is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.support.allow.format</name> |
| <value>true</value> |
| <description>Does HDFS namenode allow itself to be formatted? |
| You may consider setting this to false for any production |
| cluster, to avoid any possibility of formatting a running DFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.transfer.threads</name> |
| <value>4096</value> |
| <description> |
| Specifies the maximum number of threads to use for transferring data |
| in and out of the DN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.scan.period.hours</name> |
| <value>504</value> |
| <description> |
| If this is positive, the DataNode will not scan any |
| individual block more than once in the specified scan period. |
| If this is negative, the block scanner is disabled. |
| If this is set to zero, then the default value of 504 hours |
| or 3 weeks is used. Prior versions of HDFS incorrectly documented |
| that setting this key to zero will disable the block scanner. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.volume.bytes.per.second</name> |
| <value>1048576</value> |
| <description> |
| If this is configured less than or equal to zero, the DataNode's block scanner will be disabled. If this |
| is positive, this is the number of bytes per second that the DataNode's |
| block scanner will try to scan from each volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.skip.recent.accessed</name> |
| <value>false</value> |
| <description> |
| If this is true, scanner will check the access time of block file to avoid |
| scanning blocks accessed during recent scan peroid, reducing disk IO. |
| This feature will not work if the DataNode volume has noatime mount option. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.scanner.volume.join.timeout.ms</name> |
| <value>5000</value> |
| <description> |
| The amount of time in milliseconds that the BlockScanner times out waiting |
| for the VolumeScanner thread to join during a shutdown call. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.readahead.bytes</name> |
| <value>4194304</value> |
| <description> |
| While reading block files, if the Hadoop native libraries are available, |
| the datanode can use the posix_fadvise system call to explicitly |
| page data into the operating system buffer cache ahead of the current |
| reader's position. This can improve performance especially when |
| disks are highly contended. |
| |
| This configuration specifies the number of bytes ahead of the current |
| read position which the datanode will attempt to read ahead. This |
| feature may be disabled by configuring this property to 0. |
| |
| If the native libraries are not available, this configuration has no |
| effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.reads</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data read from HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is delivered to the client. This behavior is automatically |
| disabled for workloads which read only short sections of a block |
| (e.g HBase random-IO workloads). |
| |
| This may improve performance for some workloads by freeing buffer |
| cache space usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.writes</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data written to HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is written to disk. |
| |
| This may improve performance for some workloads by freeing buffer |
| cache space usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.sync.behind.writes</name> |
| <value>false</value> |
| <description> |
| If this configuration is enabled, the datanode will instruct the |
| operating system to enqueue all written data to the disk immediately |
| after it is written. This differs from the usual OS policy which |
| may wait for up to 30 seconds before triggering writeback. |
| |
| This may improve performance for some workloads by smoothing the |
| IO profile for data written to disk. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.max.attempts</name> |
| <value>15</value> |
| <description> |
| Expert only. The number of client failover attempts that should be |
| made before the failover is considered failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.base.millis</name> |
| <value>500</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the base value used in the failover calculation. The |
| first failover will retry immediately. The 2nd failover attempt |
| will delay at least dfs.client.failover.sleep.base.millis |
| milliseconds. And so on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.max.millis</name> |
| <value>15000</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the maximum value to wait between failovers. |
| Specifically, the time between two failover attempts will not |
| exceed +/- 50% of dfs.client.failover.sleep.max.millis |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries</name> |
| <value>0</value> |
| <description> |
| Expert only. Indicates the number of retries a failover IPC client |
| will make to establish a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries.on.timeouts</name> |
| <value>0</value> |
| <description> |
| Expert only. The number of retry attempts a failover IPC client |
| will make on socket timeout when establishing a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.datanode-restart.timeout</name> |
| <value>30</value> |
| <description> |
| Expert only. The time to wait, in seconds, from reception of an |
| datanode shutdown notification for quick restart, until declaring |
| the datanode dead and invoking the normal recovery mechanisms. |
| The notification is sent by a datanode when it is being shutdown |
| using the shutdownDatanode admin command with the upgrade option. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservices</name> |
| <value></value> |
| <description> |
| Comma-separated list of nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservice.id</name> |
| <value></value> |
| <description> |
| The ID of this nameservice. If the nameservice ID is not |
| configured or more than one nameservice is configured for |
| dfs.nameservices it is determined automatically by |
| matching the local node's address with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.internal.nameservices</name> |
| <value></value> |
| <description> |
| Comma-separated list of nameservices that belong to this cluster. |
| Datanode will report to all the nameservices in this list. By default |
| this is set to the value of dfs.nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name> |
| <value></value> |
| <description> |
| The prefix for a given nameservice, contains a comma-separated |
| list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE). |
| |
| Unique identifiers for each NameNode in the nameservice, delimited by |
| commas. This will be used by DataNodes to determine all the NameNodes |
| in the cluster. For example, if you used “mycluster” as the nameservice |
| ID previously, and you wanted to use “nn1” and “nn2” as the individual |
| IDs of the NameNodes, you would configure a property |
| dfs.ha.namenodes.mycluster, and its value "nn1,nn2". |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenode.id</name> |
| <value></value> |
| <description> |
| The ID of this namenode. If the namenode ID is not configured it |
| is determined automatically by matching the local node's address |
| with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.log-roll.period</name> |
| <value>120</value> |
| <description> |
| How often, in seconds, the StandbyNode should ask the active to |
| roll edit logs. Since the StandbyNode only reads from finalized |
| log segments, the StandbyNode will only be as up-to-date as how |
| often the logs are rolled. Note that failover triggers a log roll |
| so the StandbyNode will be up to date before it becomes active. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval.If no time unit is specified then seconds |
| is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.period</name> |
| <value>60</value> |
| <description> |
| How often, the StandbyNode and ObserverNode should check if there are new |
| edit log entries ready to be consumed. This is the minimum period between |
| checking; exponential backoff will be applied if no edits are found and |
| dfs.ha.tail-edits.period.backoff-max is configured. By default, no |
| backoff is applied. |
| Supports multiple time unit suffix (case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.period.backoff-max</name> |
| <value>0</value> |
| <description> |
| The maximum time the tailer should wait between checking for new edit log |
| entries. Exponential backoff will be applied when an edit log tail is |
| performed but no edits are available to be read. Values less than or |
| equal to zero disable backoff entirely; this is the default behavior. |
| Supports multiple time unit suffix (case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.namenode-retries</name> |
| <value>3</value> |
| <description> |
| Number of retries to use when contacting the namenode when tailing the log. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.rolledits.timeout</name> |
| <value>60</value> |
| <description>The timeout in seconds of calling rollEdits RPC on Active NN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.automatic-failover.enabled</name> |
| <value>false</value> |
| <description> |
| Whether automatic failover is enabled. See the HDFS High |
| Availability documentation for details on automatic HA |
| configuration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether clients should use datanode hostnames when |
| connecting to datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether datanodes should use datanode hostnames when |
| connecting to other datanodes for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.local.interfaces</name> |
| <value></value> |
| <description>A comma separated list of network interface names to use |
| for data transfer between the client and datanodes. When creating |
| a connection to read from or write to a datanode, the client |
| chooses one of the specified interfaces at random and binds its |
| socket to the IP of that interface. Individual names may be |
| specified as either an interface name (eg "eth0"), a subinterface |
| name (eg "eth0:0"), or an IP address (which may be specified using |
| CIDR notation to match a range of IPs). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.shared.file.descriptor.paths</name> |
| <value>/dev/shm,/tmp</value> |
| <description> |
| A comma-separated list of paths to use when creating file descriptors that |
| will be shared between the DataNode and the DFSClient. Typically we use |
| /dev/shm, so that the file descriptors will not be written to disk. |
| It tries paths in order until creation of shared memory segment succeeds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.short.circuit.shared.memory.watcher.interrupt.check.ms</name> |
| <value>60000</value> |
| <description> |
| The length of time in milliseconds that the short-circuit shared memory |
| watcher will go between checking for java interruptions sent from other |
| threads. This is provided mainly for unit tests. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The NameNode service principal. This is typically set to |
| nn/_HOST@REALM.TLD. Each NameNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on both NameNodes |
| in an HA setup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each NameNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.namenode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The DataNode service principal. This is typically set to |
| dn/_HOST@REALM.TLD. Each DataNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on all DataNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each DataNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.datanode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.principal</name> |
| <value></value> |
| <description> |
| The JournalNode service principal. This is typically set to |
| jn/_HOST@REALM.TLD. Each JournalNode will substitute _HOST with its |
| own fully qualified hostname at startup. The _HOST placeholder |
| allows using the same configuration setting on all JournalNodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab file used by each JournalNode daemon to login as its |
| service principal. The principal name is configured with |
| dfs.journalnode.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| <description> |
| The server principal used by the NameNode for web UI SPNEGO |
| authentication when Kerberos security is enabled. This is |
| typically set to HTTP/_HOST@REALM.TLD The SPNEGO server principal |
| begins with the prefix HTTP/ by convention. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.kerberos.internal.spnego.principal</name> |
| <value></value> |
| <description> |
| The server principal used by the JournalNode HTTP Server for |
| SPNEGO authentication when Kerberos security is enabled. This is |
| typically set to HTTP/_HOST@REALM.TLD. The SPNEGO server principal |
| begins with the prefix HTTP/ by convention. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| |
| For most deployments this can be set to ${dfs.web.authentication.kerberos.principal} |
| i.e use the value of dfs.web.authentication.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| <description> |
| The server principal used by the Secondary NameNode for web UI SPNEGO |
| authentication when Kerberos security is enabled. Like all other |
| Secondary NameNode settings, it is ignored in an HA setup. |
| |
| If the value is '*', the web server will attempt to login with |
| every principal specified in the keytab file |
| dfs.web.authentication.kerberos.keytab. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.authentication.kerberos.principal</name> |
| <value></value> |
| <description> |
| The server principal used by the NameNode for WebHDFS SPNEGO |
| authentication. |
| |
| Required when WebHDFS and security are enabled. In most secure clusters this |
| setting is also used to specify the values for |
| dfs.namenode.kerberos.internal.spnego.principal and |
| dfs.journalnode.kerberos.internal.spnego.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.authentication.kerberos.keytab</name> |
| <value></value> |
| <description> |
| The keytab file for the principal corresponding to |
| dfs.web.authentication.kerberos.principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.principal.pattern</name> |
| <value>*</value> |
| <description> |
| A client-side RegEx that can be configured to control |
| allowed realms to authenticate with (useful in cross-realm env.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.read.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid reading from "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Stale datanodes will be |
| moved to the end of the node list returned for reading. See |
| dfs.namenode.avoid.write.stale.datanode for a similar setting for writes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.read.slow.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid reading from "slow" datanodes. |
| Slow datanodes will be moved to the end of the node list returned |
| for reading. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.write.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid writing to "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Writes will avoid using |
| stale datanodes unless more than a configured ratio |
| (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as |
| stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting |
| for reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.enable.log.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Enable and disable logging datanode staleness. Disabled by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.stale.datanode.interval</name> |
| <value>30000</value> |
| <description> |
| Default time interval in milliseconds for marking a datanode as "stale", |
| i.e., if the namenode has not received heartbeat msg from a datanode for |
| more than this time interval, the datanode will be marked and treated |
| as "stale" by default. The stale interval cannot be too small since |
| otherwise this may cause too frequent change of stale states. |
| We thus set a minimum stale interval value (the default value is 3 times |
| of heartbeat interval) and guarantee that the stale interval cannot be less |
| than the minimum value. A stale data node is avoided during lease/block |
| recovery. It can be conditionally avoided for reads (see |
| dfs.namenode.avoid.read.stale.datanode) and for writes (see |
| dfs.namenode.avoid.write.stale.datanode). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.write.stale.datanode.ratio</name> |
| <value>0.5f</value> |
| <description> |
| When the ratio of number stale datanodes to total datanodes marked |
| is greater than this ratio, stop avoiding writing to stale nodes so |
| as to prevent causing hotspots. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.invalidate.work.pct.per.iteration</name> |
| <value>0.32f</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the percentage amount of block |
| invalidations (deletes) to do over a single DN heartbeat |
| deletion command. The final deletion count is determined by applying this |
| percentage to the number of live nodes in the system. |
| The resultant number is the number of blocks from the deletion list |
| chosen for proper invalidation over a single heartbeat of a single DN. |
| Value should be a positive, non-zero percentage in float notation (X.Yf), |
| with 1.0f meaning 100%. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.work.multiplier.per.iteration</name> |
| <value>2</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the total amount of block transfers to begin in |
| parallel at a DN, for replication, when such a command list is being |
| sent over a DN heartbeat by the NN. The actual number is obtained by |
| multiplying this multiplier with the total number of live nodes in the |
| cluster. The result number is the number of blocks to begin transfers |
| immediately for, per DN heartbeat. This number can be any positive, |
| non-zero integer. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.server.port</name> |
| <value>2049</value> |
| <description> |
| Specify the port number used by Hadoop NFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.mountd.port</name> |
| <value>4242</value> |
| <description> |
| Specify the port number used by Hadoop mount daemon. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.dump.dir</name> |
| <value>/tmp/.hdfs-nfs</value> |
| <description> |
| This directory is used to temporarily save out-of-order writes before |
| writing to HDFS. For each file, the out-of-order writes are dumped after |
| they are accumulated to exceed certain threshold (e.g., 1MB) in memory. |
| One needs to make sure the directory has enough space. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.rtmax</name> |
| <value>1048576</value> |
| <description>This is the maximum size in bytes of a READ request |
| supported by the NFS gateway. If you change this, make sure you |
| also update the nfs mount's rsize(add rsize= # of bytes to the |
| mount directive). |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.wtmax</name> |
| <value>1048576</value> |
| <description>This is the maximum size in bytes of a WRITE request |
| supported by the NFS gateway. If you change this, make sure you |
| also update the nfs mount's wsize(add wsize= # of bytes to the |
| mount directive). |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.keytab.file</name> |
| <value></value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This is the path to the keytab file for the hdfs-nfs gateway. |
| This is required when the cluster is kerberized. |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.kerberos.principal</name> |
| <value></value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This is the name of the kerberos principal. This is required when |
| the cluster is kerberized.It must be of this format: |
| nfs-gateway-user/nfs-gateway-host@kerberos-realm |
| </description> |
| </property> |
| |
| <property> |
| <name>nfs.allow.insecure.ports</name> |
| <value>true</value> |
| <description> |
| When set to false, client connections originating from unprivileged ports |
| (those above 1023) will be rejected. This is to ensure that clients |
| connecting to this NFS Gateway must have had root privilege on the machine |
| where they're connecting from. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.connection.timeout</name> |
| <value>300</value> |
| <description> |
| The minimum number of seconds that we'll cache libhdfs connection objects |
| in fuse_dfs. Lower values will result in lower memory consumption; higher |
| values may speed up access by avoiding the overhead of creating new |
| connection objects. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.timer.period</name> |
| <value>5</value> |
| <description> |
| The number of seconds between cache expiry checks in fuse_dfs. Lower values |
| will result in fuse_dfs noticing changes to Kerberos ticket caches more |
| quickly. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.metrics.logger.period.seconds</name> |
| <value>600</value> |
| <description> |
| This setting controls how frequently the NameNode logs its metrics. The |
| logging configuration must also define one or more appenders for |
| NameNodeMetricsLog for the metrics to be logged. |
| NameNode metrics logging is disabled if this value is set to zero or |
| less than zero. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.metrics.logger.period.seconds</name> |
| <value>600</value> |
| <description> |
| This setting controls how frequently the DataNode logs its metrics. The |
| logging configuration must also define one or more appenders for |
| DataNodeMetricsLog for the metrics to be logged. |
| DataNode metrics logging is disabled if this value is set to zero or |
| less than zero. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.metrics.percentiles.intervals</name> |
| <value></value> |
| <description> |
| Comma-delimited set of integers denoting the desired rollover intervals |
| (in seconds) for percentile latency metrics on the Namenode and Datanode. |
| By default, percentile latency metrics are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.peer.stats.enabled</name> |
| <value>false</value> |
| <description> |
| A switch to turn on/off tracking DataNode peer statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.peer.metrics.min.outlier.detection.samples</name> |
| <value>1000</value> |
| <description> |
| Minimum number of packet send samples which are required to qualify for outlier detection. |
| If the number of samples is below this then outlier detection is skipped. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.min.outlier.detection.nodes</name> |
| <value>10</value> |
| <description> |
| Minimum number of nodes to run outlier detection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slowpeer.low.threshold.ms</name> |
| <value>5</value> |
| <description> |
| Threshold in milliseconds below which a DataNode is definitely not slow. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.nodes.to.report</name> |
| <value>5</value> |
| <description> |
| Number of nodes to include in JSON report. We will return nodes with |
| the highest number of votes from peers. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.outliers.report.interval</name> |
| <value>30m</value> |
| <description> |
| This setting controls how frequently DataNodes will report their peer |
| latencies to the NameNode via heartbeats. This setting supports |
| multiple time unit suffixes as described in dfs.heartbeat.interval. |
| If no suffix is specified then milliseconds is assumed. |
| |
| It is ignored if dfs.datanode.peer.stats.enabled is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled</name> |
| <value>false</value> |
| <description> |
| If this is set to true, we will filter out slow nodes |
| when choosing targets for blocks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.block-placement.min-blocks-for.write</name> |
| <value>1</value> |
| <description> |
| Setting the minimum number of blocks for write operations is used to calculate the space required |
| for write operations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.slowpeer.collect.nodes</name> |
| <value>5</value> |
| <description> |
| How many slow nodes we will collect for filtering out |
| when choosing targets for blocks. |
| |
| It is ignored if dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.slowpeer.collect.interval</name> |
| <value>30m</value> |
| <description> |
| Interval at which the slow peer trackers runs in the background to collect slow peers. |
| |
| It is ignored if dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fileio.profiling.sampling.percentage</name> |
| <value>0</value> |
| <description> |
| This setting controls the percentage of file I/O events which will be |
| profiled for DataNode disk statistics. The default value of 0 disables |
| disk statistics. Set to an integer value between 1 and 100 to enable disk |
| statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.min.outlier.detection.disks</name> |
| <value>5</value> |
| <description> |
| Minimum number of disks to run outlier detection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slowdisk.low.threshold.ms</name> |
| <value>20</value> |
| <description> |
| Threshold in milliseconds below which a disk is definitely not slow. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.disks.to.report</name> |
| <value>5</value> |
| <description> |
| Number of disks to include in JSON report per operation. We will return |
| disks with the highest latency. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.slowdisks.to.exclude</name> |
| <value>0</value> |
| <description> |
| The number of slow disks that needs to be excluded. By default, this parameter is set to 0, |
| which disables excluding slow disk when choosing volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.user.group.metrics.percentiles.intervals</name> |
| <value></value> |
| <description> |
| A comma-separated list of the granularity in seconds for the metrics |
| which describe the 50/75/90/95/99th percentile latency for group resolution |
| in milliseconds. |
| By default, percentile latency metrics are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer</name> |
| <value>false</value> |
| <description> |
| Whether or not actual block data that is read/written from/to HDFS should |
| be encrypted on the wire. This only needs to be set on the NN and DNs, |
| clients will deduce this automatically. It is possible to override this setting |
| per connection by specifying custom logic via dfs.trustedchannel.resolver.class. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.algorithm</name> |
| <value></value> |
| <description> |
| This value may be set to either "3des" or "rc4". If nothing is set, then |
| the configured JCE default on the system is used (usually 3DES.) It is |
| widely believed that 3DES is more cryptographically secure, but RC4 is |
| substantially faster. |
| |
| Note that if AES is supported by both the client and server then this |
| encryption algorithm will only be used to initially transfer keys for AES. |
| (See dfs.encrypt.data.transfer.cipher.suites.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.cipher.suites</name> |
| <value></value> |
| <description> |
| This value may be either undefined or AES/CTR/NoPadding. If defined, then |
| dfs.encrypt.data.transfer uses the specified cipher suite for data |
| encryption. If not defined, then only the algorithm specified in |
| dfs.encrypt.data.transfer.algorithm is used. By default, the property is |
| not defined. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.cipher.key.bitlength</name> |
| <value>128</value> |
| <description> |
| The key bitlength negotiated by dfsclient and datanode for encryption. |
| This value may be set to either 128, 192 or 256. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.trustedchannel.resolver.class</name> |
| <value></value> |
| <description> |
| TrustedChannelResolver is used to determine whether a channel |
| is trusted for plain data transfer. The TrustedChannelResolver is |
| invoked on both client and server side. If the resolver indicates |
| that the channel is trusted, then the data transfer will not be |
| encrypted even if dfs.encrypt.data.transfer is set to true. The |
| default implementation returns false indicating that the channel |
| is not trusted. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.protection</name> |
| <value></value> |
| <description> |
| A comma-separated list of SASL protection values used for secured |
| connections to the DataNode when reading or writing block data. Possible |
| values are authentication, integrity and privacy. authentication means |
| authentication only and no integrity or privacy; integrity implies |
| authentication and integrity are enabled; and privacy implies all of |
| authentication, integrity and privacy are enabled. If |
| dfs.encrypt.data.transfer is set to true, then it supersedes the setting for |
| dfs.data.transfer.protection and enforces that all connections must use a |
| specialized encrypted SASL handshake. This property is ignored for |
| connections to a DataNode listening on a privileged port. In this case, it |
| is assumed that the use of a privileged port establishes sufficient trust. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.data.transfer.saslproperties.resolver.class</name> |
| <value></value> |
| <description> |
| SaslPropertiesResolver used to resolve the QOP used for a connection to the |
| DataNode when reading or writing block data. If not specified, the value of |
| hadoop.security.saslproperties.resolver.class is used as the default value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.rpc-address</name> |
| <value>0.0.0.0:8485</value> |
| <description> |
| The JournalNode RPC server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.rpc-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the RPC server will bind to. If this optional address is |
| set, it overrides only the hostname portion of dfs.journalnode.rpc-address. |
| This is useful for making the JournalNode listen on all interfaces by |
| setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.http-address</name> |
| <value>0.0.0.0:8480</value> |
| <description> |
| The address and port the JournalNode HTTP server listens on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.http-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of |
| dfs.journalnode.http-address. This is useful for making the JournalNode |
| HTTP server listen on allinterfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.https-address</name> |
| <value>0.0.0.0:8481</value> |
| <description> |
| The address and port the JournalNode HTTPS server listens on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.journalnode.https-bind-host</name> |
| <value></value> |
| <description> |
| The actual address the HTTP server will bind to. If this optional address |
| is set, it overrides only the hostname portion of |
| dfs.journalnode.https-address. This is useful for making the JournalNode |
| HTTP server listen on all interfaces by setting it to 0.0.0.0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.loggers</name> |
| <value>default</value> |
| <description> |
| List of classes implementing audit loggers that will receive audit events. |
| These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger. |
| The special value "default" can be used to reference the default audit |
| logger, which uses the configured log system. Installing custom audit loggers |
| may affect the performance and stability of the NameNode. Refer to the custom |
| logger's documentation for more details. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name> |
| <value>10737418240</value> <!-- 10 GB --> |
| <description> |
| Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. |
| This setting controls how much DN volumes are allowed to differ in terms of |
| bytes of free disk space before they are considered imbalanced. If the free |
| space of all the volumes are within this range of each other, the volumes |
| will be considered balanced and block assignments will be done on a pure |
| round robin basis. Support multiple size unit suffix(case insensitive), as |
| described in dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name> |
| <value>0.75f</value> |
| <description> |
| Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. |
| This setting controls what percentage of new block allocations will be sent |
| to volumes with more available disk space than others. This setting should |
| be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should |
| be no reason to prefer that volumes with less available disk space receive |
| more block allocations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.round-robin-volume-choosing-policy.additional-available-space</name> |
| <value>1073741824</value> <!-- 1 GB --> |
| <description> |
| Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to |
| org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy. |
| This setting controls how much additional available space (unit is byte) is needed |
| when choosing a volume. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.noeditlogchannelflush</name> |
| <value>false</value> |
| <description> |
| Specifies whether to flush edit log file channel. When set, expensive |
| FileChannel#force calls are skipped and synchronous disk writes are |
| enabled instead by opening the edit log file with RandomAccessFile("rwd") |
| flags. This can significantly improve the performance of edit log writes |
| on the Windows platform. |
| Note that the behavior of the "rwd" flags is platform and hardware specific |
| and might not provide the same level of guarantees as FileChannel#force. |
| For example, the write will skip the disk-cache on SAS and SCSI devices |
| while it might not on SATA devices. This is an expert level setting, |
| change with caution. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.drop.behind.writes</name> |
| <value></value> |
| <description> |
| Just like dfs.datanode.drop.cache.behind.writes, this setting causes the |
| page cache to be dropped behind HDFS writes, potentially freeing up more |
| memory for other uses. Unlike dfs.datanode.drop.cache.behind.writes, this |
| is a client-side setting rather than a setting for the entire datanode. |
| If present, this setting will override the DataNode default. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.drop.behind.reads</name> |
| <value></value> |
| <description> |
| Just like dfs.datanode.drop.cache.behind.reads, this setting causes the |
| page cache to be dropped behind HDFS reads, potentially freeing up more |
| memory for other uses. Unlike dfs.datanode.drop.cache.behind.reads, this |
| is a client-side setting rather than a setting for the entire datanode. If |
| present, this setting will override the DataNode default. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.cache.readahead</name> |
| <value></value> |
| <description> |
| When using remote reads, this setting causes the datanode to |
| read ahead in the block file using posix_fadvise, potentially decreasing |
| I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side |
| setting rather than a setting for the entire datanode. If present, this |
| setting will override the DataNode default. Support multiple size unit |
| suffix(case insensitive), as described in dfs.blocksize. |
| |
| When using local reads, this setting determines how much readahead we do in |
| BlockReaderLocal. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.server-defaults.validity.period.ms</name> |
| <value>3600000</value> |
| <description> |
| The amount of milliseconds after which cached server defaults are updated. |
| |
| By default this parameter is set to 1 hour. |
| Support multiple time unit suffix(case insensitive), as described |
| in dfs.heartbeat.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.observer.enabled</name> |
| <value>false</value> |
| <description> |
| This causes NameNode on startup to become an observer node if |
| set to true, otherwise startup is no different. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.enable.retrycache</name> |
| <value>true</value> |
| <description> |
| This enables the retry cache on the namenode. Namenode tracks for |
| non-idempotent requests the corresponding response. If a client retries the |
| request, the response from the retry cache is sent. Such operations |
| are tagged with annotation @AtMostOnce in namenode protocols. It is |
| recommended that this flag be set to true. Setting it to false, will result |
| in clients getting failure responses to retried request. This flag must |
| be enabled in HA setup for transparent fail-overs. |
| |
| The entries in the cache have expiration time configurable |
| using dfs.namenode.retrycache.expirytime.millis. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.retrycache.expirytime.millis</name> |
| <value>600000</value> |
| <description> |
| The time for which retry cache entries are retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.retrycache.heap.percent</name> |
| <value>0.03f</value> |
| <description> |
| This parameter configures the heap size allocated for retry cache |
| (excluding the response cached). This corresponds to approximately |
| 4096 entries for every 64MB of namenode process java heap size. |
| Assuming retry cache entry expiration time (configured using |
| dfs.namenode.retrycache.expirytime.millis) of 10 minutes, this |
| enables retry cache to support 7 operations per second sustained |
| for 10 minutes. As the heap size is increased, the operation rate |
| linearly increases. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.enabled</name> |
| <value>true</value> |
| <description> |
| If this is set to false, the client won't attempt to perform memory-mapped reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.cache.size</name> |
| <value>256</value> |
| <description> |
| When zero-copy reads are used, the DFSClient keeps a cache of recently used |
| memory mapped regions. This parameter controls the maximum number of |
| entries that we will keep in that cache. |
| |
| The larger this number is, the more file descriptors we will potentially |
| use for memory-mapped files. mmaped files also use virtual address space. |
| You may need to increase your ulimit virtual address space limits before |
| increasing the client mmap cache size. |
| |
| Note that you can still do zero-copy reads when this size is set to 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.cache.timeout.ms</name> |
| <value>3600000</value> |
| <description> |
| The minimum length of time that we will keep an mmap entry in the cache |
| between uses. If an entry is in the cache longer than this, and nobody |
| uses it, it will be removed by a background thread. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.mmap.retry.timeout.ms</name> |
| <value>300000</value> |
| <description> |
| The minimum amount of time that we will wait before retrying a failed mmap |
| operation. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.short.circuit.replica.stale.threshold.ms</name> |
| <value>1800000</value> |
| <description> |
| The maximum amount of time that we will consider a short-circuit replica to |
| be valid, if there is no communication from the DataNode. After this time |
| has elapsed, we will re-fetch the short-circuit replica even if it is in |
| the cache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.caching.enabled</name> |
| <value>true</value> |
| <description> |
| Set to true to enable block caching. This flag enables the NameNode to |
| maintain a mapping of cached blocks to DataNodes via processing DataNode |
| cache reports. Based on these reports and addition and removal of caching |
| directives, the NameNode will schedule caching and uncaching work. |
| In the current implementation, centralized caching introduces additional |
| write lock overhead (see CacheReplicationMonitor#rescan) even if no path |
| to cache is specified, so we recommend disabling this feature when not in |
| use. We will disable centralized caching by default in later versions. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name> |
| <value>0.25</value> |
| <description> |
| The percentage of the Java heap which we will allocate to the cached blocks |
| map. The cached blocks map is a hash map which uses chained hashing. |
| Smaller maps may be accessed more slowly if the number of cached blocks is |
| large; larger maps will consume more memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.crm.checklocktime.enable</name> |
| <value>false</value> |
| <description> |
| Set to true to enable CacheManager to check amount of time to hold the |
| global rwlock. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.crm.maxlocktime.ms</name> |
| <value>1000</value> |
| <description> |
| The maximum amount of time that CacheManager should hold the global rwlock. |
| This configuration enable when set `dfs.namenode.crm.checklocktime.enable`. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.crm.sleeptime.ms</name> |
| <value>300</value> |
| <description> |
| The amount of time that CacheManager should relase the global rwlock. |
| This configuration enable when set `dfs.namenode.crm.checklocktime.enable`. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.locked.memory</name> |
| <value>0</value> |
| <description> |
| The amount of memory in bytes to use for caching of block replicas in |
| memory on the datanode. The datanode's maximum locked memory soft ulimit |
| (RLIMIT_MEMLOCK) must be set to at least this value, else the datanode |
| will abort on startup. Support multiple size unit suffix(case insensitive), |
| as described in dfs.blocksize. |
| |
| By default, this parameter is set to 0, which disables in-memory caching. |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.pmem.cache.dirs</name> |
| <value></value> |
| <description> |
| This value specifies the persistent memory directory used for caching block |
| replica. Multiple directories separated by "," are acceptable. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.pmem.cache.recovery</name> |
| <value>true</value> |
| <description> |
| This value specifies whether previous cache on persistent memory will be recovered. |
| This configuration can take effect only if persistent memory cache is enabled by |
| specifying value for 'dfs.datanode.pmem.cache.dirs'. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.cache.directives.num.responses</name> |
| <value>100</value> |
| <description> |
| This value controls the number of cache directives that the NameNode will |
| send over the wire in response to a listDirectives RPC. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.cache.pools.num.responses</name> |
| <value>100</value> |
| <description> |
| This value controls the number of cache pools that the NameNode will |
| send over the wire in response to a listPools RPC. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.refresh.interval.ms</name> |
| <value>30000</value> |
| <description> |
| The amount of milliseconds between subsequent path cache rescans. Path |
| cache rescans are when we calculate which blocks should be cached, and on |
| what datanodes. |
| |
| By default, this parameter is set to 30 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.path.based.cache.retry.interval.ms</name> |
| <value>30000</value> |
| <description> |
| When the NameNode needs to uncache something that is cached, or cache |
| something that is not cached, it must direct the DataNodes to do so by |
| sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode |
| heartbeat. This parameter controls how frequently the NameNode will |
| resend these commands. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fsdatasetcache.max.threads.per.volume</name> |
| <value>4</value> |
| <description> |
| The maximum number of threads per volume to use for caching new data |
| on the datanode. These threads consume both I/O and CPU. This can affect |
| normal datanode operations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.fsdatasetasyncdisk.max.threads.per.volume</name> |
| <value>4</value> |
| <description> |
| The maximum number of threads per volume used to process async disk |
| operations on the datanode. These threads consume I/O and CPU at the |
| same time. This will affect normal data node operations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.cachereport.intervalMsec</name> |
| <value>10000</value> |
| <description> |
| Determines cache reporting interval in milliseconds. After this amount of |
| time, the DataNode sends a full report of its cache state to the NameNode. |
| The NameNode uses the cache report to update its map of cached blocks to |
| DataNode locations. |
| |
| This configuration has no effect if in-memory caching has been disabled by |
| setting dfs.datanode.max.locked.memory to 0 (which is the default). |
| |
| If the native libraries are not available to the DataNode, this |
| configuration has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name> |
| <value>0.5</value> |
| <description> |
| Determines when an active namenode will roll its own edit log. |
| The actual threshold (in number of edits) is determined by multiplying |
| this value by dfs.namenode.checkpoint.txns. |
| |
| This prevents extremely large edit files from accumulating on the active |
| namenode, which can cause timeouts during namenode startup and pose an |
| administrative hassle. This behavior is intended as a failsafe for when |
| the standby or secondary namenode fail to roll the edit log by the normal |
| checkpoint threshold. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edit.log.autoroll.check.interval.ms</name> |
| <value>300000</value> |
| <description> |
| How often an active namenode will check if it needs to roll its edit log, |
| in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.user.provider.user.pattern</name> |
| <value>^[A-Za-z_][A-Za-z0-9._-]*[$]?$</value> |
| <description> |
| Valid pattern for user and group names for webhdfs, it must be a valid java regex. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.acl.provider.permission.pattern</name> |
| <value>^(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?(,(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?)*$</value> |
| <description> |
| Valid pattern for user and group names in webhdfs acl operations, it must be a valid java regex. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.socket.connect-timeout</name> |
| <value>60s</value> |
| <description> |
| Socket timeout for connecting to WebHDFS servers. This prevents a |
| WebHDFS client from hanging if the server hostname is |
| misconfigured, or the server does not response before the timeout |
| expires. Value is followed by a unit specifier: ns, us, ms, s, m, |
| h, d for nanoseconds, microseconds, milliseconds, seconds, |
| minutes, hours, days respectively. Values should provide units, |
| but milliseconds are assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.socket.read-timeout</name> |
| <value>60s</value> |
| <description> |
| Socket timeout for reading data from WebHDFS servers. This |
| prevents a WebHDFS client from hanging if the server stops sending |
| data. Value is followed by a unit specifier: ns, us, ms, s, m, h, |
| d for nanoseconds, microseconds, milliseconds, seconds, minutes, |
| hours, days respectively. Values should provide units, |
| but milliseconds are assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.context</name> |
| <value>default</value> |
| <description> |
| The name of the DFSClient context that we should use. Clients that share |
| a context share a socket cache and short-circuit cache, among other things. |
| You should only change this if you don't want to share with another set of |
| threads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit</name> |
| <value>false</value> |
| <description> |
| This configuration parameter turns on short-circuit local reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.socket.send.buffer.size</name> |
| <value>0</value> |
| <description> |
| Socket send buffer size for a write pipeline in DFSClient side. |
| This may affect TCP connection throughput. |
| If it is set to zero or negative value, |
| no buffer size will be set explicitly, |
| thus enable tcp auto-tuning on some system. |
| The default value is 0. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.domain.socket.path</name> |
| <value></value> |
| <description> |
| Optional. This is a path to a UNIX domain socket that will be used for |
| communication between the DataNode and local HDFS clients. |
| If the string "_PORT" is present in this path, it will be replaced by the |
| TCP port of the DataNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.domain.socket.disable.interval.seconds</name> |
| <value>600</value> |
| <description> |
| The interval that a DataNode is disabled for future Short-Circuit Reads, |
| after an error happens during a Short-Circuit Read. Setting this to 0 will |
| not disable Short-Circuit Reads at all after errors happen. Negative values |
| are invalid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.skip.checksum</name> |
| <value>false</value> |
| <description> |
| If this configuration parameter is set, |
| short-circuit local reads will skip checksums. |
| This is normally not recommended, |
| but it may be useful for special setups. |
| You might consider using this |
| if you are doing your own checksumming outside of HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.streams.cache.size</name> |
| <value>256</value> |
| <description> |
| The DFSClient maintains a cache of recently opened file descriptors. |
| This parameter controls the maximum number of file descriptors in the cache. |
| Setting this higher will use more file descriptors, |
| but potentially provide better performance on workloads |
| involving lots of seeks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name> |
| <value>300000</value> |
| <description> |
| This controls the minimum amount of time |
| file descriptors need to sit in the client cache context |
| before they can be closed for being inactive for too long. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.audit.log.debug.cmdlist</name> |
| <value></value> |
| <description> |
| A comma separated list of NameNode commands that are written to the HDFS |
| namenode audit log only if the audit log level is debug. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.legacy.blockreader.local</name> |
| <value>false</value> |
| <description> |
| Legacy short-circuit reader implementation based on HDFS-2246 is used |
| if this configuration parameter is true. |
| This is for the platforms other than Linux |
| where the new implementation based on HDFS-347 is not available. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.read.use.cache.priority</name> |
| <value>false</value> |
| <description> |
| If true, the cached replica of the datanode is preferred |
| else the replica closest to client is preferred. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.local-path-access.user</name> |
| <value></value> |
| <description> |
| Comma separated list of the users allowed to open block files |
| on legacy short-circuit local read. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.domain.socket.data.traffic</name> |
| <value>false</value> |
| <description> |
| This control whether we will try to pass normal data traffic |
| over UNIX domain socket rather than over TCP socket |
| on node-local data transfer. |
| This is currently experimental and turned off by default. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reject-unresolved-dn-topology-mapping</name> |
| <value>false</value> |
| <description> |
| If the value is set to true, then namenode will reject datanode |
| registration if the topology mapping for a datanode is not resolved and |
| NULL is returned (script defined by net.topology.script.file.name fails |
| to execute). Otherwise, datanode will be registered and the default rack |
| will be assigned as the topology path. Topology paths are important for |
| data resiliency, since they define fault domains. Thus it may be unwanted |
| behavior to allow datanode registration with the default rack if the |
| resolving topology failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.xattrs.enabled</name> |
| <value>true</value> |
| <description> |
| Whether support for extended attributes is enabled on the NameNode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-xattrs-per-inode</name> |
| <value>32</value> |
| <description> |
| Maximum number of extended attributes per inode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-xattr-size</name> |
| <value>16384</value> |
| <description> |
| The maximum combined size of the name and value of an extended attribute |
| in bytes. It should be larger than 0, and less than or equal to maximum |
| size hard limit which is 32768. |
| Support multiple size unit suffix(case insensitive), as described in |
| dfs.blocksize. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.slow.io.warning.threshold.ms</name> |
| <value>30000</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| io warning in a dfsclient. By default, this parameter is set to 30000 |
| milliseconds (30 seconds). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.slow.io.warning.threshold.ms</name> |
| <value>300</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| io warning in a datanode. By default, this parameter is set to 300 |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.processcommands.threshold</name> |
| <value>2s</value> |
| <description>The threshold in milliseconds at which we will log a slow |
| command processing in BPServiceActor. By default, this parameter is set |
| to 2 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.enabled</name> |
| <value>false</value> |
| <description> |
| Set to true to enable dead node detection in client side. Then all the DFSInputStreams of the same client can |
| share the dead node information. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.deadnode.threads</name> |
| <value>10</value> |
| <description> |
| The maximum number of threads to use for probing dead node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.idle.sleep.ms</name> |
| <value>10000</value> |
| <description> |
| The sleep time of DeadNodeDetector per iteration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.suspectnode.threads</name> |
| <value>10</value> |
| <description> |
| The maximum number of threads to use for probing suspect node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.rpc.threads</name> |
| <value>20</value> |
| <description> |
| The maximum number of threads to use for calling RPC call to recheck the liveness of dead node. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.deadnode.interval.ms</name> |
| <value>60000</value> |
| <description> |
| Interval time in milliseconds for probing dead node behavior. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.suspectnode.interval.ms</name> |
| <value>300</value> |
| <description> |
| Interval time in milliseconds for probing suspect node behavior. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.deadnode.detection.probe.connection.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| Connection timeout for probing dead node in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.refresh.read-block-locations.ms</name> |
| <value>0</value> |
| <description> |
| Refreshing LocatedBlocks period. A value of 0 disables the feature. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.refresh.read-block-locations.register-automatically</name> |
| <value>true</value> |
| <description> |
| Whether to auto-register all DFSInputStreams for background LocatedBlock refreshes. |
| If false, user must manually register using DFSClient#addLocatedBlocksRefresh(DFSInputStream) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.refresh.read-block-locations.threads</name> |
| <value>5</value> |
| <description> |
| Number of threads to use for refreshing LocatedBlocks of registered |
| DFSInputStreams. If a DFSClient opens many DFSInputStreams, increasing |
| this may help refresh them all in a timely manner. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lease-recheck-interval-ms</name> |
| <value>2000</value> |
| <description>During the release of lease a lock is hold that make any |
| operations on the namenode stuck. In order to not block them during |
| a too long duration we stop releasing lease after this max lock limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max-lock-hold-to-release-lease-ms</name> |
| <value>25</value> |
| <description>During the release of lease a lock is hold that make any |
| operations on the namenode stuck. In order to not block them during |
| a too long duration we stop releasing lease after this max lock limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.write-lock-reporting-threshold-ms</name> |
| <value>5000</value> |
| <description>When a write lock is held on the namenode for a long time, |
| this will be logged as the lock is released. This sets how long the |
| lock must be held for logging to occur. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.read-lock-reporting-threshold-ms</name> |
| <value>5000</value> |
| <description>When a read lock is held on the namenode for a long time, |
| this will be logged as the lock is released. This sets how long the |
| lock must be held for logging to occur. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.access-control-enforcer-reporting-threshold-ms</name> |
| <value>1000</value> |
| <description> |
| If an external AccessControlEnforcer runs for a long time to check permission with the FSnamesystem lock, |
| print a WARN log message. This sets how long must be run for logging to occur. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.lock.detailed-metrics.enabled</name> |
| <value>false</value> |
| <description>If true, the namenode will keep track of how long various |
| operations hold the Namesystem lock for and emit this as metrics. These |
| metrics have names of the form FSN(Read|Write)LockNanosOperationName, |
| where OperationName denotes the name of the operation that initiated the |
| lock hold (this will be OTHER for certain uncategorized operations) and |
| they export the hold time values in nanoseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fslock.fair</name> |
| <value>true</value> |
| <description>If this is true, the FS Namesystem lock will be used in Fair mode, |
| which will help to prevent writer threads from being starved, but can provide |
| lower lock throughput. See java.util.concurrent.locks.ReentrantReadWriteLock |
| for more information on fair/non-fair locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.lock.fair</name> |
| <value>true</value> |
| <description>If this is true, the Datanode FsDataset lock will be used in Fair |
| mode, which will help to prevent writer threads from being starved, but can |
| lower lock throughput. See java.util.concurrent.locks.ReentrantReadWriteLock |
| for more information on fair/non-fair locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.startup.delay.block.deletion.sec</name> |
| <value>0</value> |
| <description>The delay in seconds at which we will pause the blocks deletion |
| after Namenode startup. By default it's disabled. |
| In the case a directory has large number of directories and files are |
| deleted, suggested delay is one hour to give the administrator enough time |
| to notice large number of pending deletion blocks and take corrective |
| action. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.block.id.layout.upgrade.threads</name> |
| <value>6</value> |
| <description>The number of threads to use when creating hard links from |
| current to previous blocks during upgrade of a DataNode to block ID-based |
| block layout (see HDFS-6482 for details on the layout).</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.encryption.zones.num.responses</name> |
| <value>100</value> |
| <description>When listing encryption zones, the maximum number of zones |
| that will be returned in a batch. Fetching the list incrementally in |
| batches improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.reencryption.status.num.responses</name> |
| <value>100</value> |
| <description>When listing re-encryption status, the maximum number of zones |
| that will be returned in a batch. Fetching the list incrementally in |
| batches improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.list.openfiles.num.responses</name> |
| <value>1000</value> |
| <description> |
| When listing open files, the maximum number of open files that will be |
| returned in a single batch. Fetching the list incrementally in batches |
| improves namenode performance. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edekcacheloader.interval.ms</name> |
| <value>1000</value> |
| <description>When KeyProvider is configured, the interval time of warming |
| up edek cache on NN starts up / becomes active. All edeks will be loaded |
| from KMS into provider cache. The edek cache loader will try to warm up the |
| cache until succeed or NN leaves active state. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edekcacheloader.initial.delay.ms</name> |
| <value>3000</value> |
| <description>When KeyProvider is configured, the time delayed until the first |
| attempt to warm up edek cache on NN start up / become active. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edekcacheloader.max-retries</name> |
| <value>10</value> |
| <description>When KeyProvider is configured, the max retries allowed to attempt |
| warm up edek cache if none of key successful on NN start up / become active. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.sleep.interval</name> |
| <value>1m</value> |
| <description>Interval the re-encrypt EDEK thread sleeps in the main loop. The |
| interval accepts units. If none given, millisecond is assumed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.batch.size</name> |
| <value>1000</value> |
| <description>How many EDEKs should the re-encrypt thread process in one batch. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.throttle.limit.handler.ratio</name> |
| <value>1.0</value> |
| <description>Throttling ratio for the re-encryption, indicating what fraction |
| of time should the re-encrypt handler thread work under NN read lock. |
| Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are |
| invalid values and will fail NN startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.throttle.limit.updater.ratio</name> |
| <value>1.0</value> |
| <description>Throttling ratio for the re-encryption, indicating what fraction |
| of time should the re-encrypt updater thread work under NN write lock. |
| Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are |
| invalid values and will fail NN startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.reencrypt.edek.threads</name> |
| <value>10</value> |
| <description>Maximum number of re-encrypt threads to contact the KMS |
| and re-encrypt the edeks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.inotify.max.events.per.rpc</name> |
| <value>1000</value> |
| <description>Maximum number of events that will be sent to an inotify client |
| in a single RPC response. The default value attempts to amortize away |
| the overhead for this RPC while avoiding huge memory requirements for the |
| client and NameNode (1000 events should consume no more than 1 MB.) |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.user.home.dir.prefix</name> |
| <value>/user</value> |
| <description>The directory to prepend to user name to get the user's |
| home direcotry. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.cache.revocation.timeout.ms</name> |
| <value>900000</value> |
| <description>When the DFSClient reads from a block file which the DataNode is |
| caching, the DFSClient can skip verifying checksums. The DataNode will |
| keep the block file in cache until the client is done. If the client takes |
| an unusually long time, though, the DataNode may need to evict the block |
| file from the cache anyway. This value controls how long the DataNode will |
| wait for the client to release a replica that it is reading without |
| checksums. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.cache.revocation.polling.ms</name> |
| <value>500</value> |
| <description>How often the DataNode should poll to see if the clients have |
| stopped using a replica that the DataNode wants to uncache. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.enabled</name> |
| <value>true</value> |
| <description> |
| Allow users to change the storage policy on files and directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.storage.policy.permissions.superuser-only</name> |
| <value>false</value> |
| <description> |
| Allow only superuser role to change the storage policy on files and |
| directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.legacy-oiv-image.dir</name> |
| <value></value> |
| <description>Determines where to save the namespace in the old fsimage format |
| during checkpointing by standby NameNode or SecondaryNameNode. Users can |
| dump the contents of the old format fsimage by oiv_legacy command. If |
| the value is not specified, old format fsimage will not be saved in |
| checkpoint. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.enabled</name> |
| <value>true</value> |
| <description>Enable nntop: reporting top users on namenode |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.window.num.buckets</name> |
| <value>10</value> |
| <description>Number of buckets in the rolling window implementation of nntop |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.num.users</name> |
| <value>10</value> |
| <description>Number of top users returned by the top tool |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.top.windows.minutes</name> |
| <value>1,5,25</value> |
| <description>comma separated list of nntop reporting periods in minutes |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.ugi.expire.after.access</name> |
| <value>600000</value> |
| <description>How long in milliseconds after the last access |
| the cached UGI will expire. With 0, never expire. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.blocks.per.postponedblocks.rescan</name> |
| <value>10000</value> |
| <description>Number of blocks to rescan for each iteration of |
| postponedMisreplicatedBlocks. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.block-pinning.enabled</name> |
| <value>false</value> |
| <description>Whether pin blocks on favored DataNode.</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.locateFollowingBlock.initial.delay.ms</name> |
| <value>400</value> |
| <description>The initial delay (unit is ms) for locateFollowingBlock, |
| the delay time will increase exponentially(double) for each retry |
| until dfs.client.block.write.locateFollowingBlock.max.delay.ms is reached, |
| after that the delay for each retry will be |
| dfs.client.block.write.locateFollowingBlock.max.delay.ms. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.locateFollowingBlock.max.delay.ms</name> |
| <value>60000</value> |
| <description> |
| The maximum delay (unit is ms) before retrying locateFollowingBlock. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.zkfc.nn.http.timeout.ms</name> |
| <value>20000</value> |
| <description> |
| The HTTP connection and read timeout value (unit is ms ) when DFS ZKFC |
| tries to get local NN thread dump after local NN becomes |
| SERVICE_NOT_RESPONDING or SERVICE_UNHEALTHY. |
| If it is set to zero, DFS ZKFC won't get local NN thread dump. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.zkfc.client.ssl.enabled</name> |
| <value>false</value> |
| <description> |
| Enable SSL/TLS encryption for the ZooKeeper communication from ZKFC. |
| Note: if hadoop.zk.ssl.enabled is set to a value, then that central setting has precedence, |
| and this value will be overridden by the value of hadoop.zk.ssl.enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.nn.not-become-active-in-safemode</name> |
| <value>false</value> |
| <description> |
| This will prevent safe mode namenodes to become active or observer while other standby |
| namenodes might be ready to serve requests when it is set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.in-progress</name> |
| <value>false</value> |
| <description> |
| Whether enable standby namenode to tail in-progress edit logs. |
| Clients might want to turn it on when they want Standby NN to have |
| more up-to-date data. When using the QuorumJournalManager, this enables |
| tailing of edit logs via the RPC-based mechanism, rather than streaming, |
| which allows for much fresher data. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.state.context.enabled</name> |
| <value>false</value> |
| <description> |
| Whether enable namenode sending back its current txnid back to client. |
| Setting this to true is required by Consistent Read from Standby feature. |
| But for regular cases, this should be set to false to avoid the overhead |
| of updating and maintaining this state. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.system.default.policy</name> |
| <value>RS-6-3-1024k</value> |
| <description>The default erasure coding policy name will be used |
| on the path if no policy name is passed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.policies.max.cellsize</name> |
| <value>4194304</value> |
| <description>The maximum cell size of erasure coding policy. Default is 4MB. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.ec.userdefined.policy.allowed</name> |
| <value>true</value> |
| <description>If set to false, doesn't allow addition of user defined |
| erasure coding policies. |
<