| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor |
| license agreements. See the NOTICE file distributed with this work for additional |
| information regarding copyright ownership. The ASF licenses this file to |
| You under the Apache License, Version 2.0 (the "License"); you may not use |
| this file except in compliance with the License. You may obtain a copy of |
| the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required |
| by applicable law or agreed to in writing, software distributed under the |
| License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS |
| OF ANY KIND, either express or implied. See the License for the specific |
| language governing permissions and limitations under the License. --> |
| |
| <!-- Do not modify this file directly. Instead, copy entries that you --> |
| <!-- wish to modify from this file into hdfs-site.xml and change them --> |
| <!-- there. If hdfs-site.xml does not already exist, create it. --> |
| |
| <configuration> |
| |
| <property> |
| <name>hadoop.hdfs.configuration.version</name> |
| <value>1</value> |
| <description>version of this configuration file</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.logging.level</name> |
| <value>info</value> |
| <description> |
| The logging level for dfs namenode. Other values are "dir" (trace |
| namespace mutations), "block" (trace block under/over replications |
| and block creations/deletions), or "all". |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.secondary.http-address</name> |
| <value>0.0.0.0:50090</value> |
| <description> |
| The secondary namenode http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.address</name> |
| <value>0.0.0.0:50010</value> |
| <description> |
| The datanode server address and port for data transfer. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.address</name> |
| <value>0.0.0.0:50075</value> |
| <description> |
| The datanode http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ipc.address</name> |
| <value>0.0.0.0:50020</value> |
| <description> |
| The datanode ipc server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.handler.count</name> |
| <value>10</value> |
| <description>The number of server threads for the datanode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.http-address</name> |
| <value>0.0.0.0:50070</value> |
| <description> |
| The address and the base port where the dfs namenode web ui will listen |
| on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.enable</name> |
| <value>false</value> |
| <description>Decide if HTTPS(SSL) is supported on HDFS |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.need-auth</name> |
| <value>false</value> |
| <description>Whether SSL client certificate authentication is required |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.server.keystore.resource</name> |
| <value>ssl-server.xml</value> |
| <description>Resource file from which ssl server keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.https.keystore.resource</name> |
| <value>ssl-client.xml</value> |
| <description>Resource file from which ssl client keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.https.address</name> |
| <value>0.0.0.0:50475</value> |
| <description>The datanode secure http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.https-address</name> |
| <value>0.0.0.0:50470</value> |
| <description>The namenode secure http server address and port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.interface</name> |
| <value>default</value> |
| <description>The name of the Network Interface from which a data node |
| should |
| report its IP address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.nameserver</name> |
| <value>default</value> |
| <description>The host name or IP address of the name server (DNS) |
| which a DataNode should use to determine the host name used by the |
| NameNode for communication and display purposes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.address</name> |
| <value>0.0.0.0:50100</value> |
| <description> |
| The backup node server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.backup.http-address</name> |
| <value>0.0.0.0:50105</value> |
| <description> |
| The backup node http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.considerLoad</name> |
| <value>true</value> |
| <description>Decide if chooseTarget considers the target's load or not |
| </description> |
| </property> |
| <property> |
| <name>dfs.default.chunk.view.size</name> |
| <value>32768</value> |
| <description>The number of bytes to view for a file on the browser. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved</name> |
| <value>0</value> |
| <description>Reserved space in bytes per volume. Always leave this |
| much space free for non dfs use. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/name</value> |
| <description>Determines where on the local filesystem the DFS name |
| node |
| should store the name table(fsimage). If this is a comma-delimited list |
| of directories then the name table is replicated in all of the |
| directories, for redundancy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.name.dir.restore</name> |
| <value>false</value> |
| <description>Set to true to enable NameNode to attempt recovering a |
| previously failed dfs.namenode.name.dir. When enabled, a recovery of |
| any |
| failed directory is attempted during checkpoint. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-component-length</name> |
| <value>0</value> |
| <description>Defines the maximum number of characters in each |
| component |
| of a path. A value of 0 will disable the check. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.fs-limits.max-directory-items</name> |
| <value>0</value> |
| <description>Defines the maximum number of items that a directory may |
| contain. A value of 0 will disable the check. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.dir</name> |
| <value>${dfs.namenode.name.dir}</value> |
| <description>Determines where on the local filesystem the DFS name |
| node |
| should store the transaction (edits) file. If this is a comma-delimited |
| list |
| of directories then the transaction file is replicated in all of the |
| directories, for redundancy. Default value is same as |
| dfs.namenode.name.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.shared.edits.dir</name> |
| <value></value> |
| <description>A directory on shared storage between the multiple |
| namenodes |
| in an HA cluster. This directory will be written by the active and read |
| by the standby in order to keep the namespaces synchronized. This |
| directory |
| does not need to be listed in dfs.namenode.edits.dir above. It should be |
| left empty in a non-HA cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.enabled</name> |
| <value>true</value> |
| <description> |
| If "true", enable permission checking in HDFS. |
| If "false", permission checking is turned off, |
| but all other behavior is unchanged. |
| Switching from one parameter value to the other does not change the mode, |
| owner or group of files or directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.superusergroup</name> |
| <value>supergroup</value> |
| <description>The name of the group of super-users.</description> |
| </property> |
| <!-- <property> <name>dfs.cluster.administrators</name> <value>ACL for the |
| admins</value> <description>This configuration is used to control who can |
| access the default servlets in the namenode, etc. </description> </property> --> |
| |
| <property> |
| <name>dfs.block.access.token.enable</name> |
| <value>false</value> |
| <description> |
| If "true", access tokens are used as capabilities for accessing |
| datanodes. |
| If "false", no access tokens are checked on accessing datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.key.update.interval</name> |
| <value>600</value> |
| <description> |
| Interval in minutes at which namenode updates its access keys. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.lifetime</name> |
| <value>600</value> |
| <description>The lifetime of access tokens in minutes.</description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/data</value> |
| <description>Determines where on the local filesystem an DFS data node |
| should store its blocks. If this is a comma-delimited |
| list of directories, then data will be stored in all named |
| directories, typically on different devices. |
| Directories that do not exist are ignored. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir.perm</name> |
| <value>700</value> |
| <description>Permissions for the directories on on the local |
| filesystem where |
| the DFS data node store its blocks. The permissions can either be octal |
| or |
| symbolic. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication</name> |
| <value>1</value> |
| <description>Default block replication. |
| The actual number of replications can be specified when the file is |
| created. |
| The default is used if replication is not specified in create time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.max</name> |
| <value>512</value> |
| <description>Maximal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.min</name> |
| <value>1</value> |
| <description>Minimal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blocksize</name> |
| <value>67108864</value> |
| <description> |
| The default block size for new files, in bytes. |
| You can use the following suffix (case insensitive): |
| k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such |
| as 128k, 512m, 1g, etc.), |
| Or provide complete size in bytes (such as 134217728 for 128 MB). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.retries</name> |
| <value>3</value> |
| <description>The number of retries for writing blocks to the data |
| nodes, |
| before we signal failure to the application. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.enable</name> |
| <value>true</value> |
| <description> |
| If there is a datanode/network failure in the write pipeline, |
| DFSClient will try to remove the failed datanode from the pipeline |
| and then continue writing with the remaining datanodes. As a result, |
| the number of datanodes in the pipeline is decreased. The feature is |
| to add new datanodes to the pipeline. |
| |
| This is a site-wide property to enable/disable the feature. |
| |
| When the cluster size is extremely small, e.g. 3 nodes or less, cluster |
| administrators may want to set the policy to NEVER in the default |
| configuration file or disable this feature. Otherwise, users may |
| experience an unusually high rate of pipeline failures since it is |
| impossible to find new datanodes for replacement. |
| |
| See also dfs.client.block.write.replace-datanode-on-failure.policy |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> |
| <value>DEFAULT</value> |
| <description> |
| This property is used only if the value of |
| dfs.client.block.write.replace-datanode-on-failure.enable is true. |
| |
| ALWAYS: always add a new datanode when an existing datanode is |
| removed. |
| |
| NEVER: never add a new datanode. |
| |
| DEFAULT: |
| Let r be the replication number. |
| Let n be the number of existing datanodes. |
| Add a new datanode only if r is greater than or equal to 3 and either |
| (1) floor(r/2) is greater than or equal to n; or |
| (2) r is greater than n and the block is hflushed/appended. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.intervalMsec</name> |
| <value>21600000</value> |
| <description>Determines block reporting interval in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.initialDelay</name> |
| <value>0</value> |
| <description>Delay for first block report in seconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.interval</name> |
| <value>21600</value> |
| <description>Interval in seconds for Datanode to scan data directories |
| and |
| reconcile the difference between blocks in memory and on the disk. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.directoryscan.threads</name> |
| <value>1</value> |
| <description>How many threads should the threadpool used to compile |
| reports |
| for volumes in parallel have. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.heartbeat.interval</name> |
| <value>3</value> |
| <description>Determines datanode heartbeat interval in seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.handler.count</name> |
| <value>10</value> |
| <description>The number of server threads for the namenode. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.threshold-pct</name> |
| <value>0.999f</value> |
| <description> |
| Specifies the percentage of blocks that should satisfy |
| the minimal replication requirement defined by |
| dfs.namenode.replication.min. |
| Values less than or equal to 0 mean not to wait for any particular |
| percentage of blocks before exiting safemode. |
| Values greater than 1 will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.min.datanodes</name> |
| <value>0</value> |
| <description> |
| Specifies the number of datanodes that must be considered alive |
| before the name node exits safemode. |
| Values less than or equal to 0 mean not to take the number of live |
| datanodes into account when deciding whether to remain in safe mode |
| during startup. |
| Values greater than the number of datanodes in the cluster |
| will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.extension</name> |
| <value>30000</value> |
| <description> |
| Determines extension of safe mode in milliseconds |
| after the threshold level is reached. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.balance.bandwidthPerSec</name> |
| <value>1048576</value> |
| <description> |
| Specifies the maximum amount of bandwidth that each datanode |
| can utilize for the balancing purpose in term of |
| the number of bytes per second. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| permitted to connect to the namenode. The full pathname of the file |
| must be specified. If the value is empty, all hosts are |
| permitted. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts.exclude</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| not permitted to connect to the namenode. The full pathname of the |
| file must be specified. If the value is empty, no hosts are |
| excluded. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.max.objects</name> |
| <value>0</value> |
| <description>The maximum number of files, directories and blocks |
| dfs supports. A value of zero indicates no limit to the number |
| of objects that dfs supports. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.interval</name> |
| <value>30</value> |
| <description>Namenode periodicity in seconds to check if decommission |
| is |
| complete. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.nodes.per.interval</name> |
| <value>5</value> |
| <description>The number of nodes namenode checks if decommission is |
| complete |
| in each dfs.namenode.decommission.interval. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.interval</name> |
| <value>3</value> |
| <description>The periodicity in seconds with which the namenode |
| computes |
| repliaction work for datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.accesstime.precision</name> |
| <value>3600000</value> |
| <description>The access time for HDFS file is precise upto this value. |
| The default value is 1 hour. Setting a value of 0 disables |
| access times for HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of datanode plug-ins to be |
| activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of namenode plug-ins to be |
| activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.stream-buffer-size</name> |
| <value>4096</value> |
| <description>The size of buffer to stream files. |
| The size of this buffer should probably be a multiple of hardware |
| page size (4096 on Intel x86), and it determines how much data is |
| buffered during read and write operations. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.bytes-per-checksum</name> |
| <value>512</value> |
| <description>The number of bytes per checksum. Must not be larger than |
| dfs.stream-buffer-size |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client-write-packet-size</name> |
| <value>65536</value> |
| <description>Packet size for clients to write</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.dir</name> |
| <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value> |
| <description>Determines where on the local filesystem the DFS |
| secondary |
| name node should store the temporary images to merge. |
| If this is a comma-delimited list of directories then the image is |
| replicated in all of the directories for redundancy. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.edits.dir</name> |
| <value>${dfs.namenode.checkpoint.dir}</value> |
| <description>Determines where on the local filesystem the DFS |
| secondary |
| name node should store the temporary edits to merge. |
| If this is a comma-delimited list of directoires then teh edits is |
| replicated in all of the directoires for redundancy. |
| Default value is same as dfs.namenode.checkpoint.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.period</name> |
| <value>3600</value> |
| <description>The number of seconds between two periodic checkpoints. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.txns</name> |
| <value>40000</value> |
| <description>The Secondary NameNode or CheckpointNode will create a |
| checkpoint |
| of the namespace every 'dfs.namenode.checkpoint.txns' transactions, |
| regardless |
| of whether 'dfs.namenode.checkpoint.period' has expired. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.checkpoint.check.period</name> |
| <value>60</value> |
| <description>The SecondaryNameNode and CheckpointNode will poll the |
| NameNode |
| every 'dfs.namenode.checkpoint.check.period' seconds to query the number |
| of uncheckpointed transactions. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.checkpoints.retained</name> |
| <value>2</value> |
| <description>The number of image checkpoint files that will be |
| retained by |
| the NameNode and Secondary NameNode in their storage directories. All |
| edit |
| logs necessary to recover an up-to-date namespace from the oldest |
| retained |
| checkpoint will also be retained. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.num.extra.edits.retained</name> |
| <value>1000000</value> |
| <description>The number of extra transactions which should be retained |
| beyond what is minimally necessary for a NN restart. This can be |
| useful for |
| audit purposes or for an HA setup where a remote Standby Node may have |
| been offline for some time and need to have a longer backlog of |
| retained |
| edits in order to start again. |
| Typically each edit is on the order of a few hundred bytes, so the default |
| of 1 million edits should be on the order of hundreds of MBs or low |
| GBs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.key.update-interval</name> |
| <value>86400000</value> |
| <description>The update interval for master key for delegation tokens |
| in the namenode in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.max-lifetime</name> |
| <value>604800000</value> |
| <description>The maximum lifetime in milliseconds for which a |
| delegation |
| token is valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.renew-interval</name> |
| <value>86400000</value> |
| <description>The renewal interval for delegation token in |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.failed.volumes.tolerated</name> |
| <value>0</value> |
| <description>The number of volumes that are allowed to |
| fail before a datanode stops offering service. By default |
| any volume failure will cause a datanode to shutdown. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compress</name> |
| <value>false</value> |
| <description>Should the dfs image be compressed? |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.compression.codec</name> |
| <value>org.apache.hadoop.io.compress.DefaultCodec</value> |
| <description>If the dfs image is compressed, how should they be |
| compressed? |
| This has to be a codec defined in io.compression.codecs. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Specifies the maximum amount of bandwidth that can be utilized for image |
| transfer in term of the number of bytes per second. |
| A default value of 0 indicates that throttling is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.support.allow.format</name> |
| <value>true</value> |
| <description>Does HDFS namenode allow itself to be formatted? |
| You may consider setting this to false for any production |
| cluster, to avoid any possibility of formatting a running DFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.transfer.threads</name> |
| <value>4096</value> |
| <description> |
| Specifies the maximum number of threads to use for transferring data |
| in and out of the DN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.readahead.bytes</name> |
| <value>4193404</value> |
| <description> |
| While reading block files, if the Hadoop native libraries are available, |
| the datanode can use the posix_fadvise system call to explicitly |
| page data into the operating system buffer cache ahead of the current |
| reader's position. This can improve performance especially when |
| disks are highly contended. |
| |
| This configuration specifies the number of bytes ahead of the current |
| read position which the datanode will attempt to read ahead. This |
| feature may be disabled by configuring this property to 0. |
| |
| If the native libraries are not available, this configuration has no |
| effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.reads</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data read from HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is delivered to the client. This behavior is automatically |
| disabled for workloads which read only short sections of a block |
| (e.g HBase random-IO workloads). |
| |
| This may improve performance for some workloads by freeing buffer |
| cache spage usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.writes</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data written to HDFS is known to be |
| significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is written to disk. |
| |
| This may improve performance for some workloads by freeing buffer |
| cache spage usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.sync.behind.writes</name> |
| <value>false</value> |
| <description> |
| If this configuration is enabled, the datanode will instruct the |
| operating system to enqueue all written data to the disk immediately |
| after it is written. This differs from the usual OS policy which |
| may wait for up to 30 seconds before triggering writeback. |
| |
| This may improve performance for some workloads by smoothing the |
| IO profile for data written to disk. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.max.attempts</name> |
| <value>15</value> |
| <description> |
| Expert only. The number of client failover attempts that should be |
| made before the failover is considered failed. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.base.millis</name> |
| <value>500</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the base value used in the failover calculation. The |
| first failover will retry immediately. The 2nd failover attempt |
| will delay at least dfs.client.failover.sleep.base.millis |
| milliseconds. And so on. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.sleep.max.millis</name> |
| <value>15000</value> |
| <description> |
| Expert only. The time to wait, in milliseconds, between failover |
| attempts increases exponentially as a function of the number of |
| attempts made so far, with a random factor of +/- 50%. This option |
| specifies the maximum value to wait between failovers. |
| Specifically, the time between two failover attempts will not |
| exceed +/- 50% of dfs.client.failover.sleep.max.millis |
| milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries</name> |
| <value>0</value> |
| <description> |
| Expert only. Indicates the number of retries a failover IPC client |
| will make to establish a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.failover.connection.retries.on.timeouts</name> |
| <value>0</value> |
| <description> |
| Expert only. The number of retry attempts a failover IPC client |
| will make on socket timeout when establishing a server connection. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservices</name> |
| <value></value> |
| <description> |
| Comma-separated list of nameservices. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.nameservice.id</name> |
| <value></value> |
| <description> |
| The ID of this nameservice. If the nameservice ID is not |
| configured or more than one nameservice is configured for |
| dfs.nameservices it is determined automatically by |
| matching the local node's address with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name> |
| <value></value> |
| <description> |
| The prefix for a given nameservice, contains a comma-separated |
| list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.namenode.id</name> |
| <value></value> |
| <description> |
| The ID of this namenode. If the namenode ID is not configured it |
| is determined automatically by matching the local node's address |
| with the configured address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.log-roll.period</name> |
| <value>120</value> |
| <description> |
| How often, in seconds, the StandbyNode should ask the active to |
| roll edit logs. Since the StandbyNode only reads from finalized |
| log segments, the StandbyNode will only be as up-to-date as how |
| often the logs are rolled. Note that failover triggers a log roll |
| so the StandbyNode will be up to date before it becomes active. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.tail-edits.period</name> |
| <value>60</value> |
| <description> |
| How often, in seconds, the StandbyNode should check for new |
| finalized log segments in the shared edits log. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.ha.automatic-failover.enabled</name> |
| <value>false</value> |
| <description> |
| Whether automatic failover is enabled. See the HDFS High |
| Availability documentation for details on automatic HA |
| configuration. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.support.append</name> |
| <value>true</value> |
| <description> |
| Does HDFS allow appends to files? |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether clients should use datanode hostnames when |
| connecting to datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether datanodes should use datanode hostnames when |
| connecting to other datanodes for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.local.interfaces</name> |
| <value></value> |
| <description>A comma separated list of network interface names to use |
| for data transfer between the client and datanodes. When creating |
| a connection to read from or write to a datanode, the client |
| chooses one of the specified interfaces at random and binds its |
| socket to the IP of that interface. Individual names may be |
| specified as either an interface name (eg "eth0"), a subinterface |
| name (eg "eth0:0"), or an IP address (which may be specified using |
| CIDR notation to match a range of IPs). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.invalidate.work.pct.per.iteration</name> |
| <value>0.32f</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the percentage amount of block |
| invalidations (deletes) to do over a single DN heartbeat |
| deletion command. The final deletion count is determined by applying this |
| percentage to the number of live nodes in the system. |
| The resultant number is the number of blocks from the deletion list |
| chosen for proper invalidation over a single heartbeat of a single |
| DN. |
| Value should be a positive, non-zero percentage in float notation (X.Yf), |
| with 1.0f meaning 100%. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.work.multiplier.per.iteration</name> |
| <value>2</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the total amount of block transfers to begin in |
| parallel at a DN, for replication, when such a command list is being |
| sent over a DN heartbeat by the NN. The actual number is obtained by |
| multiplying this multiplier with the total number of live nodes in |
| the |
| cluster. The result number is the number of blocks to begin transfers |
| immediately for, per DN heartbeat. This number can be any positive, |
| non-zero integer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.enabled</name> |
| <value>false</value> |
| <description> |
| Enable WebHDFS (REST API) in Namenodes and Datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.connection.timeout</name> |
| <value>300</value> |
| <description> |
| The minimum number of seconds that we'll cache libhdfs connection |
| objects |
| in fuse_dfs. Lower values will result in lower memory consumption; |
| higher |
| values may speed up access by avoiding the overhead of creating new |
| connection objects. |
| </description> |
| </property> |
| |
| <property> |
| <name>hadoop.fuse.timer.period</name> |
| <value>5</value> |
| <description> |
| The number of seconds between cache expiry checks in fuse_dfs. Lower |
| values |
| will result in fuse_dfs noticing changes to Kerberos ticket caches more |
| quickly. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.metrics.percentiles.intervals</name> |
| <value></value> |
| <description> |
| Comma-delimited set of integers denoting the desired rollover intervals |
| (in seconds) for percentile latency metrics on the Namenode and |
| Datanode. |
| By default, percentile latency metrics are disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer</name> |
| <value>false</value> |
| <description> |
| Whether or not actual block data that is read/written from/to HDFS should |
| be encrypted on the wire. This only needs to be set on the NN and DNs, |
| clients will deduce this automatically. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.encrypt.data.transfer.algorithm</name> |
| <value></value> |
| <description> |
| This value may be set to either "3des" or "rc4". If nothing is set, then |
| the configured JCE default on the system is used (usually 3DES.) It |
| is |
| widely believed that 3DES is more cryptographically secure, but RC4 is |
| substantially faster. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.hdfs-blocks-metadata.enabled</name> |
| <value>false</value> |
| <description> |
| Boolean which enables backend datanode-side support for the experimental |
| DistributedFileSystem#getFileVBlockStorageLocations API. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.file-block-storage-locations.num-threads</name> |
| <value>10</value> |
| <description> |
| Number of threads used for making parallel RPCs in |
| DistributedFileSystem#getFileBlockStorageLocations(). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.file-block-storage-locations.timeout</name> |
| <value>60</value> |
| <description> |
| Timeout (in seconds) for the parallel RPCs made in |
| DistributedFileSystem#getFileBlockStorageLocations(). |
| </description> |
| </property> |
| |
| </configuration> |