| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <!-- Do not modify this file directly. Instead, copy entries that you --> |
| <!-- wish to modify from this file into hdfs-site.xml and change them --> |
| <!-- there. If hdfs-site.xml does not already exist, create it. --> |
| |
| <configuration> |
| |
| <property> |
| <name>dfs.namenode.logging.level</name> |
| <value>info</value> |
| <description>The logging level for dfs namenode. Other values are "dir"(trac |
| e namespace mutations), "block"(trace block under/over replications and block |
| creations/deletions), or "all".</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.rpc-address</name> |
| <value></value> |
| <description> |
| RPC address that handles all clients requests. If empty then we'll get the |
| value from fs.default.name. |
| The value of this property will take the form of hdfs://nn-host1:rpc-port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.http.address</name> |
| <value>0.0.0.0:50090</value> |
| <description> |
| The secondary namenode http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.address</name> |
| <value>0.0.0.0:50010</value> |
| <description> |
| The datanode server address and port for data transfer. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.http.address</name> |
| <value>0.0.0.0:50075</value> |
| <description> |
| The datanode http server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.ipc.address</name> |
| <value>0.0.0.0:50020</value> |
| <description> |
| The datanode ipc server address and port. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.handler.count</name> |
| <value>3</value> |
| <description>The number of server threads for the datanode.</description> |
| </property> |
| |
| <property> |
| <name>dfs.http.address</name> |
| <value>0.0.0.0:50070</value> |
| <description> |
| The address and the base port where the dfs namenode web ui will listen on. |
| If the port is 0 then the server will start on a free port. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.enable</name> |
| <value>false</value> |
| <description>Decide if HTTPS(SSL) is supported on HDFS |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.need.client.auth</name> |
| <value>false</value> |
| <description>Whether SSL client certificate authentication is required |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.server.keystore.resource</name> |
| <value>ssl-server.xml</value> |
| <description>Resource file from which ssl server keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.https.client.keystore.resource</name> |
| <value>ssl-client.xml</value> |
| <description>Resource file from which ssl client keystore |
| information will be extracted |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.https.address</name> |
| <value>0.0.0.0:50475</value> |
| </property> |
| |
| <property> |
| <name>dfs.https.address</name> |
| <value>0.0.0.0:50470</value> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.interface</name> |
| <value>default</value> |
| <description>The name of the Network Interface from which a data node should |
| report its IP address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.dns.nameserver</name> |
| <value>default</value> |
| <description>The host name or IP address of the name server (DNS) |
| which a DataNode should use to determine the host name used by the |
| NameNode for communication and display purposes. |
| </description> |
| </property> |
| |
| |
| |
| <property> |
| <name>dfs.replication.considerLoad</name> |
| <value>true</value> |
| <description>Decide if chooseTarget considers the target's load or not |
| </description> |
| </property> |
| <property> |
| <name>dfs.default.chunk.view.size</name> |
| <value>32768</value> |
| <description>The number of bytes to view for a file on the browser. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.du.reserved</name> |
| <value>0</value> |
| <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.name.dir</name> |
| <value>${hadoop.tmp.dir}/dfs/name</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the name table(fsimage). If this is a comma-delimited list |
| of directories then the name table is replicated in all of the |
| directories, for redundancy. </description> |
| </property> |
| |
| <property> |
| <name>dfs.name.edits.dir</name> |
| <value>${dfs.name.dir}</value> |
| <description>Determines where on the local filesystem the DFS name node |
| should store the transaction (edits) file. If this is a comma-delimited list |
| of directories then the transaction file is replicated in all of the |
| directories, for redundancy. Default value is same as dfs.name.dir |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.edits.toleration.length</name> |
| <value>0</value> |
| <description> |
| The length in bytes that namenode is willing to tolerate when the edit log |
| is corrupted. The edit log toleration feature checks the entire edit log. |
| It computes read length (the length of valid data), corruption length and |
| padding length. In case that corruption length is non-zero, the corruption |
| will be tolerated only if the corruption length is less than or equal to |
| the toleration length. |
| |
| For disabling edit log toleration feature, set this property to -1. When |
| the feature is disabled, the end of edit log will not be checked. In this |
| case, namenode will startup normally even if the end of edit log is |
| corrupted. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.web.ugi</name> |
| <value>webuser,webgroup</value> |
| <description>The user account used by the web interface. |
| Syntax: USERNAME,GROUP1,GROUP2, ... |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions</name> |
| <value>true</value> |
| <description> |
| If "true", enable permission checking in HDFS. |
| If "false", permission checking is turned off, |
| but all other behavior is unchanged. |
| Switching from one parameter value to the other does not change the mode, |
| owner or group of files or directories. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.permissions.supergroup</name> |
| <value>supergroup</value> |
| <description>The name of the group of super-users.</description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.enable</name> |
| <value>false</value> |
| <description> |
| If "true", access tokens are used as capabilities for accessing datanodes. |
| If "false", no access tokens are checked on accessing datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.key.update.interval</name> |
| <value>600</value> |
| <description> |
| Interval in minutes at which namenode updates its access keys. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.access.token.lifetime</name> |
| <value>600</value> |
| <description>The lifetime of access tokens in minutes.</description> |
| </property> |
| |
| |
| <property> |
| <name>dfs.data.dir</name> |
| <value>${hadoop.tmp.dir}/dfs/data</value> |
| <description>Determines where on the local filesystem an DFS data node |
| should store its blocks. If this is a comma-delimited |
| list of directories, then data will be stored in all named |
| directories, typically on different devices. |
| Directories that do not exist are ignored. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.data.dir.perm</name> |
| <value>755</value> |
| <description>Permissions for the directories on on the local filesystem where |
| the DFS data node store its blocks. The permissions can either be octal or |
| symbolic.</description> |
| </property> |
| |
| <property> |
| <name>dfs.replication</name> |
| <value>3</value> |
| <description>Default block replication. |
| The actual number of replications can be specified when the file is created. |
| The default is used if replication is not specified in create time. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.max</name> |
| <value>512</value> |
| <description>Maximal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.min</name> |
| <value>1</value> |
| <description>Minimal block replication. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.block.size</name> |
| <value>67108864</value> |
| <description>The default block size for new files.</description> |
| </property> |
| |
| <property> |
| <name>dfs.df.interval</name> |
| <value>60000</value> |
| <description>Disk usage statistics refresh interval in msec.</description> |
| </property> |
| |
| <property> |
| <name>dfs.client.block.write.retries</name> |
| <value>3</value> |
| <description>The number of retries for writing blocks to the data nodes, |
| before we signal failure to the application. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.intervalMsec</name> |
| <value>3600000</value> |
| <description>Determines block reporting interval in milliseconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.blockreport.initialDelay</name> <value>0</value> |
| <description>Delay for first block report in seconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.heartbeat.interval</name> |
| <value>3</value> |
| <description>Determines datanode heartbeat interval in seconds.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.handler.count</name> |
| <value>10</value> |
| <description>The number of server threads for the namenode.</description> |
| </property> |
| |
| <property> |
| <name>dfs.safemode.threshold.pct</name> |
| <value>0.999f</value> |
| <description> |
| Specifies the percentage of blocks that should satisfy |
| the minimal replication requirement defined by dfs.replication.min. |
| Values less than or equal to 0 mean not to wait for any particular |
| percentage of blocks before exiting safemode. |
| Values greater than 1 will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.safemode.min.datanodes</name> |
| <value>0</value> |
| <description> |
| Specifies the number of datanodes that must be considered alive |
| before the name node exits safemode. |
| Values less than or equal to 0 mean not to take the number of live |
| datanodes into account when deciding whether to remain in safe mode |
| during startup. |
| Values greater than the number of datanodes in the cluster |
| will make safe mode permanent. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.safemode.extension</name> |
| <value>30000</value> |
| <description> |
| Determines extension of safe mode in milliseconds |
| after the threshold level is reached. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.balance.bandwidthPerSec</name> |
| <value>1048576</value> |
| <description> |
| Specifies the maximum amount of bandwidth that each datanode |
| can utilize for the balancing purpose in term of |
| the number of bytes per second. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| permitted to connect to the namenode. The full pathname of the file |
| must be specified. If the value is empty, all hosts are |
| permitted.</description> |
| </property> |
| |
| <property> |
| <name>dfs.hosts.exclude</name> |
| <value></value> |
| <description>Names a file that contains a list of hosts that are |
| not permitted to connect to the namenode. The full pathname of the |
| file must be specified. If the value is empty, no hosts are |
| excluded.</description> |
| </property> |
| |
| <property> |
| <name>dfs.max.objects</name> |
| <value>0</value> |
| <description>The maximum number of files, directories and blocks |
| dfs supports. A value of zero indicates no limit to the number |
| of objects that dfs supports. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.interval</name> |
| <value>30</value> |
| <description>Namenode periodicity in seconds to check if decommission is |
| complete.</description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.decommission.nodes.per.interval</name> |
| <value>5</value> |
| <description>The number of nodes namenode checks if decommission is complete |
| in each dfs.namenode.decommission.interval.</description> |
| </property> |
| |
| <property> |
| <name>dfs.replication.interval</name> |
| <value>3</value> |
| <description>The periodicity in seconds with which the namenode computes |
| repliaction work for datanodes. </description> |
| </property> |
| |
| <property> |
| <name>dfs.access.time.precision</name> |
| <value>3600000</value> |
| <description>The access time for HDFS file is precise upto this value. |
| The default value is 1 hour. Setting a value of 0 disables |
| access times for HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.support.append</name> |
| <description> |
| This option is no longer supported. HBase no longer requires that |
| this option be enabled as sync is now enabled by default. See |
| HADOOP-8230 for additional information. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.master.name</name> |
| <description> |
| The hostname that the NameNode will bind to. This setting should be used |
| when the NameNode is multihomed to force the NameNode to bind to a |
| specific hostname or IP address. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.key.update-interval</name> |
| <value>86400000</value> |
| <description>The update interval for master key for delegation tokens |
| in the namenode in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.max-lifetime</name> |
| <value>604800000</value> |
| <description>The maximum lifetime in milliseconds for which a delegation |
| token is valid. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.delegation.token.renew-interval</name> |
| <value>86400000</value> |
| <description>The renewal interval for delegation token in milliseconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.failed.volumes.tolerated</name> |
| <value>0</value> |
| <description>The number of volumes that are allowed to |
| fail before a datanode stops offering service. By default |
| any volume failure will cause a datanode to shutdown. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.max.xcievers</name> |
| <value>4096</value> |
| <description>Specifies the maximum number of threads to use for transferring data |
| in and out of the DN. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.readahead.bytes</name> |
| <value>4193404</value> |
| <description> |
| While reading block files, if the Hadoop native libraries are available, |
| the datanode can use the posix_fadvise system call to explicitly |
| page data into the operating system buffer cache ahead of the current |
| reader's position. This can improve performance especially when |
| disks are highly contended. |
| |
| This configuration specifies the number of bytes ahead of the current |
| read position which the datanode will attempt to read ahead. This |
| feature may be disabled by configuring this property to 0. |
| |
| If the native libraries are not available, this configuration has no |
| effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.reads</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data read from HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is delivered to the client. This behavior is automatically |
| disabled for workloads which read only short sections of a block |
| (e.g HBase random-IO workloads). |
| |
| This may improve performance for some workloads by freeing buffer |
| cache spage usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.drop.cache.behind.writes</name> |
| <value>false</value> |
| <description> |
| In some workloads, the data written to HDFS is known to be significantly |
| large enough that it is unlikely to be useful to cache it in the |
| operating system buffer cache. In this case, the DataNode may be |
| configured to automatically purge all data from the buffer cache |
| after it is written to disk. |
| |
| This may improve performance for some workloads by freeing buffer |
| cache spage usage for more cacheable data. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.sync.behind.writes</name> |
| <value>false</value> |
| <description> |
| If this configuration is enabled, the datanode will instruct the |
| operating system to enqueue all written data to the disk immediately |
| after it is written. This differs from the usual OS policy which |
| may wait for up to 30 seconds before triggering writeback. |
| |
| This may improve performance for some workloads by smoothing the |
| IO profile for data written to disk. |
| |
| If the Hadoop native libraries are not available, this configuration |
| has no effect. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether clients should use datanode hostnames when |
| connecting to datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.use.datanode.hostname</name> |
| <value>false</value> |
| <description>Whether datanodes should use datanode hostnames when |
| connecting to other datanodes for data transfer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.client.local.interfaces</name> |
| <value></value> |
| <description>A comma separated list of network interface names to use |
| for data transfer between the client and datanodes. When creating |
| a connection to read from or write to a datanode, the client |
| chooses one of the specified interfaces at random and binds its |
| socket to the IP of that interface. Individual names may be |
| specified as either an interface name (eg "eth0"), a subinterface |
| name (eg "eth0:0"), or an IP address (which may be specified using |
| CIDR notation to match a range of IPs). |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.image.transfer.bandwidthPerSec</name> |
| <value>0</value> |
| <description> |
| Specifies the maximum amount of bandwidth that can be utilized |
| for image transfer in term of the number of bytes per second. |
| A default value of 0 indicates that throttling is disabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.webhdfs.enabled</name> |
| <value>false</value> |
| <description> |
| Enable WebHDFS (REST API) in Namenodes and Datanodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| </property> |
| |
| <property> |
| <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name> |
| <value>${dfs.web.authentication.kerberos.principal}</value> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.invalidate.work.pct.per.iteration</name> |
| <value>0.32f</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the percentage amount of block |
| invalidations (deletes) to do over a single DN heartbeat |
| deletion command. The final deletion count is determined by applying this |
| percentage to the number of live nodes in the system. |
| The resultant number is the number of blocks from the deletion list |
| chosen for proper invalidation over a single heartbeat of a single DN. |
| Value should be a positive, non-zero percentage in float notation (X.Yf), |
| with 1.0f meaning 100%. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.replication.work.multiplier.per.iteration</name> |
| <value>2</value> |
| <description> |
| *Note*: Advanced property. Change with caution. |
| This determines the total amount of block transfers to begin in |
| parallel at a DN, for replication, when such a command list is being |
| sent over a DN heartbeat by the NN. The actual number is obtained by |
| multiplying this multiplier with the total number of live nodes in the |
| cluster. The result number is the number of blocks to begin transfers |
| immediately for, per DN heartbeat. This number can be any positive, |
| non-zero integer. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.read.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid reading from "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Stale datanodes will be |
| moved to the end of the node list returned for reading. See |
| dfs.namenode.avoid.write.stale.datanode for a similar setting for writes. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.avoid.write.stale.datanode</name> |
| <value>false</value> |
| <description> |
| Indicate whether or not to avoid writing to "stale" datanodes whose |
| heartbeat messages have not been received by the namenode |
| for more than a specified time interval. Writes will avoid using |
| stale datanodes unless more than a configured ratio |
| (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as |
| stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting |
| for reads. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.stale.datanode.interval</name> |
| <value>30000</value> |
| <description> |
| Default time interval for marking a datanode as "stale", i.e., if |
| the namenode has not received heartbeat msg from a datanode for |
| more than this time interval, the datanode will be marked and treated |
| as "stale" by default. The stale interval cannot be too small since |
| otherwise this may cause too frequent change of stale states. |
| We thus set a minimum stale interval value (the default value is 3 times |
| of heartbeat interval) and guarantee that the stale interval cannot be less |
| than the minimum value. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.write.stale.datanode.ratio</name> |
| <value>0.5f</value> |
| <description> |
| When the ratio of number stale datanodes to total datanodes marked |
| is greater than this ratio, stop avoiding writing to stale nodes so |
| as to prevent causing hotspots. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.datanode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of datanode plug-ins to be activated. |
| </description> |
| </property> |
| |
| <property> |
| <name>dfs.namenode.plugins</name> |
| <value></value> |
| <description>Comma-separated list of namenode plug-ins to be activated. |
| </description> |
| </property> |
| |
| </configuration> |