blob: 1a0b4cf3c75cf4adddf1d0b856c730b552e7d6bc [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Hadoop &#x2013; Apache Hadoop 3.3.1</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
Apache Hadoop 3.3.1
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<strong>Overview</strong>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Apache Hadoop 3.3.1</h1>
<p>Apache Hadoop 3.3.1 incorporates a number of significant enhancements over the previous major release line (hadoop-2.x).</p>
<p>This release is generally available (GA), meaning that it represents a point of API stability and quality that we consider production-ready.</p>
<h1>Overview</h1>
<p>Users are encouraged to read the full set of release notes. This page provides an overview of the major changes.</p>
<div class="section">
<h2><a name="Minimum_required_Java_version_increased_from_Java_7_to_Java_8"></a>Minimum required Java version increased from Java 7 to Java 8</h2>
<p>All Hadoop JARs are now compiled targeting a runtime version of Java 8. Users still using Java 7 or below must upgrade to Java 8.</p></div>
<div class="section">
<h2><a name="Support_for_erasure_coding_in_HDFS"></a>Support for erasure coding in HDFS</h2>
<p>Erasure coding is a method for durably storing data with significant space savings compared to replication. Standard encodings like Reed-Solomon (10,4) have a 1.4x space overhead, compared to the 3x overhead of standard HDFS replication.</p>
<p>Since erasure coding imposes additional overhead during reconstruction and performs mostly remote reads, it has traditionally been used for storing colder, less frequently accessed data. Users should consider the network and CPU overheads of erasure coding when deploying this feature.</p>
<p>More details are available in the <a href="./hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">HDFS Erasure Coding</a> documentation.</p></div>
<div class="section">
<h2><a name="YARN_Timeline_Service_v.2"></a>YARN Timeline Service v.2</h2>
<p>We are introducing an early preview (alpha 2) of a major revision of YARN Timeline Service: v.2. YARN Timeline Service v.2 addresses two major challenges: improving scalability and reliability of Timeline Service, and enhancing usability by introducing flows and aggregation.</p>
<p>YARN Timeline Service v.2 alpha 2 is provided so that users and developers can test it and provide feedback and suggestions for making it a ready replacement for Timeline Service v.1.x. It should be used only in a test capacity.</p>
<p>More details are available in the <a href="./hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">YARN Timeline Service v.2</a> documentation.</p></div>
<div class="section">
<h2><a name="Shell_script_rewrite"></a>Shell script rewrite</h2>
<p>The Hadoop shell scripts have been rewritten to fix many long-standing bugs and include some new features. While an eye has been kept towards compatibility, some changes may break existing installations.</p>
<p>Incompatible changes are documented in the release notes, with related discussion on <a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-9902">HADOOP-9902</a>.</p>
<p>More details are available in the <a href="./hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a> documentation. Power users will also be pleased by the <a href="./hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a> documentation, which describes much of the new functionality, particularly related to extensibility.</p></div>
<div class="section">
<h2><a name="Shaded_client_jars"></a>Shaded client jars</h2>
<p>The <tt>hadoop-client</tt> Maven artifact available in 2.x releases pulls Hadoop&#x2019;s transitive dependencies onto a Hadoop application&#x2019;s classpath. This can be problematic if the versions of these transitive dependencies conflict with the versions used by the application.</p>
<p><a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-11804">HADOOP-11804</a> adds new <tt>hadoop-client-api</tt> and <tt>hadoop-client-runtime</tt> artifacts that shade Hadoop&#x2019;s dependencies into a single jar. This avoids leaking Hadoop&#x2019;s dependencies onto the application&#x2019;s classpath.</p></div>
<div class="section">
<h2><a name="Support_for_Opportunistic_Containers_and_Distributed_Scheduling."></a>Support for Opportunistic Containers and Distributed Scheduling.</h2>
<p>A notion of <tt>ExecutionType</tt> has been introduced, whereby Applications can now request for containers with an execution type of <tt>Opportunistic</tt>. Containers of this type can be dispatched for execution at an NM even if there are no resources available at the moment of scheduling. In such a case, these containers will be queued at the NM, waiting for resources to be available for it to start. Opportunistic containers are of lower priority than the default <tt>Guaranteed</tt> containers and are therefore preempted, if needed, to make room for Guaranteed containers. This should improve cluster utilization.</p>
<p>Opportunistic containers are by default allocated by the central RM, but support has also been added to allow opportunistic containers to be allocated by a distributed scheduler which is implemented as an AMRMProtocol interceptor.</p>
<p>Please see <a href="./hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">documentation</a> for more details.</p></div>
<div class="section">
<h2><a name="MapReduce_task-level_native_optimization"></a>MapReduce task-level native optimization</h2>
<p>MapReduce has added support for a native implementation of the map output collector. For shuffle-intensive jobs, this can lead to a performance improvement of 30% or more.</p>
<p>See the release notes for <a class="externalLink" href="https://issues.apache.org/jira/browse/MAPREDUCE-2841">MAPREDUCE-2841</a> for more detail.</p></div>
<div class="section">
<h2><a name="Support_for_more_than_2_NameNodes."></a>Support for more than 2 NameNodes.</h2>
<p>The initial implementation of HDFS NameNode high-availability provided for a single active NameNode and a single Standby NameNode. By replicating edits to a quorum of three JournalNodes, this architecture is able to tolerate the failure of any one node in the system.</p>
<p>However, some deployments require higher degrees of fault-tolerance. This is enabled by this new feature, which allows users to run multiple standby NameNodes. For instance, by configuring three NameNodes and five JournalNodes, the cluster is able to tolerate the failure of two nodes rather than just one.</p>
<p>The <a href="./hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">HDFS high-availability documentation</a> has been updated with instructions on how to configure more than two NameNodes.</p></div>
<div class="section">
<h2><a name="Default_ports_of_multiple_services_have_been_changed."></a>Default ports of multiple services have been changed.</h2>
<p>Previously, the default ports of multiple Hadoop services were in the Linux ephemeral port range (32768-61000). This meant that at startup, services would sometimes fail to bind to the port due to a conflict with another application.</p>
<p>These conflicting ports have been moved out of the ephemeral range, affecting the NameNode, Secondary NameNode, DataNode, and KMS. Our documentation has been updated appropriately, but see the release notes for <a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-9427">HDFS-9427</a> and <a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-12811">HADOOP-12811</a> for a list of port changes.</p></div>
<div class="section">
<h2><a name="Support_for_Microsoft_Azure_Data_Lake_and_Aliyun_Object_Storage_System_filesystem_connectors"></a>Support for Microsoft Azure Data Lake and Aliyun Object Storage System filesystem connectors</h2>
<p>Hadoop now supports integration with Microsoft Azure Data Lake and Aliyun Object Storage System as alternative Hadoop-compatible filesystems.</p></div>
<div class="section">
<h2><a name="Intra-datanode_balancer"></a>Intra-datanode balancer</h2>
<p>A single DataNode manages multiple disks. During normal write operation, disks will be filled up evenly. However, adding or replacing disks can lead to significant skew within a DataNode. This situation is not handled by the existing HDFS balancer, which concerns itself with inter-, not intra-, DN skew.</p>
<p>This situation is handled by the new intra-DataNode balancing functionality, which is invoked via the <tt>hdfs diskbalancer</tt> CLI. See the disk balancer section in the <a href="./hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">HDFS Commands Guide</a> for more information.</p></div>
<div class="section">
<h2><a name="Reworked_daemon_and_task_heap_management"></a>Reworked daemon and task heap management</h2>
<p>A series of changes have been made to heap management for Hadoop daemons as well as MapReduce tasks.</p>
<p><a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-10950">HADOOP-10950</a> introduces new methods for configuring daemon heap sizes. Notably, auto-tuning is now possible based on the memory size of the host, and the <tt>HADOOP_HEAPSIZE</tt> variable has been deprecated. See the full release notes of HADOOP-10950 for more detail.</p>
<p><a class="externalLink" href="https://issues.apache.org/jira/browse/MAPREDUCE-5785">MAPREDUCE-5785</a> simplifies the configuration of map and reduce task heap sizes, so the desired heap size no longer needs to be specified in both the task configuration and as a Java option. Existing configs that already specify both are not affected by this change. See the full release notes of MAPREDUCE-5785 for more details.</p></div>
<div class="section">
<h2><a name="S3Guard:_Consistency_and_Metadata_Caching_for_the_S3A_filesystem_client"></a>S3Guard: Consistency and Metadata Caching for the S3A filesystem client</h2>
<p><a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-13345">HADOOP-13345</a> adds an optional feature to the S3A client of Amazon S3 storage: the ability to use a DynamoDB table as a fast and consistent store of file and directory metadata.</p>
<p>See <a href="./hadoop-aws/tools/hadoop-aws/s3guard.html">S3Guard</a> for more details.</p></div>
<div class="section">
<h2><a name="HDFS_Router-Based_Federation"></a>HDFS Router-Based Federation</h2>
<p>HDFS Router-Based Federation adds a RPC routing layer that provides a federated view of multiple HDFS namespaces. This is similar to the existing <a href="./hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>) and <a href="./hadoop-project-dist/hadoop-hdfs/Federation.html">HDFS Federation</a> functionality, except the mount table is managed on the server-side by the routing layer rather than on the client. This simplifies access to a federated cluster for existing HDFS clients.</p>
<p>See <a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-10467">HDFS-10467</a> and the HDFS Router-based Federation <a href="./hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">documentation</a> for more details.</p></div>
<div class="section">
<h2><a name="API-based_configuration_of_Capacity_Scheduler_queue_configuration"></a>API-based configuration of Capacity Scheduler queue configuration</h2>
<p>The OrgQueue extension to the capacity scheduler provides a programmatic way to change configurations by providing a REST API that users can call to modify queue configurations. This enables automation of queue configuration management by administrators in the queue&#x2019;s <tt>administer_queue</tt> ACL.</p>
<p>See <a class="externalLink" href="https://issues.apache.org/jira/browse/YARN-5734">YARN-5734</a> and the <a href="./hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler documentation</a> for more information.</p></div>
<div class="section">
<h2><a name="YARN_Resource_Types"></a>YARN Resource Types</h2>
<p>The YARN resource model has been generalized to support user-defined countable resource types beyond CPU and memory. For instance, the cluster administrator could define resources like GPUs, software licenses, or locally-attached storage. YARN tasks can then be scheduled based on the availability of these resources.</p>
<p>See <a class="externalLink" href="https://issues.apache.org/jira/browse/YARN-3926">YARN-3926</a> and the <a href="./hadoop-yarn/hadoop-yarn-site/ResourceModel.html">YARN resource model documentation</a> for more information.</p>
<h1>Getting Started</h1>
<p>The Hadoop documentation includes the information you need to get started using Hadoop. Begin with the <a href="./hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a> which shows you how to set up a single-node Hadoop installation. Then move on to the <a href="./hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a> to learn how to set up a multi-node Hadoop installation.</p></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>