blob: 092ba580bd3227bb6de52fdc904a184ba4d59a00 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2021-06-15
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.3.1 &#x2013; Hadoop: Pluggable Shuffle and Pluggable Sort</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20210615" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xleft">
<a href="http://www.apache.org/" class="externalLink">Apache</a>
&gt;
<a href="http://hadoop.apache.org/" class="externalLink">Hadoop</a>
&gt;
<a href="../index.html">Apache Hadoop MapReduce Client</a>
&gt;
<a href="index.html">Apache Hadoop 3.3.1</a>
&gt;
Hadoop: Pluggable Shuffle and Pluggable Sort
</div>
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2021-06-15
&nbsp;| Version: 3.3.1
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-openstack/index.html">OpenStack Swift</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Hadoop: Pluggable Shuffle and Pluggable Sort</h1>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Implementing_a_Custom_Shuffle_and_a_Custom_Sort">Implementing a Custom Shuffle and a Custom Sort</a></li>
<li><a href="#Configuration">Configuration</a>
<ul>
<li><a href="#Job_Configuration_Properties_.28on_per_job_basis.29:">Job Configuration Properties (on per job basis):</a></li>
<li><a href="#NodeManager_Configuration_properties.2C_yarn-site.xml_in_all_nodes:">NodeManager Configuration properties, yarn-site.xml in all nodes:</a>
<ul>
<li><a href="#Example_of_loading_jar_file_from_HDFS:">Example of loading jar file from HDFS:</a></li>
<li><a href="#Example_of_loading_jar_file_from_local_file_system:">Example of loading jar file from local file system:</a></li></ul></li></ul></li></ul>
<div class="section">
<h2><a name="Introduction"></a>Introduction</h2>
<p>The pluggable shuffle and pluggable sort capabilities allow replacing the built in shuffle and sort logic with alternate implementations. Example use cases for this are: using a different application protocol other than HTTP such as RDMA for shuffling data from the Map nodes to the Reducer nodes; or replacing the sort logic with custom algorithms that enable Hash aggregation and Limit-N query.</p>
<p><b>IMPORTANT:</b> The pluggable shuffle and pluggable sort capabilities are experimental and unstable. This means the provided APIs may change and break compatibility in future versions of Hadoop.</p></div>
<div class="section">
<h2><a name="Implementing_a_Custom_Shuffle_and_a_Custom_Sort"></a>Implementing a Custom Shuffle and a Custom Sort</h2>
<p>A custom shuffle implementation requires a <tt>org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices.AuxiliaryService</tt> implementation class running in the NodeManagers and a <tt>org.apache.hadoop.mapred.ShuffleConsumerPlugin</tt> implementation class running in the Reducer tasks.</p>
<p>The default implementations provided by Hadoop can be used as references:</p>
<ul>
<li><tt>org.apache.hadoop.mapred.ShuffleHandler</tt></li>
<li><tt>org.apache.hadoop.mapreduce.task.reduce.Shuffle</tt></li>
</ul>
<p>A custom sort implementation requires a <tt>org.apache.hadoop.mapred.MapOutputCollector</tt> implementation class running in the Mapper tasks and (optionally, depending on the sort implementation) a <tt>org.apache.hadoop.mapred.ShuffleConsumerPlugin</tt> implementation class running in the Reducer tasks.</p>
<p>The default implementations provided by Hadoop can be used as references:</p>
<ul>
<li><tt>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</tt></li>
<li><tt>org.apache.hadoop.mapreduce.task.reduce.Shuffle</tt></li>
</ul></div>
<div class="section">
<h2><a name="Configuration"></a>Configuration</h2>
<p>Except for the auxiliary service running in the NodeManagers serving the shuffle (by default the <tt>ShuffleHandler</tt>), all the pluggable components run in the job tasks. This means, they can be configured on per job basis. The auxiliary service servicing the Shuffle must be configured in the NodeManagers configuration.</p>
<div class="section">
<h3><a name="Job_Configuration_Properties_.28on_per_job_basis.29:"></a>Job Configuration Properties (on per job basis):</h3>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> <b>Property</b> </th>
<th align="left"> <b>Default Value</b> </th>
<th align="left"> <b>Explanation</b> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <tt>mapreduce.job.reduce.shuffle.consumer.plugin.class</tt> </td>
<td align="left"> <tt>org.apache.hadoop.mapreduce.task.reduce.Shuffle</tt> </td>
<td align="left"> The <tt>ShuffleConsumerPlugin</tt> implementation to use </td></tr>
<tr class="a">
<td align="left"> <tt>mapreduce.job.map.output.collector.class</tt> </td>
<td align="left"> <tt>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</tt> </td>
<td align="left"> The <tt>MapOutputCollector</tt> implementation(s) to use </td></tr>
</tbody>
</table>
<p>These properties can also be set in the <tt>mapred-site.xml</tt> to change the default values for all jobs.</p>
<p>The collector class configuration may specify a comma-separated list of collector implementations. In this case, the map task will attempt to instantiate each in turn until one of the implementations successfully initializes. This can be useful if a given collector implementation is only compatible with certain types of keys or values, for example.</p></div>
<div class="section">
<h3><a name="NodeManager_Configuration_properties.2C_yarn-site.xml_in_all_nodes:"></a>NodeManager Configuration properties, <tt>yarn-site.xml</tt> in all nodes:</h3>
<p>There are two ways to configure auxiliary services, through a manifest file or through the Configuration (the old way). If a manifest file is used, auxiliary service configurations are not read from the Configuration.</p>
<p>If using a manifest, the feature must be enabled by setting the property <tt>yarn.nodemanager.aux-services.manifest.enabled</tt> to true in <i>yarn-site.xml</i>. The file path can be set in <i>yarn-site.xml</i> under the property <tt>yarn.nodemanager.aux-services.manifest</tt>, or the file may be sent to each NM via a PUT call to the endpoint <tt>http://nm-http-address:port/ws/v1/node/auxiliaryservices</tt>. If the file path is set in the Configuration, NMs will check this file for new modifications at an interval specified by <tt>yarn.nodemanager.aux-services.manifest.reload-ms</tt> (defaults to 0; setting interval &lt;= 0 means it will not be reloaded automatically).</p>
<p>Otherwise, set the following properties to configure aux services through the Configuration.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> <b>Property</b> </th>
<th align="left"> <b>Default Value</b> </th>
<th align="left"> <b>Explanation</b> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <tt>yarn.nodemanager.aux-services</tt> </td>
<td align="left"> <tt>...,mapreduce_shuffle</tt> </td>
<td align="left"> The auxiliary service name </td></tr>
<tr class="a">
<td align="left"> <tt>yarn.nodemanager.aux-services.mapreduce_shuffle.class</tt> </td>
<td align="left"> <tt>org.apache.hadoop.mapred.ShuffleHandler</tt> </td>
<td align="left"> The auxiliary service class to use </td></tr>
<tr class="b">
<td align="left"> <tt>yarn.nodemanager.aux-services.%s.classpath</tt> </td>
<td align="left"> NONE </td>
<td align="left"> local directory which includes the related jar file as well as all the dependencies&#x2019; jar file. We could specify the single jar file or use /dep/* to load all jars under the dep directory. </td></tr>
<tr class="a">
<td align="left"> <tt>yarn.nodemanager.aux-services.%s.remote-classpath</tt> </td>
<td align="left"> NONE </td>
<td align="left"> The remote absolute or relative path to jar file </td></tr>
</tbody>
</table>
<div class="section">
<h4><a name="Example_of_loading_jar_file_from_HDFS:"></a>Example of loading jar file from HDFS:</h4>
<p>Using manifest:</p>
<div>
<div>
<pre class="source">{
&quot;services&quot;: [
{
&quot;name&quot;: &quot;mapreduce_shuffle&quot;,
&quot;version&quot;: &quot;1&quot;,
&quot;configuration&quot;: {
&quot;properties&quot;: {
&quot;class.name&quot;: &quot;org.apache.hadoop.mapred.ShuffleHandler&quot;
}
}
},
{
&quot;name&quot;: &quot;AuxServiceFromHDFS&quot;,
&quot;version&quot;: &quot;1&quot;,
&quot;configuration&quot;: {
&quot;properties&quot;: {
&quot;class.name&quot;: &quot;org.apache.auxtest.AuxServiceFromHDFS2&quot;
},
&quot;files&quot;: [
{
&quot;src_file&quot;: &quot;hdfs:///aux/test/aux-service-hdfs.jar&quot;,
&quot;type&quot;: &quot;STATIC&quot;
}
]
}
}
]
}
</pre></div></div>
<p>Or using Configuration:</p>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services&lt;/name&gt;
&lt;value&gt;mapreduce_shuffle,AuxServiceFromHDFS&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services.AuxServiceFromHDFS.remote-classpath&lt;/name&gt;
&lt;value&gt;/aux/test/aux-service-hdfs.jar&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services.AuxServiceFromHDFS.class&lt;/name&gt;
&lt;value&gt;org.apache.auxtest.AuxServiceFromHDFS2&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
</div>
<div class="section">
<h4><a name="Example_of_loading_jar_file_from_local_file_system:"></a>Example of loading jar file from local file system:</h4>
<p>Using manifest:</p>
<div>
<div>
<pre class="source">{
&quot;services&quot;: [
{
&quot;name&quot;: &quot;mapreduce_shuffle&quot;,
&quot;version&quot;: &quot;1&quot;,
&quot;configuration&quot;: {
&quot;properties&quot;: {
&quot;class.name&quot;: &quot;org.apache.hadoop.mapred.ShuffleHandler&quot;
}
}
},
{
&quot;name&quot;: &quot;AuxServiceFromHDFS&quot;,
&quot;version&quot;: &quot;1&quot;,
&quot;configuration&quot;: {
&quot;properties&quot;: {
&quot;class.name&quot;: &quot;org.apache.auxtest.AuxServiceFromHDFS2&quot;
},
&quot;files&quot;: [
{
&quot;src_file&quot;: &quot;file:///aux/test/aux-service-hdfs.jar&quot;,
&quot;type&quot;: &quot;STATIC&quot;
}
]
}
}
]
}
</pre></div></div>
<p>Or using Configuration:</p>
<div>
<div>
<pre class="source">&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services&lt;/name&gt;
&lt;value&gt;mapreduce_shuffle,AuxServiceFromHDFS&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services.AuxServiceFromHDFS.classpath&lt;/name&gt;
&lt;value&gt;/aux/test/aux-service-hdfs.jar&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;yarn.nodemanager.aux-services.AuxServiceFromHDFS.class&lt;/name&gt;
&lt;value&gt;org.apache.auxtest.AuxServiceFromHDFS2&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p><b>IMPORTANT:</b> If setting an auxiliary service in addition the default <tt>mapreduce_shuffle</tt> service, then a new service key should be added to the <tt>yarn.nodemanager.aux-services</tt> property, for example <tt>mapred.shufflex</tt>. Then the property defining the corresponding class must be <tt>yarn.nodemanager.aux-services.mapreduce_shufflex.class</tt>. Alternatively, if an aux services manifest file is used, the service should be added to the service list.</p></div></div></div>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2021
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>