blob: 9d14f65b4976d28798fd20d051edd13238230ae4 [file] [log] [blame]
<!doctype html><html lang=en class=no-js><head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name=generator content="Hugo 0.111.3"><link rel=alternate type=text/html href=/update-site/docs/++version++/mapreduce-guide/_print/><link rel=alternate type=application/rss+xml href=/update-site/docs/++version++/mapreduce-guide/index.xml><meta name=robots content="index, follow"><link rel=apple-touch-icon sizes=57x57 href=https://apache.org/favicons/apple-touch-icon-57x57.png><link rel=apple-touch-icon sizes=60x60 href=https://apache.org/favicons/apple-touch-icon-60x60.png><link rel=apple-touch-icon sizes=72x72 href=https://apache.org/favicons/apple-touch-icon-72x72.png><link rel=apple-touch-icon sizes=76x76 href=https://apache.org/favicons/apple-touch-icon-76x76.png><link rel=apple-touch-icon sizes=114x114 href=https://apache.org/favicons/apple-touch-icon-114x114.png><link rel=apple-touch-icon sizes=120x120 href=https://apache.org/favicons/apple-touch-icon-120x120.png><link rel=apple-touch-icon sizes=144x144 href=https://apache.org/favicons/apple-touch-icon-144x144.png><link rel=apple-touch-icon sizes=152x152 href=https://apache.org/favicons/apple-touch-icon-152x152.png><link rel=apple-touch-icon sizes=180x180 href=https://apache.org/favicons/apple-touch-icon-180x180.png><link rel=icon type=image/png href=https://apache.org/favicons/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=https://apache.org/favicons/favicon-194x194.png sizes=194x194><link rel=icon type=image/png href=https://apache.org/favicons/favicon-96x96.png sizes=96x96><link rel=icon type=image/png href=https://apache.org/favicons/android-chrome-192x192.png sizes=192x192><link rel=icon type=image/png href=https://apache.org/favicons/favicon-16x16.png sizes=16x16><link rel=manifest href=https://apache.org/favicons/manifest.json><link rel="shortcut icon" href=https://apache.org/favicons/favicon.ico><title>MapReduce guide | Apache Avro</title><meta name=description content><meta property="og:title" content="MapReduce guide"><meta property="og:description" content><meta property="og:type" content="website"><meta property="og:url" content="/update-site/docs/++version++/mapreduce-guide/"><meta property="og:site_name" content="Apache Avro"><meta itemprop=name content="MapReduce guide"><meta itemprop=description content><meta name=twitter:card content="summary"><meta name=twitter:title content="MapReduce guide"><meta name=twitter:description content><link rel=preload href=/update-site/scss/main.min.6deb8a211453721a965671b611280fb11af8ef2def6b7a2b0a34f6a94939360f.css as=style><link href=/update-site/scss/main.min.6deb8a211453721a965671b611280fb11af8ef2def6b7a2b0a34f6a94939360f.css rel=stylesheet integrity><script src=https://code.jquery.com/jquery-3.5.1.min.js integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin=anonymous></script>
<link rel=stylesheet href=/css/prism.css></head><body class=td-section><header><nav class="js-navbar-scroll navbar navbar-expand navbar-dark flex-column flex-md-row td-navbar"><a class=navbar-brand href=/update-site/><span class=navbar-logo><img src=/docs/++version++/logo.svg width=100 height=30 style="margin:0 10px"></span><span class="text-uppercase font-weight-bold">Apache Avro</span></a><div class="td-navbar-nav-scroll ml-md-auto" id=main_navbar><ul class="navbar-nav mt-2 mt-lg-0"><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/project/><span>Project</span></a></li><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/blog/><span>Blog</span></a></li><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/community/><span>Community</span></a></li><li class="nav-item dropdown mr-4 d-none d-lg-block"><a class="nav-link dropdown-toggle" href=# id=navbarDropdown role=button data-toggle=dropdown aria-haspopup=true aria-expanded=false>Documentation</a><div class=dropdown-menu aria-labelledby=navbarDropdownMenuLink><a class=dropdown-item href=./docs/++version++/>++version++ (Current)</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.11.0/>1.11.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.2/>1.10.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.1/>1.10.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.0/>1.10.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.2/>1.9.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.1/>1.9.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.0/>1.9.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.2/>1.8.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.1/>1.8.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.0/>1.8.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.7/>1.7.7</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.6/>1.7.6</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.5/>1.7.5</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.4/>1.7.4</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.3/>1.7.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.2/>1.7.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.1/>1.7.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.0/>1.7.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.3/>1.6.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.2/>1.6.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.1/>1.6.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.0/>1.6.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.4/>1.5.4</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.3/>1.5.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.2/>1.5.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.1/>1.5.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.0/>1.5.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.4.1/>1.4.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.4.0/>1.4.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.3/>1.3.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.2/>1.3.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.1/>1.3.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.0/>1.3.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.2.0/>1.2.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.1.0/>1.1.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.0.0/>1.0.0</a></div></li><li class="nav-item dropdown mr-4 d-none d-lg-block"><a class="nav-link dropdown-toggle" href=# id=navbarDropdown role=button data-toggle=dropdown aria-haspopup=true aria-expanded=false>ASF links</a><div class=dropdown-menu aria-labelledby=navbarDropdownMenuLink><a class=dropdown-item href=http://www.apache.org/ target=_blank>ASF Web Site</a>
<a class=dropdown-item href=http://www.apache.org/licenses/ target=_blank>License</a>
<a class=dropdown-item href=http://www.apache.org/foundation/sponsorship.html target=_blank>Donate</a>
<a class=dropdown-item href=http://www.apache.org/foundation/thanks.html target=_blank>Thanks</a>
<a class=dropdown-item href=http://www.apache.org/security/ target=_blank>Security</a></div></li></ul></div><div class="navbar-nav d-none d-lg-block"></div></nav></header><div class="container-fluid td-outer"><div class=td-main><div class="row flex-xl-nowrap"><aside class="col-12 col-md-3 col-xl-2 td-sidebar d-print-none"><div id=td-sidebar-menu class=td-sidebar__inner><div id=content-mobile><form class="td-sidebar__search d-flex align-items-center"><button class="btn btn-link td-sidebar__toggle d-md-none p-0 ml-3 fas fa-bars" type=button data-toggle=collapse data-target=#td-section-nav aria-controls=td-docs-nav aria-expanded=false aria-label="Toggle section navigation"></button></form></div><div id=content-desktop></div><nav class="collapse td-sidebar-nav foldable-nav" id=td-section-nav><ul class="td-sidebar-nav__section pr-md-3 ul-0"><li class="td-sidebar-nav__section-title td-sidebar-nav__section with-child active-path" id=m-update-sitedocs-li><a href=/update-site/docs/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section tree-root" id=m-update-sitedocs><span>Documentation</span></a><ul class=ul-1><li class="td-sidebar-nav__section-title td-sidebar-nav__section with-child active-path" id=m-update-sitedocsversion-li><input type=checkbox id=m-update-sitedocsversion-check checked>
<label for=m-update-sitedocsversion-check><a href=/update-site/docs/++version++/ title="Apache Avro™ ++version++ Documentation" class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversion><span>++version++</span></a></label><ul class="ul-2 foldable"><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversiongetting-started-java-li><input type=checkbox id=m-update-sitedocsversiongetting-started-java-check>
<label for=m-update-sitedocsversiongetting-started-java-check><a href=/update-site/docs/++version++/getting-started-java/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversiongetting-started-java><span>Getting Started (Java)</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversiongetting-started-python-li><input type=checkbox id=m-update-sitedocsversiongetting-started-python-check>
<label for=m-update-sitedocsversiongetting-started-python-check><a href=/update-site/docs/++version++/getting-started-python/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversiongetting-started-python><span>Getting Started (Python)</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionspecification-li><input type=checkbox id=m-update-sitedocsversionspecification-check>
<label for=m-update-sitedocsversionspecification-check><a href=/update-site/docs/++version++/specification/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionspecification><span>Specification</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-java-li><input type=checkbox id=m-update-sitedocsversionapi-java-check>
<label for=m-update-sitedocsversionapi-java-check><a href=./api/java/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-java><span>Java API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-c-li><input type=checkbox id=m-update-sitedocsversionapi-c-check>
<label for=m-update-sitedocsversionapi-c-check><a href=./api/c/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-c><span>C API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-c-li><input type=checkbox id=m-update-sitedocsversionapi-c-check>
<label for=m-update-sitedocsversionapi-c-check><a href=./api/cpp/html/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-c><span>C++ API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-csharp-li><input type=checkbox id=m-update-sitedocsversionapi-csharp-check>
<label for=m-update-sitedocsversionapi-csharp-check><a href=./api/csharp/html/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-csharp><span>C# API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionmapreduce-guide-li><input type=checkbox id=m-update-sitedocsversionmapreduce-guide-check>
<label for=m-update-sitedocsversionmapreduce-guide-check><a href=/update-site/docs/++version++/mapreduce-guide/ class="align-left pl-0 active td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionmapreduce-guide><span class=td-sidebar-nav-active-item>MapReduce guide</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionidl-language-li><input type=checkbox id=m-update-sitedocsversionidl-language-check>
<label for=m-update-sitedocsversionidl-language-check><a href=/update-site/docs/++version++/idl-language/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionidl-language><span>IDL Language</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionsasl-profile-li><input type=checkbox id=m-update-sitedocsversionsasl-profile-check>
<label for=m-update-sitedocsversionsasl-profile-check><a href=/update-site/docs/++version++/sasl-profile/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionsasl-profile><span>SASL profile</span></a></label></li></ul></li></ul></li></ul></nav></div></aside><aside class="d-none d-xl-block col-xl-2 td-sidebar-toc d-print-none"><div class="td-page-meta ml-2 pb-1 pt-2 mb-0"><a href=https://github.com/apache/avro/tree/master/doc/content/en/docs/++version++/MapReduce%20guide/_index.md class=td-page-meta--view target=_blank rel=noopener><i class="fa fa-file-alt fa-fw"></i> View page source</a>
<a href=https://github.com/apache/avro/edit/master/doc/content/en/docs/++version++/MapReduce%20guide/_index.md class=td-page-meta--edit target=_blank rel=noopener><i class="fa fa-edit fa-fw"></i> Edit this page</a>
<a href="https://github.com/apache/avro/new/master/doc/content/en/docs/++version++/MapReduce%20guide/_index.md?filename=change-me.md&amp;value=---%0Atitle%3A+%22Long+Page+Title%22%0AlinkTitle%3A+%22Short+Nav+Title%22%0Aweight%3A+100%0Adescription%3A+%3E-%0A+++++Page+description+for+heading+and+indexes.%0A---%0A%0A%23%23+Heading%0A%0AEdit+this+template+to+create+your+new+page.%0A%0A%2A+Give+it+a+good+name%2C+ending+in+%60.md%60+-+e.g.+%60getting-started.md%60%0A%2A+Edit+the+%22front+matter%22+section+at+the+top+of+the+page+%28weight+controls+how+its+ordered+amongst+other+pages+in+the+same+directory%3B+lowest+number+first%29.%0A%2A+Add+a+good+commit+message+at+the+bottom+of+the+page+%28%3C80+characters%3B+use+the+extended+description+field+for+more+detail%29.%0A%2A+Create+a+new+branch+so+you+can+preview+your+new+file+and+request+a+review+via+Pull+Request.%0A" class=td-page-meta--child target=_blank rel=noopener><i class="fa fa-edit fa-fw"></i> Create child page</a>
<a href="https://github.com/apache/avro/issues/new?title=MapReduce%20guide" class=td-page-meta--issue target=_blank rel=noopener><i class="fab fa-github fa-fw"></i> Create documentation issue</a>
<a href=https://github.com/apache/avro/issues/new class=td-page-meta--project-issue target=_blank rel=noopener><i class="fas fa-tasks fa-fw"></i> Create project issue</a>
<a id=print href=/update-site/docs/++version++/mapreduce-guide/_print/><i class="fa fa-print fa-fw"></i> Print entire section</a></div><div class=td-toc><nav id=TableOfContents><ul><li><a href=#setup>Setup</a></li><li><a href=#example-colorcount>Example: ColorCount</a></li><li><a href=#running-colorcount>Running ColorCount</a></li><li><a href=#avromapper---orgapachehadoopmapred-api>AvroMapper - org.apache.hadoop.mapred API</a></li><li><a href=#mapper---orgapachehadoopmapreduce-api>Mapper - org.apache.hadoop.mapreduce API</a></li><li><a href=#avroreducer---orgapachehadoopmapred-api>AvroReducer - org.apache.hadoop.mapred API</a></li><li><a href=#reduce---orgapachehadoopmapreduce-api>Reduce - org.apache.hadoop.mapreduce API</a></li><li><a href=#learning-more>Learning more</a></li></ul></nav></div><div class="taxonomy taxonomy-terms-cloud taxo-tags"><h5 class=taxonomy-title>Tag Cloud</h5><ul class=taxonomy-terms><li><a class=taxonomy-term href=/update-site/tags/java/ data-taxonomy-term=java><span class=taxonomy-label>java</span><span class=taxonomy-count>1</span></a></li><li><a class=taxonomy-term href=/update-site/tags/python/ data-taxonomy-term=python><span class=taxonomy-label>python</span><span class=taxonomy-count>1</span></a></li></ul></div></aside><main class="col-12 col-md-9 col-xl-8 pl-md-5" role=main><nav aria-label=breadcrumb class=td-breadcrumbs><ol class=breadcrumb><li class=breadcrumb-item><a href=/update-site/docs/>Documentation</a></li><li class=breadcrumb-item><a href=/update-site/docs/++version++/>++version++</a></li><li class="breadcrumb-item active" aria-current=page><a href=/update-site/docs/++version++/mapreduce-guide/>MapReduce guide</a></li></ol></nav><div class=td-content><h1>MapReduce guide</h1><header class=article-meta><p class=reading-time><i class="fa fa-clock" aria-hidden=true></i>&nbsp; 9 minute read &nbsp;</p></header><p>Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it&rsquo;s possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record.</p><p>This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the <a href=https://hadoop.apache.org/docs/current/>Hadoop documentation</a> and the <a href=./getting-started-java/>Avro getting started guide</a> for introductions to these projects. This guide uses the old MapReduce API (<code>org.apache.hadoop.mapred</code>) and the new MapReduce API (<code>org.apache.hadoop.mapreduce</code>).</p><h2 id=setup>Setup</h2><p>The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-xml data-lang=xml><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;dependency&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;groupId&gt;</span>org.apache.avro<span style=color:#204a87;font-weight:700>&lt;/groupId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;artifactId&gt;</span>avro<span style=color:#204a87;font-weight:700>&lt;/artifactId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;version&gt;</span>&#43;&#43;version&#43;&#43;<span style=color:#204a87;font-weight:700>&lt;/version&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;/dependency&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;dependency&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;groupId&gt;</span>org.apache.avro<span style=color:#204a87;font-weight:700>&lt;/groupId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;artifactId&gt;</span>avro-mapred<span style=color:#204a87;font-weight:700>&lt;/artifactId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;version&gt;</span>&#43;&#43;version&#43;&#43;<span style=color:#204a87;font-weight:700>&lt;/version&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;/dependency&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;dependency&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;groupId&gt;</span>org.apache.hadoop<span style=color:#204a87;font-weight:700>&lt;/groupId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;artifactId&gt;</span>hadoop-client<span style=color:#204a87;font-weight:700>&lt;/artifactId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;version&gt;</span>3.1.2<span style=color:#204a87;font-weight:700>&lt;/version&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;/dependency&gt;</span>
</span></span></code></pre></div><p>And the following plugin:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-xml data-lang=xml><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;plugin&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;groupId&gt;</span>org.apache.avro<span style=color:#204a87;font-weight:700>&lt;/groupId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;artifactId&gt;</span>avro-maven-plugin<span style=color:#204a87;font-weight:700>&lt;/artifactId&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;version&gt;</span>&#43;&#43;version&#43;&#43;<span style=color:#204a87;font-weight:700>&lt;/version&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;executions&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;execution&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;phase&gt;</span>generate-sources<span style=color:#204a87;font-weight:700>&lt;/phase&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;goals&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;goal&gt;</span>schema<span style=color:#204a87;font-weight:700>&lt;/goal&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;/goals&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;configuration&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;sourceDirectory&gt;</span>${project.basedir}/../<span style=color:#204a87;font-weight:700>&lt;/sourceDirectory&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;outputDirectory&gt;</span>${project.basedir}/target/generated-sources/<span style=color:#204a87;font-weight:700>&lt;/outputDirectory&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;/configuration&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;/execution&gt;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&lt;/executions&gt;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>&lt;/plugin&gt;</span>
</span></span></code></pre></div><p>If you do not configure the <em>sourceDirectory</em> and <em>outputDirectory</em> properties, the defaults will be used. The <em>sourceDirectory</em> property defaults to <em>src/main/avro</em>. The <em>outputDirectory</em> property defaults to <em>target/generated-sources</em>. You can change the paths to match your project layout.</p><p>Alternatively, Avro jars can be downloaded directly from the Apache Avro™ Releases <a href=https://avro.apache.org/releases.html>page</a>. The relevant Avro jars for this guide are <em>avro-++version++.jar</em> and <em>avro-mapred-++version++.jar</em>, as well as <em>avro-tools-++version++.jar</em> for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce.</p><h2 id=example-colorcount>Example: ColorCount</h2><p>Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under <em>examples/mr-example/src/main/java/example/</em>. <em>MapredColorCount</em> is the example for the older mapred API while <em>MapReduceColorCount</em> is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples.</p><p>MapredColorCount.java:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#204a87;font-weight:700>package</span> <span style=color:#000>example</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>java.io.IOException</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.*</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.Schema.Type</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapred.*</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.conf.*</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.fs.Path</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapred.*</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.util.*</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>example.avro.User</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>MapredColorCount</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>Configured</span> <span style=color:#204a87;font-weight:700>implements</span> <span style=color:#000>Tool</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountMapper</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>AvroMapper</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>map</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>User</span> <span style=color:#000>user</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>AvroCollector</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Reporter</span> <span style=color:#000>reporter</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>CharSequence</span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>user</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getFavoriteColor</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#8f5902;font-style:italic>// We need this check because the User.favorite_color field has type [&#34;string&#34;, &#34;null&#34;]
</span></span></span><span style=display:flex><span><span style=color:#8f5902;font-style:italic></span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>==</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#4e9a06>&#34;none&#34;</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>collect</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>color</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountReducer</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>AvroReducer</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>reduce</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>CharSequence</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Iterable</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroCollector</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Reporter</span> <span style=color:#000>reporter</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>for</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Integer</span> <span style=color:#000>value</span> <span style=color:#ce5c00;font-weight:700>:</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>+=</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>collect</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>sum</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>run</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>String</span><span style=color:#ce5c00;font-weight:700>[]</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>Exception</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>length</span> <span style=color:#ce5c00;font-weight:700>!=</span> <span style=color:#0000cf;font-weight:700>2</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>System</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>err</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>println</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#4e9a06>&#34;Usage: MapredColorCount &lt;input path&gt; &lt;output path&gt;&#34;</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>return</span> <span style=color:#ce5c00;font-weight:700>-</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>JobConf</span> <span style=color:#000>conf</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>JobConf</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>getConf</span><span style=color:#ce5c00;font-weight:700>(),</span> <span style=color:#000>MapredColorCount</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setJobName</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#4e9a06>&#34;colorcount&#34;</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>FileInputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputPaths</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Path</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>[</span><span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>]));</span>
</span></span><span style=display:flex><span> <span style=color:#000>FileOutputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputPath</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Path</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>[</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>]));</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setMapperClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>ColorCountMapper</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setReducerClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>ColorCountReducer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#8f5902;font-style:italic>// Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
</span></span></span><span style=display:flex><span><span style=color:#8f5902;font-style:italic></span> <span style=color:#8f5902;font-style:italic>// relevant config options such as input/output format, map output
</span></span></span><span style=display:flex><span><span style=color:#8f5902;font-style:italic></span> <span style=color:#8f5902;font-style:italic>// classes, and output key class.
</span></span></span><span style=display:flex><span><span style=color:#8f5902;font-style:italic></span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getClassSchema</span><span style=color:#ce5c00;font-weight:700>());</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getPairSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>STRING</span><span style=color:#ce5c00;font-weight:700>),</span>
</span></span><span style=display:flex><span> <span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>INT</span><span style=color:#ce5c00;font-weight:700>)));</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>JobClient</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>runJob</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>return</span> <span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>main</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>String</span><span style=color:#ce5c00;font-weight:700>[]</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>Exception</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>res</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>ToolRunner</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>run</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Configuration</span><span style=color:#ce5c00;font-weight:700>(),</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>MapredColorCount</span><span style=color:#ce5c00;font-weight:700>(),</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>System</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>exit</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>res</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><p>MapReduceColorCount.java:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#204a87;font-weight:700>package</span> <span style=color:#000>example</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>java.io.IOException</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.Schema</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapred.AvroKey</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapred.AvroValue</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapreduce.AvroJob</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapreduce.AvroKeyInputFormat</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.avro.mapreduce.AvroKeyValueOutputFormat</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.conf.Configured</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.fs.Path</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.io.IntWritable</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.io.NullWritable</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.io.Text</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapreduce.Job</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapreduce.Mapper</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapreduce.Reducer</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapreduce.lib.input.FileInputFormat</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.mapreduce.lib.output.FileOutputFormat</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.util.Tool</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>org.apache.hadoop.util.ToolRunner</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>example.avro.User</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>MapReduceColorCount</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>Configured</span> <span style=color:#204a87;font-weight:700>implements</span> <span style=color:#000>Tool</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountMapper</span> <span style=color:#204a87;font-weight:700>extends</span>
</span></span><span style=display:flex><span> <span style=color:#000>Mapper</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>&gt;,</span> <span style=color:#000>NullWritable</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>map</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>NullWritable</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Context</span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>InterruptedException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>CharSequence</span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>datum</span><span style=color:#ce5c00;font-weight:700>().</span><span style=color:#c4a000>getFavoriteColor</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>==</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#4e9a06>&#34;none&#34;</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>write</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>toString</span><span style=color:#ce5c00;font-weight:700>()),</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountReducer</span> <span style=color:#204a87;font-weight:700>extends</span>
</span></span><span style=display:flex><span> <span style=color:#000>Reducer</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>&gt;,</span> <span style=color:#000>AvroValue</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>reduce</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Text</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Iterable</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Context</span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>InterruptedException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>for</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>IntWritable</span> <span style=color:#000>value</span> <span style=color:#ce5c00;font-weight:700>:</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>+=</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>get</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>write</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>toString</span><span style=color:#ce5c00;font-weight:700>()),</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>AvroValue</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>sum</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>run</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>String</span><span style=color:#ce5c00;font-weight:700>[]</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>Exception</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>length</span> <span style=color:#ce5c00;font-weight:700>!=</span> <span style=color:#0000cf;font-weight:700>2</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>System</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>err</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>println</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#4e9a06>&#34;Usage: MapReduceColorCount &lt;input path&gt; &lt;output path&gt;&#34;</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>return</span> <span style=color:#ce5c00;font-weight:700>-</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>Job</span> <span style=color:#000>job</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Job</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>getConf</span><span style=color:#ce5c00;font-weight:700>());</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setJarByClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>MapReduceColorCount</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setJobName</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#4e9a06>&#34;Color Count&#34;</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>FileInputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputPaths</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Path</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>[</span><span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>]));</span>
</span></span><span style=display:flex><span> <span style=color:#000>FileOutputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputPath</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Path</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>[</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>]));</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputFormatClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>AvroKeyInputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setMapperClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>ColorCountMapper</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputKeySchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getClassSchema</span><span style=color:#ce5c00;font-weight:700>());</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setMapOutputKeyClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setMapOutputValueClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputFormatClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>AvroKeyValueOutputFormat</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setReducerClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>ColorCountReducer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputKeySchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>STRING</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputValueSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>INT</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>return</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>job</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>waitForCompletion</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>true</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>?</span> <span style=color:#0000cf;font-weight:700>0</span> <span style=color:#ce5c00;font-weight:700>:</span> <span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>main</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>String</span><span style=color:#ce5c00;font-weight:700>[]</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>Exception</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>res</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>ToolRunner</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>run</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>MapReduceColorCount</span><span style=color:#ce5c00;font-weight:700>(),</span> <span style=color:#000>args</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#000>System</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>exit</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>res</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><p>ColorCount reads in data files containing <em>User</em> records, defined in <em>examples/user.avsc</em>, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical <em>WordCount</em> MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under <em>doc/examples/mr-example/src/main/java/example/</em> to see the new MapReduce API example. The User schema is defined as follows:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;namespace&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;example.avro&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;record&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;User&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;fields&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;string&#34;</span><span style=color:#000;font-weight:700>},</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span><span style=color:#4e9a06>&#34;int&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;null&#34;</span><span style=color:#000;font-weight:700>]},</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span><span style=color:#4e9a06>&#34;string&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;null&#34;</span><span style=color:#000;font-weight:700>]}</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>]</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>}</span>
</span></span></code></pre></div><p>This schema is compiled into the <em>User</em> class used by <em>ColorCount</em> via the Avro Maven plugin (see <em>examples/mr-example/pom.xml</em> for how this is set up).</p><p><em>ColorCountMapper</em> essentially takes a <em>User</em> as input and extracts the User&rsquo;s favorite color, emitting the key-value pair <code>&lt;favoriteColor, 1></code>. <em>ColorCountReducer</em> then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file.</p><h2 id=running-colorcount>Running ColorCount</h2><p>The <em>ColorCount</em> application is provided as a Maven project in the Avro docs under <em>examples/mr-example</em>. To build the project, including the code generation of the User schema, run:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>mvn compile
</span></span></code></pre></div><p>Next, run <em>GenerateData</em> from <code>examples/mr-examples</code> to create an Avro data file, <code>input/users.avro</code>, containing 20 Users with favorite colors chosen randomly from a list:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>mvn exec:java -q -Dexec.mainClass<span style=color:#ce5c00;font-weight:700>=</span>example.GenerateData
</span></span></code></pre></div><p>Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;user&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;red&#34;</span><span style=color:#000;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;user&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;green&#34;</span><span style=color:#000;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;user&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;purple&#34;</span><span style=color:#000;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;user&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#000;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#a40000>...</span>
</span></span></code></pre></div><p>Now we&rsquo;re ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists):</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>mvn exec:java -q -Dexec.mainClass<span style=color:#ce5c00;font-weight:700>=</span>example.MapredColorCount -Dexec.args<span style=color:#ce5c00;font-weight:700>=</span><span style=color:#4e9a06>&#34;input output&#34;</span>
</span></span></code></pre></div><p>Once ColorCount completes, checking the contents of the new output directory should yield the following:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>$ ls output/
</span></span><span style=display:flex><span>part-00000.avro _SUCCESS
</span></span></code></pre></div><p>You can check the contents of the generated Avro file using the avro-tools jar:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>$ java -jar /path/to/avro-tools-&#43;&#43;version&#43;&#43;.jar tojson output/part-00000.avro
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 3, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;blue&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 7, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;green&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 1, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;none&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 2, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;orange&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 3, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;purple&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 2, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;red&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>{</span><span style=color:#4e9a06>&#34;value&#34;</span>: 2, <span style=color:#4e9a06>&#34;key&#34;</span>: <span style=color:#4e9a06>&#34;yellow&#34;</span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><p>Now let&rsquo;s go over the ColorCount example in detail.</p><h2 id=avromapper---orgapachehadoopmapred-api>AvroMapper - org.apache.hadoop.mapred API</h2><p>The easiest way to use Avro data files as input to a MapReduce job is to subclass <code>AvroMapper</code>. An <code>AvroMapper</code> defines a <code>map</code> function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a <code>Pair&lt;CharSequence, Integer>></code>, where the CharSequence key is the user&rsquo;s favorite color and the Integer value is 1.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountMapper</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>AvroMapper</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>map</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>User</span> <span style=color:#000>user</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>AvroCollector</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Reporter</span> <span style=color:#000>reporter</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>CharSequence</span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>user</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getFavoriteColor</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#8f5902;font-style:italic>// We need this check because the User.favorite_color field has type [&#34;string&#34;, &#34;null&#34;]
</span></span></span><span style=display:flex><span><span style=color:#8f5902;font-style:italic></span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>==</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#4e9a06>&#34;none&#34;</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>collect</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>color</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><p>In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setMapperClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>ColorCountMapper</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span><span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setInputSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getClassSchema</span><span style=color:#ce5c00;font-weight:700>());</span>
</span></span></code></pre></div><p>Note that <code>AvroMapper</code> does not implement the <code>Mapper</code> interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper&rsquo;s map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call <code>JobConf.setMapperClass</code>, <code>JobConf.setInputFormat</code>, <code>JobConf.setMapOutputKeyClass</code>, <code>JobConf.setMapOutputValueClass</code>, or <code>JobConf.setOutputKeyComparatorClass</code>.</p><h2 id=mapper---orgapachehadoopmapreduce-api>Mapper - org.apache.hadoop.mapreduce API</h2><p>This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it&rsquo;s key. Additionally, the AvroJob method calls were slightly changed.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountMapper</span> <span style=color:#204a87;font-weight:700>extends</span>
</span></span><span style=display:flex><span> <span style=color:#000>Mapper</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>&gt;,</span> <span style=color:#000>NullWritable</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>map</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>User</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>NullWritable</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Context</span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>InterruptedException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#000>CharSequence</span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>datum</span><span style=color:#ce5c00;font-weight:700>().</span><span style=color:#c4a000>getFavoriteColor</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>if</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>==</span> <span style=color:#204a87;font-weight:700>null</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>color</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#4e9a06>&#34;none&#34;</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>write</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>color</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>toString</span><span style=color:#ce5c00;font-weight:700>()),</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#0000cf;font-weight:700>1</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><h2 id=avroreducer---orgapachehadoopmapred-api>AvroReducer - org.apache.hadoop.mapred API</h2><p>Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the <code>Iterable&lt;Integer></code> representing the counts for that color (they should all be 1 in this example) and adds up the counts.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountReducer</span> <span style=color:#204a87;font-weight:700>extends</span> <span style=color:#000>AvroReducer</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>reduce</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>CharSequence</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Iterable</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>AvroCollector</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Reporter</span> <span style=color:#000>reporter</span><span style=color:#ce5c00;font-weight:700>)</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>for</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Integer</span> <span style=color:#000>value</span> <span style=color:#ce5c00;font-weight:700>:</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>+=</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>collector</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>collect</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>sum</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><p>In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setReducerClass</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>ColorCountReducer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>class</span><span style=color:#ce5c00;font-weight:700>);</span>
</span></span><span style=display:flex><span><span style=color:#000>AvroJob</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>setOutputSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>conf</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Pair</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>getPairSchema</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>STRING</span><span style=color:#ce5c00;font-weight:700>),</span>
</span></span><span style=display:flex><span> <span style=color:#000>Schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>create</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Type</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>INT</span><span style=color:#ce5c00;font-weight:700>)));</span>
</span></span></code></pre></div><p>Note that <em>AvroReducer</em> does not implement the <em>Reducer</em> interface. The intermediate Pairs output by the mapper are split into <em>AvroKeys</em> and <em>AvroValues</em>, which are processed by a Reducer that calls the configured AvroReducer&rsquo;s <code>reduce</code> function. <code>AvroJob.setOutputSchema</code> sets up the relevant configuration parameters needed to make this happen, thus you should not need to call <code>JobConf.setReducerClass</code>, <code>JobConf.setOutputFormat</code>, <code>JobConf.setOutputKeyClass</code>, <code>JobConf.setMapOutputKeyClass</code>, <code>JobConf.setMapOutputValueClass</code>, or <code>JobConf.setOutputKeyComparatorClass</code>.</p><h2 id=reduce---orgapachehadoopmapreduce-api>Reduce - org.apache.hadoop.mapreduce API</h2><p>As before we not detail every difference between the APIs. As with the <em>Mapper</em> change <em>ColorCountReducer</em> is now a subclass of <em>Reducer</em> and <em>AvroKey</em> and <em>AvroValue</em> are emitted. Additionally, the <em>AvroJob</em> method calls were slightly changed.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>static</span> <span style=color:#204a87;font-weight:700>class</span> <span style=color:#000>ColorCountReducer</span> <span style=color:#204a87;font-weight:700>extends</span>
</span></span><span style=display:flex><span> <span style=color:#000>Reducer</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Text</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>&gt;,</span> <span style=color:#000>AvroValue</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;&gt;</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#5c35cc;font-weight:700>@Override</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>public</span> <span style=color:#204a87;font-weight:700>void</span> <span style=color:#000>reduce</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>Text</span> <span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>Iterable</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>IntWritable</span><span style=color:#ce5c00;font-weight:700>&gt;</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#000>Context</span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#204a87;font-weight:700>throws</span> <span style=color:#000>IOException</span><span style=color:#ce5c00;font-weight:700>,</span> <span style=color:#000>InterruptedException</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>int</span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#0000cf;font-weight:700>0</span><span style=color:#ce5c00;font-weight:700>;</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>for</span> <span style=color:#ce5c00;font-weight:700>(</span><span style=color:#000>IntWritable</span> <span style=color:#000>value</span> <span style=color:#ce5c00;font-weight:700>:</span> <span style=color:#000>values</span><span style=color:#ce5c00;font-weight:700>)</span> <span style=color:#ce5c00;font-weight:700>{</span>
</span></span><span style=display:flex><span> <span style=color:#000>sum</span> <span style=color:#ce5c00;font-weight:700>+=</span> <span style=color:#000>value</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>get</span><span style=color:#ce5c00;font-weight:700>();</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#000>context</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>write</span><span style=color:#ce5c00;font-weight:700>(</span><span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>AvroKey</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>CharSequence</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>key</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#c4a000>toString</span><span style=color:#ce5c00;font-weight:700>()),</span> <span style=color:#204a87;font-weight:700>new</span> <span style=color:#000>AvroValue</span><span style=color:#ce5c00;font-weight:700>&lt;</span><span style=color:#000>Integer</span><span style=color:#ce5c00;font-weight:700>&gt;(</span><span style=color:#000>sum</span><span style=color:#ce5c00;font-weight:700>));</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span><span style=display:flex><span> <span style=color:#ce5c00;font-weight:700>}</span>
</span></span></code></pre></div><h2 id=learning-more>Learning more</h2><p>The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa.</p><p>The mapred package has API org.apache.avro.mapred documentation as does the <code>org.apache.avro.mapreduce</code> package. MapReduce API (<code>org.apache.hadoop.mapreduce</code>). Similarily to the mapreduce package, it&rsquo;s possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the <code>AvroWordCount</code> application, found under <em>examples/mr-example/src/main/java/example/AvroWordCount.java</em> in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the <code>MapReduceAvroWordCount</code> application, found under <em>examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java</em> in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API.</p><div class=section-index></div><div class="text-muted mt-5 pt-3 border-top">Last modified April 13, 2023: <a href=https://github.com/apache/avro/commit/8b181dcaa392cfc9bf9ed903deb586de9878f938>Fix the path for Java javadoc HTMLs (8b181dc)</a></div></div></main></div></div><footer class="bg-dark py-5 row d-print-none"><div class="container-fluid mx-sm-5"><div class=row><div class="col-4 col-sm-3 text-xs-center order-sm-2"><ul class="list-inline mb-0"><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="User mailing list" aria-label="User mailing list"><a class=text-white target=_blank rel=noopener href=mailto:user@avro.apache.org aria-label="User mailing list"><i class="fa fa-envelope"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=Twitter aria-label=Twitter><a class=text-white target=_blank rel=noopener href=https://twitter.com/ApacheAvro aria-label=Twitter><i class="fab fa-twitter"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Stack Overflow" aria-label="Stack Overflow"><a class=text-white target=_blank rel=noopener href=https://stackoverflow.com/questions/tagged/avro aria-label="Stack Overflow"><i class="fab fa-stack-overflow"></i></a></li></ul></div><div class="col-4 col-sm-3 text-right text-xs-center order-sm-3"><ul class="list-inline mb-0"><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=GitHub aria-label=GitHub><a class=text-white target=_blank rel=noopener href=https://github.com/apache/avro aria-label=GitHub><i class="fab fa-github"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=Issues aria-label=Issues><a class=text-white target=_blank rel=noopener href=https://issues.apache.org/jira/projects/AVRO/issues aria-label=Issues><i class="fab fa-jira"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Chat with other project developers at Slack" aria-label="Chat with other project developers at Slack"><a class=text-white target=_blank rel=noopener href=https://the-asf.slack.com/ aria-label="Chat with other project developers at Slack"><i class="fab fa-slack"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Developer mailing list" aria-label="Developer mailing list"><a class=text-white target=_blank rel=noopener href=mailto:dev@avro.apache.org aria-label="Developer mailing list"><i class="fa fa-envelope"></i></a></li></ul></div><div class="col-10 col-sm-3 text-center py-2 order-sm-2"><a href=https://www.apache.org/><small class=text-white>&copy; 2023 The Apache Software Foundation </small></a><small class=text-white>All Rights Reserved</small><p><small class=text-white>Apache Avro, Avro&trade;, Apache&reg;, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</small></p></div><div class="col-5 col-sm-3 order-sm-2"><a href=https://www.apache.org/events/current-event.html><img src=https://www.apache.org/events/current-event-234x60.png></a></div></div></div></footer></div><script src=https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js integrity=sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN crossorigin=anonymous></script>
<script src=https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js integrity="sha512-UR25UO94eTnCVwjbXozyeVd6ZqpaAE9naiEUBK/A+QDbfSTQFhPGj5lOR6d8tsgbBk84Ggb5A3EkjsOgPRPcKA==" crossorigin=anonymous></script>
<script src=/js/tabpane-persist.js></script>
<script src=/update-site/js/main.min.b0910468256f44515fad3d1c8b5cf64a439da3abc1acef42ad39b9ceac3ae705.js integrity="sha256-sJEEaCVvRFFfrT0ci1z2SkOdo6vBrO9CrTm5zqw65wU=" crossorigin=anonymous></script>
<script src=/js/prism.js></script></body></html>