<!DOCTYPE HTML>
<html lang="en-US">
<head>
  <meta charset="UTF-8">
  <title>Using in MapRed</title>
  <meta name="viewport" content="width=device-width,initial-scale=1">
  <meta name="generator" content="Jekyll v4.3.4">
  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
  <link rel="stylesheet" href="/css/screen.css">
  <link rel="icon" type="image/x-icon" href="/favicon.ico">
  <!--[if lt IE 9]>
  <script src="/js/html5shiv.min.js"></script>
  <script src="/js/respond.min.js"></script>
  <![endif]-->
  <!-- Matomo -->
  <script>
    var _paq = window._paq = window._paq || [];
    /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
    _paq.push(["setDoNotTrack", true]);
    _paq.push(["disableCookies"]);
    _paq.push(['trackPageView']);
    _paq.push(['enableLinkTracking']);
    (function() {
      var u="https://analytics.apache.org/";
      _paq.push(['setTrackerUrl', u+'matomo.php']);
      _paq.push(['setSiteId', '68']);
      var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
      g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
    })();
  </script>
  <!-- End Matomo Code -->
</head>


<body class="wrap">
  <header role="banner">
  <nav class="mobile-nav show-on-mobiles">
    <ul>
  <li class="">
    <a href="/">Home</a>
  </li>
  <li class="">
    <a href="/releases/"><span class="show-on-mobiles">Rel</span>
                         <span class="hide-on-mobiles">Releases</span></a>
  </li>
  <li class="current">
    <a href="/docs/"><span class="show-on-mobiles">Doc</span>
                     <span class="hide-on-mobiles">Documentation</span></a>
  </li>
  <li class="">
    <a href="/talks/"><span class="show-on-mobiles">Talk</span>
                      <span class="hide-on-mobiles">Talks</span></a>
  </li>
  <li class="">
    <a href="/news/">News</a>
  </li>
  <li class="">
    <a href="/develop/"><span class="show-on-mobiles">Dev</span>
                        <span class="hide-on-mobiles">Develop</span></a>
  </li>
  <li class="">
    <a href="/help/">Help</a>
  </li>
</ul>

  </nav>
  <div class="grid">
    <div class="unit one-quarter center-on-mobiles">
      <h1>
        <a href="/">
          <span class="sr-only">Apache ORC</span>
          <img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
        </a>
      </h1>
    </div>
    <nav class="main-nav unit three-quarters hide-on-mobiles">
      <ul>
  <li class="">
    <a href="/">Home</a>
  </li>
  <li class="">
    <a href="/releases/"><span class="show-on-mobiles">Rel</span>
                         <span class="hide-on-mobiles">Releases</span></a>
  </li>
  <li class="current">
    <a href="/docs/"><span class="show-on-mobiles">Doc</span>
                     <span class="hide-on-mobiles">Documentation</span></a>
  </li>
  <li class="">
    <a href="/talks/"><span class="show-on-mobiles">Talk</span>
                      <span class="hide-on-mobiles">Talks</span></a>
  </li>
  <li class="">
    <a href="/news/">News</a>
  </li>
  <li class="">
    <a href="/develop/"><span class="show-on-mobiles">Dev</span>
                        <span class="hide-on-mobiles">Develop</span></a>
  </li>
  <li class="">
    <a href="/help/">Help</a>
  </li>
</ul>

    </nav>
  </div>
</header>


    <section class="docs">
    <div class="grid">

      <div class="docs-nav-mobile unit whole show-on-mobiles">
  <select onchange="if (this.value) window.location.href=this.value">
    <option value="">Navigate the docs…</option>
    
    <optgroup label="Overview">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/index.html">Background</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
      <option value="/docs/adopters.html">ORC Adopters</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/types.html">Types</option>
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/indexes.html">Indexes</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
      <option value="/docs/acid.html">ACID support</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Installing">
      


  

  
    
  
    
  
    
      <option value="/docs/building.html">Building ORC</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Spark">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/spark-ddl.html">Spark DDL</option>
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/spark-config.html">Spark Configuration</option>
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Python">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/pyarrow.html">PyArrow</option>
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/dask.html">Dask</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Hive">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/hive-ddl.html">Hive DDL</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/hive-config.html">Hive Configuration</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in MapReduce">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/mapred.html">Using in MapRed</option>
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/mapreduce.html">Using in MapReduce</option>
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using ORC Core">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/core-java.html">Using Core Java</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
      <option value="/docs/core-cpp.html">Using Core C++</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
      <option value="/docs/core-java-config.html">ORC Java configuration</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Tools">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/cpp-tools.html">C++ Tools</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/java-tools.html">Java Tools</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
  </select>
</div>


      <div class="unit four-fifths">
        <article>
          <h1>Using in MapRed</h1>
          <p>This page describes how to read and write ORC files from Hadoop’s
older org.apache.hadoop.mapred MapReduce APIs. If you want to use the
new org.apache.hadoop.mapreduce API, please look at the <a href="/docs/mapreduce.html">next
page</a>.</p>

<h2 id="reading-orc-files">Reading ORC files</h2>

<p>Add ORC and your desired version of Hadoop to your <code class="language-plaintext highlighter-rouge">pom.xml</code>:</p>

<div class="language-xml highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nt">&lt;dependencies&gt;</span>
  <span class="nt">&lt;dependency&gt;</span>
    <span class="nt">&lt;groupId&gt;</span>org.apache.orc<span class="nt">&lt;/groupId&gt;</span>
    <span class="nt">&lt;artifactId&gt;</span>orc-mapreduce<span class="nt">&lt;/artifactId&gt;</span>
    <span class="nt">&lt;version&gt;</span>1.1.0<span class="nt">&lt;/version&gt;</span>
  <span class="nt">&lt;/dependency&gt;</span>
  <span class="nt">&lt;dependency&gt;</span>
    <span class="nt">&lt;groupId&gt;</span>org.apache.hadoop<span class="nt">&lt;/groupId&gt;</span>
    <span class="nt">&lt;artifactId&gt;</span>hadoop-mapreduce-client-core<span class="nt">&lt;/artifactId&gt;</span>
    <span class="nt">&lt;version&gt;</span>2.7.0<span class="nt">&lt;/version&gt;</span>
  <span class="nt">&lt;/dependency&gt;</span>
<span class="nt">&lt;/dependencies&gt;</span>
</code></pre></div></div>

<p>Set the minimal properties in your JobConf:</p>

<ul>
  <li><strong>mapreduce.job.inputformat.class</strong> = <a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcInputFormat.html">org.apache.orc.mapred.OrcInputFormat</a></li>
  <li><strong>mapreduce.input.fileinputformat.inputdir</strong> = your input directory</li>
</ul>

<p>ORC files contain a series of values of the same type and that type
schema is encoded in the file. Because the ORC files are
self-describing, the reader always knows how to correctly interpret
the data. All of the ORC files written by Hive and most of the others have
a struct as the value type.</p>

<p>Your Mapper class will receive org.apache.hadoop.io.NullWritable as
the key and a value based on the table below expanded recursively.</p>

<table>
  <thead>
    <tr>
      <th>ORC Type</th>
      <th>Writable Type</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>array</td>
      <td><a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcStruct.html">org.apache.orc.mapred.OrcList</a></td>
    </tr>
    <tr>
      <td>binary</td>
      <td>org.apache.hadoop.io.BytesWritable</td>
    </tr>
    <tr>
      <td>bigint</td>
      <td>org.apache.hadoop.io.LongWritable</td>
    </tr>
    <tr>
      <td>boolean</td>
      <td>org.apache.hadoop.io.BooleanWritable</td>
    </tr>
    <tr>
      <td>char</td>
      <td>org.apache.hadoop.io.Text</td>
    </tr>
    <tr>
      <td>date</td>
      <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/serde2/io/DateWritable.html">org.apache.hadoop.hive.serde2.io.DateWritable</a></td>
    </tr>
    <tr>
      <td>decimal</td>
      <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.html">org.apache.hadoop.hive.serde2.io.HiveDecimalWritable</a></td>
    </tr>
    <tr>
      <td>double</td>
      <td>org.apache.hadoop.io.DoubleWritable</td>
    </tr>
    <tr>
      <td>float</td>
      <td>org.apache.hadoop.io.FloatWritable</td>
    </tr>
    <tr>
      <td>int</td>
      <td>org.apache.hadoop.io.IntWritable</td>
    </tr>
    <tr>
      <td>map</td>
      <td><a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcMap.html">org.apache.orc.mapred.OrcMap</a></td>
    </tr>
    <tr>
      <td>smallint</td>
      <td>org.apache.hadoop.io.ShortWritable</td>
    </tr>
    <tr>
      <td>string</td>
      <td>org.apache.hadoop.io.Text</td>
    </tr>
    <tr>
      <td>struct</td>
      <td><a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcStruct.html">org.apache.orc.mapred.OrcStruct</a></td>
    </tr>
    <tr>
      <td>timestamp</td>
      <td><a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcTimestamp.html">org.apache.orc.mapred.OrcTimestamp</a></td>
    </tr>
    <tr>
      <td>tinyint</td>
      <td>org.apache.hadoop.io.ByteWritable</td>
    </tr>
    <tr>
      <td>uniontype</td>
      <td><a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcUnion.html">org.apache.orc.mapred.OrcUnion</a></td>
    </tr>
    <tr>
      <td>varchar</td>
      <td>org.apache.hadoop.io.Text</td>
    </tr>
  </tbody>
</table>

<p>Let’s assume that your input directory contains ORC files with the
schema <code class="language-plaintext highlighter-rouge">struct&lt;s:string,i:int&gt;</code> and you want to use the string field
as the key to the MapReduce shuffle and the integer as the value. The
mapper code would look like:</p>

<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">MyMapper</span>
    <span class="kd">implements</span> <span class="nc">Mapper</span><span class="o">&lt;</span><span class="nc">NullWritable</span><span class="o">,</span><span class="nc">OrcStruct</span><span class="o">,</span><span class="nc">Text</span><span class="o">,</span><span class="nc">IntWritable</span><span class="o">&gt;</span> <span class="o">{</span>

  <span class="c1">// Input should be: struct&lt;s:string,i:int&gt;</span>
  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">map</span><span class="o">(</span><span class="nc">NullWritable</span> <span class="n">key</span><span class="o">,</span> <span class="nc">OrcStruct</span> <span class="n">value</span><span class="o">,</span>
                  <span class="nc">OutputCollector</span><span class="o">&lt;</span><span class="nc">Text</span><span class="o">,</span><span class="nc">IntWritable</span><span class="o">&gt;</span> <span class="n">output</span><span class="o">,</span>
                  <span class="nc">Reporter</span> <span class="n">reporter</span><span class="o">)</span> <span class="kd">throws</span> <span class="nc">IOException</span> <span class="o">{</span>
    <span class="n">output</span><span class="o">.</span><span class="na">collect</span><span class="o">((</span><span class="nc">Text</span><span class="o">)</span> <span class="n">value</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="mi">0</span><span class="o">),</span>
                   <span class="o">(</span><span class="nc">IntWritable</span><span class="o">)</span> <span class="n">value</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="mi">1</span><span class="o">));</span>
  <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">configure</span><span class="o">(</span><span class="nc">JobConf</span> <span class="n">conf</span><span class="o">)</span> <span class="o">{</span> <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">close</span><span class="o">()</span> <span class="o">{</span> <span class="o">}</span>
<span class="o">}</span>
</code></pre></div></div>

<h2 id="writing-orc-files">Writing ORC files</h2>

<p>To write ORC files from your MapReduce job, you’ll need to set</p>

<ul>
  <li><strong>mapreduce.job.outputformat.class</strong> = <a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcOutputFormat.html">org.apache.orc.mapred.OrcOutputFormat</a></li>
  <li><strong>mapreduce.output.fileoutputformat.outputdir</strong> = your output directory</li>
  <li><strong>orc.mapred.output.schema</strong> = the schema to write to the ORC file</li>
</ul>

<p>The reducer needs to create the Writable value to be put into the ORC
file and typically uses the OrcStruct.createValue(TypeDescription)
function. For our example, let’s assume that the shuffle types are
(Text, IntWritable) from the previous section and the reduce should
gather the integer for each key together and write them as a list. The
output schema would be <code class="language-plaintext highlighter-rouge">struct&lt;key:string,ints:array&lt;int&gt;&gt;</code>. As always
with MapReduce, if your method stores the values, you need to copy their
value before getting the next.</p>

<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyReducer</span>
  <span class="kd">implements</span> <span class="nc">Reducer</span><span class="o">&lt;</span><span class="nc">Text</span><span class="o">,</span><span class="nc">IntWritable</span><span class="o">,</span><span class="nc">NullWritable</span><span class="o">,</span><span class="nc">OrcStruct</span><span class="o">&gt;</span> <span class="o">{</span>

  <span class="kd">private</span> <span class="nc">TypeDescription</span> <span class="n">schema</span> <span class="o">=</span>
    <span class="nc">TypeDescription</span><span class="o">.</span><span class="na">fromString</span><span class="o">(</span><span class="s">"struct&lt;key:string,ints:array&lt;int&gt;&gt;"</span><span class="o">);</span>
  <span class="c1">// createValue creates the correct value type for the schema</span>
  <span class="kd">private</span> <span class="nc">OrcStruct</span> <span class="n">pair</span> <span class="o">=</span> <span class="o">(</span><span class="nc">OrcStruct</span><span class="o">)</span> <span class="nc">OrcStruct</span><span class="o">.</span><span class="na">createValue</span><span class="o">(</span><span class="n">schema</span><span class="o">);</span>
  <span class="c1">// get a handle to the list of ints</span>
  <span class="kd">private</span> <span class="nc">OrcList</span><span class="o">&lt;</span><span class="nc">IntWritable</span><span class="o">&gt;</span> <span class="n">values</span> <span class="o">=</span>
    <span class="o">(</span><span class="nc">OrcList</span><span class="o">&lt;</span><span class="nc">IntWritable</span><span class="o">&gt;)</span> <span class="n">pair</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="mi">1</span><span class="o">);</span>
  <span class="kd">private</span> <span class="kd">final</span> <span class="nc">NullWritable</span> <span class="n">nada</span> <span class="o">=</span> <span class="nc">NullWritable</span><span class="o">.</span><span class="na">get</span><span class="o">();</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reduce</span><span class="o">(</span><span class="nc">Text</span> <span class="n">key</span><span class="o">,</span> <span class="nc">Iterator</span><span class="o">&lt;</span><span class="nc">IntWritable</span><span class="o">&gt;</span> <span class="n">iterator</span><span class="o">,</span>
                     <span class="nc">OutputCollector</span><span class="o">&lt;</span><span class="nc">NullWritable</span><span class="o">,</span> <span class="nc">OrcStruct</span><span class="o">&gt;</span> <span class="n">output</span><span class="o">,</span>
                     <span class="nc">Reporter</span> <span class="n">reporter</span><span class="o">)</span> <span class="kd">throws</span> <span class="nc">IOException</span> <span class="o">{</span>
    <span class="n">pair</span><span class="o">.</span><span class="na">setFieldValue</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">key</span><span class="o">);</span>
    <span class="n">values</span><span class="o">.</span><span class="na">clear</span><span class="o">();</span>
    <span class="k">while</span> <span class="o">(</span><span class="n">iterator</span><span class="o">.</span><span class="na">hasNext</span><span class="o">())</span> <span class="o">{</span>
      <span class="n">values</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="k">new</span> <span class="nc">IntWritable</span><span class="o">(</span><span class="n">iterator</span><span class="o">.</span><span class="na">next</span><span class="o">().</span><span class="na">get</span><span class="o">()));</span>
    <span class="o">}</span>
    <span class="n">output</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="n">nada</span><span class="o">,</span> <span class="n">pair</span><span class="o">);</span>
  <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">configure</span><span class="o">(</span><span class="nc">JobConf</span> <span class="n">conf</span><span class="o">)</span> <span class="o">{</span> <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">close</span><span class="o">()</span> <span class="o">{</span> <span class="o">}</span>
<span class="o">}</span>
</code></pre></div></div>

<h2 id="sending-orcstruct-orclist-orcmap-or-orcunion-through-the-shuffle">Sending OrcStruct, OrcList, OrcMap, or OrcUnion through the Shuffle</h2>

<p>In the previous examples, only the Hadoop types were sent through the
MapReduce shuffle. The complex ORC types, since they are generic
types, need to have their full type information provided to create the
object. To enable MapReduce to properly instantiate the OrcStruct and
other ORC types, we need to wrap it in either an
<a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcKey.html">OrcKey</a>
for the shuffle key or
<a href="/api/orc-mapreduce/index.html?org/apache/orc/mapred/OrcValue.html">OrcValue</a>
for the shuffle value.</p>

<p>To send two OrcStructs through the shuffle, define the following properties
in the JobConf:</p>

<ul>
  <li><strong>mapreduce.map.output.key.class</strong> = org.apache.orc.mapred.OrcKey</li>
  <li><strong>orc.mapred.map.output.key.schema</strong> = the shuffle key’s schema</li>
  <li><strong>mapreduce.map.output.value.class</strong> = org.apache.orc.mapred.OrcValue</li>
  <li><strong>orc.mapred.map.output.value.schema</strong> = the shuffle value’s schema</li>
</ul>

<p>The mapper just adds an OrcKey and OrcWrapper around the key and value
respectively. These objects should be created once and reused as the mapper
runs.</p>

<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyMapperWithShuffle</span>
  <span class="kd">implements</span> <span class="nc">Mapper</span><span class="o">&lt;</span><span class="nc">NullWritable</span><span class="o">,</span><span class="nc">OrcStruct</span><span class="o">,</span><span class="nc">OrcKey</span><span class="o">,</span><span class="nc">OrcValue</span><span class="o">&gt;</span> <span class="o">{</span>

  <span class="c1">// create wrapper objects</span>
  <span class="kd">private</span> <span class="nc">OrcKey</span> <span class="n">keyWrapper</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">OrcKey</span><span class="o">();</span>
  <span class="kd">private</span> <span class="nc">OrcValue</span> <span class="n">valueWrapper</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">OrcValue</span><span class="o">();</span>

  <span class="c1">// create a new structure to pass as the value in the shuffle</span>
  <span class="kd">private</span> <span class="nc">OrcStruct</span> <span class="n">outStruct</span> <span class="o">=</span> <span class="o">(</span><span class="nc">OrcStruct</span><span class="o">)</span> <span class="nc">OrcStruct</span><span class="o">.</span><span class="na">createValue</span>
    <span class="o">(</span><span class="nc">TypeDescription</span><span class="o">.</span><span class="na">fromString</span><span class="o">(</span><span class="s">"struct&lt;i:int,j:int&gt;"</span><span class="o">));</span>

  <span class="c1">// get the two fields of the outStruct</span>
  <span class="kd">private</span> <span class="nc">IntWritable</span> <span class="n">i</span> <span class="o">=</span> <span class="o">(</span><span class="nc">IntWritable</span><span class="o">)</span> <span class="n">outStruct</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="s">"i"</span><span class="o">);</span>
  <span class="kd">private</span> <span class="nc">IntWritable</span> <span class="n">j</span> <span class="o">=</span> <span class="o">(</span><span class="nc">IntWritable</span><span class="o">)</span> <span class="n">outStruct</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="s">"j"</span><span class="o">);</span>

  <span class="c1">// Assume the input has type: struct&lt;s:string,i:int&gt;</span>
  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">map</span><span class="o">(</span><span class="nc">NullWritable</span> <span class="n">key</span><span class="o">,</span> <span class="nc">OrcStruct</span> <span class="n">value</span><span class="o">,</span>
                  <span class="nc">OutputCollector</span><span class="o">&lt;</span><span class="nc">OrcKey</span><span class="o">,</span><span class="nc">OrcValue</span><span class="o">&gt;</span> <span class="n">output</span><span class="o">,</span>
                  <span class="nc">Reporter</span> <span class="n">reporter</span><span class="o">)</span> <span class="kd">throws</span> <span class="nc">IOException</span> <span class="o">{</span>
    <span class="n">keyWrapper</span><span class="o">.</span><span class="na">key</span> <span class="o">=</span> <span class="n">value</span><span class="o">;</span>
    <span class="n">valueWrapper</span><span class="o">.</span><span class="na">value</span> <span class="o">=</span> <span class="n">outStruct</span><span class="o">;</span>
    <span class="kt">int</span> <span class="n">val</span> <span class="o">=</span> <span class="o">((</span><span class="nc">IntWritable</span><span class="o">)</span> <span class="n">value</span><span class="o">.</span><span class="na">getFieldValue</span><span class="o">(</span><span class="s">"i"</span><span class="o">)).</span><span class="na">get</span><span class="o">();</span>
    <span class="n">i</span><span class="o">.</span><span class="na">set</span><span class="o">(</span><span class="n">val</span> <span class="o">*</span> <span class="mi">2</span><span class="o">);</span>
    <span class="n">j</span><span class="o">.</span><span class="na">set</span><span class="o">(</span><span class="n">val</span> <span class="o">*</span> <span class="n">val</span><span class="o">);</span>
    <span class="n">output</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="n">keyWrapper</span><span class="o">,</span> <span class="n">valueWrapper</span><span class="o">);</span>
  <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">configure</span><span class="o">(</span><span class="nc">JobConf</span> <span class="n">conf</span><span class="o">)</span> <span class="o">{</span> <span class="o">}</span>

  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">close</span><span class="o">()</span> <span class="o">{</span> <span class="o">}</span>
<span class="o">}</span>
</code></pre></div></div>

<p>The reducer code accesses the underlying OrcStructs by using the
OrcKey.key and OrcValue.value fields.</p>

          





  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  
    <div class="section-nav">
      <div class="left align-right">
          
            
            
            <a href="/docs/hive-config.html" class="prev">Back</a>
          
      </div>
      <div class="right align-left">
          
            
            
            <a href="/docs/mapreduce.html" class="next">Next</a>
          
      </div>
    </div>
    <div class="clear"></div>
    

        </article>
      </div>

      <div class="unit one-fifth hide-on-mobiles">
  <aside>
    
    <h4>Overview</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/index.html">Background</a></li>
      


  

  

  
    
  
    
      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/types.html">Types</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
      


  

  

  
    
      <li class=""><a href="/docs/acid.html">ACID support</a></li>
      


</ul>

    
    <h4>Installing</h4>
    

<ul>

  

  

  
    
  
    
  
    
      <li class=""><a href="/docs/building.html">Building ORC</a></li>
      


</ul>

    
    <h4>Using in Spark</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li>
      


</ul>

    
    <h4>Using in Python</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/dask.html">Dask</a></li>
      


</ul>

    
    <h4>Using in Hive</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
      


</ul>

    
    <h4>Using in MapReduce</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class="current"><a href="/docs/mapred.html">Using in MapRed</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
      


</ul>

    
    <h4>Using ORC Core</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
      


  

  

  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li>
      


</ul>

    
    <h4>Tools</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
      


</ul>

    
  </aside>
</div>


      <div class="clear"></div>

    </div>
  </section>


  <footer role="contentinfo">
  <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are &copy;&nbsp;2025
     <a href="https://www.apache.org/">Apache Software Foundation</a>
     under the terms of the <a
      href="https://www.apache.org/licenses/LICENSE-2.0.html">
      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
      of the Apache Software Foundation.</p>
</footer>

  <script>
  var anchorForId = function (id) {
    var anchor = document.createElement("a");
    anchor.className = "header-link";
    anchor.href      = "#" + id;
    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
    anchor.title = "Permalink";
    return anchor;
  };

  var linkifyAnchors = function (level, containingElement) {
    var headers = containingElement.getElementsByTagName("h" + level);
    for (var h = 0; h < headers.length; h++) {
      var header = headers[h];

      if (typeof header.id !== "undefined" && header.id !== "") {
        header.appendChild(anchorForId(header.id));
      }
    }
  };

  document.onreadystatechange = function () {
    if (this.readyState === "complete") {
      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
      if (!contentBlock) {
        return;
      }
      for (var level = 1; level <= 6; level++) {
        linkifyAnchors(level, contentBlock);
      }
    }
  };
</script>


</body>
</html>
