| <!DOCTYPE HTML> |
| <html lang="en-US"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>Using Core Java</title> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Jekyll v3.8.6"> |
| <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> |
| <link rel="stylesheet" href="/css/screen.css"> |
| <link rel="icon" type="image/x-icon" href="/favicon.ico"> |
| <!--[if lt IE 9]> |
| <script src="/js/html5shiv.min.js"></script> |
| <script src="/js/respond.min.js"></script> |
| <![endif]--> |
| </head> |
| |
| |
| <body class="wrap"> |
| <header role="banner"> |
| <nav class="mobile-nav show-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| <div class="grid"> |
| <div class="unit one-quarter center-on-mobiles"> |
| <h1> |
| <a href="/"> |
| <span class="sr-only">Apache ORC</span> |
| <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> |
| </a> |
| </h1> |
| </div> |
| <nav class="main-nav unit three-quarters hide-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| </header> |
| |
| |
| <section class="docs"> |
| <div class="grid"> |
| |
| <div class="docs-nav-mobile unit whole show-on-mobiles"> |
| <select onchange="if (this.value) window.location.href=this.value"> |
| <option value="">Navigate the docs…</option> |
| |
| <optgroup label="Overview"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/index.html">Background</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/adopters.html">ORC Adopters</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/types.html">Types</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/indexes.html">Indexes</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/acid.html">ACID support</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Installing"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/building.html">Building ORC</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Spark"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-ddl.html">Spark DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-config.html">Spark Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Python"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/pyarrow.html">PyArrow</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/dask.html">Dask</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Hive"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-ddl.html">Hive DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-config.html">Hive Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in MapReduce"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapred.html">Using in MapRed</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapreduce.html">Using in MapReduce</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using ORC Core"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java.html">Using Core Java</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-cpp.html">Using Core C++</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java-config.html">ORC Java configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Tools"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/cpp-tools.html">C++ Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/java-tools.html">Java Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| </select> |
| </div> |
| |
| |
| <div class="unit four-fifths"> |
| <article> |
| <h1>Using Core Java</h1> |
| <p>The Core ORC API reads and writes ORC files into Hive’s storage-api |
| vectorized classes. Both Hive and MapReduce use the Core API to actually |
| read and write the data.</p> |
| |
| <h2 id="vectorized-row-batch">Vectorized Row Batch</h2> |
| |
| <p>Data is passed to ORC as instances of |
| <a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.html">VectorizedRowBatch</a> |
| that contain the data for 1024 rows. The focus is on speed and |
| accessing the data fields directly. <code class="highlighter-rouge">cols</code> is an array of |
| <a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.html">ColumnVector</a> |
| and <code class="highlighter-rouge">size</code> is the number of rows.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">package</span> <span class="nn">org.apache.hadoop.hive.ql.exec.vector</span><span class="o">;</span> |
| |
| <span class="kd">public</span> <span class="kd">class</span> <span class="nc">VectorizedRowBatch</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span><span class="o">[]</span> <span class="n">cols</span><span class="o">;</span> |
| <span class="kd">public</span> <span class="kt">int</span> <span class="n">size</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.html">ColumnVector</a> |
| is the parent type of the different kinds of columns and has some |
| fields that are shared across all of the column types. In particular, |
| the <code class="highlighter-rouge">noNulls</code> flag if there are no nulls in this column for this batch |
| and the <code class="highlighter-rouge">isRepeating</code> flag for columns were the entire batch is the |
| same value. For columns where <code class="highlighter-rouge">noNulls == false</code> the <code class="highlighter-rouge">isNull</code> array is |
| true if that value is null.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">abstract</span> <span class="kd">class</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| |
| <span class="c1">// If the whole column vector has no nulls, this is true, otherwise false.</span> |
| <span class="kd">public</span> <span class="kt">boolean</span> <span class="n">noNulls</span><span class="o">;</span> |
| |
| <span class="c1">// If hasNulls is true, then this array contains true if the value is</span> |
| <span class="c1">// is null, otherwise false.</span> |
| <span class="kd">public</span> <span class="kt">boolean</span><span class="o">[]</span> <span class="n">isNull</span><span class="o">;</span> |
| |
| <span class="cm">/* |
| * True if same value repeats for whole column vector. |
| * If so, vector[0] holds the repeating value. |
| */</span> |
| <span class="kd">public</span> <span class="kt">boolean</span> <span class="n">isRepeating</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p>The subtypes of ColumnVector are:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>ORC Type</th> |
| <th>ColumnVector</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>array</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.html">ListColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>binary</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.html">BytesColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>bigint</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>boolean</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>char</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.html">BytesColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>date</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>decimal</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.html">DecimalColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>double</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.html">DoubleColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>float</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.html">DoubleColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>int</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>map</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.html">MapColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>smallint</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>string</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.html">BytesColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>struct</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.html">StructColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>timestamp</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.html">TimestampColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>tinyint</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>uniontype</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.html">UnionColumnVector</a></td> |
| </tr> |
| <tr> |
| <td>varchar</td> |
| <td><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.html">BytesColumnVector</a></td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.html">LongColumnVector</a> handles all of the integer types (boolean, bigint, |
| date, int, smallint, and tinyint). The data is represented as an array of |
| longs where each value is sign-extended as necessary.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">LongColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">vector</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.html">TimestampColumnVector</a> |
| handles timestamp values. The data is represented as an array of longs |
| and an array of ints.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">TimestampColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| |
| <span class="c1">// the number of milliseconds since 1 Jan 1970 00:00 GMT</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">time</span><span class="o">;</span> |
| |
| <span class="c1">// the number of nanoseconds within the second</span> |
| <span class="kd">public</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">nanos</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.html">DoubleColumnVector</a> |
| handles all of the floating point types (double, and float). The data |
| is represented as an array of doubles.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">DoubleColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="kt">double</span><span class="o">[]</span> <span class="n">vector</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.html">DecimalColumnVector</a> |
| handles decimal columns. The data is represented as an array of |
| HiveDecimalWritable. Note that this implementation is not performant |
| and will likely be replaced.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">DecimalColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="nc">HiveDecimalWritable</span><span class="o">[]</span> <span class="n">vector</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.html">BytesColumnVector</a> |
| handles all of the binary types (binary, char, string, and |
| varchar). The data is represented as a byte array, offset, and |
| length. The byte arrays may or may not be shared between values.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">BytesColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="kt">byte</span><span class="o">[][]</span> <span class="n">vector</span><span class="o">;</span> |
| <span class="kd">public</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">start</span><span class="o">;</span> |
| <span class="kd">public</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">length</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.html">StructColumnVector</a> |
| handles the struct columns and represents the data as an array of |
| <code class="highlighter-rouge">ColumnVector</code>. The value for row 5 consists of the fifth value from |
| each of the <code class="highlighter-rouge">fields</code> values.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">StructColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span><span class="o">[]</span> <span class="n">fields</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.html">UnionColumnVector</a> |
| handles the union columns and represents the data as an array of |
| integers that pick the subtype and a <code class="highlighter-rouge">fields</code> array one per a |
| subtype. Only the value of the <code class="highlighter-rouge">fields</code> that corresponds to |
| <code class="highlighter-rouge">tags[row]</code> is set.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">UnionColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="kd">public</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">tags</span><span class="o">;</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span><span class="o">[]</span> <span class="n">fields</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.html">ListColumnVector</a> |
| handles the array columns and represents the data as two arrays of |
| integers for the offset and lengths and a <code class="highlighter-rouge">ColumnVector</code> for the |
| children values.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">ListColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="c1">// for each row, the first offset of the child</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">offsets</span><span class="o">;</span> |
| <span class="c1">// for each row, the number of elements in the array</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">lengths</span><span class="o">;</span> |
| <span class="c1">// the offset in the child that should be used for new values</span> |
| <span class="kd">public</span> <span class="kt">int</span> <span class="n">childCount</span><span class="o">;</span> |
| |
| <span class="c1">// the values of the children</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span> <span class="n">child</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p><a href="https://javadoc.io/static/org.apache.hive/hive-storage-api/2.8.1/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.html">MapColumnVector</a> |
| handles the map columns and represents the data as two arrays of |
| integers for the offset and lengths and two <code class="highlighter-rouge">ColumnVector</code>s for the |
| keys and values.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">class</span> <span class="nc">MapColumnVector</span> <span class="kd">extends</span> <span class="nc">ColumnVector</span> <span class="o">{</span> |
| <span class="c1">// for each row, the first offset of the child</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">offsets</span><span class="o">;</span> |
| <span class="c1">// for each row, the number of elements in the array</span> |
| <span class="kd">public</span> <span class="kt">long</span><span class="o">[]</span> <span class="n">lengths</span><span class="o">;</span> |
| <span class="c1">// the offset in the child that should be used for new values</span> |
| <span class="kd">public</span> <span class="kt">int</span> <span class="n">childCount</span><span class="o">;</span> |
| |
| <span class="c1">// the values of the keys and values</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span> <span class="n">keys</span><span class="o">;</span> |
| <span class="kd">public</span> <span class="nc">ColumnVector</span> <span class="n">values</span><span class="o">;</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h2 id="writing-orc-files">Writing ORC Files</h2> |
| |
| <h3 id="simple-example">Simple Example</h3> |
| <p>To write an ORC file, you need to define the schema and use the |
| <a href="/api/orc-core/index.html?org/apache/orc/OrcFile.html">OrcFile</a> |
| class to create a |
| <a href="/api/orc-core/index.html?org/apache/orc/Writer.html">Writer</a> |
| with the desired filename. This example sets the required schema |
| parameter, but there are many other options to control the ORC writer.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nc">Configuration</span> <span class="n">conf</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">Configuration</span><span class="o">();</span> |
| <span class="nc">TypeDescription</span> <span class="n">schema</span> <span class="o">=</span> <span class="nc">TypeDescription</span><span class="o">.</span><span class="na">fromString</span><span class="o">(</span><span class="s">"struct<x:int,y:int>"</span><span class="o">);</span> |
| <span class="nc">Writer</span> <span class="n">writer</span> <span class="o">=</span> <span class="nc">OrcFile</span><span class="o">.</span><span class="na">createWriter</span><span class="o">(</span><span class="k">new</span> <span class="nc">Path</span><span class="o">(</span><span class="s">"my-file.orc"</span><span class="o">),</span> |
| <span class="nc">OrcFile</span><span class="o">.</span><span class="na">writerOptions</span><span class="o">(</span><span class="n">conf</span><span class="o">)</span> |
| <span class="o">.</span><span class="na">setSchema</span><span class="o">(</span><span class="n">schema</span><span class="o">));</span> |
| </code></pre></div></div> |
| |
| <p>Now you need to create a row batch, set the data, and write it to the file |
| as the batch fills up. When the file is done, close the <code class="highlighter-rouge">Writer</code>.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nc">VectorizedRowBatch</span> <span class="n">batch</span> <span class="o">=</span> <span class="n">schema</span><span class="o">.</span><span class="na">createRowBatch</span><span class="o">();</span> |
| <span class="nc">LongColumnVector</span> <span class="n">x</span> <span class="o">=</span> <span class="o">(</span><span class="nc">LongColumnVector</span><span class="o">)</span> <span class="n">batch</span><span class="o">.</span><span class="na">cols</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> |
| <span class="nc">LongColumnVector</span> <span class="n">y</span> <span class="o">=</span> <span class="o">(</span><span class="nc">LongColumnVector</span><span class="o">)</span> <span class="n">batch</span><span class="o">.</span><span class="na">cols</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span> |
| <span class="k">for</span><span class="o">(</span><span class="kt">int</span> <span class="n">r</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">r</span> <span class="o"><</span> <span class="mi">10000</span><span class="o">;</span> <span class="o">++</span><span class="n">r</span><span class="o">)</span> <span class="o">{</span> |
| <span class="kt">int</span> <span class="n">row</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="na">size</span><span class="o">++;</span> |
| <span class="n">x</span><span class="o">.</span><span class="na">vector</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="n">r</span><span class="o">;</span> |
| <span class="n">y</span><span class="o">.</span><span class="na">vector</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="n">r</span> <span class="o">*</span> <span class="mi">3</span><span class="o">;</span> |
| <span class="c1">// If the batch is full, write it out and start over.</span> |
| <span class="k">if</span> <span class="o">(</span><span class="n">batch</span><span class="o">.</span><span class="na">size</span> <span class="o">==</span> <span class="n">batch</span><span class="o">.</span><span class="na">getMaxSize</span><span class="o">())</span> <span class="o">{</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">addRowBatch</span><span class="o">(</span><span class="n">batch</span><span class="o">);</span> |
| <span class="n">batch</span><span class="o">.</span><span class="na">reset</span><span class="o">();</span> |
| <span class="o">}</span> |
| <span class="o">}</span> |
| <span class="k">if</span> <span class="o">(</span><span class="n">batch</span><span class="o">.</span><span class="na">size</span> <span class="o">!=</span> <span class="mi">0</span><span class="o">)</span> <span class="o">{</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">addRowBatch</span><span class="o">(</span><span class="n">batch</span><span class="o">);</span> |
| <span class="n">batch</span><span class="o">.</span><span class="na">reset</span><span class="o">();</span> |
| <span class="o">}</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">close</span><span class="o">();</span> |
| </code></pre></div></div> |
| |
| <h3 id="advanced-example">Advanced Example</h3> |
| |
| <p>The following example writes an ORC file with two integer |
| columns and a map column. Each row’s map has 5 elements with keys |
| ranging from “<row>.0” to “<row>.4”.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nc">Path</span> <span class="n">testFilePath</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">Path</span><span class="o">(</span><span class="s">"advanced-example.orc"</span><span class="o">);</span> |
| <span class="nc">Configuration</span> <span class="n">conf</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">Configuration</span><span class="o">();</span> |
| |
| <span class="nc">TypeDescription</span> <span class="n">schema</span> <span class="o">=</span> |
| <span class="nc">TypeDescription</span><span class="o">.</span><span class="na">fromString</span><span class="o">(</span><span class="s">"struct<first:int,"</span> <span class="o">+</span> |
| <span class="s">"second:int,third:map<string,int>>"</span><span class="o">);</span> |
| |
| <span class="nc">Writer</span> <span class="n">writer</span> <span class="o">=</span> |
| <span class="nc">OrcFile</span><span class="o">.</span><span class="na">createWriter</span><span class="o">(</span><span class="n">testFilePath</span><span class="o">,</span> |
| <span class="nc">OrcFile</span><span class="o">.</span><span class="na">writerOptions</span><span class="o">(</span><span class="n">conf</span><span class="o">).</span><span class="na">setSchema</span><span class="o">(</span><span class="n">schema</span><span class="o">));</span> |
| |
| <span class="nc">VectorizedRowBatch</span> <span class="n">batch</span> <span class="o">=</span> <span class="n">schema</span><span class="o">.</span><span class="na">createRowBatch</span><span class="o">();</span> |
| <span class="nc">LongColumnVector</span> <span class="n">first</span> <span class="o">=</span> <span class="o">(</span><span class="nc">LongColumnVector</span><span class="o">)</span> <span class="n">batch</span><span class="o">.</span><span class="na">cols</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> |
| <span class="nc">LongColumnVector</span> <span class="n">second</span> <span class="o">=</span> <span class="o">(</span><span class="nc">LongColumnVector</span><span class="o">)</span> <span class="n">batch</span><span class="o">.</span><span class="na">cols</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span> |
| |
| <span class="c1">//Define map. You need also to cast the key and value vectors</span> |
| <span class="nc">MapColumnVector</span> <span class="n">map</span> <span class="o">=</span> <span class="o">(</span><span class="nc">MapColumnVector</span><span class="o">)</span> <span class="n">batch</span><span class="o">.</span><span class="na">cols</span><span class="o">[</span><span class="mi">2</span><span class="o">];</span> |
| <span class="nc">BytesColumnVector</span> <span class="n">mapKey</span> <span class="o">=</span> <span class="o">(</span><span class="nc">BytesColumnVector</span><span class="o">)</span> <span class="n">map</span><span class="o">.</span><span class="na">keys</span><span class="o">;</span> |
| <span class="nc">LongColumnVector</span> <span class="n">mapValue</span> <span class="o">=</span> <span class="o">(</span><span class="nc">LongColumnVector</span><span class="o">)</span> <span class="n">map</span><span class="o">.</span><span class="na">values</span><span class="o">;</span> |
| |
| <span class="c1">// Each map has 5 elements</span> |
| <span class="kd">final</span> <span class="kt">int</span> <span class="no">MAP_SIZE</span> <span class="o">=</span> <span class="mi">5</span><span class="o">;</span> |
| <span class="kd">final</span> <span class="kt">int</span> <span class="no">BATCH_SIZE</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="na">getMaxSize</span><span class="o">();</span> |
| |
| <span class="c1">// Ensure the map is big enough</span> |
| <span class="n">mapKey</span><span class="o">.</span><span class="na">ensureSize</span><span class="o">(</span><span class="no">BATCH_SIZE</span> <span class="o">*</span> <span class="no">MAP_SIZE</span><span class="o">,</span> <span class="kc">false</span><span class="o">);</span> |
| <span class="n">mapValue</span><span class="o">.</span><span class="na">ensureSize</span><span class="o">(</span><span class="no">BATCH_SIZE</span> <span class="o">*</span> <span class="no">MAP_SIZE</span><span class="o">,</span> <span class="kc">false</span><span class="o">);</span> |
| |
| <span class="c1">// add 1500 rows to file</span> |
| <span class="k">for</span><span class="o">(</span><span class="kt">int</span> <span class="n">r</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">r</span> <span class="o"><</span> <span class="mi">1500</span><span class="o">;</span> <span class="o">++</span><span class="n">r</span><span class="o">)</span> <span class="o">{</span> |
| <span class="kt">int</span> <span class="n">row</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="na">size</span><span class="o">++;</span> |
| |
| <span class="n">first</span><span class="o">.</span><span class="na">vector</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="n">r</span><span class="o">;</span> |
| <span class="n">second</span><span class="o">.</span><span class="na">vector</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="n">r</span> <span class="o">*</span> <span class="mi">3</span><span class="o">;</span> |
| |
| <span class="n">map</span><span class="o">.</span><span class="na">offsets</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="n">map</span><span class="o">.</span><span class="na">childCount</span><span class="o">;</span> |
| <span class="n">map</span><span class="o">.</span><span class="na">lengths</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">=</span> <span class="no">MAP_SIZE</span><span class="o">;</span> |
| <span class="n">map</span><span class="o">.</span><span class="na">childCount</span> <span class="o">+=</span> <span class="no">MAP_SIZE</span><span class="o">;</span> |
| |
| <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">mapElem</span> <span class="o">=</span> <span class="o">(</span><span class="kt">int</span><span class="o">)</span> <span class="n">map</span><span class="o">.</span><span class="na">offsets</span><span class="o">[</span><span class="n">row</span><span class="o">];</span> |
| <span class="n">mapElem</span> <span class="o"><</span> <span class="n">map</span><span class="o">.</span><span class="na">offsets</span><span class="o">[</span><span class="n">row</span><span class="o">]</span> <span class="o">+</span> <span class="no">MAP_SIZE</span><span class="o">;</span> <span class="o">++</span><span class="n">mapElem</span><span class="o">)</span> <span class="o">{</span> |
| <span class="nc">String</span> <span class="n">key</span> <span class="o">=</span> <span class="s">"row "</span> <span class="o">+</span> <span class="n">r</span> <span class="o">+</span> <span class="s">"."</span> <span class="o">+</span> <span class="o">(</span><span class="n">mapElem</span> <span class="o">-</span> <span class="n">map</span><span class="o">.</span><span class="na">offsets</span><span class="o">[</span><span class="n">row</span><span class="o">]);</span> |
| <span class="n">mapKey</span><span class="o">.</span><span class="na">setVal</span><span class="o">(</span><span class="n">mapElem</span><span class="o">,</span> <span class="n">key</span><span class="o">.</span><span class="na">getBytes</span><span class="o">(</span><span class="nc">StandardCharsets</span><span class="o">.</span><span class="na">UTF_8</span><span class="o">));</span> |
| <span class="n">mapValue</span><span class="o">.</span><span class="na">vector</span><span class="o">[</span><span class="n">mapElem</span><span class="o">]</span> <span class="o">=</span> <span class="n">mapElem</span><span class="o">;</span> |
| <span class="o">}</span> |
| <span class="k">if</span> <span class="o">(</span><span class="n">row</span> <span class="o">==</span> <span class="no">BATCH_SIZE</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="o">{</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">addRowBatch</span><span class="o">(</span><span class="n">batch</span><span class="o">);</span> |
| <span class="n">batch</span><span class="o">.</span><span class="na">reset</span><span class="o">();</span> |
| <span class="o">}</span> |
| <span class="o">}</span> |
| <span class="k">if</span> <span class="o">(</span><span class="n">batch</span><span class="o">.</span><span class="na">size</span> <span class="o">!=</span> <span class="mi">0</span><span class="o">)</span> <span class="o">{</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">addRowBatch</span><span class="o">(</span><span class="n">batch</span><span class="o">);</span> |
| <span class="n">batch</span><span class="o">.</span><span class="na">reset</span><span class="o">();</span> |
| <span class="o">}</span> |
| <span class="n">writer</span><span class="o">.</span><span class="na">close</span><span class="o">();</span> |
| |
| </code></pre></div></div> |
| |
| <h2 id="reading-orc-files">Reading ORC Files</h2> |
| |
| <p>To read ORC files, use the |
| <a href="/api/orc-core/index.html?org/apache/orc/OrcFile.html">OrcFile</a> |
| class to create a |
| <a href="/api/orc-core/index.html?org/apache/orc/Reader.html">Reader</a> |
| that contains the metadata about the file. There are a few options to |
| the ORC reader, but far fewer than the writer and none of them are |
| required. The reader has methods for getting the number of rows, |
| schema, compression, etc. from the file.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nc">Reader</span> <span class="n">reader</span> <span class="o">=</span> <span class="nc">OrcFile</span><span class="o">.</span><span class="na">createReader</span><span class="o">(</span><span class="k">new</span> <span class="nc">Path</span><span class="o">(</span><span class="s">"my-file.orc"</span><span class="o">),</span> |
| <span class="nc">OrcFile</span><span class="o">.</span><span class="na">readerOptions</span><span class="o">(</span><span class="n">conf</span><span class="o">));</span> |
| </code></pre></div></div> |
| |
| <p>To get the data, create a |
| <a href="/api/orc-core/index.html?org/apache/orc/RecordReader.html">RecordReader</a> |
| object. By default, the RecordReader reads all rows and all columns, |
| but there are options to control the data that is read.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nc">RecordReader</span> <span class="n">rows</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="na">rows</span><span class="o">();</span> |
| <span class="nc">VectorizedRowBatch</span> <span class="n">batch</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="na">getSchema</span><span class="o">().</span><span class="na">createRowBatch</span><span class="o">();</span> |
| </code></pre></div></div> |
| |
| <p>With a <code class="highlighter-rouge">RecordReader</code> the user can ask for the next batch until there |
| are no more left. The reader will stop the batch at certain boundaries, so the |
| returned batch may not be full, but it will always contain some rows.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">while</span> <span class="o">(</span><span class="n">rows</span><span class="o">.</span><span class="na">nextBatch</span><span class="o">(</span><span class="n">batch</span><span class="o">))</span> <span class="o">{</span> |
| <span class="k">for</span><span class="o">(</span><span class="kt">int</span> <span class="n">r</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">r</span> <span class="o"><</span> <span class="n">batch</span><span class="o">.</span><span class="na">size</span><span class="o">;</span> <span class="o">++</span><span class="n">r</span><span class="o">)</span> <span class="o">{</span> |
| <span class="o">...</span> <span class="n">process</span> <span class="n">row</span> <span class="n">r</span> <span class="n">from</span> <span class="n">batch</span> |
| <span class="o">}</span> |
| <span class="o">}</span> |
| <span class="n">rows</span><span class="o">.</span><span class="na">close</span><span class="o">();</span> |
| </code></pre></div></div> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="section-nav"> |
| <div class="left align-right"> |
| |
| |
| |
| <a href="/docs/mapreduce.html" class="prev">Back</a> |
| |
| </div> |
| <div class="right align-left"> |
| |
| |
| |
| <a href="/docs/core-cpp.html" class="next">Next</a> |
| |
| </div> |
| </div> |
| <div class="clear"></div> |
| |
| |
| </article> |
| </div> |
| |
| <div class="unit one-fifth hide-on-mobiles"> |
| <aside> |
| |
| <h4>Overview</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/index.html">Background</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/types.html">Types</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/indexes.html">Indexes</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/acid.html">ACID support</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Installing</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/building.html">Building ORC</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Spark</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Python</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/dask.html">Dask</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Hive</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in MapReduce</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using ORC Core</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="current"><a href="/docs/core-java.html">Using Core Java</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Tools</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| </aside> |
| </div> |
| |
| |
| <div class="clear"></div> |
| |
| </div> |
| </section> |
| |
| |
| <footer role="contentinfo"> |
| <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are © 2024 |
| <a href="https://www.apache.org/">Apache Software Foundation</a> |
| under the terms of the <a |
| href="https://www.apache.org/licenses/LICENSE-2.0.html"> |
| Apache License v2</a>. Apache ORC and its logo are trademarks |
| of the Apache Software Foundation.</p> |
| </footer> |
| |
| <script> |
| var anchorForId = function (id) { |
| var anchor = document.createElement("a"); |
| anchor.className = "header-link"; |
| anchor.href = "#" + id; |
| anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; |
| anchor.title = "Permalink"; |
| return anchor; |
| }; |
| |
| var linkifyAnchors = function (level, containingElement) { |
| var headers = containingElement.getElementsByTagName("h" + level); |
| for (var h = 0; h < headers.length; h++) { |
| var header = headers[h]; |
| |
| if (typeof header.id !== "undefined" && header.id !== "") { |
| header.appendChild(anchorForId(header.id)); |
| } |
| } |
| }; |
| |
| document.onreadystatechange = function () { |
| if (this.readyState === "complete") { |
| var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; |
| if (!contentBlock) { |
| return; |
| } |
| for (var level = 1; level <= 6; level++) { |
| linkifyAnchors(level, contentBlock); |
| } |
| } |
| }; |
| </script> |
| |
| |
| </body> |
| </html> |