| <!DOCTYPE HTML> |
| <html lang="en-US"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>C++ Tools</title> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Jekyll v3.8.6"> |
| <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> |
| <link rel="stylesheet" href="/css/screen.css"> |
| <link rel="icon" type="image/x-icon" href="/favicon.ico"> |
| <!--[if lt IE 9]> |
| <script src="/js/html5shiv.min.js"></script> |
| <script src="/js/respond.min.js"></script> |
| <![endif]--> |
| </head> |
| |
| |
| <body class="wrap"> |
| <header role="banner"> |
| <nav class="mobile-nav show-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| <div class="grid"> |
| <div class="unit one-quarter center-on-mobiles"> |
| <h1> |
| <a href="/"> |
| <span class="sr-only">Apache ORC</span> |
| <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> |
| </a> |
| </h1> |
| </div> |
| <nav class="main-nav unit three-quarters hide-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| </header> |
| |
| |
| <section class="docs"> |
| <div class="grid"> |
| |
| <div class="docs-nav-mobile unit whole show-on-mobiles"> |
| <select onchange="if (this.value) window.location.href=this.value"> |
| <option value="">Navigate the docs…</option> |
| |
| <optgroup label="Overview"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/index.html">Background</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/adopters.html">ORC Adopters</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/types.html">Types</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/indexes.html">Indexes</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/acid.html">ACID support</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Installing"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/building.html">Building ORC</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Spark"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-ddl.html">Spark DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-config.html">Spark Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Python"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/pyarrow.html">PyArrow</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/dask.html">Dask</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Hive"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-ddl.html">Hive DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-config.html">Hive Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in MapReduce"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapred.html">Using in MapRed</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapreduce.html">Using in MapReduce</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using ORC Core"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java.html">Using Core Java</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-cpp.html">Using Core C++</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java-config.html">ORC Java configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Tools"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/cpp-tools.html">C++ Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/java-tools.html">Java Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| </select> |
| </div> |
| |
| |
| <div class="unit four-fifths"> |
| <article> |
| <h1>C++ Tools</h1> |
| <h2 id="orc-contents">orc-contents</h2> |
| |
| <p>Displays the contents of the ORC file as a JSON document. With the |
| <code class="highlighter-rouge">columns</code> argument only the selected columns are printed.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-contents <span class="o">[</span>options] <filename> |
| Options: |
| <span class="nt">-h</span> <span class="nt">--help</span> |
| <span class="nt">-c</span> <span class="nt">--columns</span> Comma separated list of top-level column fields |
| <span class="nt">-t</span> <span class="nt">--columnTypeIds</span> Comma separated list of column <span class="nb">type </span>ids |
| <span class="nt">-n</span> <span class="nt">--columnNames</span> Comma separated list of column names |
| <span class="nt">-b</span> <span class="nt">--batch</span> Batch size <span class="k">for </span>reading |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestOrcFile.test1.orc, you’ll see (without |
| the line breaks within each record):</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-contents examples/TestOrcFile.test1.orc |
| <span class="o">{</span><span class="s2">"boolean1"</span>: <span class="nb">false</span>, <span class="s2">"byte1"</span>: 1, <span class="s2">"short1"</span>: 1024, <span class="s2">"int1"</span>: 65536, <span class="se">\\</span> |
| <span class="s2">"long1"</span>: 9223372036854775807, <span class="s2">"float1"</span>: 1, <span class="s2">"double1"</span>: <span class="nt">-15</span>, <span class="se">\\</span> |
| <span class="s2">"bytes1"</span>: <span class="o">[</span>0, 1, 2, 3, 4], <span class="s2">"string1"</span>: <span class="s2">"hi"</span>, <span class="s2">"middle"</span>: <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"list"</span>: <span class="o">[{</span><span class="s2">"int1"</span>: 1, <span class="s2">"string1"</span>: <span class="s2">"bye"</span><span class="o">}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"int1"</span>: 2, <span class="s2">"string1"</span>: <span class="s2">"sigh"</span><span class="o">}]}</span>, <span class="se">\\</span> |
| <span class="s2">"list"</span>: <span class="o">[{</span><span class="s2">"int1"</span>: 3, <span class="s2">"string1"</span>: <span class="s2">"good"</span><span class="o">}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"int1"</span>: 4, <span class="s2">"string1"</span>: <span class="s2">"bad"</span><span class="o">}]</span>, <span class="se">\\</span> |
| <span class="s2">"map"</span>: <span class="o">[]}</span> |
| <span class="o">{</span><span class="s2">"boolean1"</span>: <span class="nb">true</span>, <span class="s2">"byte1"</span>: 100, <span class="s2">"short1"</span>: 2048, <span class="s2">"int1"</span>: 65536, |
| <span class="s2">"long1"</span>: 9223372036854775807, <span class="s2">"float1"</span>: 2, <span class="s2">"double1"</span>: <span class="nt">-5</span>, <span class="se">\\</span> |
| <span class="s2">"bytes1"</span>: <span class="o">[]</span>, <span class="s2">"string1"</span>: <span class="s2">"bye"</span>, <span class="se">\\</span> |
| <span class="s2">"middle"</span>: <span class="o">{</span><span class="s2">"list"</span>: <span class="o">[{</span><span class="s2">"int1"</span>: 1, <span class="s2">"string1"</span>: <span class="s2">"bye"</span><span class="o">}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"int1"</span>: 2, <span class="s2">"string1"</span>: <span class="s2">"sigh"</span><span class="o">}]}</span>, <span class="se">\\</span> |
| <span class="s2">"list"</span>: <span class="o">[{</span><span class="s2">"int1"</span>: 100000000, <span class="s2">"string1"</span>: <span class="s2">"cat"</span><span class="o">}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"int1"</span>: <span class="nt">-100000</span>, <span class="s2">"string1"</span>: <span class="s2">"in"</span><span class="o">}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"int1"</span>: 1234, <span class="s2">"string1"</span>: <span class="s2">"hat"</span><span class="o">}]</span>, <span class="se">\\</span> |
| <span class="s2">"map"</span>: <span class="o">[{</span><span class="s2">"key"</span>: <span class="s2">"chani"</span>, <span class="s2">"value"</span>: <span class="o">{</span><span class="s2">"int1"</span>: 5, <span class="s2">"string1"</span>: <span class="s2">"chani"</span><span class="o">}}</span>, <span class="se">\\</span> |
| <span class="o">{</span><span class="s2">"key"</span>: <span class="s2">"mauddib"</span>, <span class="se">\\</span> |
| <span class="s2">"value"</span>: <span class="o">{</span><span class="s2">"int1"</span>: 1, <span class="s2">"string1"</span>: <span class="s2">"mauddib"</span><span class="o">}}]}</span> |
| </code></pre></div></div> |
| |
| <h2 id="orc-metadata">orc-metadata</h2> |
| |
| <p>Displays the metadata of the ORC file as a JSON document. With the |
| <code class="highlighter-rouge">verbose</code> option additional information about the layout of the file |
| is also printed.</p> |
| |
| <p>For diagnosing problems, it is useful to use the ‘–raw’ option that |
| prints the protocol buffers from the ORC file directly rather than |
| interpreting them.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-metadata <span class="o">[</span><span class="nt">-v</span><span class="o">]</span> <span class="o">[</span><span class="nt">--raw</span><span class="o">]</span> <filename> |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-metadata examples/TestOrcFile.test1.orc |
| <span class="o">{</span> <span class="s2">"name"</span>: <span class="s2">"../examples/TestOrcFile.test1.orc"</span>, |
| <span class="s2">"type"</span>: <span class="s2">"struct<boolean1:boolean,byte1:tinyint,short1:smallint, |
| int1:int,long1:bigint,float1:float,double1:double,bytes1:binary, |
| string1:string,middle:struct<list:array<struct<int1:int,string1: |
| string>>>,list:array<struct<int1:int,string1:string>>,map:map< |
| string,struct<int1:int,string1:string>>>"</span>, |
| <span class="s2">"rows"</span>: 2, |
| <span class="s2">"stripe count"</span>: 1, |
| <span class="s2">"format"</span>: <span class="s2">"0.12"</span>, <span class="s2">"writer version"</span>: <span class="s2">"HIVE-8732"</span>, |
| <span class="s2">"compression"</span>: <span class="s2">"zlib"</span>, <span class="s2">"compression block"</span>: 10000, |
| <span class="s2">"file length"</span>: 1711, |
| <span class="s2">"content"</span>: 1015, <span class="s2">"stripe stats"</span>: 250, <span class="s2">"footer"</span>: 421, <span class="s2">"postscript"</span>: 24, |
| <span class="s2">"row index stride"</span>: 10000, |
| <span class="s2">"user metadata"</span>: <span class="o">{</span> |
| <span class="o">}</span>, |
| <span class="s2">"stripes"</span>: <span class="o">[</span> |
| <span class="o">{</span> <span class="s2">"stripe"</span>: 0, <span class="s2">"rows"</span>: 2, |
| <span class="s2">"offset"</span>: 3, <span class="s2">"length"</span>: 1012, |
| <span class="s2">"index"</span>: 570, <span class="s2">"data"</span>: 243, <span class="s2">"footer"</span>: 199 |
| <span class="o">}</span> |
| <span class="o">]</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h2 id="csv-import">csv-import</h2> |
| |
| <p>Imports CSV file into an Orc file using the specified schema. |
| Compound types are not yet supported. <code class="highlighter-rouge">delimiter</code> option indicates |
| the delimiter in the input CSV file and by default is <code class="highlighter-rouge">,</code>. <code class="highlighter-rouge">stripe</code> |
| option means the stripe size and set to 128MB by default. <code class="highlighter-rouge">block</code> |
| option is compression block size which is 64KB by default. <code class="highlighter-rouge">batch</code> |
| option is by default 1024 rows for one batch.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% csv-import <span class="o">[</span><span class="nt">--delimiter</span><span class="o">=</span><character>] <span class="o">[</span><span class="nt">--stripe</span><span class="o">=</span><size>] |
| <span class="o">[</span><span class="nt">--block</span><span class="o">=</span><size>] <span class="o">[</span><span class="nt">--batch</span><span class="o">=</span><size>] |
| <schema> <inputCSVFile> <outputORCFile> |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestCSVFileImport.test10rows.csv, |
| you’ll see:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% csv-import <span class="s2">"struct<a:bigint,b:string,c:double>"</span> |
| examples/TestCSVFileImport.test10rows.csv /tmp/test.orc |
| <span class="o">[</span>2018-04-11 11:12:16] Start importing Orc file... |
| <span class="o">[</span>2018-04-11 11:12:16] Finish importing Orc file. |
| <span class="o">[</span>2018-04-11 11:12:16] Total writer elasped <span class="nb">time</span>: 0.001352s. |
| <span class="o">[</span>2018-04-11 11:12:16] Total writer CPU <span class="nb">time</span>: 0.001339s. |
| </code></pre></div></div> |
| |
| <h2 id="orc-scan">orc-scan</h2> |
| |
| <p>Scans and displays the row count of the ORC file. With the <code class="highlighter-rouge">batch</code> option |
| to set the batch size which is 1024 rows by default. It is useful to check |
| if the ORC file is damaged.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-scan <span class="o">[</span>options] <filename>... |
| Options: |
| <span class="nt">-h</span> <span class="nt">--help</span> |
| <span class="nt">-c</span> <span class="nt">--columns</span> Comma separated list of top-level column fields |
| <span class="nt">-t</span> <span class="nt">--columnTypeIds</span> Comma separated list of column <span class="nb">type </span>ids |
| <span class="nt">-n</span> <span class="nt">--columnNames</span> Comma separated list of column names |
| <span class="nt">-b</span> <span class="nt">--batch</span> Batch size <span class="k">for </span>reading |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestOrcFile.test1.orc, you’ll see:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-scan examples/TestOrcFile.test1.orc |
| Rows: 2 |
| Batches: 1 |
| </code></pre></div></div> |
| |
| <h2 id="orc-statistics">orc-statistics</h2> |
| |
| <p>Displays the file-level and stripe-level column statistics of the ORC file. |
| With the <code class="highlighter-rouge">withIndex</code> option to include column statistics in each row group.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-statistics <span class="o">[</span><span class="nt">--withIndex</span><span class="o">]</span> <filename> |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestOrcFile.columnProjection.orc |
| you’ll see:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-statistics examples/TestOrcFile.columnProjection.orc |
| File examples/TestOrcFile.columnProjection.orc has 3 columns |
| <span class="k">***</span> Column 0 <span class="k">***</span> |
| Column has 21000 values and has null value: no |
| |
| <span class="k">***</span> Column 1 <span class="k">***</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 21000 |
| Has null: no |
| Minimum: <span class="nt">-2147439072</span> |
| Maximum: 2147257982 |
| Sum: 268482658568 |
| |
| <span class="k">***</span> Column 2 <span class="k">***</span> |
| Data <span class="nb">type</span>: String |
| Values: 21000 |
| Has null: no |
| Minimum: 100119c272d7db89 |
| Maximum: fffe9f6f23b287f3 |
| Total length: 334559 |
| |
| File examples/TestOrcFile.columnProjection.orc has 5 stripes |
| <span class="k">***</span> Stripe 0 <span class="k">***</span> |
| |
| <span class="nt">---</span> Column 0 <span class="nt">---</span> |
| Column has 5000 values and has null value: no |
| |
| <span class="nt">---</span> Column 1 <span class="nt">---</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 5000 |
| Has null: no |
| Minimum: <span class="nt">-2145365268</span> |
| Maximum: 2147025027 |
| Sum: <span class="nt">-29841423854</span> |
| |
| <span class="nt">---</span> Column 2 <span class="nt">---</span> |
| Data <span class="nb">type</span>: String |
| Values: 5000 |
| Has null: no |
| Minimum: 1005350489418be2 |
| Maximum: fffbb8718c92b09f |
| Total length: 79644 |
| |
| <span class="k">***</span> Stripe 1 <span class="k">***</span> |
| |
| <span class="nt">---</span> Column 0 <span class="nt">---</span> |
| Column has 5000 values and has null value: no |
| |
| <span class="nt">---</span> Column 1 <span class="nt">---</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 5000 |
| Has null: no |
| Minimum: <span class="nt">-2147115959</span> |
| Maximum: 2147257982 |
| Sum: 108604887785 |
| |
| <span class="nt">---</span> Column 2 <span class="nt">---</span> |
| Data <span class="nb">type</span>: String |
| Values: 5000 |
| Has null: no |
| Minimum: 100119c272d7db89 |
| Maximum: fff0ae41d41e6afc |
| Total length: 79640 |
| |
| <span class="k">***</span> Stripe 2 <span class="k">***</span> |
| |
| <span class="nt">---</span> Column 0 <span class="nt">---</span> |
| Column has 5000 values and has null value: no |
| |
| <span class="nt">---</span> Column 1 <span class="nt">---</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 5000 |
| Has null: no |
| Minimum: <span class="nt">-2145932387</span> |
| Maximum: 2145877119 |
| Sum: 70064190848 |
| |
| <span class="nt">---</span> Column 2 <span class="nt">---</span> |
| Data <span class="nb">type</span>: String |
| Values: 5000 |
| Has null: no |
| Minimum: 10130af874ae036c |
| Maximum: fffe9f6f23b287f3 |
| Total length: 79645 |
| |
| <span class="k">***</span> Stripe 3 <span class="k">***</span> |
| |
| <span class="nt">---</span> Column 0 <span class="nt">---</span> |
| Column has 5000 values and has null value: no |
| |
| <span class="nt">---</span> Column 1 <span class="nt">---</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 5000 |
| Has null: no |
| Minimum: <span class="nt">-2147439072</span> |
| Maximum: 2147074354 |
| Sum: 104681356482 |
| |
| <span class="nt">---</span> Column 2 <span class="nt">---</span> |
| Data <span class="nb">type</span>: String |
| Values: 5000 |
| Has null: no |
| Minimum: 102547d48ed06518 |
| Maximum: fffa47c57dc7b69a |
| Total length: 79689 |
| |
| <span class="k">***</span> Stripe 4 <span class="k">***</span> |
| |
| <span class="nt">---</span> Column 0 <span class="nt">---</span> |
| Column has 1000 values and has null value: no |
| |
| <span class="nt">---</span> Column 1 <span class="nt">---</span> |
| Data <span class="nb">type</span>: Integer |
| Values: 1000 |
| Has null: no |
| Minimum: <span class="nt">-2141222223</span> |
| Maximum: 2145816096 |
| Sum: 14973647307 |
| |
| <span class="nt">---</span> Column 2 <span class="nt">---</span> |
| Data <span class="nb">type</span>: String |
| Values: 1000 |
| Has null: no |
| Minimum: 1059d81c9025a217 |
| Maximum: ffc17f0e35e1a6c0 |
| Total length: 15941 |
| </code></pre></div></div> |
| |
| <h2 id="orc-memory">orc-memory</h2> |
| |
| <p>Estimate the memory footprint for reading the ORC file.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-memory <span class="o">[</span>options] <filename> |
| Options: |
| <span class="nt">-h</span> <span class="nt">--help</span> |
| <span class="nt">-c</span> <span class="nt">--columns</span> Comma separated list of top-level column fields |
| <span class="nt">-t</span> <span class="nt">--columnTypeIds</span> Comma separated list of column <span class="nb">type </span>ids |
| <span class="nt">-n</span> <span class="nt">--columnNames</span> Comma separated list of column names |
| <span class="nt">-b</span> <span class="nt">--batch</span> Batch size <span class="k">for </span>reading |
| </code></pre></div></div> |
| |
| <p>If you run it on the example file TestOrcFile.columnProjection.orc |
| you’ll see:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% orc-memory examples/TestOrcFile.columnProjection.orc, |
| Reader memory estimate: 202972 |
| Batch memory estimate: 27000 |
| Total memory estimate: 229972 |
| Actual max memory used: 160381 |
| </code></pre></div></div> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="section-nav"> |
| <div class="left align-right"> |
| |
| |
| |
| <a href="/docs/core-java-config.html" class="prev">Back</a> |
| |
| </div> |
| <div class="right align-left"> |
| |
| |
| |
| <a href="/docs/java-tools.html" class="next">Next</a> |
| |
| </div> |
| </div> |
| <div class="clear"></div> |
| |
| |
| </article> |
| </div> |
| |
| <div class="unit one-fifth hide-on-mobiles"> |
| <aside> |
| |
| <h4>Overview</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/index.html">Background</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/types.html">Types</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/indexes.html">Indexes</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/acid.html">ACID support</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Installing</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/building.html">Building ORC</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Spark</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Python</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/dask.html">Dask</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Hive</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in MapReduce</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using ORC Core</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Tools</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="current"><a href="/docs/cpp-tools.html">C++ Tools</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| </aside> |
| </div> |
| |
| |
| <div class="clear"></div> |
| |
| </div> |
| </section> |
| |
| |
| <footer role="contentinfo"> |
| <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are © 2024 |
| <a href="https://www.apache.org/">Apache Software Foundation</a> |
| under the terms of the <a |
| href="https://www.apache.org/licenses/LICENSE-2.0.html"> |
| Apache License v2</a>. Apache ORC and its logo are trademarks |
| of the Apache Software Foundation.</p> |
| </footer> |
| |
| <script> |
| var anchorForId = function (id) { |
| var anchor = document.createElement("a"); |
| anchor.className = "header-link"; |
| anchor.href = "#" + id; |
| anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; |
| anchor.title = "Permalink"; |
| return anchor; |
| }; |
| |
| var linkifyAnchors = function (level, containingElement) { |
| var headers = containingElement.getElementsByTagName("h" + level); |
| for (var h = 0; h < headers.length; h++) { |
| var header = headers[h]; |
| |
| if (typeof header.id !== "undefined" && header.id !== "") { |
| header.appendChild(anchorForId(header.id)); |
| } |
| } |
| }; |
| |
| document.onreadystatechange = function () { |
| if (this.readyState === "complete") { |
| var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; |
| if (!contentBlock) { |
| return; |
| } |
| for (var level = 1; level <= 6; level++) { |
| linkifyAnchors(level, contentBlock); |
| } |
| } |
| }; |
| </script> |
| |
| |
| </body> |
| </html> |