| <!DOCTYPE HTML> |
| <html lang="en-US"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>Java Tools</title> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Jekyll v3.8.6"> |
| <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> |
| <link rel="stylesheet" href="/css/screen.css"> |
| <link rel="icon" type="image/x-icon" href="/favicon.ico"> |
| <!--[if lt IE 9]> |
| <script src="/js/html5shiv.min.js"></script> |
| <script src="/js/respond.min.js"></script> |
| <![endif]--> |
| </head> |
| |
| |
| <body class="wrap"> |
| <header role="banner"> |
| <nav class="mobile-nav show-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| <div class="grid"> |
| <div class="unit one-quarter center-on-mobiles"> |
| <h1> |
| <a href="/"> |
| <span class="sr-only">Apache ORC</span> |
| <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> |
| </a> |
| </h1> |
| </div> |
| <nav class="main-nav unit three-quarters hide-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| </header> |
| |
| |
| <section class="docs"> |
| <div class="grid"> |
| |
| <div class="docs-nav-mobile unit whole show-on-mobiles"> |
| <select onchange="if (this.value) window.location.href=this.value"> |
| <option value="">Navigate the docs…</option> |
| |
| <optgroup label="Overview"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/index.html">Background</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/adopters.html">ORC Adopters</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/types.html">Types</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/indexes.html">Indexes</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/acid.html">ACID support</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Installing"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/building.html">Building ORC</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Spark"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-ddl.html">Spark DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-config.html">Spark Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Python"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/pyarrow.html">PyArrow</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/dask.html">Dask</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Hive"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-ddl.html">Hive DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-config.html">Hive Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in MapReduce"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapred.html">Using in MapRed</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapreduce.html">Using in MapReduce</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using ORC Core"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java.html">Using Core Java</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-cpp.html">Using Core C++</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java-config.html">ORC Java configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Tools"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/cpp-tools.html">C++ Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/java-tools.html">Java Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| </select> |
| </div> |
| |
| |
| <div class="unit four-fifths"> |
| <article> |
| <h1>Java Tools</h1> |
| <p>In addition to the C++ tools, there is an ORC tools jar that packages |
| several useful utilities and the necessary Java dependencies |
| (including Hadoop) into a single package. The Java ORC tool jar |
| supports both the local file system and HDFS.</p> |
| |
| <p>The subcommands for the tools are:</p> |
| |
| <ul> |
| <li>convert (since ORC 1.4) - convert CSV/JSON/ORC files to ORC</li> |
| <li>count (since ORC 1.6) - recursively find *.orc and print the number of rows</li> |
| <li>data - print the data of an ORC file</li> |
| <li>json-schema (since ORC 1.4) - determine the schema of JSON documents</li> |
| <li>key (since ORC 1.5) - print information about the encryption keys</li> |
| <li>meta - print the metadata of an ORC file</li> |
| <li>scan (since ORC 1.3) - scan the data for benchmarking</li> |
| <li>sizes (since ORC 1.7.2) - list size on disk of each column</li> |
| <li>version (since ORC 1.6) - print the version of this ORC tool</li> |
| </ul> |
| |
| <p>The command line looks like:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% java <span class="nt">-jar</span> orc-tools-X.Y.Z-uber.jar <sub-command> <args> |
| </code></pre></div></div> |
| |
| <h2 id="java-convert">Java Convert</h2> |
| |
| <p>The convert command reads several CSV/JSON/ORC files and converts them into a |
| single ORC file.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">-b,--bloomFilterColumns <columns></code></dt> |
| <dd>Comma separated values of column names for which bloom filter is to be created. |
| By default, no bloom filters will be created.</dd> |
| <dt><code class="highlighter-rouge">-e,--escape <escape></code></dt> |
| <dd>Sets CSV escape character</dd> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-H,--header <header></code></dt> |
| <dd>Sets CSV header lines</dd> |
| <dt><code class="highlighter-rouge">-n,--null <null></code></dt> |
| <dd>Sets CSV null string</dd> |
| <dt><code class="highlighter-rouge">-o,--output <filename></code></dt> |
| <dd>Sets the output ORC filename, which defaults to output.orc</dd> |
| <dt><code class="highlighter-rouge">-O,--overwrite</code></dt> |
| <dd>If the file already exists, it will be overwritten</dd> |
| <dt><code class="highlighter-rouge">-q,--quote <quote></code></dt> |
| <dd>Sets CSV quote character</dd> |
| <dt><code class="highlighter-rouge">-s,--schema <schema></code></dt> |
| <dd>Sets the schema for the ORC file. By default, the schema is automatically discovered.</dd> |
| <dt><code class="highlighter-rouge">-S,--separator <separator></code></dt> |
| <dd>Sets CSV separator character</dd> |
| <dt><code class="highlighter-rouge">-t,--timestampformat <timestampformat></code></dt> |
| <dd>Sets timestamp Format</dd> |
| </dl> |
| |
| <p>The automatic JSON schema discovery is equivalent to the json-schema tool |
| below.</p> |
| |
| <h2 id="java-count">Java Count</h2> |
| |
| <p>The count command recursively find *.orc and print the number of rows.</p> |
| |
| <h2 id="java-data">Java Data</h2> |
| |
| <p>The data command prints the data in an ORC file as a JSON document. Each |
| record is printed as a JSON object on a line. Each record is annotated with |
| the fieldnames and a JSON representation that depends on the field’s type.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-n,--lines <LINES></code></dt> |
| <dd>Sets lines of data to be printed</dd> |
| </dl> |
| |
| <h2 id="java-json-schema">Java JSON Schema</h2> |
| |
| <p>The JSON Schema discovery tool processes a set of JSON documents and |
| produces a schema that encompasses all of the records in all of the |
| documents. It works by computing the enclosing type and promoting it |
| to include all of the observed values.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">-f,--flat</code></dt> |
| <dd>Print the schema as a list of flat types for each subfield</dd> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-p,--pretty</code></dt> |
| <dd>Pretty print the schema</dd> |
| <dt><code class="highlighter-rouge">-t,--table</code></dt> |
| <dd>Print the schema as a Hive table declaration</dd> |
| </dl> |
| |
| <h2 id="java-key">Java Key</h2> |
| |
| <p>The key command prints the information about the encryption keys.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-o,--output <output></code></dt> |
| <dd>Output filename</dd> |
| </dl> |
| |
| <h2 id="java-meta">Java Meta</h2> |
| |
| <p>The meta command prints the metadata about the given ORC file and is |
| equivalent to the Hive ORC File Dump command.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">--backup-path <path></code></dt> |
| <dd>when used with –recover specifies the path where the recovered file is written (default: /tmp)</dd> |
| <dt><code class="highlighter-rouge">-d,--data</code></dt> |
| <dd>Should the data be printed</dd> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-j,--json</code></dt> |
| <dd>Format the output in JSON</dd> |
| <dt><code class="highlighter-rouge">-p,--pretty</code></dt> |
| <dd>Pretty print the output</dd> |
| <dt><code class="highlighter-rouge">-r,--rowindex <ids></code></dt> |
| <dd>Print the row indexes for the comma separated list of column ids</dd> |
| <dt><code class="highlighter-rouge">--recover</code></dt> |
| <dd>Skip over corrupted values in the ORC file</dd> |
| <dt><code class="highlighter-rouge">--skip-dump</code></dt> |
| <dd>Skip dumping the metadata</dd> |
| <dt><code class="highlighter-rouge">-t,--timezone</code></dt> |
| <dd>Print the timezone of the writer</dd> |
| </dl> |
| |
| <p>An example of the output is given below:</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% java <span class="nt">-jar</span> orc-tools-X.Y.Z-uber.jar meta examples/TestOrcFile.test1.orc |
| Processing data file examples/TestOrcFile.test1.orc <span class="o">[</span>length: 1711] |
| Structure <span class="k">for </span>examples/TestOrcFile.test1.orc |
| File Version: 0.12 with HIVE_8732 |
| Rows: 2 |
| Compression: ZLIB |
| Compression size: 10000 |
| Type: struct<boolean1:boolean,byte1:tinyint,short1:smallint,int1:int, |
| long1:bigint,float1:float,double1:double,bytes1:binary,string1:string, |
| middle:struct<list:array<struct<int1:int,string1:string>>>,list:array< |
| struct<int1:int,string1:string>>,map:map<string,struct<int1:int,string1: |
| string>>> |
| |
| Stripe Statistics: |
| Stripe 1: |
| Column 0: count: 2 hasNull: <span class="nb">false |
| </span>Column 1: count: 2 hasNull: <span class="nb">false true</span>: 1 |
| Column 2: count: 2 hasNull: <span class="nb">false </span>min: 1 max: 100 <span class="nb">sum</span>: 101 |
| Column 3: count: 2 hasNull: <span class="nb">false </span>min: 1024 max: 2048 <span class="nb">sum</span>: 3072 |
| Column 4: count: 2 hasNull: <span class="nb">false </span>min: 65536 max: 65536 <span class="nb">sum</span>: 131072 |
| Column 5: count: 2 hasNull: <span class="nb">false </span>min: 9223372036854775807 max: 9223372036854775807 |
| Column 6: count: 2 hasNull: <span class="nb">false </span>min: 1.0 max: 2.0 <span class="nb">sum</span>: 3.0 |
| Column 7: count: 2 hasNull: <span class="nb">false </span>min: <span class="nt">-15</span>.0 max: <span class="nt">-5</span>.0 <span class="nb">sum</span>: <span class="nt">-20</span>.0 |
| Column 8: count: 2 hasNull: <span class="nb">false sum</span>: 5 |
| Column 9: count: 2 hasNull: <span class="nb">false </span>min: bye max: hi <span class="nb">sum</span>: 5 |
| Column 10: count: 2 hasNull: <span class="nb">false |
| </span>Column 11: count: 2 hasNull: <span class="nb">false |
| </span>Column 12: count: 4 hasNull: <span class="nb">false |
| </span>Column 13: count: 4 hasNull: <span class="nb">false </span>min: 1 max: 2 <span class="nb">sum</span>: 6 |
| Column 14: count: 4 hasNull: <span class="nb">false </span>min: bye max: sigh <span class="nb">sum</span>: 14 |
| Column 15: count: 2 hasNull: <span class="nb">false |
| </span>Column 16: count: 5 hasNull: <span class="nb">false |
| </span>Column 17: count: 5 hasNull: <span class="nb">false </span>min: <span class="nt">-100000</span> max: 100000000 <span class="nb">sum</span>: 99901241 |
| Column 18: count: 5 hasNull: <span class="nb">false </span>min: bad max: <span class="k">in </span><span class="nb">sum</span>: 15 |
| Column 19: count: 2 hasNull: <span class="nb">false |
| </span>Column 20: count: 2 hasNull: <span class="nb">false </span>min: chani max: mauddib <span class="nb">sum</span>: 12 |
| Column 21: count: 2 hasNull: <span class="nb">false |
| </span>Column 22: count: 2 hasNull: <span class="nb">false </span>min: 1 max: 5 <span class="nb">sum</span>: 6 |
| Column 23: count: 2 hasNull: <span class="nb">false </span>min: chani max: mauddib <span class="nb">sum</span>: 12 |
| |
| File Statistics: |
| Column 0: count: 2 hasNull: <span class="nb">false |
| </span>Column 1: count: 2 hasNull: <span class="nb">false true</span>: 1 |
| Column 2: count: 2 hasNull: <span class="nb">false </span>min: 1 max: 100 <span class="nb">sum</span>: 101 |
| Column 3: count: 2 hasNull: <span class="nb">false </span>min: 1024 max: 2048 <span class="nb">sum</span>: 3072 |
| Column 4: count: 2 hasNull: <span class="nb">false </span>min: 65536 max: 65536 <span class="nb">sum</span>: 131072 |
| Column 5: count: 2 hasNull: <span class="nb">false </span>min: 9223372036854775807 max: 9223372036854775807 |
| Column 6: count: 2 hasNull: <span class="nb">false </span>min: 1.0 max: 2.0 <span class="nb">sum</span>: 3.0 |
| Column 7: count: 2 hasNull: <span class="nb">false </span>min: <span class="nt">-15</span>.0 max: <span class="nt">-5</span>.0 <span class="nb">sum</span>: <span class="nt">-20</span>.0 |
| Column 8: count: 2 hasNull: <span class="nb">false sum</span>: 5 |
| Column 9: count: 2 hasNull: <span class="nb">false </span>min: bye max: hi <span class="nb">sum</span>: 5 |
| Column 10: count: 2 hasNull: <span class="nb">false |
| </span>Column 11: count: 2 hasNull: <span class="nb">false |
| </span>Column 12: count: 4 hasNull: <span class="nb">false |
| </span>Column 13: count: 4 hasNull: <span class="nb">false </span>min: 1 max: 2 <span class="nb">sum</span>: 6 |
| Column 14: count: 4 hasNull: <span class="nb">false </span>min: bye max: sigh <span class="nb">sum</span>: 14 |
| Column 15: count: 2 hasNull: <span class="nb">false |
| </span>Column 16: count: 5 hasNull: <span class="nb">false |
| </span>Column 17: count: 5 hasNull: <span class="nb">false </span>min: <span class="nt">-100000</span> max: 100000000 <span class="nb">sum</span>: 99901241 |
| Column 18: count: 5 hasNull: <span class="nb">false </span>min: bad max: <span class="k">in </span><span class="nb">sum</span>: 15 |
| Column 19: count: 2 hasNull: <span class="nb">false |
| </span>Column 20: count: 2 hasNull: <span class="nb">false </span>min: chani max: mauddib <span class="nb">sum</span>: 12 |
| Column 21: count: 2 hasNull: <span class="nb">false |
| </span>Column 22: count: 2 hasNull: <span class="nb">false </span>min: 1 max: 5 <span class="nb">sum</span>: 6 |
| Column 23: count: 2 hasNull: <span class="nb">false </span>min: chani max: mauddib <span class="nb">sum</span>: 12 |
| |
| Stripes: |
| Stripe: offset: 3 data: 243 rows: 2 <span class="nb">tail</span>: 199 index: 570 |
| Stream: column 0 section ROW_INDEX start: 3 length 11 |
| Stream: column 1 section ROW_INDEX start: 14 length 22 |
| Stream: column 2 section ROW_INDEX start: 36 length 26 |
| Stream: column 3 section ROW_INDEX start: 62 length 27 |
| Stream: column 4 section ROW_INDEX start: 89 length 30 |
| Stream: column 5 section ROW_INDEX start: 119 length 28 |
| Stream: column 6 section ROW_INDEX start: 147 length 34 |
| Stream: column 7 section ROW_INDEX start: 181 length 34 |
| Stream: column 8 section ROW_INDEX start: 215 length 21 |
| Stream: column 9 section ROW_INDEX start: 236 length 30 |
| Stream: column 10 section ROW_INDEX start: 266 length 11 |
| Stream: column 11 section ROW_INDEX start: 277 length 16 |
| Stream: column 12 section ROW_INDEX start: 293 length 11 |
| Stream: column 13 section ROW_INDEX start: 304 length 24 |
| Stream: column 14 section ROW_INDEX start: 328 length 31 |
| Stream: column 15 section ROW_INDEX start: 359 length 16 |
| Stream: column 16 section ROW_INDEX start: 375 length 11 |
| Stream: column 17 section ROW_INDEX start: 386 length 32 |
| Stream: column 18 section ROW_INDEX start: 418 length 30 |
| Stream: column 19 section ROW_INDEX start: 448 length 16 |
| Stream: column 20 section ROW_INDEX start: 464 length 37 |
| Stream: column 21 section ROW_INDEX start: 501 length 11 |
| Stream: column 22 section ROW_INDEX start: 512 length 24 |
| Stream: column 23 section ROW_INDEX start: 536 length 37 |
| Stream: column 1 section DATA start: 573 length 5 |
| Stream: column 2 section DATA start: 578 length 6 |
| Stream: column 3 section DATA start: 584 length 9 |
| Stream: column 4 section DATA start: 593 length 11 |
| Stream: column 5 section DATA start: 604 length 12 |
| Stream: column 6 section DATA start: 616 length 11 |
| Stream: column 7 section DATA start: 627 length 15 |
| Stream: column 8 section DATA start: 642 length 8 |
| Stream: column 8 section LENGTH start: 650 length 6 |
| Stream: column 9 section DATA start: 656 length 8 |
| Stream: column 9 section LENGTH start: 664 length 6 |
| Stream: column 11 section LENGTH start: 670 length 6 |
| Stream: column 13 section DATA start: 676 length 7 |
| Stream: column 14 section DATA start: 683 length 6 |
| Stream: column 14 section LENGTH start: 689 length 6 |
| Stream: column 14 section DICTIONARY_DATA start: 695 length 10 |
| Stream: column 15 section LENGTH start: 705 length 6 |
| Stream: column 17 section DATA start: 711 length 25 |
| Stream: column 18 section DATA start: 736 length 18 |
| Stream: column 18 section LENGTH start: 754 length 8 |
| Stream: column 19 section LENGTH start: 762 length 6 |
| Stream: column 20 section DATA start: 768 length 15 |
| Stream: column 20 section LENGTH start: 783 length 6 |
| Stream: column 22 section DATA start: 789 length 6 |
| Stream: column 23 section DATA start: 795 length 15 |
| Stream: column 23 section LENGTH start: 810 length 6 |
| Encoding column 0: DIRECT |
| Encoding column 1: DIRECT |
| Encoding column 2: DIRECT |
| Encoding column 3: DIRECT_V2 |
| Encoding column 4: DIRECT_V2 |
| Encoding column 5: DIRECT_V2 |
| Encoding column 6: DIRECT |
| Encoding column 7: DIRECT |
| Encoding column 8: DIRECT_V2 |
| Encoding column 9: DIRECT_V2 |
| Encoding column 10: DIRECT |
| Encoding column 11: DIRECT_V2 |
| Encoding column 12: DIRECT |
| Encoding column 13: DIRECT_V2 |
| Encoding column 14: DICTIONARY_V2[2] |
| Encoding column 15: DIRECT_V2 |
| Encoding column 16: DIRECT |
| Encoding column 17: DIRECT_V2 |
| Encoding column 18: DIRECT_V2 |
| Encoding column 19: DIRECT_V2 |
| Encoding column 20: DIRECT_V2 |
| Encoding column 21: DIRECT |
| Encoding column 22: DIRECT_V2 |
| Encoding column 23: DIRECT_V2 |
| |
| File length: 1711 bytes |
| Padding length: 0 bytes |
| Padding ratio: 0% |
| ______________________________________________________________________ |
| </code></pre></div></div> |
| |
| <h2 id="java-scan">Java Scan</h2> |
| |
| <p>The scan command reads the contents of the file without printing anything. It |
| is primarily intendend for benchmarking the Java reader without including the |
| cost of printing the data out.</p> |
| |
| <dl> |
| <dt><code class="highlighter-rouge">-h,--help</code></dt> |
| <dd>Print help</dd> |
| <dt><code class="highlighter-rouge">-s,--schema</code></dt> |
| <dd>Print schema</dd> |
| <dt><code class="highlighter-rouge">-v,--verbose</code></dt> |
| <dd>Print exceptions</dd> |
| </dl> |
| |
| <h2 id="java-sizes">Java Sizes</h2> |
| |
| <p>The sizes command lists size on disk of each column. The output contains not |
| only the raw data of the table, but also the size of metadata such as <code class="highlighter-rouge">padding</code>, |
| <code class="highlighter-rouge">stripeFooter</code>, <code class="highlighter-rouge">fileFooter</code>, <code class="highlighter-rouge">stripeIndex</code> and <code class="highlighter-rouge">stripeData</code>.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% java <span class="nt">-jar</span> orc-tools-X.Y.Z-uber.jar sizes examples/my-file.orc |
| Percent Bytes/Row Name |
| 98.45 2.62 y |
| 0.81 0.02 _file_footer |
| 0.30 0.01 _index |
| 0.25 0.01 x |
| 0.19 0.01 _stripe_footer |
| ______________________________________________________________________ |
| </code></pre></div></div> |
| |
| <h2 id="java-merge">Java Merge</h2> |
| |
| <p>The merge command can merge multiple ORC files that all have the same schema into a single ORC file.</p> |
| |
| <div class="language-shell highlighter-rouge"><div class="highlight"><pre class="highlight"><code>% java <span class="nt">-jar</span> orc-tools-X.Y.Z-uber.jar merge <span class="nt">--output</span> /path/to/merged.orc /path/to/input_orc/ |
| ______________________________________________________________________ |
| </code></pre></div></div> |
| |
| <h2 id="java-version">Java Version</h2> |
| |
| <p>The version command prints the version of this ORC tool.</p> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="section-nav"> |
| <div class="left align-right"> |
| |
| |
| |
| <a href="/docs/cpp-tools.html" class="prev">Back</a> |
| |
| </div> |
| <div class="right align-left"> |
| |
| <span class="next disabled">Next</span> |
| |
| </div> |
| </div> |
| <div class="clear"></div> |
| |
| |
| </article> |
| </div> |
| |
| <div class="unit one-fifth hide-on-mobiles"> |
| <aside> |
| |
| <h4>Overview</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/index.html">Background</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/types.html">Types</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/indexes.html">Indexes</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/acid.html">ACID support</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Installing</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/building.html">Building ORC</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Spark</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Python</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/dask.html">Dask</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Hive</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in MapReduce</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using ORC Core</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Tools</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="current"><a href="/docs/java-tools.html">Java Tools</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| </aside> |
| </div> |
| |
| |
| <div class="clear"></div> |
| |
| </div> |
| </section> |
| |
| |
| <footer role="contentinfo"> |
| <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are © 2024 |
| <a href="https://www.apache.org/">Apache Software Foundation</a> |
| under the terms of the <a |
| href="https://www.apache.org/licenses/LICENSE-2.0.html"> |
| Apache License v2</a>. Apache ORC and its logo are trademarks |
| of the Apache Software Foundation.</p> |
| </footer> |
| |
| <script> |
| var anchorForId = function (id) { |
| var anchor = document.createElement("a"); |
| anchor.className = "header-link"; |
| anchor.href = "#" + id; |
| anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; |
| anchor.title = "Permalink"; |
| return anchor; |
| }; |
| |
| var linkifyAnchors = function (level, containingElement) { |
| var headers = containingElement.getElementsByTagName("h" + level); |
| for (var h = 0; h < headers.length; h++) { |
| var header = headers[h]; |
| |
| if (typeof header.id !== "undefined" && header.id !== "") { |
| header.appendChild(anchorForId(header.id)); |
| } |
| } |
| }; |
| |
| document.onreadystatechange = function () { |
| if (this.readyState === "complete") { |
| var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; |
| if (!contentBlock) { |
| return; |
| } |
| for (var level = 1; level <= 6; level++) { |
| linkifyAnchors(level, contentBlock); |
| } |
| } |
| }; |
| </script> |
| |
| |
| </body> |
| </html> |