blob: 6c0c4de029e36b9d168da168bb4c7f13e08cd30c [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<title>Types</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Jekyll v3.8.6">
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
<link rel="stylesheet" href="/css/screen.css">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<!--[if lt IE 9]>
<script src="/js/html5shiv.min.js"></script>
<script src="/js/respond.min.js"></script>
<![endif]-->
</head>
<body class="wrap">
<header role="banner">
<nav class="mobile-nav show-on-mobiles">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/releases/"><span class="show-on-mobiles">Rel</span>
<span class="hide-on-mobiles">Releases</span></a>
</li>
<li class="current">
<a href="/docs/"><span class="show-on-mobiles">Doc</span>
<span class="hide-on-mobiles">Documentation</span></a>
</li>
<li class="">
<a href="/talks/"><span class="show-on-mobiles">Talk</span>
<span class="hide-on-mobiles">Talks</span></a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="">
<a href="/develop/"><span class="show-on-mobiles">Dev</span>
<span class="hide-on-mobiles">Develop</span></a>
</li>
<li class="">
<a href="/help/">Help</a>
</li>
</ul>
</nav>
<div class="grid">
<div class="unit one-quarter center-on-mobiles">
<h1>
<a href="/">
<span class="sr-only">Apache ORC</span>
<img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
</a>
</h1>
</div>
<nav class="main-nav unit three-quarters hide-on-mobiles">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/releases/"><span class="show-on-mobiles">Rel</span>
<span class="hide-on-mobiles">Releases</span></a>
</li>
<li class="current">
<a href="/docs/"><span class="show-on-mobiles">Doc</span>
<span class="hide-on-mobiles">Documentation</span></a>
</li>
<li class="">
<a href="/talks/"><span class="show-on-mobiles">Talk</span>
<span class="hide-on-mobiles">Talks</span></a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="">
<a href="/develop/"><span class="show-on-mobiles">Dev</span>
<span class="hide-on-mobiles">Develop</span></a>
</li>
<li class="">
<a href="/help/">Help</a>
</li>
</ul>
</nav>
</div>
</header>
<section class="docs">
<div class="grid">
<div class="docs-nav-mobile unit whole show-on-mobiles">
<select onchange="if (this.value) window.location.href=this.value">
<option value="">Navigate the docs…</option>
<optgroup label="Overview">
<option value="/docs/index.html">Background</option>
<option value="/docs/adopters.html">ORC Adopters</option>
<option value="/docs/types.html">Types</option>
<option value="/docs/indexes.html">Indexes</option>
<option value="/docs/acid.html">ACID support</option>
</optgroup>
<optgroup label="Installing">
<option value="/docs/building.html">Building ORC</option>
</optgroup>
<optgroup label="Using in Spark">
<option value="/docs/spark-ddl.html">Spark DDL</option>
<option value="/docs/spark-config.html">Spark Configuration</option>
</optgroup>
<optgroup label="Using in Python">
<option value="/docs/pyarrow.html">PyArrow</option>
<option value="/docs/dask.html">Dask</option>
</optgroup>
<optgroup label="Using in Hive">
<option value="/docs/hive-ddl.html">Hive DDL</option>
<option value="/docs/hive-config.html">Hive Configuration</option>
</optgroup>
<optgroup label="Using in MapReduce">
<option value="/docs/mapred.html">Using in MapRed</option>
<option value="/docs/mapreduce.html">Using in MapReduce</option>
</optgroup>
<optgroup label="Using ORC Core">
<option value="/docs/core-java.html">Using Core Java</option>
<option value="/docs/core-cpp.html">Using Core C++</option>
<option value="/docs/core-java-config.html">ORC Java configuration</option>
</optgroup>
<optgroup label="Tools">
<option value="/docs/cpp-tools.html">C++ Tools</option>
<option value="/docs/java-tools.html">Java Tools</option>
</optgroup>
</select>
</div>
<div class="unit four-fifths">
<article>
<h1>Types</h1>
<p>ORC files are completely self-describing and do not depend on the Hive
Metastore or any other external metadata. The file includes all of the
type and encoding information for the objects stored in the file. Because the
file is self-contained, it does not depend on the user’s environment to
correctly interpret the file’s contents.</p>
<p>ORC provides a rich set of scalar and compound types:</p>
<ul>
<li>Integer
<ul>
<li>boolean (1 bit)</li>
<li>tinyint (8 bit)</li>
<li>smallint (16 bit)</li>
<li>int (32 bit)</li>
<li>bigint (64 bit)</li>
</ul>
</li>
<li>Floating point
<ul>
<li>float</li>
<li>double</li>
</ul>
</li>
<li>String types
<ul>
<li>string</li>
<li>char</li>
<li>varchar</li>
</ul>
</li>
<li>Binary blobs
<ul>
<li>binary</li>
</ul>
</li>
<li>Decimal type
<ul>
<li>decimal</li>
</ul>
</li>
<li>Date/time
<ul>
<li>timestamp</li>
<li>timestamp with local time zone</li>
<li>date</li>
</ul>
</li>
<li>Compound types
<ul>
<li>struct</li>
<li>list</li>
<li>map</li>
<li>union</li>
</ul>
</li>
</ul>
<p>All ORC file are logically sequences of identically typed objects. Hive
always uses a struct with a field for each of the top-level columns as
the root object type, but that is not required. All types in ORC can take
null values including the compound types.</p>
<p>Compound types have children columns that hold the values for their
sub-elements. For example, a struct column has one child column for
each field of the struct. Lists always have a single child column for
the element values and maps always have two child columns. Union
columns have one child column for each of the variants.</p>
<p>Given the following definition of the table Foobar, the columns in the
file would form the given tree.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>create table Foobar (
myInt int,
myMap map&lt;string,
struct&lt;myString : string,
myDouble: double&gt;&gt;,
myTime timestamp
);
</code></pre></div></div>
<p><img src="/img/TreeWriters.png" alt="ORC column structure" /></p>
<h1 id="timestamps">Timestamps</h1>
<p>ORC includes two different forms of timestamps from the SQL world:</p>
<ul>
<li><strong>Timestamp</strong> is a date and time without a time zone, which does not change based on the time zone of the reader.</li>
<li><strong>Timestamp with local time zone</strong> is a fixed instant in time, which does change based on the time zone of the reader.</li>
</ul>
<p>Unless your application uses UTC consistently, <strong>timestamp with
local time zone</strong> is strongly preferred over <strong>timestamp</strong> for most
use cases. When users say an event is at 10:00, it is always in
reference to a certain timezone and means a point in time, rather than
10:00 in an arbitrary time zone.</p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Value in America/Los_Angeles</th>
<th>Value in America/New_York</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>timestamp</strong></td>
<td>2014-12-12 6:00:00</td>
<td>2014-12-12 6:00:00</td>
</tr>
<tr>
<td><strong>timestamp with local time zone</strong></td>
<td>2014-12-12 9:00:00</td>
<td>2014-12-12 6:00:00</td>
</tr>
</tbody>
</table>
<div class="section-nav">
<div class="left align-right">
<a href="/docs/adopters.html" class="prev">Back</a>
</div>
<div class="right align-left">
<a href="/docs/indexes.html" class="next">Next</a>
</div>
</div>
<div class="clear"></div>
</article>
</div>
<div class="unit one-fifth hide-on-mobiles">
<aside>
<h4>Overview</h4>
<ul>
<li class=""><a href="/docs/index.html">Background</a></li>
<li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
<li class="current"><a href="/docs/types.html">Types</a></li>
<li class=""><a href="/docs/indexes.html">Indexes</a></li>
<li class=""><a href="/docs/acid.html">ACID support</a></li>
</ul>
<h4>Installing</h4>
<ul>
<li class=""><a href="/docs/building.html">Building ORC</a></li>
</ul>
<h4>Using in Spark</h4>
<ul>
<li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li>
<li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li>
</ul>
<h4>Using in Python</h4>
<ul>
<li class=""><a href="/docs/pyarrow.html">PyArrow</a></li>
<li class=""><a href="/docs/dask.html">Dask</a></li>
</ul>
<h4>Using in Hive</h4>
<ul>
<li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
<li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
</ul>
<h4>Using in MapReduce</h4>
<ul>
<li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
<li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
</ul>
<h4>Using ORC Core</h4>
<ul>
<li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
<li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
<li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li>
</ul>
<h4>Tools</h4>
<ul>
<li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
<li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
</ul>
</aside>
</div>
<div class="clear"></div>
</div>
</section>
<footer role="contentinfo">
<p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are &copy;&nbsp;2024
<a href="https://www.apache.org/">Apache Software Foundation</a>
under the terms of the <a
href="https://www.apache.org/licenses/LICENSE-2.0.html">
Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
of the Apache Software Foundation.</p>
</footer>
<script>
var anchorForId = function (id) {
var anchor = document.createElement("a");
anchor.className = "header-link";
anchor.href = "#" + id;
anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
anchor.title = "Permalink";
return anchor;
};
var linkifyAnchors = function (level, containingElement) {
var headers = containingElement.getElementsByTagName("h" + level);
for (var h = 0; h < headers.length; h++) {
var header = headers[h];
if (typeof header.id !== "undefined" && header.id !== "") {
header.appendChild(anchorForId(header.id));
}
}
};
document.onreadystatechange = function () {
if (this.readyState === "complete") {
var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
if (!contentBlock) {
return;
}
for (var level = 1; level <= 6; level++) {
linkifyAnchors(level, contentBlock);
}
}
};
</script>
</body>
</html>