blob: 3e4503334e1689b4f3d0b0beb5a6098dbc2bee94 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<title>ORC Adopters</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Jekyll v3.8.6">
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
<link rel="stylesheet" href="/css/screen.css">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<!--[if lt IE 9]>
<script src="/js/html5shiv.min.js"></script>
<script src="/js/respond.min.js"></script>
<![endif]-->
</head>
<body class="wrap">
<header role="banner">
<nav class="mobile-nav show-on-mobiles">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/releases/"><span class="show-on-mobiles">Rel</span>
<span class="hide-on-mobiles">Releases</span></a>
</li>
<li class="current">
<a href="/docs/"><span class="show-on-mobiles">Doc</span>
<span class="hide-on-mobiles">Documentation</span></a>
</li>
<li class="">
<a href="/talks/"><span class="show-on-mobiles">Talk</span>
<span class="hide-on-mobiles">Talks</span></a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="">
<a href="/develop/"><span class="show-on-mobiles">Dev</span>
<span class="hide-on-mobiles">Develop</span></a>
</li>
<li class="">
<a href="/help/">Help</a>
</li>
</ul>
</nav>
<div class="grid">
<div class="unit one-quarter center-on-mobiles">
<h1>
<a href="/">
<span class="sr-only">Apache ORC</span>
<img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
</a>
</h1>
</div>
<nav class="main-nav unit three-quarters hide-on-mobiles">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/releases/"><span class="show-on-mobiles">Rel</span>
<span class="hide-on-mobiles">Releases</span></a>
</li>
<li class="current">
<a href="/docs/"><span class="show-on-mobiles">Doc</span>
<span class="hide-on-mobiles">Documentation</span></a>
</li>
<li class="">
<a href="/talks/"><span class="show-on-mobiles">Talk</span>
<span class="hide-on-mobiles">Talks</span></a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="">
<a href="/develop/"><span class="show-on-mobiles">Dev</span>
<span class="hide-on-mobiles">Develop</span></a>
</li>
<li class="">
<a href="/help/">Help</a>
</li>
</ul>
</nav>
</div>
</header>
<section class="docs">
<div class="grid">
<div class="docs-nav-mobile unit whole show-on-mobiles">
<select onchange="if (this.value) window.location.href=this.value">
<option value="">Navigate the docs…</option>
<optgroup label="Overview">
<option value="/docs/index.html">Background</option>
<option value="/docs/adopters.html">ORC Adopters</option>
<option value="/docs/types.html">Types</option>
<option value="/docs/indexes.html">Indexes</option>
<option value="/docs/acid.html">ACID support</option>
</optgroup>
<optgroup label="Installing">
<option value="/docs/building.html">Building ORC</option>
</optgroup>
<optgroup label="Using in Spark">
<option value="/docs/spark-ddl.html">Spark DDL</option>
<option value="/docs/spark-config.html">Spark Configuration</option>
</optgroup>
<optgroup label="Using in Python">
<option value="/docs/pyarrow.html">PyArrow</option>
<option value="/docs/dask.html">Dask</option>
</optgroup>
<optgroup label="Using in Hive">
<option value="/docs/hive-ddl.html">Hive DDL</option>
<option value="/docs/hive-config.html">Hive Configuration</option>
</optgroup>
<optgroup label="Using in MapReduce">
<option value="/docs/mapred.html">Using in MapRed</option>
<option value="/docs/mapreduce.html">Using in MapReduce</option>
</optgroup>
<optgroup label="Using ORC Core">
<option value="/docs/core-java.html">Using Core Java</option>
<option value="/docs/core-cpp.html">Using Core C++</option>
<option value="/docs/core-java-config.html">ORC Java configuration</option>
</optgroup>
<optgroup label="Tools">
<option value="/docs/cpp-tools.html">C++ Tools</option>
<option value="/docs/java-tools.html">Java Tools</option>
</optgroup>
</select>
</div>
<div class="unit four-fifths">
<article>
<h1>ORC Adopters</h1>
<p>If your company or tool uses ORC, please let us know so that we can update
this page.</p>
<h3 id="apache-hadoop"><a href="https://hadoop.apache.org/">Apache Hadoop</a></h3>
<p>ORC files have always supporting reading and writing from Hadoop’s MapReduce,
but with the ORC 1.1.0 release it is now easier than ever without pulling in
Hive’s exec jar and all of its dependencies. OrcStruct now also implements
WritableComparable and can be serialized through the MapReduce shuffle.</p>
<h3 id="apache-spark"><a href="https://spark.apache.org/">Apache Spark</a></h3>
<p>Apache Spark has <a href="https://databricks.com/blog/2015/07/16/joint-blog-post-bringing-orc-support-into-apache-spark.html">added
support</a>
for reading and writing ORC files with support for column project and
predicate push down.</p>
<h3 id="apache-arrow"><a href="https://arrow.apache.org/">Apache Arrow</a></h3>
<p>Apache Arrow supports reading and writing <a href="https://arrow.apache.org/docs/index.html?highlight=orc#apache-arrow">ORC file format</a>.</p>
<h3 id="apache-flink"><a href="https://flink.apache.org/">Apache Flink</a></h3>
<p>Apache Flink supports
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/connectors/table/formats/orc/">ORC format in Table API</a>
for reading and writing ORC files.</p>
<h3 id="apache-iceberg"><a href="https://iceberg.apache.org/">Apache Iceberg</a></h3>
<p>Apache Iceberg supports <a href="https://iceberg.apache.org/#spec/#orc">ORC spec</a> to use ORC tables.</p>
<h3 id="apache-druid"><a href="https://druid.apache.org/">Apache Druid</a></h3>
<p>Apache Druid supports
<a href="https://druid.apache.org/docs/0.22.1/development/extensions-core/orc.html#orc-extension">ORC extension</a>
to ingest and understand the Apache ORC data format.</p>
<h3 id="apache-hive"><a href="https://hive.apache.org/">Apache Hive</a></h3>
<p>Apache Hive was the original use case and home for ORC. ORC’s strong
type system, advanced compression, column projection, predicate push
down, and vectorization support make Hive <a href="https://hortonworks.com/blog/orcfile-in-hdp-2-better-compression-better-performance/">perform
better</a>
than any other format for your data.</p>
<h3 id="apache-impala"><a href="https://impala.apache.org/">Apache Impala</a></h3>
<p>Apache Impala supports <a href="https://impala.apache.org/docs/build/html/topics/impala_orc.html">reading from ORC format Hive tables</a>
by leveraging the ORC C++ library.</p>
<h3 id="apache-gobblin"><a href="https://gobblin.apache.org/">Apache Gobblin</a></h3>
<p>Apache Gobblin supports
<a href="https://gobblin.apache.org/docs/case-studies/Writing-ORC-Data/">writing data to ORC files</a>
by leveraging Apache Hive’s SerDe library.</p>
<h3 id="apache-nifi"><a href="https://nifi.apache.org/">Apache Nifi</a></h3>
<p>Apache Nifi is <a href="https://issues.apache.org/jira/browse/NIFI-1663">adding
support</a> for writing
ORC files.</p>
<h3 id="apache-pig"><a href="https://pig.apache.org/">Apache Pig</a></h3>
<p>Apache Pig added support for reading and writing ORC files in <a href="https://hortonworks.com/blog/announcing-apache-pig-0-14-0/">Pig
14.0</a>.</p>
<h3 id="eel"><a href="https://github.com/51zero/eel-sdk">EEL</a></h3>
<p>EEL is a Scala BigData API that supports reading and writing data for
various file formats and storage systems including to and from ORC. It
is designed as a in-process low level API for manipulating data. Data
is lazily streamed from source to sink and using standard Scala
operations such as map, flatMap and filter, it is especially suited
for ETL style applications. EEL supports ORC predicate and projection
pushdowns and correct handles conversions from other formats including
complex types such as maps, lists or nested structs. A typical use
case would be to extract data from JDBC to ORC files housed in HDFS,
or directly into Hive tables backed by an ORC file format.</p>
<h3 id="facebook"><a href="https://facebook.com">Facebook</a></h3>
<p>With more than 300 PB of data, Facebook was an <a href="https://code.facebook.com/posts/229861827208629/scaling-the-facebook-data-warehouse-to-300-pb/">early adopter of
ORC</a> and quickly put it into production.</p>
<h3 id="linkedin"><a href="https://linkedin.com">LinkedIn</a></h3>
<p>LinkedIn uses
<a href="https://engineering.linkedin.com/blog/2021/fastingest-low-latency-gobblin">the ORC file format</a>
with Apache Iceberg metadata catalog and Apache Gobblin to provide our data customers with high-query performance.</p>
<p>https://engineering.linkedin.com/blog/2021/fastingest-low-latency-gobblin</p>
<h3 id="trino-formerly-presto-sql"><a href="https://trino.io/">Trino (formerly Presto SQL)</a></h3>
<p>The Trino team has done a lot of work <a href="https://code.facebook.com/posts/370832626374903/even-faster-data-at-the-speed-of-presto-orc/">integrating
ORC</a> into their SQL engine.</p>
<h3 id="timber"><a href="https://timber.io/">Timber</a></h3>
<p>Timber adopted ORC for it’s S3 based logging platform that stores
petabytes of log data. ORC has been key in ensuring a fast,
cost-effective strategy for persisting and querying that data.</p>
<h3 id="vertica"><a href="http://www8.hp.com/us/en/software-solutions/advanced-sql-big-data-analytics/">Vertica</a></h3>
<p>HPE Vertica has contributed significantly to the ORC C++ library. ORC
is a significant part of Vertica SQL-on-Hadoop (VSQLoH) which brings
the performance, reliability and standards compliance of the Vertica
Analytic Database to the Hadoop ecosystem.</p>
<div class="section-nav">
<div class="left align-right">
<a href="/docs/index.html" class="prev">Back</a>
</div>
<div class="right align-left">
<a href="/docs/types.html" class="next">Next</a>
</div>
</div>
<div class="clear"></div>
</article>
</div>
<div class="unit one-fifth hide-on-mobiles">
<aside>
<h4>Overview</h4>
<ul>
<li class=""><a href="/docs/index.html">Background</a></li>
<li class="current"><a href="/docs/adopters.html">ORC Adopters</a></li>
<li class=""><a href="/docs/types.html">Types</a></li>
<li class=""><a href="/docs/indexes.html">Indexes</a></li>
<li class=""><a href="/docs/acid.html">ACID support</a></li>
</ul>
<h4>Installing</h4>
<ul>
<li class=""><a href="/docs/building.html">Building ORC</a></li>
</ul>
<h4>Using in Spark</h4>
<ul>
<li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li>
<li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li>
</ul>
<h4>Using in Python</h4>
<ul>
<li class=""><a href="/docs/pyarrow.html">PyArrow</a></li>
<li class=""><a href="/docs/dask.html">Dask</a></li>
</ul>
<h4>Using in Hive</h4>
<ul>
<li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
<li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
</ul>
<h4>Using in MapReduce</h4>
<ul>
<li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
<li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
</ul>
<h4>Using ORC Core</h4>
<ul>
<li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
<li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
<li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li>
</ul>
<h4>Tools</h4>
<ul>
<li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
<li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
</ul>
</aside>
</div>
<div class="clear"></div>
</div>
</section>
<footer role="contentinfo">
<p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are &copy;&nbsp;2024
<a href="https://www.apache.org/">Apache Software Foundation</a>
under the terms of the <a
href="https://www.apache.org/licenses/LICENSE-2.0.html">
Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
of the Apache Software Foundation.</p>
</footer>
<script>
var anchorForId = function (id) {
var anchor = document.createElement("a");
anchor.className = "header-link";
anchor.href = "#" + id;
anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
anchor.title = "Permalink";
return anchor;
};
var linkifyAnchors = function (level, containingElement) {
var headers = containingElement.getElementsByTagName("h" + level);
for (var h = 0; h < headers.length; h++) {
var header = headers[h];
if (typeof header.id !== "undefined" && header.id !== "") {
header.appendChild(anchorForId(header.id));
}
}
};
document.onreadystatechange = function () {
if (this.readyState === "complete") {
var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
if (!contentBlock) {
return;
}
for (var level = 1; level <= 6; level++) {
linkifyAnchors(level, contentBlock);
}
}
};
</script>
</body>
</html>