blob: 8045f66085067e83d5bd3976021cd9e9f4ce4efc [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<title>Pig adapter</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Jekyll v3.7.3">
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
<link rel="stylesheet" href="/css/screen.css">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<!--[if lt IE 9]>
<script src="/js/html5shiv.min.js"></script>
<script src="/js/respond.min.js"></script>
<![endif]-->
</head>
<body class="wrap">
<header role="banner">
<div class="grid">
<div class="unit center-on-mobiles">
<h1>
<a href="/">
<span class="sr-only">Apache Calcite</span>
<img src="/img/logo.svg" alt="Calcite Logo">
</a>
</h1>
</div>
<nav class="main-nav">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/downloads/">Download</a>
</li>
<li class="">
<a href="/community/">Community</a>
</li>
<li class="">
<a href="/develop/">Develop</a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="current">
<a href="/docs/">Docs</a>
</li>
</ul>
</nav>
</div>
</header>
<section class="docs">
<div class="grid">
<div class="docs-nav-mobile unit whole show-on-mobiles">
<select onchange="if (this.value) window.location.href=this.value">
<option value="">Navigate the docs…</option>
<optgroup label="Overview">
</optgroup>
<optgroup label="Advanced">
</optgroup>
<optgroup label="Avatica">
</optgroup>
<optgroup label="Reference">
</optgroup>
<optgroup label="Meta">
</optgroup>
</select>
</div>
<div class="unit four-fifths">
<article>
<h1>Pig adapter</h1>
<!--
-->
<h1 id="overview">Overview</h1>
<p>The Pig adapter allows you to write queries in SQL and execute them using
<a href="https://pig.apache.org">Apache Pig</a>.</p>
<h1 id="a-simple-example">A simple example</h1>
<p>Let’s start with a simple example. First, we need a
<a href="/docs/model.html">model definition</a>,
as follows.</p>
<figure class="highlight"><pre><code class="language-json" data-lang="json"><span class="p">{</span><span class="w">
</span><span class="s2">"version"</span><span class="p">:</span><span class="w"> </span><span class="s2">"1.0"</span><span class="p">,</span><span class="w">
</span><span class="s2">"defaultSchema"</span><span class="p">:</span><span class="w"> </span><span class="s2">"SALES"</span><span class="p">,</span><span class="w">
</span><span class="s2">"schemas"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"PIG"</span><span class="p">,</span><span class="w">
</span><span class="s2">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"custom"</span><span class="p">,</span><span class="w">
</span><span class="s2">"factory"</span><span class="p">:</span><span class="w"> </span><span class="s2">"org.apache.calcite.adapter.pig.PigSchemaFactory"</span><span class="p">,</span><span class="w">
</span><span class="s2">"tables"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"t"</span><span class="p">,</span><span class="w">
</span><span class="s2">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"custom"</span><span class="p">,</span><span class="w">
</span><span class="s2">"factory"</span><span class="p">:</span><span class="w"> </span><span class="s2">"org.apache.calcite.adapter.pig.PigTableFactory"</span><span class="p">,</span><span class="w">
</span><span class="s2">"operand"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"file"</span><span class="p">:</span><span class="w"> </span><span class="s2">"data.txt"</span><span class="p">,</span><span class="w">
</span><span class="s2">"columns"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"tc0"</span><span class="p">,</span><span class="w"> </span><span class="s2">"tc1"</span><span class="p">]</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"s"</span><span class="p">,</span><span class="w">
</span><span class="s2">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"custom"</span><span class="p">,</span><span class="w">
</span><span class="s2">"factory"</span><span class="p">:</span><span class="w"> </span><span class="s2">"org.apache.calcite.adapter.pig.PigTableFactory"</span><span class="p">,</span><span class="w">
</span><span class="s2">"operand"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"file"</span><span class="p">:</span><span class="w"> </span><span class="s2">"data2.txt"</span><span class="p">,</span><span class="w">
</span><span class="s2">"columns"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"sc0"</span><span class="p">,</span><span class="w"> </span><span class="s2">"sc1"</span><span class="p">]</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">}</span><span class="w"> </span><span class="p">]</span><span class="w">
</span><span class="p">}</span><span class="w"> </span><span class="p">]</span><span class="w">
</span><span class="p">}</span></code></pre></figure>
<p>Now, if you write the SQL query</p>
<figure class="highlight"><pre><code class="language-sql" data-lang="sql"><span class="k">select</span> <span class="o">*</span>
<span class="k">from</span> <span class="nv">"t"</span>
<span class="k">join</span> <span class="nv">"s"</span> <span class="k">on</span> <span class="nv">"tc1"</span> <span class="o">=</span> <span class="nv">"sc0"</span></code></pre></figure>
<p>the Pig adapter will generate the Pig Latin script</p>
<figure class="highlight"><pre><code class="language-sql" data-lang="sql"><span class="n">t</span> <span class="o">=</span> <span class="k">LOAD</span> <span class="s1">'data.txt'</span> <span class="k">USING</span> <span class="n">PigStorage</span><span class="p">()</span> <span class="k">AS</span> <span class="p">(</span><span class="n">tc0</span><span class="p">:</span><span class="n">chararray</span><span class="p">,</span> <span class="n">tc1</span><span class="p">:</span><span class="n">chararray</span><span class="p">);</span>
<span class="n">s</span> <span class="o">=</span> <span class="k">LOAD</span> <span class="s1">'data2.txt'</span> <span class="k">USING</span> <span class="n">PigStorage</span><span class="p">()</span> <span class="k">AS</span> <span class="p">(</span><span class="n">sc0</span><span class="p">:</span><span class="n">chararray</span><span class="p">,</span> <span class="n">sc1</span><span class="p">:</span><span class="n">chararray</span><span class="p">);</span>
<span class="n">t</span> <span class="o">=</span> <span class="k">JOIN</span> <span class="n">t</span> <span class="k">BY</span> <span class="n">tc1</span><span class="p">,</span> <span class="n">s</span> <span class="k">BY</span> <span class="n">sc0</span><span class="p">;</span></code></pre></figure>
<p>which is then executed using Pig’s runtime, typically MapReduce on
<a href="https://hadoop.apache.org/">Apache Hadoop</a>.</p>
<h1 id="relationship-to-piglet">Relationship to Piglet</h1>
<p>Calcite has another component called
<a href="/apidocs/org/apache/calcite/piglet/package-summary.html">Piglet</a>.
It allows you to write queries in a subset of Pig Latin,
and execute them using any applicable Calcite adapter.
So, Piglet is basically the opposite of the Pig adapter.</p>
</article>
</div>
<div class="unit one-fifth hide-on-mobiles">
<aside>
<h4>Overview</h4>
<ul>
<li class=""><a href="/docs/index.html">Background</a></li>
<li class=""><a href="/docs/tutorial.html">Tutorial</a></li>
<li class=""><a href="/docs/algebra.html">Algebra</a></li>
</ul>
<h4>Advanced</h4>
<ul>
<li class=""><a href="/docs/adapter.html">Adapters</a></li>
<li class=""><a href="/docs/spatial.html">Spatial</a></li>
<li class=""><a href="/docs/stream.html">Streaming</a></li>
<li class=""><a href="/docs/materialized_views.html">Materialized Views</a></li>
<li class=""><a href="/docs/lattice.html">Lattices</a></li>
</ul>
<h4>Avatica</h4>
<ul>
<li class=""><a href="/docs/avatica_overview.html">Overview</a></li>
<li class=""><a href="/docs/avatica_roadmap.html">Roadmap</a></li>
<li class=""><a href="/docs/avatica_json_reference.html">JSON Reference</a></li>
<li class=""><a href="/docs/avatica_protobuf_reference.html">Protobuf Reference</a></li>
</ul>
<h4>Reference</h4>
<ul>
<li class=""><a href="/docs/reference.html">SQL language</a></li>
<li class=""><a href="/docs/model.html">JSON/YAML models</a></li>
<li class=""><a href="/docs/howto.html">HOWTO</a></li>
</ul>
<h4>Meta</h4>
<ul>
<li class=""><a href="/docs/history.html">History</a></li>
<li class=""><a href="/docs/powered_by.html">Powered by Calcite</a></li>
<li class=""><a href="/apidocs">API</a></li>
<li class=""><a href="/testapidocs">Test API</a></li>
</ul>
</aside>
</div>
<div class="clear"></div>
</div>
</section>
<footer role="contentinfo">
<div id="poweredby">
<a href="http://www.apache.org/">
<span class="sr-only">Apache</span>
<img src="/img/feather.png" width="190" height="77" alt="Apache Logo"></a>
</div>
<div id="copyright">
<p>The contents of this website are Copyright &copy;&nbsp;2019
<a href="https://www.apache.org/">Apache Software Foundation</a>
under the terms of
the <a href="https://www.apache.org/licenses/">
Apache&nbsp;License&nbsp;v2</a>. Apache Calcite and its logo are
trademarks of the Apache Software Foundation.</p>
</div>
</footer>
<script>
var anchorForId = function (id) {
var anchor = document.createElement("a");
anchor.className = "header-link";
anchor.href = "#" + id;
anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
anchor.title = "Permalink";
return anchor;
};
var linkifyAnchors = function (level, containingElement) {
var headers = containingElement.getElementsByTagName("h" + level);
for (var h = 0; h < headers.length; h++) {
var header = headers[h];
if (typeof header.id !== "undefined" && header.id !== "") {
header.appendChild(anchorForId(header.id));
}
}
};
document.onreadystatechange = function () {
if (this.readyState === "complete") {
var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
if (!contentBlock) {
return;
}
for (var level = 1; level <= 6; level++) {
linkifyAnchors(level, contentBlock);
}
}
};
</script>
</body>
</html>