blob: ef780a6c374e92b18a4f78b2dc56e1056d6bdf7f [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<title>Background</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Jekyll v3.7.3">
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
<link rel="stylesheet" href="/css/screen.css">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<!--[if lt IE 9]>
<script src="/js/html5shiv.min.js"></script>
<script src="/js/respond.min.js"></script>
<![endif]-->
</head>
<body class="wrap">
<header role="banner">
<div class="grid">
<div class="unit center-on-mobiles">
<h1>
<a href="/">
<span class="sr-only">Apache Calcite</span>
<img src="/img/logo.svg" alt="Calcite Logo">
</a>
</h1>
</div>
<nav class="main-nav">
<ul>
<li class="">
<a href="/">Home</a>
</li>
<li class="">
<a href="/downloads/">Download</a>
</li>
<li class="">
<a href="/community/">Community</a>
</li>
<li class="">
<a href="/develop/">Develop</a>
</li>
<li class="">
<a href="/news/">News</a>
</li>
<li class="current">
<a href="/docs/">Docs</a>
</li>
</ul>
</nav>
</div>
</header>
<section class="docs">
<div class="grid">
<div class="docs-nav-mobile unit whole show-on-mobiles">
<select onchange="if (this.value) window.location.href=this.value">
<option value="">Navigate the docs…</option>
<optgroup label="Overview">
</optgroup>
<optgroup label="Advanced">
</optgroup>
<optgroup label="Avatica">
</optgroup>
<optgroup label="Reference">
</optgroup>
<optgroup label="Meta">
</optgroup>
</select>
</div>
<div class="unit four-fifths">
<article>
<h1>Background</h1>
<!--
-->
<p>Apache Calcite is a dynamic data management framework.</p>
<p>It contains many of the pieces that comprise a typical database
management system, but omits some key functions: storage of data,
algorithms to process data, and a repository for storing metadata.</p>
<p>Calcite intentionally stays out of the business of storing and
processing data. As we shall see, this makes it an excellent choice
for mediating between applications and one or more data storage
locations and data processing engines. It is also a perfect foundation
for building a database: just add data.</p>
<p>To illustrate, let’s create an empty instance of Calcite and then
point it at some data.</p>
<figure class="highlight"><pre><code class="language-java" data-lang="java"><span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">HrSchema</span> <span class="o">{</span>
<span class="kd">public</span> <span class="kd">final</span> <span class="n">Employee</span><span class="o">[]</span> <span class="n">emps</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">final</span> <span class="n">Department</span><span class="o">[]</span> <span class="n">depts</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span>
<span class="o">}</span>
<span class="n">Class</span><span class="o">.</span><span class="na">forName</span><span class="o">(</span><span class="s">"org.apache.calcite.jdbc.Driver"</span><span class="o">);</span>
<span class="n">Properties</span> <span class="n">info</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Properties</span><span class="o">();</span>
<span class="n">info</span><span class="o">.</span><span class="na">setProperty</span><span class="o">(</span><span class="s">"lex"</span><span class="o">,</span> <span class="s">"JAVA"</span><span class="o">);</span>
<span class="n">Connection</span> <span class="n">connection</span> <span class="o">=</span>
<span class="n">DriverManager</span><span class="o">.</span><span class="na">getConnection</span><span class="o">(</span><span class="s">"jdbc:calcite:"</span><span class="o">,</span> <span class="n">info</span><span class="o">);</span>
<span class="n">CalciteConnection</span> <span class="n">calciteConnection</span> <span class="o">=</span>
<span class="n">connection</span><span class="o">.</span><span class="na">unwrap</span><span class="o">(</span><span class="n">CalciteConnection</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="n">SchemaPlus</span> <span class="n">rootSchema</span> <span class="o">=</span> <span class="n">calciteConnection</span><span class="o">.</span><span class="na">getRootSchema</span><span class="o">();</span>
<span class="n">Schema</span> <span class="n">schema</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ReflectiveSchema</span><span class="o">(</span><span class="k">new</span> <span class="n">HrSchema</span><span class="o">());</span>
<span class="n">rootSchema</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="s">"hr"</span><span class="o">,</span> <span class="n">schema</span><span class="o">);</span>
<span class="n">Statement</span> <span class="n">statement</span> <span class="o">=</span> <span class="n">calciteConnection</span><span class="o">.</span><span class="na">createStatement</span><span class="o">();</span>
<span class="n">ResultSet</span> <span class="n">resultSet</span> <span class="o">=</span> <span class="n">statement</span><span class="o">.</span><span class="na">executeQuery</span><span class="o">(</span>
<span class="s">"select d.deptno, min(e.empid)\n"</span>
<span class="o">+</span> <span class="s">"from hr.emps as e\n"</span>
<span class="o">+</span> <span class="s">"join hr.depts as d\n"</span>
<span class="o">+</span> <span class="s">" on e.deptno = d.deptno\n"</span>
<span class="o">+</span> <span class="s">"group by d.deptno\n"</span>
<span class="o">+</span> <span class="s">"having count(*) &gt; 1"</span><span class="o">);</span>
<span class="n">print</span><span class="o">(</span><span class="n">resultSet</span><span class="o">);</span>
<span class="n">resultSet</span><span class="o">.</span><span class="na">close</span><span class="o">();</span>
<span class="n">statement</span><span class="o">.</span><span class="na">close</span><span class="o">();</span>
<span class="n">connection</span><span class="o">.</span><span class="na">close</span><span class="o">();</span></code></pre></figure>
<p>Where is the database? There is no database. The connection is
completely empty until <code class="highlighter-rouge">new ReflectiveSchema</code> registers a Java
object as a schema and its collection fields <code class="highlighter-rouge">emps</code> and <code class="highlighter-rouge">depts</code> as
tables.</p>
<p>Calcite does not want to own data; it does not even have a favorite data
format. This example used in-memory data sets, and processed them
using operators such as <code class="highlighter-rouge">groupBy</code> and <code class="highlighter-rouge">join</code> from the linq4j
library. But Calcite can also process data in other data formats, such
as JDBC. In the first example, replace</p>
<figure class="highlight"><pre><code class="language-java" data-lang="java"><span class="n">Schema</span> <span class="n">schema</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ReflectiveSchema</span><span class="o">(</span><span class="k">new</span> <span class="n">HrSchema</span><span class="o">());</span></code></pre></figure>
<p>with</p>
<figure class="highlight"><pre><code class="language-java" data-lang="java"><span class="n">Class</span><span class="o">.</span><span class="na">forName</span><span class="o">(</span><span class="s">"com.mysql.jdbc.Driver"</span><span class="o">);</span>
<span class="n">BasicDataSource</span> <span class="n">dataSource</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BasicDataSource</span><span class="o">();</span>
<span class="n">dataSource</span><span class="o">.</span><span class="na">setUrl</span><span class="o">(</span><span class="s">"jdbc:mysql://localhost"</span><span class="o">);</span>
<span class="n">dataSource</span><span class="o">.</span><span class="na">setUsername</span><span class="o">(</span><span class="s">"username"</span><span class="o">);</span>
<span class="n">dataSource</span><span class="o">.</span><span class="na">setPassword</span><span class="o">(</span><span class="s">"password"</span><span class="o">);</span>
<span class="n">Schema</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">JdbcSchema</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">rootSchema</span><span class="o">,</span> <span class="s">"hr"</span><span class="o">,</span> <span class="n">dataSource</span><span class="o">,</span>
<span class="kc">null</span><span class="o">,</span> <span class="s">"name"</span><span class="o">);</span></code></pre></figure>
<p>and Calcite will execute the same query in JDBC. To the application,
the data and API are the same, but behind the scenes the
implementation is very different. Calcite uses optimizer rules to push
the <code class="highlighter-rouge">JOIN</code> and <code class="highlighter-rouge">GROUP BY</code> operations to the source database.</p>
<p>In-memory and JDBC are just two familiar examples. Calcite can handle
any data source and data format. To add a data source, you need to
write an adapter that tells Calcite what collections in the data
source it should consider “tables”.</p>
<p>For more advanced integration, you can write optimizer
rules. Optimizer rules allow Calcite to access data of a new format,
allow you to register new operators (such as a better join algorithm),
and allow Calcite to optimize how queries are translated to
operators. Calcite will combine your rules and operators with built-in
rules and operators, apply cost-based optimization, and generate an
efficient plan.</p>
<h3 id="writing-an-adapter">Writing an adapter</h3>
<p>The subproject under example/csv provides a CSV adapter, which is
fully functional for use in applications but is also simple enough to
serve as a good template if you are writing your own adapter.</p>
<p>See the <a href="/docs/tutorial.html">tutorial</a> for information on using
the CSV adapter and writing other adapters.</p>
<p>See the <a href="howto.html">HOWTO</a> for more information about
using other adapters, and about using Calcite in general.</p>
<h2 id="status">Status</h2>
<p>The following features are complete.</p>
<ul>
<li>Query parser, validator and optimizer</li>
<li>Support for reading models in JSON format</li>
<li>Many standard functions and aggregate functions</li>
<li>JDBC queries against Linq4j and JDBC back-ends</li>
<li>Linq4j front-end</li>
<li>SQL features: SELECT, FROM (including JOIN syntax), WHERE, GROUP BY
(including GROUPING SETS), aggregate functions (including
COUNT(DISTINCT …) and FILTER), HAVING, ORDER BY (including NULLS
FIRST/LAST), set operations (UNION, INTERSECT, MINUS), sub-queries
(including correlated sub-queries), windowed aggregates, LIMIT
(syntax as <a href="https://www.postgresql.org/docs/8.4/static/sql-select.html#SQL-LIMIT">Postgres</a>);
more details in the <a href="reference.html">SQL reference</a></li>
<li>Local and remote JDBC drivers; see <a href="avatica_overview.html">Avatica</a></li>
<li>Several <a href="adapter.html">adapters</a></li>
</ul>
<div class="section-nav">
<div class="left align-right">
<span class="prev disabled">Previous</span>
</div>
<div class="right align-left">
<a href="/docs/tutorial.html" class="next">Next</a>
</div>
</div>
<div class="clear"></div>
</article>
</div>
<div class="unit one-fifth hide-on-mobiles">
<aside>
<h4>Overview</h4>
<ul>
<li class="current"><a href="/docs/index.html">Background</a></li>
<li class=""><a href="/docs/tutorial.html">Tutorial</a></li>
<li class=""><a href="/docs/algebra.html">Algebra</a></li>
</ul>
<h4>Advanced</h4>
<ul>
<li class=""><a href="/docs/adapter.html">Adapters</a></li>
<li class=""><a href="/docs/spatial.html">Spatial</a></li>
<li class=""><a href="/docs/stream.html">Streaming</a></li>
<li class=""><a href="/docs/materialized_views.html">Materialized Views</a></li>
<li class=""><a href="/docs/lattice.html">Lattices</a></li>
</ul>
<h4>Avatica</h4>
<ul>
<li class=""><a href="/docs/avatica_overview.html">Overview</a></li>
<li class=""><a href="/docs/avatica_roadmap.html">Roadmap</a></li>
<li class=""><a href="/docs/avatica_json_reference.html">JSON Reference</a></li>
<li class=""><a href="/docs/avatica_protobuf_reference.html">Protobuf Reference</a></li>
</ul>
<h4>Reference</h4>
<ul>
<li class=""><a href="/docs/reference.html">SQL language</a></li>
<li class=""><a href="/docs/model.html">JSON/YAML models</a></li>
<li class=""><a href="/docs/howto.html">HOWTO</a></li>
</ul>
<h4>Meta</h4>
<ul>
<li class=""><a href="/docs/history.html">History</a></li>
<li class=""><a href="/docs/powered_by.html">Powered by Calcite</a></li>
<li class=""><a href="/apidocs">API</a></li>
<li class=""><a href="/testapidocs">Test API</a></li>
</ul>
</aside>
</div>
<div class="clear"></div>
</div>
</section>
<footer role="contentinfo">
<div id="poweredby">
<a href="http://www.apache.org/">
<span class="sr-only">Apache</span>
<img src="/img/feather.png" width="190" height="77" alt="Apache Logo"></a>
</div>
<div id="copyright">
<p>The contents of this website are Copyright &copy;&nbsp;2019
<a href="https://www.apache.org/">Apache Software Foundation</a>
under the terms of
the <a href="https://www.apache.org/licenses/">
Apache&nbsp;License&nbsp;v2</a>. Apache Calcite and its logo are
trademarks of the Apache Software Foundation.</p>
</div>
</footer>
<script>
var anchorForId = function (id) {
var anchor = document.createElement("a");
anchor.className = "header-link";
anchor.href = "#" + id;
anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
anchor.title = "Permalink";
return anchor;
};
var linkifyAnchors = function (level, containingElement) {
var headers = containingElement.getElementsByTagName("h" + level);
for (var h = 0; h < headers.length; h++) {
var header = headers[h];
if (typeof header.id !== "undefined" && header.id !== "") {
header.appendChild(anchorForId(header.id));
}
}
};
document.onreadystatechange = function () {
if (this.readyState === "complete") {
var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
if (!contentBlock) {
return;
}
for (var level = 1; level <= 6; level++) {
linkifyAnchors(level, contentBlock);
}
}
};
</script>
</body>
</html>