blob: 97a59c572c9f97fdb11b092a6e7bc25488cd13b0 [file] [log] [blame]
<!DOCTYPE html>
<!--
| Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/arrayindex.md at 2021-07-16
| Rendered using Apache Maven Fluido Skin 1.7
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="Date-Revision-yyyymmdd" content="20210716" />
<meta http-equiv="Content-Language" content="en" />
<title>AsterixDB &#x2013; AsterixDB Support of Array Indexes</title>
<link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
<link rel="stylesheet" href="../css/site.css" />
<link rel="stylesheet" href="../css/print.css" media="print" />
<script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
</head>
<body class="topBarDisabled">
<div class="container-fluid">
<div id="banner">
<div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png" alt="AsterixDB"/></a></div>
<div class="pull-right"></div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
<li id="publishDate">Last Published: 2021-07-16</li>
<li id="projectVersion" class="pull-right">Version: 0.9.7</li>
<li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
</ul>
</div>
<div class="row-fluid">
<div id="leftColumn" class="span2">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">Get Started - Installation</li>
<li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
<li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
<li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
<li class="nav-header">AsterixDB Primer</li>
<li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
<li class="nav-header">Data Model</li>
<li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
<li class="nav-header">Queries</li>
<li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
<li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
<li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
<li class="nav-header">API/SDK</li>
<li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
<li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
<li class="nav-header">Advanced Features</li>
<li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
<li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
<li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
<li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
<li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
<li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
<li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
<li class="active"><a href="#"><span class="none"></span>Support of Array Indexes</a></li>
<li class="nav-header">Deprecated</li>
<li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
<li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
<li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
</ul>
<hr />
<div id="poweredBy">
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" /></a>
</div>
</div>
</div>
<div id="bodyColumn" class="span10" >
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1>AsterixDB Support of Array Indexes</h1>
<div class="section">
<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#QuantificationQueries">Quantification Queries</a></li>
<li><a href="#ExplicitUnnestQueries">Explicit Unnesting Queries</a></li>
<li><a href="#JoinQueries">Join Queries</a></li>
<li><a href="#ComplexIndexingExamples">Complex Indexing Examples</a></li>
</ul></div>
<div class="section">
<h2><a name="Overview_.5BBack_to_TOC.5D"></a><a name="Overview" id="Overview">Overview</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
<p>Array indexes are used in applications where users want to accelerate a query that involves some array-valued or multiset-valued field. This enables fast evaluation of predicates in queries involving arrays or multisets in datasets. For brevity, all further mentions of array-valued fields are also applicable to multiset-valued fields.</p>
<p>Array-valued fields are a natural data modeling concept for documents. In the traditional inventory management example, it is natural for the line items of an order to exist as a part of the order itself. Previously if an AsterixDB user wanted to optimize a query involving a predicate on the line items of an order, they would a) have to undertake some form of schema migration to separate the line items from the orders into different datasets, b) create an index on the new dataset for line items, and finally c) modify their query to join orders and line items. With the introduction of array indexes in AsterixDB, users can keep their arrays intact and still reap the performance benefits of an index.</p>
<p>It should be noted that in AsterixDB, array indexes are <i>not</i> meant to serve as covering indexes. In fact due to AsterixDB&#x2019;s record-level locking, index-only plans involving multi-valued fields (i.e. array indexes and inverted indexes) are not currently possible. Instead, array indexes are simply meant to accelerate queries involving multi-valued fields.</p></div>
<div class="section">
<h2><a name="Quantification_Queries_.5BBack_to_TOC.5D"></a><a name="QuantificationQueries" id="QuantificationQueries">Quantification Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
<p>A common use-case for array indexes involves quantifying some or all elements within an array. Quantification queries have two variants: existential and universal. Existential queries ask if <i>any</i> element in some array satisfies a given predicate. Membership queries are a specific type of existential query, asking if any element in some array is equal to a particular value. Universal queries ask if <i>all</i> elements in some array satisfy a particular predicate. Empty arrays are not stored in an array index, meaning that a user must additionally specify that the array is non-empty to tell AsterixDB that it is possible to use an array index as an access method for the given query.</p>
<p>All query examples here will use the orders and products datasets below.</p>
<div>
<div>
<pre class="source">CREATE TYPE ordersType AS {
orderno: int,
custid: string,
items: [{ itemno: int, productno: int, qty: int, price: float }]
};
CREATE DATASET orders (ordersType) PRIMARY KEY orderno;
CREATE TYPE productsType AS {
productno: int,
categories: {{ string }}
};
CREATE DATASET products (productsType) PRIMARY KEY productno;
</pre></div></div>
<p>Let us now create an index on the <tt>categories</tt> multiset of the <tt>products</tt> dataset.</p>
<div>
<div>
<pre class="source">CREATE INDEX pCategoriesIdx ON products (UNNEST categories);
</pre></div></div>
<p>Suppose we now want to find all products that have the category &#x201c;Food&#x201d;. The following membership query will utilize the index we just created.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT p
FROM products p
WHERE &quot;Food&quot; IN p.categories;
</pre></div></div>
<p>We can also rewrite the query above as an explicit existential quantification query with an equality predicate and the index will be utilized.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT p
FROM products p
WHERE SOME c IN p.categories SATISFIES c = &quot;Food&quot;;
</pre></div></div>
<p>Let us now create an index on the <tt>qty</tt> and <tt>price</tt> fields in the <tt>items</tt> array of the <tt>orders</tt> dataset.</p>
<div>
<div>
<pre class="source">CREATE INDEX oItemsQtyPriceIdx ON orders (UNNEST items SELECT qty, price);
</pre></div></div>
<p>Now suppose we want to find all orders that only have items with large quantities and low prices, not counting orders without any items. The following universal quantification query will utilize the index we just created.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT o
FROM orders o
WHERE LEN(o.items) &gt; 0 AND
(EVERY i IN o.items SATISFIES i.qty &gt; 100 AND i.price &lt; 5.00);
</pre></div></div>
<p>Take note of the <tt>LEN(o.items) &gt; 0</tt> conjunct. Array indexes cannot be used for queries with potentially empty arrays.</p></div>
<div class="section">
<h2><a name="Explicit_Unnesting_Queries_.5BBack_to_TOC.5D"></a><a name="ExplicitUnnestQueries" id="ExplicitUnnestQueries">Explicit Unnesting Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
<p>Array indexes can also be used to accelerate queries that involve the explicit unnesting of array fields. We can express the same membership / existential example above using an explicit <tt>UNNEST</tt> query. (To keep the same cardinality as the query above (i.e. to undo the <tt>UNNEST</tt>), we add a <tt>DISTINCT</tt> clause, though the index would be utilized either way.)</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT DISTINCT p
FROM products p, p.categories c
WHERE c = &quot;Food&quot;;
</pre></div></div>
<p>As another example, suppose that we want to find all orders that have <i>some</i> item with a large quantity. The following query will utilize the <tt>oItemsQtyPriceIdx</tt> we created, using only the first field in the index <tt>qty</tt>.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT DISTINCT o
FROM orders o, o.items i
WHERE i.qty &gt; 100;
</pre></div></div>
</div>
<div class="section">
<h2><a name="Join_Queries_.5BBack_to_TOC.5D"></a><a name="JoinQueries" id="JoinQueries">Join Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
<p>Finally, array indexes can also be used for index nested-loop joins if the field being joined is located within an array. Let us create another index on the <tt>items</tt> array of the <tt>orders</tt> dataset, this time on the <tt>productno</tt> field.</p>
<div>
<div>
<pre class="source">CREATE INDEX oProductIDIdx ON orders (UNNEST items SELECT productno);
</pre></div></div>
<p>Now suppose we want to find all products located in a specific order. We can accomplish this with the join query below. Note that we must specify the <tt>indexnl</tt> join hint to tell AsterixDB that we want to optimize this specific join, as hash joins are the default join method otherwise.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT DISTINCT p
FROM products p, orders o, o.items i
WHERE i.productno /*+ indexnl */ = p.productno
AND o.custid = &quot;C41&quot;;
</pre></div></div>
</div>
<div class="section">
<h2><a name="Complex_Indexing_Examples_.5BBack_to_TOC.5D"></a><a name="ComplexIndexingExamples" id="ComplexIndexingExamples">Complex Indexing Examples</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
<div class="section">
<h3><a name="Open_Indexes"></a>Open Indexes</h3>
<p>Similar to atomic indexes, array indexes are not limited to closed fields. The following DDLs illustrate how we could express <tt>CREATE INDEX</tt> statements comparable to those above if the to-be-indexed fields were not included in the their dataset&#x2019;s type definitions.</p>
<div>
<div>
<pre class="source">CREATE INDEX pCategoriesIdx ON products (UNNEST categories : string);
CREATE INDEX oItemsQtyPriceIdx ON orders (UNNEST items SELECT qty : int, price : int);
CREATE INDEX oProductIDIdx ON orders (UNNEST items SELECT productno : int);
</pre></div></div>
</div>
<div class="section">
<h3><a name="Arrays_in_Arrays"></a>Arrays in Arrays</h3>
<p>Array indexes are not just limited to arrays of depth = 1. We can generalize this to arrays of arbitrary depth, as long as an object encapsulates each array. The following DDLs describe indexing the <tt>qty</tt> field in an <tt>items</tt> array at various depths.</p>
<div>
<div>
<pre class="source">// { orderno: ..., items0: [ { items1: [ { qty: int, ... } ] } ] }
CREATE INDEX oItemItemQtyIdx ON orders (UNNEST items0 UNNEST items1 SELECT qty);
// { orderno: ..., items0: [ { items1: [ { items2: [ { qty: int, ... } ] } ] } ] }
CREATE INDEX oItemItemItemQtyIdx ON orders (UNNEST items0 UNNEST items1 UNNEST items2 SELECT qty);
</pre></div></div>
<p>The queries below will utilize the indexes above. The first query utilizes the <tt>oItemItemQtyIdx</tt> index through nested existential quantification. The second query utilizes the <tt>oItemItemItemQtyIdx</tt> index with three unnesting clauses.</p>
<div>
<div>
<pre class="source">SET `compiler.arrayindex` &quot;true&quot;;
SELECT o
FROM orders o
WHERE SOME o0 IN o.items0 SATISFIES (
SOME o1 IN o0.items1 SATISFIES o1.qty = 100
);
SELECT DISTINCT o
FROM orders o, o.items0 o0, o0.items1 o1, o1.items2 o2
WHERE o2.qty = 100;
</pre></div></div></div></div>
</div>
</div>
</div>
<hr/>
<footer>
<div class="container-fluid">
<div class="row-fluid">
<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
feather logo, and the Apache AsterixDB project logo are either
registered trademarks or trademarks of The Apache Software
Foundation in the United States and other countries.
All other marks mentioned may be trademarks or registered
trademarks of their respective owners.
</div>
</div>
</div>
</footer>
</body>
</html>