blob: 19b8e180d7ed9ba6ad50eaa03c48b52b7de578d8 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Untitled | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../../../../assets/css/site.css">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="Cassandra">
<meta name="dcterms.identifier" content="4.1">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../../../../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="docs-wrapper article">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../../../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../../../../assets/img/logo-white-r.png" alt="Cassandra Logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../../../../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../../../../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="docs-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h2>Cassandra Documentation</h2>
</div>
</div>
<div class="body px-medium py-medium container">
<div class="docs-nav-bar flex flex-space-between mb-medium">
<div id="mobile-docs-nav-burger" class="hidden">
<svg viewBox="0 0 24 24" width="36" height="36" stroke="#1c81a0" stroke-width="2.5" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="3" y1="12" x2="21" y2="12"></line><line x1="3" y1="6" x2="21" y2="6"></line><line x1="3" y1="18" x2="21" y2="18"></line></svg>
</div>
<div class="docs-nav-item relative">
<input id="search-input" type="text" placeholder="Search docs">
</div>
<div class="versions-wrapper">
<h4>Version:</h4>
<div class="nav-panel-explore" data-panel="explore">
<div id="version-toggle" class="context">
<span class="version">4.1</span>
</div>
<ul id="versions-list" class="components">
<li class="component">
<ul class="versions">
<li class="version is-latest">
<a href="../../../../_/index.html">master</a>
</li>
</ul>
</li>
<li class="component is-current">
<ul class="versions">
<li class="version">
<a href="../../../trunk/index.html">trunk</a>
</li>
<li class="version">
<a href="../../../5.0/index.html">5.0</a>
</li>
<li class="version is-current is-latest">
<a href="../../index.html">4.1</a>
</li>
<li class="version">
<a href="../../../4.0/index.html">4.0</a>
</li>
<li class="version">
<a href="../../../3.11/index.html">3.11</a>
</li>
</ul>
</li>
</ul>
</div>
</div> </div>
<div class="cf relative">
<nav class="nav docs-nav full-800">
<div class="nav-menu">
<ul class="nav-list">
<li class="nav-item is-active" data-depth="0">
<ul class="nav-list">
<li class="nav-item" data-depth="1">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../index.html">Main</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../_/glossary.html">Glossary</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../_/bugs.html">How to report bugs</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../_/contactus.html">Contact us</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
<li class="nav-item is-active" data-depth="0">
<ul class="nav-list">
<li class="nav-item is-current-path is-active" data-depth="1">
<span class="nav-line">
<button class="nav-toggle"></button>
<span class="nav-text">Cassandra</span>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../getting_started/index.html">Getting Started</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/installing.html">Installing Cassandra</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/configuring.html">Configuring Cassandra</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/querying.html">Inserting and querying</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/drivers.html">Client drivers</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/java11.html">Support for Java 11</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../getting_started/production.html">Production recommendations</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../new/index.html">What&#8217;s new</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../architecture/index.html">Architecture</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/overview.html">Overview</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/dynamo.html">Dynamo</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/storage_engine.html">Storage engine</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/guarantees.html">Guarantees</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/messaging.html">Improved internode messaging</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../architecture/streaming.html">Improved streaming</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../data_modeling/index.html">Data modeling</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/intro.html">Introduction</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_conceptual.html">Conceptual data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_rdbms.html">RDBMS design</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_queries.html">Defining application queries</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_logical.html">Logical data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_physical.html">Physical data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_refining.html">Evaluating and refining data models</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_schema.html">Defining database schema</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../data_modeling/data_modeling_tools.html">Cassandra data modeling tools</a>
</span>
</li>
</ul>
</li>
<li class="nav-item is-current-path is-active" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="index.html">Cassandra Query Language (CQL)</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="definitions.html">Definitions</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="types.html">Data types</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="ddl.html">Data definition (DDL)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="dml.html">Data manipulation (DML)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="operators.html">Operators</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="indexes.html">Secondary indexes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="mvs.html">Materialized views</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="functions.html">Functions</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="json.html">JSON</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="security.html">Security</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="triggers.html">Triggers</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="appendices.html">Appendices</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="changes.html">Changes</a>
</span>
</li>
<li class="nav-item is-current-page is-active" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="SASI.html">SASI</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="cql_singlefile.html">Single file of CQL information</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../configuration/index.html">Configuration</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_yaml_file.html">cassandra.yaml</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_rackdc_file.html">cassandra-rackdc.properties</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_env_sh_file.html">cassandra-env.sh</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_topo_file.html">cassandra-topologies.properties</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_cl_archive_file.html">commitlog-archiving.properties</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_logback_xml_file.html">logback.xml</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/cass_jvm_options_file.html">jvm-* files</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../configuration/configuration.html">Liberating cassandra.yaml Parameters' Names from Their Units</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../operating/index.html">Operating</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="#operating/snitch.adoc">Snitches</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/topo_changes.html">Topology changes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/repair.html">Repair</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/read_repair.html">Read repair</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/hints.html">Hints</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/bloom_filters.html">Bloom filters</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/compression.html">Compression</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/cdc.html">Change Data Capture (CDC)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/backups.html">Backups</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/bulk_loading.html">Bulk loading</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/metrics.html">Metrics</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/security.html">Security</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/hardware.html">Hardware</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/compaction/index.html">Compaction</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/virtualtables.html">Virtual tables</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/auditlogging.html">Audit logging</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/audit_logging.html">Audit logging 2</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/fqllogging.html">Full query logging</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../operating/transientreplication.html">Transient replication</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../tools/index.html">Tools</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../tools/cqlsh.html">cqlsh: the CQL shell</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../tools/nodetool/nodetool.html">nodetool</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../tools/sstable/index.html">SSTable tools</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../tools/cassandra_stress.html">cassandra-stress</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../troubleshooting/index.html">Troubleshooting</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../troubleshooting/finding_nodes.html">Finding misbehaving nodes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../troubleshooting/reading_logs.html">Reading Cassandra logs</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../troubleshooting/use_nodetool.html">Using nodetool</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../troubleshooting/use_tools.html">Using external tools to deep-dive</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../../_/development/index.html">Development</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/gettingstarted.html">Getting started</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/ide.html">Building and IDE integration</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/testing.html">Testing</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/patches.html">Contributing code changes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/code_style.html">Code style</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/how_to_review.html">Review checklist</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/how_to_commit.html">How to commit</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/documentation.html">Working on documentation</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/ci.html">Jenkins CI environment</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/dependencies.html">Dependency management</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../_/development/release_process.html">Release process</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../faq/index.html">FAQ</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../plugins/index.html">Plug-ins</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<aside class="toc sidebar">
<div class="toc-menu"></div>
</aside>
<main class="article default-main full-800" data-ceiling="topbar">
<div class="article-header">
<nav class="crumbs" aria-label="breadcrumbs">
<ul>
<li class="crumb">Cassandra</li>
<li class="crumb"><a href="index.html">Cassandra Query Language (CQL)</a></li>
<li class="crumb"><a href="SASI.html">SASI</a></li>
</ul>
</nav>
<div class="tools" role="navigation">
<ul>
<li class="tool edit"><a href="https://github.com/apache/cassandra/edit/cassandra-4.1/doc/modules/cassandra/pages/cql/SASI.adoc" title="Edit Page" target="_blank" rel="noopener">Edit</a></li>
</ul>
</div>
</div>
<article class="doc">
<div class="sect1">
<h2 id="sasiindex"><a class="anchor" href="#sasiindex"></a>SASIIndex</h2>
<div class="sectionbody">
<div class="paragraph">
<p><a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java"><code>SASIIndex</code></a>,
or <code>SASI</code> for short, is an implementation of Cassandra&#8217;s <code>Index</code>
interface that can be used as an alternative to the existing
implementations. SASI&#8217;s indexing and querying improves on existing
implementations by tailoring it specifically to Cassandra’s needs. SASI
has superior performance in cases where queries would previously require
filtering. In achieving this performance, SASI aims to be significantly
less resource intensive than existing implementations, in memory, disk,
and CPU usage. In addition, SASI supports prefix and contains queries on
strings (similar to SQL’s <code>LIKE = "foo*"</code> or <code>LIKE = "<strong>foo</strong>"'</code>).</p>
</div>
<div class="paragraph">
<p>The following goes on describe how to get up and running with SASI,
demonstrates usage with examples, and provides some details on its
implementation.</p>
</div>
<div class="sect2">
<h3 id="using-sasi"><a class="anchor" href="#using-sasi"></a>Using SASI</h3>
<div class="paragraph">
<p>The examples below walk through creating a table and indexes on its
columns, and performing queries on some inserted data.</p>
</div>
<div class="paragraph">
<p>The examples below assume the <code>demo</code> keyspace has been created and is in
use.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh&gt; CREATE KEYSPACE demo WITH replication = {
... 'class': 'SimpleStrategy',
... 'replication_factor': '1'
... };
cqlsh&gt; USE demo;</pre>
</div>
</div>
<div class="paragraph">
<p>All examples are performed on the <code>sasi</code> table:</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; CREATE TABLE sasi (id uuid, first_name text, last_name text,
... age int, height int, created_at bigint, primary key (id));</pre>
</div>
</div>
<div class="sect3">
<h4 id="creating-indexes"><a class="anchor" href="#creating-indexes"></a>Creating Indexes</h4>
<div class="paragraph">
<p>To create SASI indexes use CQLs <code>CREATE CUSTOM INDEX</code> statement:</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
... WITH OPTIONS = {
... 'analyzer_class':
... 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
... 'case_sensitive': 'false'
... };
cqlsh:demo&gt; CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
... WITH OPTIONS = {'mode': 'CONTAINS'};
cqlsh:demo&gt; CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex';
cqlsh:demo&gt; CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex'
... WITH OPTIONS = {'mode': 'SPARSE'};</pre>
</div>
</div>
<div class="paragraph">
<p>The indexes created have some options specified that customize their
behaviour and potentially performance. The index on <code>first_name</code> is
case-insensitive. The analyzers are discussed more in a subsequent
example. The <code>NonTokenizingAnalyzer</code> performs no analysis on the text.
Each index has a mode: <code>PREFIX</code>, <code>CONTAINS</code>, or <code>SPARSE</code>, the first
being the default. The <code>last_name</code> index is created with the mode
<code>CONTAINS</code> which matches terms on suffixes instead of prefix only.
Examples of this are available below and more detail can be found in the
section on <a href="#ondiskindexbuilder">OnDiskIndex</a>.The <code>created_at</code> column
is created with its mode set to <code>SPARSE</code>, which is meant to improve
performance of querying large, dense number ranges like timestamps for
data inserted every millisecond. Details of the <code>SPARSE</code> implementation
can also be found in the section on the
<a href="#ondiskindexbuilder">OnDiskIndex</a>. The <code>age</code> index is created with
the default <code>PREFIX</code> mode and no case-sensitivity or text analysis
options are specified since the field is numeric.</p>
</div>
<div class="paragraph">
<p>After inserting the following data and performing a <code>nodetool flush</code>,
SASI performing index flushes to disk can be seen in Cassandra’s logs –
although the direct call to flush is not required (see
<a href="#indexmemtable">IndexMemtable</a> for more details).</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023);
cqlsh:demo&gt; INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024);
cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi;
first_name | last_name | age | height | created_at
------------+---------------+-----+--------+---------------
Michael | Kjellman | 26 | 180 | 1442959315021
Mikhail | Stepura | 36 | 173 | 1442959315020
Jason | Brown | 40 | 182 | 1442959315023
Pavel | Yaskevich | 27 | 181 | 1442959315018
Vijay | Parthasarathy | 34 | 183 | 1442959315024
Jordan | West | 26 | 173 | 1442959315019
Johnny | Zhang | 32 | 175 | 1442959315022
(7 rows)</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="equality-prefix-queries"><a class="anchor" href="#equality-prefix-queries"></a>Equality &amp; Prefix Queries</h4>
<div class="paragraph">
<p>SASI supports all queries already supported by CQL, including LIKE
statement for PREFIX, CONTAINS and SUFFIX searches.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi
... WHERE first_name = 'Pavel';
first_name | last_name | age | height | created_at
-------------+-----------+-----+--------+---------------
Pavel | Yaskevich | 27 | 181 | 1442959315018
(1 rows)</pre>
</div>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi
... WHERE first_name = 'pavel';
first_name | last_name | age | height | created_at
-------------+-----------+-----+--------+---------------
Pavel | Yaskevich | 27 | 181 | 1442959315018
(1 rows)</pre>
</div>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi
... WHERE first_name LIKE 'M%';
first_name | last_name | age | height | created_at
------------+-----------+-----+--------+---------------
Michael | Kjellman | 26 | 180 | 1442959315021
Mikhail | Stepura | 36 | 173 | 1442959315020
(2 rows)</pre>
</div>
</div>
<div class="paragraph">
<p>Of course, the case of the query does not matter for the <code>first_name</code>
column because of the options provided at index creation time.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi
... WHERE first_name LIKE 'm%';
first_name | last_name | age | height | created_at
------------+-----------+-----+--------+---------------
Michael | Kjellman | 26 | 180 | 1442959315021
Mikhail | Stepura | 36 | 173 | 1442959315020
(2 rows)</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="compound-queries"><a class="anchor" href="#compound-queries"></a>Compound Queries</h4>
<div class="paragraph">
<p>SASI supports queries with multiple predicates, however, due to the
nature of the default indexing implementation, CQL requires the user to
specify <code>ALLOW FILTERING</code> to opt-in to the potential performance
pitfalls of such a query. With SASI, while the requirement to include
<code>ALLOW FILTERING</code> remains, to reduce modifications to the grammar, the
performance pitfalls do not exist because filtering is not performed.
Details on how SASI joins data from multiple predicates is available
below in the <a href="#implementation-details">Implementation Details</a>
section.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT first_name, last_name, age, height, created_at FROM sasi
... WHERE first_name LIKE 'M%' and age &lt; 30 ALLOW FILTERING;
first_name | last_name | age | height | created_at
------------+-----------+-----+--------+---------------
Michael | Kjellman | 26 | 180 | 1442959315021
(1 rows)</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="suffix-queries"><a class="anchor" href="#suffix-queries"></a>Suffix Queries</h4>
<div class="paragraph">
<p>The next example demonstrates <code>CONTAINS</code> mode on the <code>last_name</code> column.
By using this mode, predicates can search for any strings containing the
search string as a sub-string. In this case the strings containing <code>a''
or </code>an''.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT * FROM sasi WHERE last_name LIKE '%a%';
id | age | created_at | first_name | height | last_name
--------------------------------------+-----+---------------+------------+--------+---------------
f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | 1442959315020 | Mikhail | 173 | Stepura
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich
8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy
2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
(5 rows)
cqlsh:demo&gt; SELECT * FROM sasi WHERE last_name LIKE '%an%';
id | age | created_at | first_name | height | last_name
--------------------------------------+-----+---------------+------------+--------+-----------
f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
(2 rows)</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="expressions-on-non-indexed-columns"><a class="anchor" href="#expressions-on-non-indexed-columns"></a>Expressions on Non-Indexed Columns</h4>
<div class="paragraph">
<p>SASI also supports filtering on non-indexed columns like <code>height</code>. The
expression can only narrow down an existing query using <code>AND</code>.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT * FROM sasi WHERE last_name LIKE '%a%' AND height &gt;= 175 ALLOW FILTERING;
id | age | created_at | first_name | height | last_name
--------------------------------------+-----+---------------+------------+--------+---------------
f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich
8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy
2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
(4 rows)</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="delimiter-based-tokenization-analysis"><a class="anchor" href="#delimiter-based-tokenization-analysis"></a>Delimiter based Tokenization Analysis</h4>
<div class="paragraph">
<p>A simple text analysis provided is delimiter based tokenization. This
provides an alternative to indexing collections, as delimiter separated
text can be indexed without the overhead of <code>CONTAINS</code> mode nor using
<code>PREFIX</code> or <code>SUFFIX</code> queries.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; ALTER TABLE sasi ADD aliases text;
cqlsh:demo&gt; CREATE CUSTOM INDEX on sasi (aliases) USING 'org.apache.cassandra.index.sasi.SASIIndex'
... WITH OPTIONS = {
... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer',
... 'delimiter': ',',
... 'mode': 'prefix',
... 'analyzed': 'true'};
cqlsh:demo&gt; UPDATE sasi SET aliases = 'Mike,Mick,Mikey,Mickey' WHERE id = f5dfcabe-de96-4148-9b80-a1c41ed276b4;
cqlsh:demo&gt; SELECT * FROM sasi WHERE aliases LIKE 'Mikey' ALLOW FILTERING;
id | age | aliases | created_at | first_name | height | last_name
--------------------------------------+-----+------------------------+---------------+------------+--------+-----------
f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | Mike,Mick,Mikey,Mickey | 1442959315021 | Michael | 180 | Kjellman</pre>
</div>
</div>
</div>
<div class="sect3">
<h4 id="text-analysis-tokenization-and-stemming"><a class="anchor" href="#text-analysis-tokenization-and-stemming"></a>Text Analysis (Tokenization and Stemming)</h4>
<div class="paragraph">
<p>Lastly, to demonstrate text analysis an additional column is needed on
the table. Its definition, index, and statements to update rows are
shown below.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; ALTER TABLE sasi ADD bio text;
cqlsh:demo&gt; CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex'
... WITH OPTIONS = {
... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer',
... 'tokenization_enable_stemming': 'true',
... 'analyzed': 'true',
... 'tokenization_normalize_lowercase': 'true',
... 'tokenization_locale': 'en'
... };
cqlsh:demo&gt; UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217;
cqlsh:demo&gt; UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500;
cqlsh:demo&gt; SELECT * FROM sasi;
id | age | bio | created_at | first_name | height | last_name
--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+---------------
f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | null | 1442959315021 | Michael | 180 | Kjellman
96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | null | 1442959315020 | Mikhail | 173 | Stepura
6b757016-631d-4fdb-ac62-40b127ccfbc7 | 40 | null | 1442959315023 | Jason | 182 | Brown
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | null | 1442959315024 | Vijay | 183 | Parthasarathy
5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | null | 1442959315022 | Johnny | 175 | Zhang
(7 rows)</pre>
</div>
</div>
<div class="paragraph">
<p>Index terms and query search strings are stemmed for the <code>bio</code> column
because it was configured to use the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java"><code>StandardAnalyzer</code></a>
and <code>analyzed</code> is set to <code>true</code>. The <code>tokenization_normalize_lowercase</code>
is similar to the <code>case_sensitive</code> property but for the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java"><code>StandardAnalyzer</code></a>.
These query demonstrates the stemming applied by
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java"><code>StandardAnalyzer</code></a>.</p>
</div>
<div class="literalblock">
<div class="content">
<pre>cqlsh:demo&gt; SELECT * FROM sasi WHERE bio LIKE 'distributing';
id | age | bio | created_at | first_name | height | last_name
--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
(2 rows)
cqlsh:demo&gt; SELECT * FROM sasi WHERE bio LIKE 'they argued';
id | age | bio | created_at | first_name | height | last_name
--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
(2 rows)
cqlsh:demo&gt; SELECT * FROM sasi WHERE bio LIKE 'working at the company';
id | age | bio | created_at | first_name | height | last_name
--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
(1 rows)
cqlsh:demo&gt; SELECT * FROM sasi WHERE bio LIKE 'soft eng';
id | age | bio | created_at | first_name | height | last_name
--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
(2 rows)</pre>
</div>
</div>
</div>
</div>
<div class="sect2">
<h3 id="implementation-details"><a class="anchor" href="#implementation-details"></a>Implementation Details</h3>
<div class="paragraph">
<p>While SASI, at the surface, is simply an implementation of the <code>Index</code>
interface, at its core there are several data structures and algorithms
used to satisfy it. These are described here. Additionally, the changes
internal to Cassandra to support SASI’s integration are described.</p>
</div>
<div class="paragraph">
<p>The <code>Index</code> interface divides responsibility of the implementer into two
parts: Indexing and Querying. Further, Cassandra makes it possible to
divide those responsibilities into the memory and disk components. SASI
takes advantage of Cassandra’s write-once, immutable, ordered data model
to build indexes along with the flushing of the memtable to disk – this
is the origin of the name ``SSTable Attached Secondary Index''.</p>
</div>
<div class="paragraph">
<p>The SASI index data structures are built in memory as the SSTable is
being written and they are flushed to disk before the writing of the
SSTable completes. The writing of each index file only requires
sequential writes to disk. In some cases, partial flushes are performed,
and later stitched back together, to reduce memory usage. These data
structures are optimized for this use case.</p>
</div>
<div class="paragraph">
<p>Taking advantage of Cassandra’s ordered data model, at query time,
candidate indexes are narrowed down for searching, minimizing the amount
of work done. Searching is then performed using an efficient method that
streams data off disk as needed.</p>
</div>
<div class="sect3">
<h4 id="indexing"><a class="anchor" href="#indexing"></a>Indexing</h4>
<div class="paragraph">
<p>Per SSTable, SASI writes an index file for each indexed column. The data
for these files is built in memory using the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java"><code>OnDiskIndexBuilder</code></a>.
Once flushed to disk, the data is read using the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java"><code>OnDiskIndex</code></a>
class. These are composed of bytes representing indexed terms, organized
for efficient writing or searching respectively. The keys and values
they hold represent tokens and positions in an SSTable and these are
stored per-indexed term in
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java"><code>TokenTreeBuilder</code></a>s
for writing, and
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java"><code>TokenTree</code></a>s
for querying. These index files are memory mapped after being written to
disk, for quicker access. For indexing data in the memtable, SASI uses
its
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java"><code>IndexMemtable</code></a>
class.</p>
</div>
<div class="sect4">
<h5 id="ondiskindexbuilder"><a class="anchor" href="#ondiskindexbuilder"></a>OnDiskIndex(Builder)</h5>
<div class="paragraph">
<p>Each
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java"><code>OnDiskIndex</code></a>
is an instance of a modified
<a href="https://en.wikipedia.org/wiki/Suffix_array">Suffix Array</a> data structure.
The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java"><code>OnDiskIndex</code></a>
is comprised of page-size blocks of sorted terms and pointers to the
terms’ associated data, as well as the data itself, stored also in one
or more page-sized blocks. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java"><code>OnDiskIndex</code></a>
is structured as a tree of arrays, where each level describes the terms
in the level below, the final level being the terms themselves. The
<code>PointerLevel`s and their `PointerBlock`s contain terms and pointers to
other blocks that <em>end</em> with those terms. The `DataLevel</code>, the final
level, and its <code>DataBlock`s contain terms and point to the data itself,
contained in
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java">`TokenTree</code></a>s.</p>
</div>
<div class="paragraph">
<p>The terms written to the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java"><code>OnDiskIndex</code></a>
vary depending on its <code>mode'': either <code>PREFIX</code>, <code>CONTAINS</code>, or
<code>SPARSE</code>. In the <code>PREFIX</code> and <code>SPARSE</code> cases, terms’ exact values are
written exactly once per <code>OnDiskIndex</code>. For example, when using a
<code>PREFIX</code> index with terms <code>Jason</code>, <code>Jordan</code>, <code>Pavel</code>, all three will be
included in the index. A <code>CONTAINS</code> index writes additional terms for
each suffix of each term recursively. Continuing with the example, a
<code>CONTAINS</code> index storing the previous terms would also store <code>ason</code>,
<code>ordan</code>, <code>avel</code>, <code>son</code>, <code>rdan</code>, <code>vel</code>, etc. This allows for queries on
the suffix of strings. The <code>SPARSE</code> mode differs from <code>PREFIX</code> in that
for every 64 blocks of terms a
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java"><code>TokenTree</code></a>
is built merging all the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java"><code>TokenTree</code></a>s
for each term into a single one. This copy of the data is used for
efficient iteration of large ranges of e.g. timestamps. The index
</code>mode'' is configurable per column at index creation time.</p>
</div>
</div>
<div class="sect4">
<h5 id="tokentreebuilder"><a class="anchor" href="#tokentreebuilder"></a>TokenTree(Builder)</h5>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java"><code>TokenTree</code></a>
is an implementation of the well-known
<a href="https://en.wikipedia.org/wiki/B%2B_tree">B+-tree</a> that has been modified
to optimize for its use-case. In particular, it has been optimized to
associate tokens, longs, with a set of positions in an SSTable, also
longs. Allowing the set of long values accommodates the possibility of a
hash collision in the token, but the data structure is optimized for the
unlikely possibility of such a collision.</p>
</div>
<div class="paragraph">
<p>To optimize for its write-once environment the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java"><code>TokenTreeBuilder</code></a>
completely loads its interior nodes as the tree is built and it uses the
well-known algorithm optimized for bulk-loading the data structure.</p>
</div>
<div class="paragraph">
<p><a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java"><code>TokenTree</code></a>s
provide the means to iterate over tokens, and file positions, that match
a given term, and to skip forward in that iteration, an operation used
heavily at query time.</p>
</div>
</div>
<div class="sect4">
<h5 id="indexmemtable"><a class="anchor" href="#indexmemtable"></a>IndexMemtable</h5>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java"><code>IndexMemtable</code></a>
handles indexing the in-memory data held in the memtable. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java"><code>IndexMemtable</code></a>
in turn manages either a
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java"><code>TrieMemIndex</code></a>
or a
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java"><code>SkipListMemIndex</code></a>
per-column. The choice of which index type is used is data dependent.
The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java"><code>TrieMemIndex</code></a>
is used for literal types. <code>AsciiType</code> and <code>UTF8Type</code> are literal types
by default but any column can be configured as a literal type using the
<code>is_literal</code> option at index creation time. For non-literal types the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java"><code>SkipListMemIndex</code></a>
is used. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java"><code>TrieMemIndex</code></a>
is an implementation that can efficiently support prefix queries on
character-like data. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java"><code>SkipListMemIndex</code></a>,
conversely, is better suited for other Cassandra data types like
numbers.</p>
</div>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java"><code>TrieMemIndex</code></a>
is built using either the <code>ConcurrentRadixTree</code> or
<code>ConcurrentSuffixTree</code> from the <code>com.goooglecode.concurrenttrees</code>
package. The choice between the two is made based on the indexing mode,
<code>PREFIX</code> or other modes, and <code>CONTAINS</code> mode, respectively.</p>
</div>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java"><code>SkipListMemIndex</code></a>
is built on top of <code>java.util.concurrent.ConcurrentSkipListSet</code>.</p>
</div>
</div>
</div>
<div class="sect3">
<h4 id="querying"><a class="anchor" href="#querying"></a>Querying</h4>
<div class="paragraph">
<p>Responsible for converting the internal <code>IndexExpression</code> representation
into SASI’s
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
and
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java"><code>Expression</code></a>
trees, optimizing the trees to reduce the amount of work done, and
driving the query itself, the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
is the work horse of SASI’s querying implementation. To efficiently
perform union and intersection operations, SASI provides several
iterators similar to Cassandra’s <code>MergeIterator</code>, but tailored
specifically for SASI’s use while including more features. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java"><code>RangeUnionIterator</code></a>,
like its name suggests, performs set unions over sets of tokens/keys
matching the query, only reading as much data as it needs from each set
to satisfy the query. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java"><code>RangeIntersectionIterator</code></a>,
similar to its counterpart, performs set intersections over its data.</p>
</div>
<div class="sect4">
<h5 id="queryplan"><a class="anchor" href="#queryplan"></a>QueryPlan</h5>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
instantiated per search query is at the core of SASI’s querying
implementation. Its work can be divided in two stages: analysis and
execution.</p>
</div>
<div class="paragraph">
<p>During the analysis phase,
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
converts from Cassandra’s internal representation of <code>IndexExpression`s,
which has also been modified to support encoding queries that contain
ORs and groupings of expressions using parentheses (see the
<a href="#cassandra-internal-changes">Cassandra Internal Changes</a> section
below for more details). This process produces a tree of
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java">`Operation</code></a>s,
which in turn may contain
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java"><code>Expression</code></a>s,
all of which provide an alternative, more efficient, representation of
the query.</p>
</div>
<div class="paragraph">
<p>During execution, the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
uses the <code>DecoratedKey</code>-generating iterator created from the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
tree. These keys are read from disk and a final check to ensure they
satisfy the query is made, once again using the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
tree. At the point the desired amount of matching data has been found,
or there is no more matching data, the result set is returned to the
coordinator through the existing internal components.</p>
</div>
<div class="paragraph">
<p>The number of queries (total/failed/timed-out), and their latencies, are
maintined per-table/column family.</p>
</div>
<div class="paragraph">
<p>SASI also supports concurrently iterating terms for the same index
across SSTables. The concurrency factor is controlled by the
<code>cassandra.search_concurrency_factor</code> system property. The default is
<code>1</code>.</p>
</div>
<div class="sect5">
<h6 id="querycontroller"><a class="anchor" href="#querycontroller"></a>QueryController</h6>
<div class="paragraph">
<p>Each
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
references a
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java"><code>QueryController</code></a>
used throughout the execution phase. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java"><code>QueryController</code></a>
has two responsibilities: to manage and ensure the proper cleanup of
resources (indexes), and to strictly enforce the time bound per query,
specified by the user via the range slice timeout. All indexes are
accessed via the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java"><code>QueryController</code></a>
so that they can be safely released by it later. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java"><code>QueryController</code></a>’s
<code>checkpoint</code> function is called in specific places in the execution path
to ensure the time-bound is enforced.</p>
</div>
</div>
<div class="sect5">
<h6 id="queryplan-optimizations"><a class="anchor" href="#queryplan-optimizations"></a>QueryPlan Optimizations</h6>
<div class="paragraph">
<p>While in the analysis phase, the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
performs several potential optimizations to the query. The goal of these
optimizations is to reduce the amount of work performed during the
execution phase.</p>
</div>
<div class="paragraph">
<p>The simplest optimization performed is compacting multiple expressions
joined by logical intersections (<code>AND</code>) into a single
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
with three or more
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java"><code>Expression</code></a>s.
For example, the query
<code>WHERE age &lt; 100 AND fname = 'p*' AND first_name != 'pa*' AND age &gt; 21</code>
would, without modification, have the following tree:</p>
</div>
<div class="literalblock">
<div class="content">
<pre> ┌───────┐
┌────────│ AND │──────┐
│ └───────┘ │
▼ ▼
┌───────┐ ┌──────────┐
┌─────│ AND │─────┐ │age &lt; 100 │
│ └───────┘ │ └──────────┘
▼ ▼
┌──────────┐ ┌───────┐
│ fname=p* │ ┌─│ AND │───┐
└──────────┘ │ └───────┘ │
▼ ▼
┌──────────┐ ┌──────────┐
│fname!=pa*│ │ age &gt; 21 │
└──────────┘ └──────────┘</pre>
</div>
</div>
<div class="paragraph">
<p><a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
will remove the redundant right branch whose root is the final <code>AND</code> and
has leaves <code>fname != pa*</code> and <code>age &gt; 21</code>. These
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java"><code>Expression</code></a>s
will be compacted into the parent <code>AND</code>, a safe operation due to <code>AND</code>
being associative and commutative. The resulting tree looks like the
following:</p>
</div>
<div class="literalblock">
<div class="content">
<pre> ┌───────┐
┌────────│ AND │──────┐
│ └───────┘ │
▼ ▼
┌───────┐ ┌──────────┐
┌───────────│ AND │────────┐ │age &lt; 100 │
│ └───────┘ │ └──────────┘
▼ │ ▼
┌──────────┐ │ ┌──────────┐
│ fname=p* │ ▼ │ age &gt; 21 │
└──────────┘ ┌──────────┐ └──────────┘
│fname!=pa*│
└──────────┘</pre>
</div>
</div>
<div class="paragraph">
<p>When excluding results from the result set, using <code>!=</code>, the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
determines the best method for handling it. For range queries, for
example, it may be optimal to divide the range into multiple parts with
a hole for the exclusion. For string queries, such as this one, it is
more optimal, however, to simply note which data to skip, or exclude,
while scanning the index. Following this optimization the tree looks
like this:</p>
</div>
<div class="literalblock">
<div class="content">
<pre> ┌───────┐
┌────────│ AND │──────┐
│ └───────┘ │
▼ ▼
┌───────┐ ┌──────────┐
┌───────│ AND │────────┐ │age &lt; 100 │
│ └───────┘ │ └──────────┘
▼ ▼
┌──────────────────┐ ┌──────────┐
│ fname=p* │ │ age &gt; 21 │
│ exclusions=[pa*] │ └──────────┘
└──────────────────┘</pre>
</div>
</div>
<div class="paragraph">
<p>The last type of optimization applied, for this query, is to merge range
expressions across branches of the tree – without modifying the meaning
of the query, of course. In this case, because the query contains all
<code>AND`s the `age</code> expressions can be collapsed. Along with this
optimization, the initial collapsing of unneeded `AND`s can also be
applied once more to result in this final tree using to execute the
query:</p>
</div>
<div class="literalblock">
<div class="content">
<pre> ┌───────┐
┌──────│ AND │───────┐
│ └───────┘ │
▼ ▼
┌──────────────────┐ ┌────────────────┐
│ fname=p* │ │ 21 &lt; age &lt; 100 │
│ exclusions=[pa*] │ └────────────────┘
└──────────────────┘</pre>
</div>
</div>
</div>
</div>
<div class="sect4">
<h5 id="operations-and-expressions"><a class="anchor" href="#operations-and-expressions"></a>Operations and Expressions</h5>
<div class="paragraph">
<p>As discussed, the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
optimizes a tree represented by
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>s
as interior nodes, and
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java"><code>Expression</code></a>s
as leaves. The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
class, more specifically, can have zero, one, or two
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>s
as children and an unlimited number of expressions. The iterators used
to perform the queries, discussed below in the
<code>`Range(Union|Intersection)Iterator'' section, implement the necessary
logic to merge results transparently regardless of the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java">`Operation</code></a>s
children.</p>
</div>
<div class="paragraph">
<p>Besides participating in the optimizations performed by the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>,
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
is also responsible for taking a row that has been returned by the query
and performing a final validation that it in fact does match. This
<code>satisfiesBy</code> operation is performed recursively from the root of the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java"><code>Operation</code></a>
tree for a given query. These checks are performed directly on the data
in a given row. For more details on how <code>satisfiesBy</code> works, see the
documentation
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123">in
the code</a>.</p>
</div>
</div>
<div class="sect4">
<h5 id="rangeunionintersectioniterator"><a class="anchor" href="#rangeunionintersectioniterator"></a>Range(Union|Intersection)Iterator</h5>
<div class="paragraph">
<p>The abstract <code>RangeIterator</code> class provides a unified interface over the
two main operations performed by SASI at various layers in the execution
path: set intersection and union. These operations are performed in a
iterated, or ``streaming'', fashion to prevent unneeded reads of
elements from either set. In both the intersection and union cases the
algorithms take advantage of the data being pre-sorted using the same
sort order, e.g. term or token order.</p>
</div>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java"><code>RangeUnionIterator</code></a>
performs the ``Merge-Join'' portion of the
<a href="https://en.wikipedia.org/wiki/Sort-merge_join">Sort-Merge-Join</a>
algorithm, with the properties of an outer-join, or union. It is
implemented with several optimizations to improve its performance over a
large number of iterators – sets to union. Specifically, the iterator
exploits the likely case of the data having many sub-groups of
overlapping ranges and the unlikely case that all ranges will overlap
each other. For more details see the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21">javadoc</a>.</p>
</div>
<div class="paragraph">
<p>The
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java"><code>RangeIntersectionIterator</code></a>
itself is not a subclass of <code>RangeIterator</code>. It is a container for
several classes, one of which, <code>AbstractIntersectionIterator</code>,
sub-classes <code>RangeIterator</code>. SASI supports two methods of performing the
intersection operation, and the ability to be adaptive in choosing
between them based on some properties of the data.</p>
</div>
<div class="paragraph">
<p><code>BounceIntersectionIterator</code>, and the <code>BOUNCE</code> strategy, works like the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java"><code>RangeUnionIterator</code></a>
in that it performs a ``Merge-Join'', however, its nature is similar to
a inner-join, where like values are merged by a data-specific merge
function (e.g. merging two tokens in a list to lookup in a SSTable
later). See the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101">javadoc</a>
for more details on its implementation.</p>
</div>
<div class="paragraph">
<p><code>LookupIntersectionIterator</code>, and the <code>LOOKUP</code> strategy, performs a
different operation, more similar to a lookup in an associative data
structure, or ``hash lookup'' in database terminology. Once again,
details on the implementation can be found in the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208">javadoc</a>.</p>
</div>
<div class="paragraph">
<p>The choice between the two iterators, or the <code>ADAPTIVE</code> strategy, is
based upon the ratio of data set sizes of the minimum and maximum range
of the sets being intersected. If the number of the elements in minimum
range divided by the number of elements is the maximum range is less
than or equal to <code>0.01</code>, then the <code>ADAPTIVE</code> strategy chooses the
<code>LookupIntersectionIterator</code>, otherwise the <code>BounceIntersectionIterator</code>
is chosen.</p>
</div>
</div>
</div>
<div class="sect3">
<h4 id="the-sasiindex-class"><a class="anchor" href="#the-sasiindex-class"></a>The SASIIndex Class</h4>
<div class="paragraph">
<p>The above components are glued together by the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java"><code>SASIIndex</code></a>
class which implements <code>Index</code>, and is instantiated per-table containing
SASI indexes. It manages all indexes for a table via the
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java"><code>sasi.conf.DataTracker</code></a>
and
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java"><code>sasi.conf.view.View</code></a>
components, controls writing of all indexes for an SSTable via its
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java"><code>PerSSTableIndexWriter</code></a>,
and initiates searches with <code>Searcher</code>. These classes glue the
previously mentioned indexing components together with Cassandra’s
SSTable life-cycle ensuring indexes are not only written when Memtable’s
flush, but also as SSTable’s are compacted. For querying, the <code>Searcher</code>
does little but defer to
<a href="https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java"><code>QueryPlan</code></a>
and update e.g. latency metrics exposed by SASI.</p>
</div>
</div>
<div class="sect3">
<h4 id="cassandra-internal-changes"><a class="anchor" href="#cassandra-internal-changes"></a>Cassandra Internal Changes</h4>
<div class="paragraph">
<p>To support the above changes and integrate them into Cassandra a few
minor internal changes were made to Cassandra itself. These are
described here.</p>
</div>
<div class="sect4">
<h5 id="sstable-write-life-cycle-notifications"><a class="anchor" href="#sstable-write-life-cycle-notifications"></a>SSTable Write Life-cycle Notifications</h5>
<div class="paragraph">
<p>The <code>SSTableFlushObserver</code> is an observer pattern-like interface, whose
sub-classes can register to be notified about events in the life-cycle
of writing out a SSTable. Sub-classes can be notified when a flush
begins and ends, as well as when each next row is about to be written,
and each next column. SASI’s <code>PerSSTableIndexWriter</code>, discussed above,
is the only current subclass.</p>
</div>
</div>
</div>
<div class="sect3">
<h4 id="limitations-and-caveats"><a class="anchor" href="#limitations-and-caveats"></a>Limitations and Caveats</h4>
<div class="paragraph">
<p>The following are items that can be addressed in future updates but are
not available in this repository or are not currently implemented.</p>
</div>
<div class="ulist">
<ul>
<li>
<p>The cluster must be configured to use a partitioner that produces
<code>LongToken`s, e.g. `Murmur3Partitioner</code>. Other existing partitioners
which don’t produce LongToken e.g. <code>ByteOrderedPartitioner</code> and
<code>RandomPartitioner</code> will not work with SASI.</p>
</li>
<li>
<p>Not Equals and OR support have been removed in this release while
changes are made to Cassandra itself to support them.</p>
</li>
</ul>
</div>
</div>
<div class="sect3">
<h4 id="contributors"><a class="anchor" href="#contributors"></a>Contributors</h4>
<div class="ulist">
<ul>
<li>
<p><a href="https://github.com/xedin">Pavel Yaskevich</a></p>
</li>
<li>
<p><a href="https://github.com/jrwest">Jordan West</a></p>
</li>
<li>
<p><a href="https://github.com/mkjellman">Michael Kjellman</a></p>
</li>
<li>
<p><a href="https://github.com/jasobrown">Jason Brown</a></p>
</li>
<li>
<p><a href="https://github.com/mishail">Mikhail Stepura</a></p>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
</article>
</main>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../../../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../../../../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script src="../../../../assets/js/site.js"></script>
<script async src="../../../../assets/js/vendor/highlight.js"></script>
<script src="../../../../assets/js/vendor/lunr.js"></script>
<script src="../../../../assets/js/vendor/search.js" id="search-script" data-base-path="../../../.." data-page-path="/Cassandra/4.1/cassandra/cql/SASI.html"></script>
<script async src="../../../../assets/../search-index.js"></script>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
</html>