blob: 6be4289f9e2ec9c30f8fa3997b21ec93159a68f1 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Compaction overview | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../../../../../../assets/css/site.css">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="Cassandra">
<meta name="dcterms.identifier" content="trunk">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../../../../../../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="docs-wrapper article">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../../../../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../../../../../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../../../../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../../../../../../assets/img/logo-white-r.png" alt="Cassandra Logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../../../../../../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../../../../../../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="docs-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h2>Cassandra Documentation</h2>
</div>
</div>
<div class="body px-medium py-medium container">
<div class="docs-nav-bar flex flex-space-between mb-medium">
<div id="mobile-docs-nav-burger" class="hidden">
<svg viewBox="0 0 24 24" width="36" height="36" stroke="#1c81a0" stroke-width="2.5" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="3" y1="12" x2="21" y2="12"></line><line x1="3" y1="6" x2="21" y2="6"></line><line x1="3" y1="18" x2="21" y2="18"></line></svg>
</div>
<div class="docs-nav-item relative">
<input id="search-input" type="text" placeholder="Search docs">
</div>
<div class="versions-wrapper">
<h4>Version:</h4>
<div class="nav-panel-explore" data-panel="explore">
<div id="version-toggle" class="context">
<span class="version">trunk</span>
</div>
<ul id="versions-list" class="components">
<li class="component">
<ul class="versions">
<li class="version is-latest">
<a href="../../../../../../_/index.html">master</a>
</li>
</ul>
</li>
<li class="component is-current">
<ul class="versions">
<li class="version is-current">
<a href="../../../../index.html">trunk</a>
</li>
<li class="version">
<a href="../../../../../5.0/index.html">5.0</a>
</li>
<li class="version is-latest">
<a href="../../../../../4.1/index.html">4.1</a>
</li>
<li class="version">
<a href="../../../../../4.0/index.html">4.0</a>
</li>
<li class="version">
<a href="../../../../../3.11/index.html">3.11</a>
</li>
</ul>
</li>
</ul>
</div>
</div> </div>
<div class="cf relative">
<nav class="nav docs-nav full-800">
<div class="nav-menu">
<ul class="nav-list">
<li class="nav-item is-active" data-depth="0">
<ul class="nav-list">
<li class="nav-item" data-depth="1">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../../index.html">Main</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/glossary.html">Glossary</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/bugs.html">How to report bugs</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/contactus.html">Contact us</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../../../../_/development/index.html">Development</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/gettingstarted.html">Getting started</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/ide.html">Building and IDE integration</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/testing.html">Testing</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/patches.html">Contributing code changes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/code_style.html">Code style</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/how_to_review.html">Review checklist</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/how_to_commit.html">How to commit</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/documentation.html">Working on documentation</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/ci.html">Jenkins CI environment</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/dependencies.html">Dependency management</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../../../../_/development/release_process.html">Release process</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="nav-item is-active" data-depth="0">
<ul class="nav-list">
<li class="nav-item" data-depth="1">
<span class="nav-line">
<button class="nav-toggle"></button>
<span class="nav-text">Cassandra</span>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../overview/faq/index.html">FAQ</a>
</span>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../getting-started/index.html">Getting Started</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/cassandra-quickstart.html">Cassandra Quickstart</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/sai-quickstart.html">SAI Quickstart</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/vector-search-quickstart.html">Vector Search Quickstart</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../installing/installing.html">Installing Cassandra</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/configuring.html">Configuring Cassandra</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/querying.html">Inserting and querying</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/drivers.html">Client drivers</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/production.html">Production recommendations</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../new/index.html">What&#8217;s new</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/java17.html">Support for Java</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../architecture/index.html">Architecture</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/overview.html">Overview</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/dynamo.html">Dynamo</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/storage-engine.html">Storage Engine</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/guarantees.html">Guarantees</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/messaging.html">Improved Internode Messaging</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../architecture/streaming.html">Improved Streaming</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../developing/data-modeling/index.html">Data Modeling</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/intro.html">Introduction</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_conceptual.html">Conceptual data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_rdbms.html">RDBMS design</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_queries.html">Defining application queries</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_logical.html">Logical data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_physical.html">Physical data modeling</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_refining.html">Evaluating and refining data models</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_schema.html">Defining database schema</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/data-modeling/data-modeling_tools.html">Cassandra data modeling tools</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../developing/cql/index.html">Cassandra Query Language (CQL)</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/definitions.html">Definitions</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/types.html">Data types</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/ddl.html">Data definition (DDL)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/dml.html">Data manipulation (DML)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/dynamic-data-masking.html">Dynamic Data Masking (DDM)</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/operators.html">Operators</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../developing/cql/indexing/indexing-concepts.html">Indexing concepts</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="4">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../developing/cql/indexing/sai/sai-overview.html">SAI Overview</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/sai/sai-concepts.html">Concepts</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/sai-quickstart.html">SAI Quickstart</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/sai/sai-faq.html">SAI FAQ</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/sai/sai-working-with.html">Working with SAI</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/sai/operations/sai-operations.html">SAI operations</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../developing/cql/indexing/2i/2i-overview.html">Secondary indexes (2i) overview</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/2i/2i-concepts.html">Concepts</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/2i/2i-working-with.html">Working with 2i</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/indexing/2i/operations/2i-build.html">Rebuild 2i</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/mvs.html">Materialized views</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/functions.html">Functions</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/json.html">JSON</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/security.html">Security</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/triggers.html">Triggers</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/appendices.html">Appendices</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/changes.html">Changes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/SASI.html">SASI</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../developing/cql/cql_singlefile.html">Single file of CQL information</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../vector-search/overview.html">Vector Search overview</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../vector-search/concepts.html">Concepts</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../../vector-search/data-modeling.html">Data Modeling</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../getting-started/vector-search-quickstart.html">Vector Search Quickstart</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../vector-search/vector-search-working-with.html">Working with Vector Search</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../index.html">Managing</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../configuration/index.html">Configuring</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_yaml_file.html">cassandra.yaml</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_rackdc_file.html">cassandra-rackdc.properties</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_env_sh_file.html">cassandra-env.sh</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_topo_file.html">cassandra-topologies.properties</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_cl_archive_file.html">commitlog-archiving.properties</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_logback_xml_file.html">logback.xml</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/cass_jvm_options_file.html">jvm-* files</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../configuration/configuration.html">Liberating cassandra.yaml Parameters' Names from Their Units</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../index.html">Operating</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../backups.html">Backups</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../bloom_filters.html">Bloom filters</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../bulk_loading.html">Bulk loading</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../cdc.html">Change Data Capture (CDC)</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="index.html">Compaction</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../compression.html">Compression</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../hardware.html">Hardware</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../hints.html">Hints</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../logging.html">Logging</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../auditlogging.html">Audit logging</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../audit_logging.html">Audit logging 2</a>
</span>
</li>
<li class="nav-item" data-depth="5">
<span class="nav-line">
<a class="nav-link" href="../fqllogging.html">Full query logging</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../metrics.html">Monitoring metrics</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../repair.html">Repair</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../read_repair.html">Read repair</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../security.html">Security</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../snitch.html">Snitches</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../topo_changes.html">Topology changes</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../transientreplication.html">Transient replication</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../virtualtables.html">Virtual tables</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../tools/index.html">Tools</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../tools/cqlsh.html">cqlsh: the CQL shell</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../tools/nodetool/nodetool.html">nodetool</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="../../tools/sstable/index.html">SSTable tools</a>
</span>
</li>
<li class="nav-item" data-depth="4">
<span class="nav-line">
<a class="nav-link" href="#cassandra:managing/tools/cassandra_stress.adoc">cassandra-stress</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../troubleshooting/index.html">Troubleshooting</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../troubleshooting/finding_nodes.html">Finding misbehaving nodes</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../troubleshooting/reading_logs.html">Reading Cassandra logs</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../troubleshooting/use_nodetool.html">Using nodetool</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../troubleshooting/use_tools.html">Using external tools to deep-dive</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<button class="nav-toggle"></button>
<a class="nav-link" href="../../../reference/index.html">Reference</a>
</span>
<ul class="nav-list">
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/alter-table.html">ALTER TABLE</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/create-index.html">CREATE INDEX</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/create-custom-index.html">CREATE CUSTOM INDEX</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/create-table.html">CREATE TABLE</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/drop-index.html">DROP INDEX</a>
</span>
</li>
<li class="nav-item" data-depth="3">
<span class="nav-line">
<a class="nav-link" href="../../../reference/cql-commands/drop-table.html">DROP TABLE</a>
</span>
</li>
</ul>
</li>
<li class="nav-item" data-depth="2">
<span class="nav-line">
<a class="nav-link" href="../../../integrating/plugins/index.html">Plug-ins</a>
</span>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<aside class="toc sidebar">
<div class="toc-menu"></div>
</aside>
<main class="article default-main full-800" data-ceiling="topbar">
<div class="article-banner">
<p>You are viewing the documentation for a prerelease version.</p>
</div>
<div class="article-header">
<nav class="crumbs" aria-label="breadcrumbs">
<ul>
<li class="crumb"><a href="../../../../index.html">Cassandra</a></li>
<li class="crumb"><a href="overview.html">Compaction overview</a></li>
</ul>
</nav>
<div class="tools" role="navigation">
<ul>
<li class="tool edit"><a href="https://github.com/apache/cassandra/edit/trunk/doc/modules/cassandra/pages/managing/operating/compaction/overview.adoc" title="Edit Page" target="_blank" rel="noopener">Edit</a></li>
</ul>
</div>
</div>
<article class="doc">
<h1 class="page">Compaction overview</h1>
<div class="sect1">
<h2 id="what-is-compaction"><a class="anchor" href="#what-is-compaction"></a>What is compaction?</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Data in Cassandra is created in <a href="../../../architecture/storage-engine.html#memtables" class="page">memtables</a>.
Once a memory threshold is reached, to free up memory again, the data is written to an <a href="../../../architecture/storage-engine.html#SSTables" class="page">SSTable</a>, an <a href="/_/glossary.html#immutable">immutable</a> file residing on disk.</p>
</div>
<div class="paragraph">
<p>Because SSTables are immutable, when data is updated or deleted, the old data is not overwritten with inserts or updates, or removed from the SSTable.
Instead, a new SSTable is created with the updated data with a new timestamp, and the old SSTable is marked for deletion.
The piece of deleted data is known as a <a href="/_/glossary.html#tombstone">tombstone</a>.</p>
</div>
<div class="paragraph">
<p>Over time, Cassandra may write many versions of a row in different SSTables.
Each version may have a unique set of columns stored with a different timestamp.
As SSTables accumulate, the distribution of data can require accessing more and more SSTables to retrieve a complete row.</p>
</div>
<div class="paragraph">
<p>To keep the database healthy, Cassandra periodically merges SSTables and discards old data.
This process is called <a href="/_/glossary.html#compaction">compaction</a>.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="why-must-compaction-be-run"><a class="anchor" href="#why-must-compaction-be-run"></a>Why must compaction be run?</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Since SSTables are consulted during read operations, it is important to keep the number of SSTables small.
Write operations will cause the number of SSTables to grow, so compaction is necessary.
Besides the issue of tombstones, data is deleted for other reasons, too, such as Time-To-Live (TTL) expiration of some data.
Deleting, updating, or expiring data are all valid triggers for compaction.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="what-does-compaction-accomplish"><a class="anchor" href="#what-does-compaction-accomplish"></a>What does compaction accomplish?</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Two important factors accomplished by compaction are performance improvement and disk space reclamation.
If SSTables have duplicate data that must be read, read operations are slower.
Once tombstones and duplicates are removed, read operations are faster.
SSTables use disk space, and reducing the size of SSTables through compaction frees up disk space.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="how-does-compaction-work"><a class="anchor" href="#how-does-compaction-work"></a>How does compaction work?</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Compaction works on a collection of SSTables.
From these SSTables, compaction collects all versions of each unique row and assembles one complete row, using the most up-to-date version (by timestamp) of each of the row&#8217;s columns.
The merge process is performant, because rows are sorted by partition key within each SSTable, and the merge process does not use random I/O.
The new versions of each row is written to a new SSTable.
The old versions, along with any rows that are ready for deletion, are left in the old SSTables, and are deleted as soon as pending reads are completed.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="types-of-compaction"><a class="anchor" href="#types-of-compaction"></a>Types of compaction</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The concept of compaction is used for different kinds of operations in
Cassandra, the common thing about these operations is that it takes one
or more SSTables, merges, and outputs new SSTables. The types of compactions are:</p>
</div>
<div class="dlist">
<dl>
<dt class="hdlist1">Minor compaction</dt>
<dd>
<p>A minor compaction triggered automatically in Cassandra for several actions:</p>
<div class="ulist">
<ul>
<li>
<p>When an SSTable is added to the node through flushing</p>
</li>
<li>
<p>When autocompaction is enabled after being disabled (<code>nodetool enableautocompaction</code>)</p>
</li>
<li>
<p>When compaction adds new SSTables</p>
</li>
<li>
<p>A check for new minor compactions every 5 minutes</p>
</li>
</ul>
</div>
</dd>
<dt class="hdlist1">Major compaction</dt>
<dd>
<p>A major compaction is triggered when a user executes a compaction over all SSTables on the node.</p>
</dd>
<dt class="hdlist1">User defined compaction</dt>
<dd>
<p>Similar to a major compaction, a user-defined compaction executes when a user triggers a compaction on a given set of SSTables.</p>
</dd>
<dt class="hdlist1">Scrub</dt>
<dd>
<p>A scrub triggers a compaction to try to fix any broken SSTables.
This can actually remove valid data if that data is corrupted.
If that happens you will need to run a full repair on the node.</p>
</dd>
<dt class="hdlist1">UpgradeSSTables</dt>
<dd>
<p>A compaction occurs when you upgrade SSTables to the latest version.
Run this after upgrading to a new major version.</p>
</dd>
<dt class="hdlist1">Cleanup</dt>
<dd>
<p>Compaction executes to remove any ranges that a node no longer owns.
This type of compaction is typically triggered on neighbouring nodes after a node has been bootstrapped, since the bootstrapping node will take ownership of some ranges from those nodes.</p>
</dd>
<dt class="hdlist1">Secondary index rebuild</dt>
<dd>
<p>A compaction is triggered if the secondary indexes are rebuilt on a node.</p>
</dd>
<dt class="hdlist1">Anticompaction</dt>
<dd>
<p>After repair, the ranges that were actually repaired are split out of the SSTables that existed when repair started. This type of compaction rewrites SSTables to accomplish this task.</p>
</dd>
<dt class="hdlist1">Sub range compaction</dt>
<dd>
<p>It is possible to only compact a given sub range - this action is useful if you know a token that has been misbehaving - either gathering many updates or many deletes.
The command <code>nodetool compact -st x -et y</code> will pick all SSTables containing the range between x and y and issue a compaction for those SSTables.
For Size Tiered Compaction Strategy, this will most likely include all SSTables, but with Leveled Compaction Strategy, it can issue the compaction for a subset of the SSTables.
With LCS the resulting SSTable will end up in L0.</p>
</dd>
</dl>
</div>
</div>
</div>
<div class="sect1">
<h2 id="strategies"><a class="anchor" href="#strategies"></a>Strategies</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Different compaction strategies are available to optimize for different workloads.
Picking the right compaction strategy for your workload will ensure the best performance for both querying and for compaction itself.</p>
</div>
<div class="dlist">
<dl>
<dt class="hdlist1"><a href="ucs.html" class="page"><code>Unified Compaction Strategy (UCS)</code></a></dt>
<dd>
<p>UCS is a good choice for most workloads and is recommended for new workloads.
This compaction strategy is designed to handle a wide variety of workloads.
It is designed to be able to handle both immutable time-series data and workloads with lots of updates and deletes.
It is also designed to be able to handle both spinning disks and SSDs.</p>
</dd>
<dt class="hdlist1"><a href="stcs.html" class="page"><code>Size Tiered Compaction Strategy (STCS)</code></a></dt>
<dd>
<p>STCS is the default compaction strategy, because it is useful as a fallback when other strategies don&#8217;t fit the workload.
Most useful for not strictly time-series workloads with spinning disks, or when the I/O from <code>LCS</code> is too high.</p>
</dd>
<dt class="hdlist1"><a href="lcs.html" class="page"><code>Leveled Compaction Strategy (LCS)</code></a></dt>
<dd>
<p>Leveled Compaction Strategy (LCS) is optimized for read heavy workloads, or workloads with lots of updates and deletes.
It is not a good choice for immutable time-series data.</p>
</dd>
<dt class="hdlist1"><a href="twcs.html" class="page"><code>Time Window Compaction Strategy (TWCS)</code></a></dt>
<dd>
<p>Time Window Compaction Strategy is designed for TTL&#8217;ed, mostly immutable time-series data.</p>
</dd>
</dl>
</div>
</div>
</div>
<div class="sect1">
<h2 id="tombstones"><a class="anchor" href="#tombstones"></a>Tombstones</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="what-are-tombstones"><a class="anchor" href="#what-are-tombstones"></a>What are tombstones?</h3>
<div class="paragraph">
<p>Cassandra&#8217;s processes for deleting data are designed to improve performance, and to work with Cassandra&#8217;s built-in properties for data distribution and fault-tolerance.</p>
</div>
<div class="paragraph">
<p>Cassandra treats a deletion as an insertion, and inserts a time-stamped deletion marker called a tombstone.
The tombstones go through Cassandra&#8217;s write path, and are written to SSTables on one or more nodes.
The key feature difference of a tombstone is that it has a built-in expiration date/time.
At the end of its expiration period, the grace period, the tombstone is deleted as part of Cassandra&#8217;s normal compaction process.</p>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<i class="fa icon-note" title="Note"></i>
</td>
<td class="content">
<div class="paragraph">
<p>You can also mark a Cassandra row or column with a time-to-live (TTL) value.
After this amount of time has ended, Cassandra marks the object with a tombstone, and handles it like other tombstoned objects.</p>
</div>
</td>
</tr>
</table>
</div>
</div>
<div class="sect2">
<h3 id="why-tombstones"><a class="anchor" href="#why-tombstones"></a>Why tombstones?</h3>
<div class="paragraph">
<p>The tombstone represents the deletion of an object, either a row or column value.
This approach is used instead of removing values because of the distributed nature of Cassandra.
Once an object is marked as a tombstone, queries will ignore all values that are time-stamped previous to the tombstone insertion.</p>
</div>
</div>
<div class="sect2">
<h3 id="zombies"><a class="anchor" href="#zombies"></a>Zombies</h3>
<div class="paragraph">
<p>In a multi-node cluster, Cassandra may store replicas of the same data on two or more nodes.
This helps prevent data loss, but it complicates the deletion process.
If a node receives a delete command for data it stores locally, the node tombstones the specified object and tries to pass the tombstone to other nodes containing replicas of that object.
But if one replica node is unresponsive at that time, it does not receive the tombstone immediately, so it still contains the pre-delete version of the object.
If the tombstoned object has already been deleted from the rest of the cluster before that node recovers, Cassandra treats the object on the recovered node as new data, and propagates it to the rest of the cluster.
This kind of deleted but persistent object is called a <a href="/_/glossary.html#zombie">zombie</a>.</p>
</div>
</div>
<div class="sect2">
<h3 id="grace-period"><a class="anchor" href="#grace-period"></a>Grace period</h3>
<div class="paragraph">
<p>To prevent the reappearance of zombies, Cassandra gives each tombstone a grace period.
The grace period for a tombstone is set with the table property ` WITH gc_grace_seconds`.
Its default value is 864000 seconds (ten days), after which a tombstone expires and can be deleted during compaction.
Prior to the grace period expiring, Cassandra will retain a tombstone through compaction events.
Each table can have its own value for this property.</p>
</div>
<div class="paragraph">
<p>The purpose of the grace period is to give unresponsive nodes time to recover and process tombstones normally.
If a client writes a new update to the tombstoned object during the grace period, Cassandra overwrites the tombstone.
If a client sends a read for that object during the grace period, Cassandra disregards the tombstone and retrieves the object from other replicas if possible.</p>
</div>
<div class="paragraph">
<p>When an unresponsive node recovers, Cassandra uses hinted handoff to replay the database mutations the node missed while it was down.
Cassandra does not replay a mutation for a tombstoned object during its grace period.
But if the node does not recover until after the grace period ends, Cassandra may miss the deletion.</p>
</div>
<div class="paragraph">
<p>After the tombstone&#8217;s grace period ends, Cassandra deletes the tombstone during compaction.</p>
</div>
</div>
<div class="sect2">
<h3 id="deletion"><a class="anchor" href="#deletion"></a>Deletion</h3>
<div class="paragraph">
<p>After <code>gc_grace_seconds</code> has expired the tombstone may be removed (meaning there will no longer be any object that a certain piece of data was
deleted).
But one complication for deletion is that a tombstone can live in one SSTable and the data it marks for deletion in another, so a compaction must also remove both SSTables.
More precisely, drop an actual tombstone the:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>The tombstone must be older than <code>gc_grace_seconds</code>.
Note that tombstones will not be removed until a compaction event even if <code>gc_grace_seconds</code> has elapsed.</p>
</li>
<li>
<p>If partition X contains the tombstone, the SSTable containing the partition plus all SSTables containing data older than the tombstone containing X must be included in the same compaction.
If all data in any SSTable containing partition X is newer than the tombstone, it can be ignored.</p>
</li>
<li>
<p>If the option <code>only_purge_repaired_tombstones</code> is enabled, tombstones are only removed if the data has also been repaired.
This process is described in the "Deletes with tombstones" sections.</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>If a node remains down or disconnected for longer than <code>gc_grace_seconds</code>, its deleted data will be repaired back to the other nodes and reappear in the cluster.
This is basically the same as in the "Deletes without Tombstones" section.</p>
</div>
<div class="sect3">
<h4 id="deletes-without-tombstones"><a class="anchor" href="#deletes-without-tombstones"></a>Deletes without tombstones</h4>
<div class="paragraph">
<p>Imagine a three node cluster which has the value [A] replicated to every
node.:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[A], [A], [A]</code></pre>
</div>
</div>
<div class="paragraph">
<p>If one of the nodes fails and and our delete operation only removes existing values, we can end up with a cluster that looks like:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[], [], [A]</code></pre>
</div>
</div>
<div class="paragraph">
<p>Then a repair operation would replace the value of [A] back onto the two nodes which are missing the value.:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[A], [A], [A]</code></pre>
</div>
</div>
<div class="paragraph">
<p>This would cause our data to be resurrected as a zombie even though it had been deleted.</p>
</div>
</div>
<div class="sect3">
<h4 id="deletes-with-tombstones"><a class="anchor" href="#deletes-with-tombstones"></a>Deletes with tombstones</h4>
<div class="paragraph">
<p>Starting again with a three node cluster which has the value [A] replicated to every node.:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[A], [A], [A]</code></pre>
</div>
</div>
<div class="paragraph">
<p>If instead of removing data we add a tombstone object, so the single node failure situation will look like:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[A, Tombstone[A]], [A, Tombstone[A]], [A]</code></pre>
</div>
</div>
<div class="paragraph">
<p>Now when we issue a repair the tombstone will be copied to the replica, rather than the deleted data being resurrected:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-none hljs" data-lang="none">[A, Tombstone[A]], [A, Tombstone[A]], [A, Tombstone[A]]</code></pre>
</div>
</div>
<div class="paragraph">
<p>Our repair operation will correctly put the state of the system to what we expect with the object [A] marked as deleted on all nodes.
This does mean we will end up accruing tombstones which will permanently accumulate disk space.
To avoid keeping tombstones forever, we set <code>gc_grace_seconds</code> for every table in Cassandra.</p>
</div>
</div>
</div>
<div class="sect2">
<h3 id="fully-expired-sstables"><a class="anchor" href="#fully-expired-sstables"></a>Fully expired SSTables</h3>
<div class="paragraph">
<p>If an SSTable contains only tombstones and it is guaranteed that SSTable is not shadowing data in any other SSTable, then the compaction can drop
that SSTable.
If you see SSTables with only tombstones (note that TTL&#8217;d data is considered tombstones once the time-to-live has expired), but it is not being dropped by compaction, it is likely that other SSTables contain older data.
There is a tool called <code>sstableexpiredblockers</code> that will list which SSTables are droppable and which are blocking them from being dropped.
With <code>TimeWindowCompactionStrategy</code> it is possible to remove the guarantee (not check for shadowing data) by enabling <code>unsafe_aggressive_sstable_expiration</code>.</p>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="ttl"><a class="anchor" href="#ttl"></a>TTL</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Data in Cassandra can have an additional property called time to live -
this is used to automatically drop data that has expired once the time
is reached. Once the TTL has expired the data is converted to a
tombstone which stays around for at least <code>gc_grace_seconds</code>. Note that
if you mix data with TTL and data without TTL (or just different length
of the TTL) Cassandra will have a hard time dropping the tombstones
created since the partition might span many SSTables and not all are
compacted at once.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="fully-expired-sstables-2"><a class="anchor" href="#fully-expired-sstables-2"></a>Fully expired SSTables</h2>
<div class="sectionbody">
<div class="paragraph">
<p>If an SSTable contains only tombstones and it is guaranteed that
SSTable is not shadowing data in any other SSTable, then the compaction can drop
that SSTable. If you see SSTables with only tombstones (note that TTL-ed
data is considered tombstones once the time-to-live has expired), but it
is not being dropped by compaction, it is likely that other SSTables
contain older data. There is a tool called <code>sstableexpiredblockers</code> that
will list which SSTables are droppable and which are blocking them from
being dropped. With <code>TimeWindowCompactionStrategy</code> it
is possible to remove the guarantee (not check for shadowing data) by
enabling <code>unsafe_aggressive_sstable_expiration</code>.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="repairedunrepaired-data"><a class="anchor" href="#repairedunrepaired-data"></a>Repaired/unrepaired data</h2>
<div class="sectionbody">
<div class="paragraph">
<p>With incremental repairs Cassandra must keep track of what data is
repaired and what data is unrepaired. With anticompaction repaired data
is split out into repaired and unrepaired SSTables. To avoid mixing up
the data again separate compaction strategy instances are run on the two
sets of data, each instance only knowing about either the repaired or
the unrepaired SSTables. This means that if you only run incremental
repair once and then never again, you might have very old data in the
repaired SSTables that block compaction from dropping tombstones in the
unrepaired (probably newer) SSTables.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="data-directories"><a class="anchor" href="#data-directories"></a>Data directories</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Since tombstones and data can live in different SSTables it is important
to realize that losing an SSTable might lead to data becoming live again
- the most common way of losing SSTables is to have a hard drive break
down. To avoid making data live tombstones and actual data are always in
the same data directory. This way, if a disk is lost, all versions of a
partition are lost and no data can get undeleted. To achieve this a
compaction strategy instance per data directory is run in addition to
the compaction strategy instances containing repaired/unrepaired data,
this means that if you have 4 data directories there will be 8
compaction strategy instances running. This has a few more benefits than
just avoiding data getting undeleted:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>It is possible to run more compactions in parallel - leveled
compaction will have several totally separate levelings and each one can
run compactions independently from the others.</p>
</li>
<li>
<p>Users can backup and restore a single data directory.</p>
</li>
<li>
<p>Note though that currently all data directories are considered equal,
so if you have a tiny disk and a big disk backing two data directories,
the big one will be limited the by the small one. One work around to
this is to create more data directories backed by the big disk.</p>
</li>
</ul>
</div>
</div>
</div>
<div class="sect1">
<h2 id="single-sstable-tombstone-compaction"><a class="anchor" href="#single-sstable-tombstone-compaction"></a>Single SSTable tombstone compaction</h2>
<div class="sectionbody">
<div class="paragraph">
<p>When an SSTable is written a histogram with the tombstone expiry times
is created and this is used to try to find SSTables with very many
tombstones and run single SSTable compaction on that SSTable in hope of
being able to drop tombstones in that SSTable. Before starting this it
is also checked how likely it is that any tombstones will actually will
be able to be dropped how much this SSTable overlaps with other
SSTables. To avoid most of these checks the compaction option
<code>unchecked_tombstone_compaction</code> can be enabled.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="compaction-options"><a class="anchor" href="#compaction-options"></a>Common options</h2>
<div class="sectionbody">
<div class="paragraph">
<p>There is a number of common options for all the compaction strategies;</p>
</div>
<div class="dlist">
<dl>
<dt class="hdlist1"><code>enabled</code> (default: true)</dt>
<dd>
<p>Whether minor compactions should run. Note that you can have 'enabled': true as a compaction option and then do 'nodetool enableautocompaction' to start running compactions.</p>
</dd>
<dt class="hdlist1"><code>tombstone_threshold</code> (default: 0.2)</dt>
<dd>
<p>How much of the SSTable should be tombstones for us to consider doing a single SSTable compaction of that SSTable.</p>
</dd>
<dt class="hdlist1"><code>tombstone_compaction_interval</code> (default: 86400s (1 day))</dt>
<dd>
<p>Since it might not be possible to drop any tombstones when doing a single SSTable compaction we need to make sure that one SSTable is not constantly getting recompacted - this option states how often we should try for a given SSTable.</p>
</dd>
<dt class="hdlist1"><code>log_all</code> (default: false)</dt>
<dd>
<p>New detailed compaction logging, see <code>below &lt;detailed-compaction-logging&gt;</code>.</p>
</dd>
<dt class="hdlist1"><code>unchecked_tombstone_compaction</code> (default: false)</dt>
<dd>
<p>The single SSTable compaction has quite strict checks for whether it should be started, this option disables those checks and for some use cases this might be needed.
Note that this does not change anything for the actual compaction, tombstones are only dropped if it is safe to do so - it might just rewrite an SSTable without being able to drop any tombstones.</p>
</dd>
<dt class="hdlist1"><code>only_purge_repaired_tombstone</code> (default: false)</dt>
<dd>
<p>Option to enable the extra safety of making sure that tombstones are only dropped if the data has been repaired.</p>
</dd>
<dt class="hdlist1"><code>min_threshold</code> (default: 4)</dt>
<dd>
<p>Lower limit of number of SSTables before a compaction is triggered.
Not used for <code>LeveledCompactionStrategy</code>.</p>
</dd>
<dt class="hdlist1"><code>max_threshold</code> (default: 32)</dt>
<dd>
<p>Upper limit of number of SSTables before a compaction is triggered.
Not used for <code>LeveledCompactionStrategy</code>.</p>
</dd>
</dl>
</div>
<div class="paragraph">
<p>Further, see the section on each strategy for specific additional options.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="compaction-nodetool-commands"><a class="anchor" href="#compaction-nodetool-commands"></a>Compaction nodetool commands</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The <code>nodetool &lt;nodetool&gt;</code> utility provides a number of commands related to compaction:</p>
</div>
<div class="dlist">
<dl>
<dt class="hdlist1"><code>enableautocompaction</code></dt>
<dd>
<p>Enable compaction.</p>
</dd>
<dt class="hdlist1"><code>disableautocompaction</code></dt>
<dd>
<p>Disable compaction.</p>
</dd>
<dt class="hdlist1"><code>setcompactionthroughput</code></dt>
<dd>
<p>How fast compaction should run at most - defaults to 64MiB/s.</p>
</dd>
<dt class="hdlist1"><code>compactionstats</code></dt>
<dd>
<p>Statistics about current and pending compactions.</p>
</dd>
<dt class="hdlist1"><code>compactionhistory</code></dt>
<dd>
<p>List details about the last compactions.</p>
</dd>
<dt class="hdlist1"><code>setcompactionthreshold</code></dt>
<dd>
<p>Set the min/max SSTable count for when to trigger compaction, defaults to 4/32.</p>
</dd>
</dl>
</div>
</div>
</div>
<div class="sect1">
<h2 id="switching-the-compaction-strategy-and-options-using-jmx"><a class="anchor" href="#switching-the-compaction-strategy-and-options-using-jmx"></a>Switching the compaction strategy and options using JMX</h2>
<div class="sectionbody">
<div class="paragraph">
<p>It is possible to switch compaction strategies and its options on just a single node using JMX, this is a great way to experiment with settings without affecting the whole cluster.
The mbean is:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-console hljs" data-lang="console">org.apache.cassandra.db:type=ColumnFamilies,keyspace=&lt;keyspace_name&gt;,columnfamily=&lt;table_name&gt;</code></pre>
</div>
</div>
<div class="paragraph">
<p>and the attribute to change is <code>CompactionParameters</code> or <code>CompactionParametersJson</code> if you use jconsole or jmc. For example, the syntax for the json version is the same as you would use in an <code>ALTER TABLE &lt;alter-table-statement&gt;</code> statement:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="highlightjs highlight"><code class="language-console hljs" data-lang="console">{ 'class': 'LeveledCompactionStrategy', 'sstable_size_in_mb': 123, 'fanout_size': 10}</code></pre>
</div>
</div>
<div class="paragraph">
<p>The setting is kept until someone executes an <code>ALTER TABLE &lt;alter-table-statement&gt;</code> that touches the compaction settings or restarts the node.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="detailed-compaction-logging"><a class="anchor" href="#detailed-compaction-logging"></a>More detailed compaction logging</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Enable with the compaction option <code>log_all</code> and a more detailed compaction log file will be produced in your log directory.</p>
</div>
</div>
</div>
</article>
</main>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../../../../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../../../../../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../../../../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../../../../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../../../../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../../../../../../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script src="../../../../../../assets/js/site.js"></script>
<script async src="../../../../../../assets/js/vendor/highlight.js"></script>
<script src="../../../../../../assets/js/vendor/lunr.js"></script>
<script src="../../../../../../assets/js/vendor/search.js" id="search-script" data-base-path="../../../../../.." data-page-path="/Cassandra/trunk/cassandra/managing/operating/compaction/overview.html"></script>
<script async src="../../../../../../assets/../search-index.js"></script>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
</html>