blob: 9dcd483d8967512c0459711fc6693b99d30dd5ce [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Apache Cassandra | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../../assets/css/site.css">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="_">
<meta name="dcterms.identifier" content="master">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="single-post">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../../assets/img/logo-white-r.png" alt="cassandra logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="home-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h1>Apache Cassandra 5.0 Features: Unified Compaction Strategy</h1>
<h3>October 27, 2023 | Lorina Poland</h3>
</div>
</div>
<div id="blog-post" class="flex-center py-large arrow">
<div class="blog-breadcrumb mb-medium">
<div class="inner inner--narrow">
<a href="/_/blog.html">« Back to the Apache Cassandra Blog</a>
</div>
</div>
<div class="post-content">
<div class="inner inner--narrow">
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p><em>Apache Cassandra 5.0 is the project’s major release for 2023, and it promises some of the biggest changes for Cassandra to-date. After more than a decade of engineering work dedicated to stabilizing and building Cassandra as a distributed database, we now look forward to introducing a host of exciting features and enhancements that empower users to take their data-driven applications to the next level - including machine learning and artificial intelligence.</em></p>
</div>
<div class="paragraph">
<p><em>This blog series aims to give a deeper dive into some of the key features of Cassandra 5.0.</em></p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="introduction"><a class="anchor" href="#introduction"></a>Introduction</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Compaction is an essential process in Apache Cassandra® that merges and optimizes data on disk to improve read performance and free disk space. Until now, users had to choose between different compaction strategies upfront, each with its own advantages and drawbacks. Switching later is very difficult. To address these challenges, the Unified Compaction Strategy (UCS) has been introduced as a powerful and adaptive compaction solution.</p>
</div>
<div class="paragraph">
<p>In this blog post, we will dive into the details of the UCS, demonstrate its usage, and compare it to existing compaction strategies in Cassandra.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="the-unified-compaction-strategy"><a class="anchor" href="#the-unified-compaction-strategy"></a>The Unified Compaction Strategy</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Unified Compaction Strategy (UCS), is a cutting-edge compaction strategy that harmoniously blends the benefits of tiered and leveled compaction strategies while adding sharding capabilities. UCS enables seamless reconfiguration at any time and serves as the foundation for future compaction improvements, including automatic adaptation to various workloads. By leveraging the similarities between tiered and leveled compactions and utilizing the concept of "density" instead of "size", UCS creates a more versatile and efficient approach to managing SSTables in Cassandra.</p>
</div>
<div class="paragraph">
<p>UCS offers users the flexibility to choose between leveled and/or tiered strategies based on their unique requirements by adjusting the fanout factor and minimum SSTable size parameters. This tuning capability allows optimal trade-offs between read amplification (RA) and write amplification (WA) to be made, catering to different workloads and performance demands. Moreover, UCS supports the customization of fanout factors for each level, empowering users to define mixed strategies that adapt to their specific use cases. The innovative sharding mechanism utilized by UCS ensures efficient, concurrent compactions while maintaining non-overlap between SSTables, ultimately leading to improved performance and streamlined management of your Apache Cassandra deployment.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="using-the-unified-compaction-strategy"><a class="anchor" href="#using-the-unified-compaction-strategy"></a>Using the Unified Compaction Strategy</h2>
<div class="sectionbody">
<div class="paragraph">
<p>To use UCS even in a currently running cluster, you can update your table&#8217;s compaction configuration as follows:</p>
</div>
<div class="paragraph">
<p><code>ALTER TABLE your_table WITH compaction = { 'class': 'UnifiedCompactionStrategy', 'scaling_parameters': 'T8, T4, N, L4' };</code></p>
</div>
<div class="paragraph">
<p>In this example, the scaling_parameters option specifies the fan factor and compaction method for each level of the hierarchy. You can customize these parameters to suit your specific workload requirements. If the list is shorter than the number of levels, the last value is repeated for all higher levels.</p>
</div>
<div class="paragraph">
<p>Remember that higher values of the scaling parameter improve write amplification (WA) at the expense of read amplification (RA), while lower values improve RA at the expense of WA. You can tailor the scaling parameters to your specific workload requirements to optimize the performance of your Apache Cassandra deployment.</p>
</div>
<div class="paragraph">
<p>The full list of new parameters are listed here:</p>
</div>
<table class="tableblock frame-all grid-all stretch">
<colgroup>
<col style="width: 50%;">
<col style="width: 50%;">
</colgroup>
<thead>
<tr>
<th class="tableblock halign-left valign-top">Parameter</th>
<th class="tableblock halign-left valign-top">Explanation</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">scaling_parameters</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies per-level scaling parameters, used to define the behavior for all levels of the hierarchy. Determines whether leveled, tiered, or mixed compaction is used. Default value is T4.</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">target_sstable_size</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Target size for SSTables. Balances streaming and repair efficiency with memory pressure. Default value is 1 GiB.</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">base_shard_count</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Minimum number of shards for levels with the smallest density. Affects L0 SSTables and write throughput. Default value is 4 (1 for system tables, or when multiple data locations are defined).</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">expired_sstable_check_frequency_seconds</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Frequency of checking for expired SSTables. Default value is 10 minutes.</p></td>
</tr>
</tbody>
</table>
<div class="sect2">
<h3 id="comparing-compaction-strategies"><a class="anchor" href="#comparing-compaction-strategies"></a>Comparing Compaction Strategies</h3>
<div class="paragraph">
<p>To better understand the benefits of UCS, let&#8217;s compare it to existing compaction strategies in Apache Cassandra.</p>
</div>
<table class="tableblock frame-all grid-all stretch">
<colgroup>
<col style="width: 14.2857%;">
<col style="width: 14.2857%;">
<col style="width: 14.2857%;">
<col style="width: 14.2857%;">
<col style="width: 14.2857%;">
<col style="width: 14.2857%;">
<col style="width: 14.2858%;">
</colgroup>
<thead>
<tr>
<th class="tableblock halign-left valign-top">Compaction Strategy</th>
<th class="tableblock halign-left valign-top">Best Suited For</th>
<th class="tableblock halign-left valign-top">Read Amplification</th>
<th class="tableblock halign-left valign-top">Write Amplification</th>
<th class="tableblock halign-left valign-top">Space Overhead</th>
<th class="tableblock halign-left valign-top">Complexity</th>
<th class="tableblock halign-left valign-top">Concurrency</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">STCS</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Write-heavy, non-time series workloads</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">High</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Low</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">High</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">LCS</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Read-heavy workloads, wide partition non-TS</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Low</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">High</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Low</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">TWCS</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Time series workloads</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Moderate</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">UCS</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Wide range of workloads (adapts based on config)</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Adaptive</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Adaptive</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Adaptive</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Low</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">High</p></td>
</tr>
</tbody>
</table>
<div class="paragraph">
<p>As this table shows, UCS adapts to different workloads, offering better read/write amplification tradeoffs and concurrency while maintaining a lower complexity level.
Conclusion
The Unified Compaction Strategy in Apache Cassandra provides an adaptive and flexible solution to the existing challenges of compaction. It simplifies the decision-making process for users while offering better performance and resource utilization. With UCS, users no longer have to worry about suboptimal compaction choices and can instead focus on their application&#8217;s core functionality.</p>
</div>
<div class="paragraph">
<p>As the development of UCS continues, the roadmap aims to make the strategy even more adaptive, relieving the user of the hard task of choosing suitable compaction choices, and making Apache Cassandra an even more powerful solution for database development.</p>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="learn-more-about-apache-cassandra"><a class="anchor" href="#learn-more-about-apache-cassandra"></a>Learn More About Apache Cassandra</h2>
<div class="sectionbody">
<div class="paragraph">
<p>As we get closer to the General Availability of Cassandra 5.0, there are a host of ways to get more involved in the community and follow project developments:</p>
</div>
<div class="paragraph">
<p><a href="https://events.linuxfoundation.org/cassandra-summit/" target="_blank" rel="noopener">Cassandra Summit + Code AI</a> is taking place Dec. 12-13 in San Jose, CA. Cassandra Summit is THE gathering place for Apache Cassandra data practitioners, developers, engineers and enthusiasts, and it’s where we’ll be diving deeper into Cassandra 5.0 features. <a href="https://events.linuxfoundation.org/cassandra-summit/program/cfp/#overview" target="_blank" rel="noopener">Submit a talk</a> for the NEW AI Track at Cassandra Summit; CFP closes Monday, October 26 at 9:00 AM PDT (UTC-7).</p>
</div>
<div class="paragraph">
<p>For more information about Apache Cassandra or to join the community discussion, you can join us on these channels:</p>
</div>
<div class="ulist">
<ul>
<li>
<p><a href="/_/index.html">Apache Cassandra Website</a></p>
</li>
<li>
<p><a href="https://the-asf.slack.com/ssb/redirect" target="_blank" rel="noopener">ASF Slack</a></p>
</li>
<li>
<p><a href="https://www.youtube.com/@PlanetCassandra" target="_blank" rel="noopener">Planet Cassandra Youtube</a></p>
</li>
<li>
<p><a href="https://www.meetup.com/cassandra-global/" target="_blank" rel="noopener">Planet Cassandra Global Meetup Group</a></p>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
<script>
jQuery(function(){
});
</script>
</html>