blob: 989935fbf4c3183ea65faf8dca31c2e29d45caf8 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance. Linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure make it the perfect platform for mission-critical data. Cassandra's support for replicating across multiple datacenters is best-in-class, providing lower latency for your users and the peace of mind of knowing that you can survive regional outages.
">
<meta name="keywords" content="cassandra, apache, apache cassandra, distributed storage, key value store, scalability, bigtable, dynamo" />
<meta name="robots" content="index,follow" />
<meta name="language" content="en" />
<title>Documentation</title>
<link rel="canonical" href="http://cassandra.apache.org/doc/latest/operating/compaction/lcs.html">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link rel="stylesheet" href="./../../../../css/style.css">
<link rel="stylesheet" href="./../../../../css/sphinx.css">
<link rel="top" title="Apache Cassandra Documentation v4.0-alpha4" href="../../index.html"/>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.2.0/css/all.css" integrity="sha384-hWVjflwFxL6sNzntih27bfxkr27PmbbK/iSvJ+a4+0owXq79v+lsFkW54bOGbiDQ" crossorigin="anonymous">
<link type="application/atom+xml" rel="alternate" href="http://cassandra.apache.org/feed.xml" title="Apache Cassandra Website" />
</head>
<body>
<!-- breadcrumbs -->
<div class="topnav">
<div class="container breadcrumb-container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<img class="asf-logo" src="./../../../../img/asf_feather.png" />
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="./../../../../">Apache Cassandra</a></li>
<li><a href="./../../../../doc/latest/">Documentation</a></li>
<li>Leveled Compaction Strategy</li>
</ul>
</div>
<!-- navbar -->
<nav class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#cassandra-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="./../../../../"><img src="./../../../../img/cassandra_logo.png" alt="Apache Cassandra logo" /></a>
</div><!-- /.navbar-header -->
<div id="cassandra-menu" class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li><a href="./../../../../">Home</a></li>
<li><a href="./../../../../download/">Download</a></li>
<li><a href="./../../../../doc/latest/">Documentation</a></li>
<li><a href="./../../../../community/">Community</a></li>
<li>
<a href="./../../../../blog/">Blog</a>
</li>
</ul>
</div><!-- /#cassandra-menu -->
</div>
</nav><!-- /.navbar -->
</div><!-- /.topnav -->
<div class="container-fluid">
<div class="row">
<div class="col-md-3">
<div class="doc-navigation">
<div class="doc-menu" role="navigation">
<div class="navbar-header">
<button type="button" class="pull-left navbar-toggle" data-toggle="collapse" data-target=".sidebar-navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-collapse collapse sidebar-navbar-collapse">
<form id="doc-search-form" class="navbar-form" action="../../search.html" method="get" role="search">
<div class="form-group">
<input type="text" size="30" class="form-control input-sm" name="q" placeholder="Search docs">
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</form>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../getting_started/index.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../new/index.html">New Features in Apache Cassandra 4.0</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../cql/index.html">The Cassandra Query Language (CQL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_modeling/index.html">Data Modeling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../configuration/index.html">Configuring Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Operating Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../tools/index.html">Cassandra Tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../troubleshooting/index.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../development/index.html">Contributing to Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../plugins/index.html">Third-Party Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../bugs.html">Reporting Bugs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contactus.html">Contact us</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="col-md-8">
<div class="content doc-content">
<div class="content-container">
<div class="section" id="leveled-compaction-strategy">
<span id="lcs"></span><h1>Leveled Compaction Strategy<a class="headerlink" href="#leveled-compaction-strategy" title="Permalink to this headline"></a></h1>
<p>The idea of <code class="docutils literal notranslate"><span class="pre">LeveledCompactionStrategy</span></code> (LCS) is that all sstables are put into different levels where we guarantee
that no overlapping sstables are in the same level. By overlapping we mean that the first/last token of a single sstable
are never overlapping with other sstables. This means that for a SELECT we will only have to look for the partition key
in a single sstable per level. Each level is 10x the size of the previous one and each sstable is 160MB by default. L0
is where sstables are streamed/flushed - no overlap guarantees are given here.</p>
<p>When picking compaction candidates we have to make sure that the compaction does not create overlap in the target level.
This is done by always including all overlapping sstables in the next level. For example if we select an sstable in L3,
we need to guarantee that we pick all overlapping sstables in L4 and make sure that no currently ongoing compactions
will create overlap if we start that compaction. We can start many parallel compactions in a level if we guarantee that
we wont create overlap. For L0 -&gt; L1 compactions we almost always need to include all L1 sstables since most L0 sstables
cover the full range. We also can’t compact all L0 sstables with all L1 sstables in a single compaction since that can
use too much memory.</p>
<p>When deciding which level to compact LCS checks the higher levels first (with LCS, a “higher” level is one with a higher
number, L0 being the lowest one) and if the level is behind a compaction will be started in that level.</p>
<div class="section" id="major-compaction">
<h2>Major compaction<a class="headerlink" href="#major-compaction" title="Permalink to this headline"></a></h2>
<p>It is possible to do a major compaction with LCS - it will currently start by filling out L1 and then once L1 is full,
it continues with L2 etc. This is sub optimal and will change to create all the sstables in a high level instead,
CASSANDRA-11817.</p>
</div>
<div class="section" id="bootstrapping">
<h2>Bootstrapping<a class="headerlink" href="#bootstrapping" title="Permalink to this headline"></a></h2>
<p>During bootstrap sstables are streamed from other nodes. The level of the remote sstable is kept to avoid many
compactions after the bootstrap is done. During bootstrap the new node also takes writes while it is streaming the data
from a remote node - these writes are flushed to L0 like all other writes and to avoid those sstables blocking the
remote sstables from going to the correct level, we only do STCS in L0 until the bootstrap is done.</p>
</div>
<div class="section" id="stcs-in-l0">
<h2>STCS in L0<a class="headerlink" href="#stcs-in-l0" title="Permalink to this headline"></a></h2>
<p>If LCS gets very many L0 sstables reads are going to hit all (or most) of the L0 sstables since they are likely to be
overlapping. To more quickly remedy this LCS does STCS compactions in L0 if there are more than 32 sstables there. This
should improve read performance more quickly compared to letting LCS do its L0 -&gt; L1 compactions. If you keep getting
too many sstables in L0 it is likely that LCS is not the best fit for your workload and STCS could work out better.</p>
</div>
<div class="section" id="starved-sstables">
<h2>Starved sstables<a class="headerlink" href="#starved-sstables" title="Permalink to this headline"></a></h2>
<p>If a node ends up with a leveling where there are a few very high level sstables that are not getting compacted they
might make it impossible for lower levels to drop tombstones etc. For example, if there are sstables in L6 but there is
only enough data to actually get a L4 on the node the left over sstables in L6 will get starved and not compacted. This
can happen if a user changes sstable_size_in_mb from 5MB to 160MB for example. To avoid this LCS tries to include
those starved high level sstables in other compactions if there has been 25 compaction rounds where the highest level
has not been involved.</p>
</div>
<div class="section" id="lcs-options">
<span id="id1"></span><h2>LCS options<a class="headerlink" href="#lcs-options" title="Permalink to this headline"></a></h2>
<dl class="docutils">
<dt><code class="docutils literal notranslate"><span class="pre">sstable_size_in_mb</span></code> (default: 160MB)</dt>
<dd>The target compressed (if using compression) sstable size - the sstables can end up being larger if there are very
large partitions on the node.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">fanout_size</span></code> (default: 10)</dt>
<dd>The target size of levels increases by this fanout_size multiplier. You can reduce the space amplification by tuning
this option.</dd>
</dl>
<p>LCS also support the <code class="docutils literal notranslate"><span class="pre">cassandra.disable_stcs_in_l0</span></code> startup option (<code class="docutils literal notranslate"><span class="pre">-Dcassandra.disable_stcs_in_l0=true</span></code>) to avoid
doing STCS in L0.</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<hr />
<footer>
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/cassandra"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=cassandra"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheCassandra">Tweet #cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
<a class="subscribe-rss icon-link" href="/feed.xml" title="Subscribe to Blog via RSS">
<span><i class="fa fa-rss"></i></span>
</a>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2016 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache, the Apache feather logo, and Apache Cassandra are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</footer>
<!-- Javascript. Placed here so pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script src="./../../../../js/underscore-min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="./../../../../js/doctools.js"></script>
<script src="./../../../../js/searchtools.js"></script>
<script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: "", VERSION: "", COLLAPSE_INDEX: false, FILE_SUFFIX: ".html", HAS_SOURCE: false, SOURCELINK_SUFFIX: ".txt" }; </script>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try {
var pageTracker = _gat._getTracker("UA-11583863-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>