blob: c9b8b0989c8e033ec82ef856d45f2b61f69ac292 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance. Linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure make it the perfect platform for mission-critical data. Cassandra's support for replicating across multiple datacenters is best-in-class, providing lower latency for your users and the peace of mind of knowing that you can survive regional outages.
">
<meta name="keywords" content="cassandra, apache, apache cassandra, distributed storage, key value store, scalability, bigtable, dynamo" />
<meta name="robots" content="index,follow" />
<meta name="language" content="en" />
<title>Documentation</title>
<link rel="canonical" href="http://cassandra.apache.org/doc/latest/operating/compaction/twcs.html">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link rel="stylesheet" href="./../../../../css/style.css">
<link rel="stylesheet" href="./../../../../css/sphinx.css">
<link rel="top" title="Apache Cassandra Documentation v4.0-alpha5" href="../../index.html"/>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.2.0/css/all.css" integrity="sha384-hWVjflwFxL6sNzntih27bfxkr27PmbbK/iSvJ+a4+0owXq79v+lsFkW54bOGbiDQ" crossorigin="anonymous">
<link type="application/atom+xml" rel="alternate" href="http://cassandra.apache.org/feed.xml" title="Apache Cassandra Website" />
</head>
<body>
<!-- breadcrumbs -->
<div class="topnav">
<div class="container breadcrumb-container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<img class="asf-logo" src="./../../../../img/asf_feather.png" />
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="./../../../../">Apache Cassandra</a></li>
<li><a href="./../../../../doc/latest/">Documentation</a></li>
<li>Time Window CompactionStrategy</li>
</ul>
</div>
<!-- navbar -->
<nav class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#cassandra-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="./../../../../"><img src="./../../../../img/cassandra_logo.png" alt="Apache Cassandra logo" /></a>
</div><!-- /.navbar-header -->
<div id="cassandra-menu" class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li><a href="./../../../../">Home</a></li>
<li><a href="./../../../../download/">Download</a></li>
<li><a href="./../../../../doc/latest/">Documentation</a></li>
<li><a href="./../../../../community/">Community</a></li>
<li>
<a href="./../../../../blog/">Blog</a>
</li>
</ul>
</div><!-- /#cassandra-menu -->
</div>
</nav><!-- /.navbar -->
</div><!-- /.topnav -->
<div class="container-fluid">
<div class="row">
<div class="col-md-3">
<div class="doc-navigation">
<div class="doc-menu" role="navigation">
<div class="navbar-header">
<button type="button" class="pull-left navbar-toggle" data-toggle="collapse" data-target=".sidebar-navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-collapse collapse sidebar-navbar-collapse">
<form id="doc-search-form" class="navbar-form" action="../../search.html" method="get" role="search">
<div class="form-group">
<input type="text" size="30" class="form-control input-sm" name="q" placeholder="Search docs">
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</form>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../getting_started/index.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../new/index.html">New Features in Apache Cassandra 4.0</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../cql/index.html">The Cassandra Query Language (CQL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../data_modeling/index.html">Data Modeling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../configuration/index.html">Configuring Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Operating Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../tools/index.html">Cassandra Tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../troubleshooting/index.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../development/index.html">Contributing to Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../plugins/index.html">Third-Party Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../bugs.html">Reporting Bugs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contactus.html">Contact us</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="col-md-8">
<div class="content doc-content">
<div class="content-container">
<div class="section" id="time-window-compactionstrategy">
<span id="twcs"></span><h1>Time Window CompactionStrategy<a class="headerlink" href="#time-window-compactionstrategy" title="Permalink to this headline"></a></h1>
<p><code class="docutils literal notranslate"><span class="pre">TimeWindowCompactionStrategy</span></code> (TWCS) is designed specifically for workloads where it’s beneficial to have data on
disk grouped by the timestamp of the data, a common goal when the workload is time-series in nature or when all data is
written with a TTL. In an expiring/TTL workload, the contents of an entire SSTable likely expire at approximately the
same time, allowing them to be dropped completely, and space reclaimed much more reliably than when using
<code class="docutils literal notranslate"><span class="pre">SizeTieredCompactionStrategy</span></code> or <code class="docutils literal notranslate"><span class="pre">LeveledCompactionStrategy</span></code>. The basic concept is that
<code class="docutils literal notranslate"><span class="pre">TimeWindowCompactionStrategy</span></code> will create 1 sstable per file for a given window, where a window is simply calculated
as the combination of two primary options:</p>
<dl class="docutils">
<dt><code class="docutils literal notranslate"><span class="pre">compaction_window_unit</span></code> (default: DAYS)</dt>
<dd>A Java TimeUnit (MINUTES, HOURS, or DAYS).</dd>
<dt><code class="docutils literal notranslate"><span class="pre">compaction_window_size</span></code> (default: 1)</dt>
<dd>The number of units that make up a window.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">unsafe_aggressive_sstable_expiration</span></code> (default: false)</dt>
<dd>Expired sstables will be dropped without checking its data is shadowing other sstables. This is a potentially
risky option that can lead to data loss or deleted data re-appearing, going beyond what
<cite>unchecked_tombstone_compaction</cite> does for single sstable compaction. Due to the risk the jvm must also be
started with <cite>-Dcassandra.unsafe_aggressive_sstable_expiration=true</cite>.</dd>
</dl>
<p>Taken together, the operator can specify windows of virtually any size, and <cite>TimeWindowCompactionStrategy</cite> will work to
create a single sstable for writes within that window. For efficiency during writing, the newest window will be
compacted using <cite>SizeTieredCompactionStrategy</cite>.</p>
<p>Ideally, operators should select a <code class="docutils literal notranslate"><span class="pre">compaction_window_unit</span></code> and <code class="docutils literal notranslate"><span class="pre">compaction_window_size</span></code> pair that produces
approximately 20-30 windows - if writing with a 90 day TTL, for example, a 3 Day window would be a reasonable choice
(<code class="docutils literal notranslate"><span class="pre">'compaction_window_unit':'DAYS','compaction_window_size':3</span></code>).</p>
<div class="section" id="timewindowcompactionstrategy-operational-concerns">
<h2>TimeWindowCompactionStrategy Operational Concerns<a class="headerlink" href="#timewindowcompactionstrategy-operational-concerns" title="Permalink to this headline"></a></h2>
<p>The primary motivation for TWCS is to separate data on disk by timestamp and to allow fully expired SSTables to drop
more efficiently. One potential way this optimal behavior can be subverted is if data is written to SSTables out of
order, with new data and old data in the same SSTable. Out of order data can appear in two ways:</p>
<ul class="simple">
<li>If the user mixes old data and new data in the traditional write path, the data will be comingled in the memtables
and flushed into the same SSTable, where it will remain comingled.</li>
<li>If the user’s read requests for old data cause read repairs that pull old data into the current memtable, that data
will be comingled and flushed into the same SSTable.</li>
</ul>
<p>While TWCS tries to minimize the impact of comingled data, users should attempt to avoid this behavior. Specifically,
users should avoid queries that explicitly set the timestamp via CQL <code class="docutils literal notranslate"><span class="pre">USING</span> <span class="pre">TIMESTAMP</span></code>. Additionally, users should run
frequent repairs (which streams data in such a way that it does not become comingled).</p>
</div>
<div class="section" id="changing-timewindowcompactionstrategy-options">
<h2>Changing TimeWindowCompactionStrategy Options<a class="headerlink" href="#changing-timewindowcompactionstrategy-options" title="Permalink to this headline"></a></h2>
<p>Operators wishing to enable <code class="docutils literal notranslate"><span class="pre">TimeWindowCompactionStrategy</span></code> on existing data should consider running a major compaction
first, placing all existing data into a single (old) window. Subsequent newer writes will then create typical SSTables
as expected.</p>
<p>Operators wishing to change <code class="docutils literal notranslate"><span class="pre">compaction_window_unit</span></code> or <code class="docutils literal notranslate"><span class="pre">compaction_window_size</span></code> can do so, but may trigger
additional compactions as adjacent windows are joined together. If the window size is decrease d (for example, from 24
hours to 12 hours), then the existing SSTables will not be modified - TWCS can not split existing SSTables into multiple
windows.</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<hr />
<footer>
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/cassandra"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=cassandra"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheCassandra">Tweet #cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
<a class="subscribe-rss icon-link" href="/feed.xml" title="Subscribe to Blog via RSS">
<span><i class="fa fa-rss"></i></span>
</a>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2016 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache, the Apache feather logo, and Apache Cassandra are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</footer>
<!-- Javascript. Placed here so pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script src="./../../../../js/underscore-min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="./../../../../js/doctools.js"></script>
<script src="./../../../../js/searchtools.js"></script>
<script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: "", VERSION: "", COLLAPSE_INDEX: false, FILE_SUFFIX: ".html", HAS_SOURCE: false, SOURCELINK_SUFFIX: ".txt" }; </script>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try {
var pageTracker = _gat._getTracker("UA-11583863-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>