blob: d9691c349b687451be8d0f1e6ec0929f66b41b2b [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance. Linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure make it the perfect platform for mission-critical data. Cassandra's support for replicating across multiple datacenters is best-in-class, providing lower latency for your users and the peace of mind of knowing that you can survive regional outages.
">
<meta name="keywords" content="cassandra, apache, apache cassandra, distributed storage, key value store, scalability, bigtable, dynamo" />
<meta name="robots" content="index,follow" />
<meta name="language" content="en" />
<title>Documentation</title>
<link rel="canonical" href="http://cassandra.apache.org/doc/3.11.5/operating/compression.html">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link rel="stylesheet" href="./../../../css/style.css">
<link rel="stylesheet" href="./../../../css/sphinx.css">
<link rel="top" title="Apache Cassandra Documentation v3.11.5" href="../index.html"/> <link rel="up" title="Operating Cassandra" href="index.html"/> <link rel="next" title="Change Data Capture" href="cdc.html"/> <link rel="prev" title="Bloom Filters" href="bloom_filters.html"/>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.2.0/css/all.css" integrity="sha384-hWVjflwFxL6sNzntih27bfxkr27PmbbK/iSvJ+a4+0owXq79v+lsFkW54bOGbiDQ" crossorigin="anonymous">
<link type="application/atom+xml" rel="alternate" href="http://cassandra.apache.org/feed.xml" title="Apache Cassandra Website" />
</head>
<body>
<!-- breadcrumbs -->
<div class="topnav">
<div class="container breadcrumb-container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<img class="asf-logo" src="./../../../img/asf_feather.png" />
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="./../../../">Apache Cassandra</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./">Operating Cassandra</a></li>
<li>Compression</li>
</ul>
</div>
<!-- navbar -->
<nav class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#cassandra-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="./../../../"><img src="./../../../img/cassandra_logo.png" alt="Apache Cassandra logo" /></a>
</div><!-- /.navbar-header -->
<div id="cassandra-menu" class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li><a href="./../../../">Home</a></li>
<li><a href="./../../../download/">Download</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./../../../community/">Community</a></li>
<li>
<a href="./../../../blog/">Blog</a>
</li>
</ul>
</div><!-- /#cassandra-menu -->
</div>
</nav><!-- /.navbar -->
</div><!-- /.topnav -->
<div class="container-fluid">
<div class="row">
<div class="col-md-3">
<div class="doc-navigation">
<div class="doc-menu" role="navigation">
<div class="navbar-header">
<button type="button" class="pull-left navbar-toggle" data-toggle="collapse" data-target=".sidebar-navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-collapse collapse sidebar-navbar-collapse">
<form id="doc-search-form" class="navbar-form" action="../search.html" method="get" role="search">
<div class="form-group">
<input type="text" size="30" class="form-control input-sm" name="q" placeholder="Search docs">
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</form>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../getting_started/index.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/index.html">Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../data_modeling/index.html">Data Modeling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cql/index.html">The Cassandra Query Language (CQL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../configuration/index.html">Configuring Cassandra</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Operating Cassandra</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="snitch.html">Snitch</a></li>
<li class="toctree-l2"><a class="reference internal" href="topo_changes.html">Adding, replacing, moving and removing nodes</a></li>
<li class="toctree-l2"><a class="reference internal" href="repair.html">Repair</a></li>
<li class="toctree-l2"><a class="reference internal" href="read_repair.html">Read repair</a></li>
<li class="toctree-l2"><a class="reference internal" href="hints.html">Hints</a></li>
<li class="toctree-l2"><a class="reference internal" href="compaction.html">Compaction</a></li>
<li class="toctree-l2"><a class="reference internal" href="bloom_filters.html">Bloom Filters</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Compression</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#configuring-compression">Configuring Compression</a></li>
<li class="toctree-l3"><a class="reference internal" href="#benefits-and-uses">Benefits and Uses</a></li>
<li class="toctree-l3"><a class="reference internal" href="#operational-impact">Operational Impact</a></li>
<li class="toctree-l3"><a class="reference internal" href="#advanced-use">Advanced Use</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="cdc.html">Change Data Capture</a></li>
<li class="toctree-l2"><a class="reference internal" href="backups.html">Backups</a></li>
<li class="toctree-l2"><a class="reference internal" href="bulk_loading.html">Bulk Loading</a></li>
<li class="toctree-l2"><a class="reference internal" href="metrics.html">Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="security.html">Security</a></li>
<li class="toctree-l2"><a class="reference internal" href="hardware.html">Hardware Choices</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../tools/index.html">Cassandra Tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="../troubleshooting/index.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../development/index.html">Cassandra Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq/index.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bugs.html">Reporting Bugs and Contributing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contactus.html">Contact us</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="col-md-8">
<div class="content doc-content">
<div class="content-container">
<div class="section" id="compression">
<h1>Compression<a class="headerlink" href="#compression" title="Permalink to this headline"></a></h1>
<p>Cassandra offers operators the ability to configure compression on a per-table basis. Compression reduces the size of
data on disk by compressing the SSTable in user-configurable compression <code class="docutils literal notranslate"><span class="pre">chunk_length_in_kb</span></code>. Because Cassandra
SSTables are immutable, the CPU cost of compressing is only necessary when the SSTable is written - subsequent updates
to data will land in different SSTables, so Cassandra will not need to decompress, overwrite, and recompress data when
UPDATE commands are issued. On reads, Cassandra will locate the relevant compressed chunks on disk, decompress the full
chunk, and then proceed with the remainder of the read path (merging data from disks and memtables, read repair, and so
on).</p>
<div class="section" id="configuring-compression">
<h2>Configuring Compression<a class="headerlink" href="#configuring-compression" title="Permalink to this headline"></a></h2>
<p>Compression is configured on a per-table basis as an optional argument to <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> or <code class="docutils literal notranslate"><span class="pre">ALTER</span> <span class="pre">TABLE</span></code>. By
default, three options are relevant:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">class</span></code> specifies the compression class - Cassandra provides three classes (<code class="docutils literal notranslate"><span class="pre">LZ4Compressor</span></code>,
<code class="docutils literal notranslate"><span class="pre">SnappyCompressor</span></code>, and <code class="docutils literal notranslate"><span class="pre">DeflateCompressor</span></code> ). The default is <code class="docutils literal notranslate"><span class="pre">LZ4Compressor</span></code>.</li>
<li><code class="docutils literal notranslate"><span class="pre">chunk_length_in_kb</span></code> specifies the number of kilobytes of data per compression chunk. The default is 64KB.</li>
<li><code class="docutils literal notranslate"><span class="pre">crc_check_chance</span></code> determines how likely Cassandra is to verify the checksum on each compression chunk during
reads. The default is 1.0.</li>
</ul>
<p>Users can set compression using the following syntax:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>CREATE TABLE keyspace.table (id int PRIMARY KEY) WITH compression = {&#39;class&#39;: &#39;LZ4Compressor&#39;};
</pre></div>
</div>
<p>Or</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>ALTER TABLE keyspace.table WITH compression = {&#39;class&#39;: &#39;SnappyCompressor&#39;, &#39;chunk_length_in_kb&#39;: 128, &#39;crc_check_chance&#39;: 0.5};
</pre></div>
</div>
<p>Once enabled, compression can be disabled with <code class="docutils literal notranslate"><span class="pre">ALTER</span> <span class="pre">TABLE</span></code> setting <code class="docutils literal notranslate"><span class="pre">enabled</span></code> to <code class="docutils literal notranslate"><span class="pre">false</span></code>:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>ALTER TABLE keyspace.table WITH compression = {&#39;enabled&#39;:&#39;false&#39;};
</pre></div>
</div>
<p>Operators should be aware, however, that changing compression is not immediate. The data is compressed when the SSTable
is written, and as SSTables are immutable, the compression will not be modified until the table is compacted. Upon
issuing a change to the compression options via <code class="docutils literal notranslate"><span class="pre">ALTER</span> <span class="pre">TABLE</span></code>, the existing SSTables will not be modified until they
are compacted - if an operator needs compression changes to take effect immediately, the operator can trigger an SSTable
rewrite using <code class="docutils literal notranslate"><span class="pre">nodetool</span> <span class="pre">scrub</span></code> or <code class="docutils literal notranslate"><span class="pre">nodetool</span> <span class="pre">upgradesstables</span> <span class="pre">-a</span></code>, both of which will rebuild the SSTables on disk,
re-compressing the data in the process.</p>
</div>
<div class="section" id="benefits-and-uses">
<h2>Benefits and Uses<a class="headerlink" href="#benefits-and-uses" title="Permalink to this headline"></a></h2>
<p>Compression’s primary benefit is that it reduces the amount of data written to disk. Not only does the reduced size save
in storage requirements, it often increases read and write throughput, as the CPU overhead of compressing data is faster
than the time it would take to read or write the larger volume of uncompressed data from disk.</p>
<p>Compression is most useful in tables comprised of many rows, where the rows are similar in nature. Tables containing
similar text columns (such as repeated JSON blobs) often compress very well.</p>
</div>
<div class="section" id="operational-impact">
<h2>Operational Impact<a class="headerlink" href="#operational-impact" title="Permalink to this headline"></a></h2>
<ul class="simple">
<li>Compression metadata is stored off-heap and scales with data on disk. This often requires 1-3GB of off-heap RAM per
terabyte of data on disk, though the exact usage varies with <code class="docutils literal notranslate"><span class="pre">chunk_length_in_kb</span></code> and compression ratios.</li>
<li>Streaming operations involve compressing and decompressing data on compressed tables - in some code paths (such as
non-vnode bootstrap), the CPU overhead of compression can be a limiting factor.</li>
<li>The compression path checksums data to ensure correctness - while the traditional Cassandra read path does not have a
way to ensure correctness of data on disk, compressed tables allow the user to set <code class="docutils literal notranslate"><span class="pre">crc_check_chance</span></code> (a float from
0.0 to 1.0) to allow Cassandra to probabilistically validate chunks on read to verify bits on disk are not corrupt.</li>
</ul>
</div>
<div class="section" id="advanced-use">
<h2>Advanced Use<a class="headerlink" href="#advanced-use" title="Permalink to this headline"></a></h2>
<p>Advanced users can provide their own compression class by implementing the interface at
<code class="docutils literal notranslate"><span class="pre">org.apache.cassandra.io.compress.ICompressor</span></code>.</p>
</div>
</div>
<div class="doc-prev-next-links" role="navigation" aria-label="footer navigation">
<a href="cdc.html" class="btn btn-default pull-right " role="button" title="Change Data Capture" accesskey="n">Next <span class="glyphicon glyphicon-circle-arrow-right" aria-hidden="true"></span></a>
<a href="bloom_filters.html" class="btn btn-default" role="button" title="Bloom Filters" accesskey="p"><span class="glyphicon glyphicon-circle-arrow-left" aria-hidden="true"></span> Previous</a>
</div>
</div>
</div>
</div>
</div>
</div>
<hr />
<footer>
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/cassandra"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=cassandra"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheCassandra">Tweet #cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
<a class="subscribe-rss icon-link" href="/feed.xml" title="Subscribe to Blog via RSS">
<span><i class="fa fa-rss"></i></span>
</a>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2016 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache, the Apache feather logo, and Apache Cassandra are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</footer>
<!-- Javascript. Placed here so pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script src="./../../../js/underscore-min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="./../../../js/doctools.js"></script>
<script src="./../../../js/searchtools.js"></script>
<script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: "", VERSION: "", COLLAPSE_INDEX: false, FILE_SUFFIX: ".html", HAS_SOURCE: false, SOURCELINK_SUFFIX: ".txt" }; </script>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try {
var pageTracker = _gat._getTracker("UA-11583863-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>