blob: 0021ea3dd567c2c03a109f7414e4459924cb43b7 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Apache Cassandra | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../../assets/css/site.css">
<meta name="description" content="The Apache Cassandra Community">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="_">
<meta name="dcterms.identifier" content="master">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="single-post">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:description" content="" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../../assets/img/logo-white-r.png" alt="cassandra logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="home-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h1>Reaper: Anti-entropy Repair Made Easy</h1>
<h3>September 28, 2021 | Alexander Dejanovski</h3>
</div>
</div>
<div id="blog-post" class="flex-center py-large arrow">
<div class="blog-breadcrumb mb-medium">
<div class="inner inner--narrow">
<a href="/_/blog.html">« Back to the Apache Cassandra Blog</a>
</div>
</div>
<div class="post-content">
<div class="inner inner--narrow">
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p>Reaper is an open source tool written in Java and built with Dropwizard to schedule and orchestrate repairs of Apache Cassandra clusters. It was originally <a href="https://www.slideshare.net/planetcassandra/spotify-automating-cassandra-repairs" target="_blank" rel="noopener">designed and open-sourced by Spotify</a> in an attempt to automate repairs while applying best practices from their solid production experience.</p>
</div>
</div>
</div>
<div class="sect2">
<h3 id="repair-challenges"><a class="anchor" href="#repair-challenges"></a>Repair Challenges</h3>
<div class="paragraph">
<p>Anti-entropy repair is traditionally performed using the nodetool repair command. It can be performed in two ways, full or incremental, and configured to repair various token ranges: all, primary range, or sub-range. Add to this different validation compaction orchestration settings (sequential, parallel, and data center aware), the fact that anti-compaction may trigger in some cases, and you&#8217;re down a rabbit hole of complexity. All this for an operation that is mandatory and should be simple to run.
In the 1.x/2.x days of Cassandra (and probably after that), some operators simply gave up on repairing their clusters due to the difficulties involved in completing the operation successfully without impacting SLAs.
The main problems encountered during repairs were:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>A high number of pending compactions and SSTables on disk</p>
</li>
<li>
<p>Repairs taking longer than the tombstones GC grace period</p>
</li>
<li>
<p>High cluster load due to repair pressure</p>
</li>
<li>
<p>Blocked/never-ending repairs</p>
</li>
<li>
<p>A repair that isn&#8217;t resumable in case of failure</p>
</li>
<li>
<p>vnodes made the operation very long and challenging to perform</p>
</li>
</ul>
</div>
<div class="sect3">
<h4 id="reaper-performs-safe-repairs"><a class="anchor" href="#reaper-performs-safe-repairs"></a>Reaper performs safe repairs</h4>
<div class="paragraph">
<p>Reaper was built to address those issues and make repairs as safe and reliable as possible. It splits the repair operations into evenly sized subranges and schedules them so that:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>All nodes are kept busy repairing small units of data if possible</p>
</li>
<li>
<p>A single segment is running on a node at once</p>
</li>
<li>
<p>Segments lasting too long are terminated and re-scheduled</p>
</li>
<li>
<p>Failed segments get replayed in case of a transient failure</p>
</li>
<li>
<p>Pending compactions are monitored to pause segment scheduling, preventing overload</p>
</li>
<li>
<p>Repairs can be paused</p>
</li>
</ul>
</div>
<div class="paragraph">
<p>The tool also supports incremental repair, which should be safely usable starting with Cassandra 4.0. Since Cassandra 3.0, Reaper can create segments with several token ranges to reduce the overhead of vnodes on repairs. Such ranges will be repaired in a single job by Cassandra as segments will only contain ranges that are replicated on the same set of nodes.</p>
</div>
</div>
</div>
<div class="sect2">
<h3 id="reaper-features"><a class="anchor" href="#reaper-features"></a>Reaper Features</h3>
<div class="paragraph">
<p>Reaper ships with a <a href="http://cassandra-reaper.io/docs/api/" target="_blank" rel="noopener">REST API</a>, a command-line tool (spreaper) and a Web UI:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image5.png" alt="cluster view">
</div>
</div>
<div class="paragraph">
<p>It collects and displays runtime Cassandra metrics, running compactions and ongoing streaming sessions:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image6.png" alt="node metrics">
</div>
</div>
<div class="paragraph">
<p>Reaper ships with a scheduler for recurring repairs but can also perform on-demand one-off repairs:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image7.png" alt="scheduler">
</div>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image4.png" alt="repair">
</div>
</div>
<div class="paragraph">
<p>It&#8217;s easy to install as a <a href="http://cassandra-reaper.io/docs/download/install/" target="_blank" rel="noopener">tarball</a>, a <a href="https://hub.docker.com/r/thelastpickle/cassandra-reaper/" target="_blank" rel="noopener">Docker container</a>, or <a href="https://cloudsmith.io/%7Ethelastpickle/repos/reaper/packages/" target="_blank" rel="noopener">deb/rpm packages</a> and can be deployed in various ways to accommodate your <a href="http://cassandra-reaper.io/docs/usage/multi_dc_distributed/" target="_blank" rel="noopener">cluster&#8217;s architecture</a>. A single Reaper instance is capable of managing repairs on dozens of Cassandra clusters but several instances can be deployed to provide high availability or adapt to JMX restrictions in your network:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image8.png" alt="singlereaper multidc">
</div>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image3.png" alt="multireaper multidc">
</div>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image2.png" alt="ha reaper setup">
</div>
</div>
<div class="paragraph">
<p>If JMX is restricted to local access, Reaper can even be deployed as a sidecar:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="../_images/blog/blog-post-reaper-anti-entropy-repair/image1.png" alt="reaper sidecar">
</div>
</div>
<div class="paragraph">
<p>Reaper also can listen and display live Cassandra’s emitted Diagnostic Events.</p>
</div>
<div class="paragraph">
<p>In Cassandra 4.0 internal system “diagnostic events” have become available via the work done in <a href="https://issues.apache.org/jira/browse/CASSANDRA-12944" target="_blank" rel="noopener">CASSANDRA-12944</a>. These allow us to observe internal Cassandra events, for example, in unit tests, and with external tools. These diagnostic events provide operational monitoring and troubleshooting beyond logs and metrics.</p>
</div>
<div class="paragraph">
<p>Reaper can use Postgres and Cassandra itself as a storage backend for its data and can repair all Cassandra versions since 1.2 up to the latest 4.0.</p>
</div>
<div class="paragraph">
<p>To make Reaper more efficient, segment orchestration was recently revamped and modernized. It opened for a long-awaited feature: fully concurrent repairs for different keyspaces and tables.
These changes also introduced a long-awaited feature by allowing fully concurrent repairs for different keyspaces/tables.</p>
</div>
<div class="paragraph">
<p>You can find more details on these changes in the <a href="https://thelastpickle.com/blog/2021/02/22/reaper-for-apache-cassandra-2-2-release.html" target="_blank" rel="noopener">2.2 release blog post</a>.</p>
</div>
<div class="paragraph">
<p>Note: the latest release is <a href="https://github.com/thelastpickle/cassandra-reaper/releases/tag/2.3.1" target="_blank" rel="noopener">2.3.1</a>.</p>
</div>
</div>
<div class="sect2">
<h3 id="eager-to-try-reaper"><a class="anchor" href="#eager-to-try-reaper"></a>Eager to try Reaper?</h3>
<div class="paragraph">
<p>Head over to the <a href="http://cassandra-reaper.io/" target="_blank" rel="noopener">cassandra-reaper.io</a> website, which contains all information you&#8217;ll need to get started, install Reaper and stop worrying about repairs!</p>
</div>
<div class="paragraph">
<p>This article by Alexander Dejanovski was previously published on <a href="https://jaxenter.com/reaper-anti-entropy-174878.html" target="_blank" rel="noopener">JAXEnter.com</a>.</p>
</div>
</div>
</div>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
<script>
jQuery(function(){
});
</script>
</html>