blob: bbbea1a0b083438d6b0b3711697e9e7bf9104ad1 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Apache Cassandra | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../../assets/css/site.css">
<meta name="description" content="The Apache Cassandra Community">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="_">
<meta name="dcterms.identifier" content="master">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="single-post">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:description" content="" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../../assets/img/logo-white-r.png" alt="cassandra logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="home-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h1>Finding Bugs in Cassandra&#x27;s Internals with Property-based Testing</h1>
<h3>October 17, 2018 | The Apache Cassandra Community</h3>
</div>
</div>
<div id="blog-post" class="flex-center py-large arrow">
<div class="blog-breadcrumb mb-medium">
<div class="inner inner--narrow">
<a href="/_/blog.html">« Back to the Apache Cassandra Blog</a>
</div>
</div>
<div class="post-content">
<div class="inner inner--narrow">
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p>As of September 1st, the Apache Cassandra community has shifted the focus of Cassandra 4.0 development from new feature work to testing, validation, and hardening, with the goal of releasing a stable 4.0 that every Cassandra user, from small deployments to large corporations, can deploy with confidence. There are several projects and methodologies that the community is undertaking to this end. One of these is the adoption of property-based testing, which was <a href="Testing-Apache-Cassandra-4.html" class="page" target="_blank" rel="noopener">previously introduced here</a>. This post will take a look at a specific use of this approach and how it found a bug in a new feature meant to ensure data integrity between the client and Cassandra.</p>
</div>
</div>
</div>
<div class="sect3">
<h4 id="detecting-corruption-is-a-property"><a class="anchor" href="#detecting-corruption-is-a-property"></a>Detecting Corruption is a Property</h4>
<div class="paragraph">
<p>In this post, we demonstrate property-based testing in Cassandra through the integration of the <a href="https://github.com/ncredinburgh/QuickTheories" target="_blank" rel="noopener">QuickTheories</a> library introduced as part of the work done for <a href="https://issues.apache.org/jira/browse/CASSANDRA-13304" target="_blank" rel="noopener">CASSANDRA-13304</a>.</p>
</div>
<div class="paragraph">
<p>This ticket modifies the framing of Cassandra’s native client protocol to include checksums in addition to the existing, optional compression. Clients can opt-in to this new feature to retain data integrity across the many hops between themselves and Cassandra. This is meant to address cases where hardware and protocol level checksums fail (due to underlying hardware issues) — a case that has been seen in production. A description of the protocol changes can be found in the ticket but for the purposes of this discussion the salient part is that two checksums are added: one that covers the length(s) of the data (if compressed there are two lengths), and one for the data itself. Before merging this feature, property-based testing using QuickTheories was used to uncover a bug in the calculation of the checksum over the lengths. This bug could have led to silent corruption at worst or unexpected errors during deserialization at best.</p>
</div>
<div class="paragraph">
<p>The test used to find this bug is shown below. This example tests the property that when a frame is corrupted, that corruption should be caught by checksum comparison. The test is wrapped inside of a standard JUnit test case but, once called by JUnit, execution is handed over to QuickTheories to generate and execute hundreds of examples. These examples are dictated by the types of input that should be generated (the arguments to <code>forAll</code>). The execution of each individual example is done by <code>checkAssert</code> and its argument, the <code>roundTripWithCorruption</code> function.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>@Test
public void corruptionCausesFailure()
{
qt().withExamples(500)
.forAll(inputWithCorruptablePosition(),
integers().between(0, Byte.MAX_Value).map(Integer::byteValue),
compressors(),
checksumTypes())
.checkAssert(this::roundTripWithCorruption);
}</pre>
</div>
</div>
<div class="paragraph">
<p>The <code>roundTripWithCorruption</code> function is a generalization of a unit test that worked similarly but for a single case. It is given an input to transform and a position in the transformed output to insert corruption, as well as what byte to write to the corrupted position. The additional arguments (the compressor and checksum type) are used to ensure coverage of Cassandra’s various compression and checksumming implementations.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>private void roundTripWithCorruption(Pair&lt;String, Integer&gt; inputAndCorruptablePosition,
byte corruptionValue,
Compressor compressor,
ChecksumType checksum) {
String input = inputAndCorruptablePosition.left;
ByteBuf expectedBuf = Unpooled.wrappedBuffer(input.getBytes());
int byteToCorrupt = inputAndCorruptablePosition.right;
ChecksummingTransformer transformer = new ChecksummingTransformer(checksum, DEFAULT_BLOCK_SIZE, compressor);
ByteBuf outbound = transformer.transformOutbound(expectedBuf);
// make sure we're actually expecting to produce some corruption
if (outbound.getByte(byteToCorrupt) == corruptionValue)
return;
if (byteToCorrupt &gt;= outbound.writerIndex())
return;
try {
int oldIndex = outbound.writerIndex();
outbound.writerIndex(byteToCorrupt);
outbound.writeByte(corruptionValue);
outbound.writerIndex(oldIndex);
ByteBuf inbound = transformer.transformInbound(outbound, FLAGS);
// verify that the content was actually corrupted
expectedBuf.readerIndex(0);
Assert.assertEquals(expectedBuf, inbound);
} catch(ProtocolException e) {
return;
}
}</pre>
</div>
</div>
<div class="paragraph">
<p>The remaining piece is how those arguments are generated — the arguments to <code>forAll</code> mentioned above. Each argument is a function that returns an input generator. For each example, an input is pulled from each generator and passed to <code>roundTripWithCorruption</code>. The <code>compressors()</code> and <code>checksums()</code> generators aren’t copied here. They can be found in the source and are based on built-in generator methods, provided by QuickTheories, that select a value from a list of values. The second argument, <code>integers().between(0, Byte.MAX_VALUE).map(Integer::byteValue)</code>, generates non-negative numbers that fit into a single byte. These numbers will be passed as the <code>corruptionValue</code> argument.</p>
</div>
<div class="paragraph">
<p>The <code>inputWithCorruptiblePosition</code> generator, copied below, generates strings to use as input to the transformation function and a position within the output byte stream to corrupt. Because compression prevents knowledge of the output size of the frame, the generator tries to choose a somewhat reasonable position to corrupt by limiting the choice to the size of the generated string (it’s uncommon for compression to generate a larger string and the implementation discards the compressed value if it does). It also avoids corrupting the first two bytes of the stream which are not covered by a checksum and therefore can be corrupted without being caught. The function above ensures that corruption is actually introduced and that corrupting a position larger than the size of the output does not occur.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>private Gen&lt;Pair&lt;String, Integer&gt;&gt; inputWithCorruptablePosition()
{
return inputs().flatMap(s -&gt; integers().between(2, s.length() + 2)
.map(i -&gt; Pair.create(s, i)));
}</pre>
</div>
</div>
<div class="paragraph">
<p>With all those pieces in place, if the test were run before the bug were fixed, it would fail with the following output.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>java.lang.AssertionError: Property falsified after 2 example(s)
Smallest found falsifying value(s) :-
{(c,3), 0, null, Adler32}
Cause was :-
java.lang.IndexOutOfBoundsException: readerIndex(10) + length(16711681) exceeds writerIndex(15): UnpooledHeapByteBuf(ridx: 10, widx: 15, cap: 54/54)
at io.netty.buffer.AbstractByteBuf.checkReadableBytes0(AbstractByteBuf.java:1401)
at io.netty.buffer.AbstractByteBuf.checkReadableBytes(AbstractByteBuf.java:1388)
at io.netty.buffer.AbstractByteBuf.readBytes(AbstractByteBuf.java:870)
at org.apache.cassandra.transport.frame.checksum.ChecksummingTransformer.transformInbound(ChecksummingTransformer.java:289)
at org.apache.cassandra.transport.frame.checksum.ChecksummingTransformerTest.roundTripWithCorruption(ChecksummingTransformerTest.java:106)
...
Other found falsifying value(s) :-
{(c,3), 0, null, CRC32}
{(c,3), 1, null, CRC32}
{(c,3), 9, null, CRC32}
{(c,3), 11, null, CRC32}
{(c,3), 36, null, CRC32}
{(c,3), 50, null, CRC32}
{(c,3), 74, null, CRC32}
{(c,3), 99, null, CRC32}
Seed was 179207634899674</pre>
</div>
</div>
<div class="paragraph">
<p>The output shows more than a single failing example. This is because QuickTheories, like most property-based testing libraries, comes with a shrinker, which performs the task of taking a failure and minimizing its inputs. This aids in debugging because there are multiple failing examples to look at often removing noise in the process. Additionally, a seed value is provided so the same series of tests and failures can be generated again — another useful feature when debugging. In this case, the library generated an example that contains a single byte of input, which will corrupt the fourth byte in the output stream by setting it to zero, using no compression, and using Adler32 for checksumming. It can be seen from the other failing examples that using CRC32 also fails. This is due to improper calculation of the checksum, regardless of the algorithm. In particular, the checksum was only calculated over the least significant byte of each length rather than all eight bytes. By corrupting the fourth byte of the output stream (the first length’s second-most significant byte not covered by the calculation), an invalid length is read and later used.</p>
</div>
</div>
<div class="sect3">
<h4 id="where-to-find-more"><a class="anchor" href="#where-to-find-more"></a>Where to Find More</h4>
<div class="paragraph">
<p>Property-based testing is a broad topic, much of which is not covered by this post. In addition to Cassandra, it has been used successfully in several places including <a href="https://arxiv.org/pdf/1703.06574.pdf" target="_blank" rel="noopener">car operating systems</a> and <a href="https://youtu.be/hXnS_Xjwk2Y?t=1023" target="_blank" rel="noopener">suppliers’ products</a>, <a href="https://dl.acm.org/citation.cfm?id=2034662" target="_blank" rel="noopener">GNOME Glib</a>, <a href="https://github.com/WesleyAC/raft/tree/master/src" target="_blank" rel="noopener">distributed consensus</a>, and other <a href="https://www.youtube.com/watch?v=x9mW54GJpG0" target="_blank" rel="noopener">distributed databases</a>. It can also be combined with other approaches such as fault-injection and memory leak detection. Stateful models can also be built to generate a series of commands instead of running each example on one generated set of inputs. Our goal is to evangelize this approach within the Cassandra developer community and encourage more testing of this kind as part of our work to deliver the most stable major release of Cassandra yet.</p>
</div>
</div>
</div>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
<script>
jQuery(function(){
});
</script>
</html>