content/_/blog/Harry-an-Open-Source-Fuzz-Testing-and-Verification-Tool-for-Apache-Cassandra.html - cassandra-website - Git at Google

 <!DOCTYPE html>
 <html lang="en">
   <head>
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width,initial-scale=1.0">
     <title>Apache Cassandra | Apache Cassandra Documentation</title>
     <link rel="stylesheet" href="../../assets/css/site.css">
     <meta name="description" content="The Apache Cassandra Community">
     <link rel="schema.dcterms" href="https://purl.org/dc/terms/">
     <meta name="dcterms.subject" content="_">
     <meta name="dcterms.identifier" content="master">
     <meta name="generator" content="Antora 2.3.4">
     <link rel="icon" href="../../assets/img/favicon.ico" type="image/x-icon">
 <script>
   const script = document.createElement("script");
   const domain = window.location.hostname;
   script.type = "text/javascript";
   script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
   script.setAttribute("data-domain",domain);
   script.setAttribute("defer",'true');
   script.setAttribute("async",'true');
   document.getElementsByTagName("head")[0].appendChild(script);
 </script>  </head>
   <body class="single-post">
       <div class="container mx-auto relative">
         <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
     <meta property="og:type" content="website" />
 	<meta property="og:description" content="" />
 	<meta property="og:url" content="/" />
 	<meta property="og:site_name" content="Apache Cassandra" />

 <header id="top-nav">
     <div class="inner relative">
         <div class="header-social-icons text-right">
             <a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
             <a href="https://www.linkedin.com/company/apache-cassandra/"  target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
             <a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
         </div>
         <div class="cf">
             <div class="logo left"><a href="/"><img src="../../assets/img/logo-white-r.png" alt="cassandra logo"></a></div>
             <div class="mobile-nav-icon right">
                 <img class="toggle-icon" src="../../assets/img/hamburger-nav.svg">
             </div>
             <ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
     <li>
         <a class="nav-link hide-mobile">Get Started</a>
         <ul class="sub-menu bg-white">
             <li class="pa-micro">
                 <a href="/_/cassandra-basics.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Cassandra Basics
                     </div>
                 </a>
             </li>
             <li class="pa-micro">
                 <a href="/_/quickstart.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Quickstart
                     </div>
                 </a>
             </li>
             <li class="pa-micro">
                 <a href="/_/ecosystem.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Ecosystem
                     </div>
                 </a>
             </li>
         </ul>
     </li>
     <li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
     <li>
         <a class="nav-link" href="/_/community.html">Community</a>
         <ul class="sub-menu bg-white">
             <li class="pa-micro">
                 <a href="/_/community.html#code-of-conduct">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-welcome.png" alt="welcome icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Welcome
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/community.html#discussions">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-discussions.png" alt="discussions icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Discussions
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/community.html#project-governance">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-governance.png" alt="Governance icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Governance
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/community.html#how-to-contribute">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-contribute.png" alt="Contribute icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Contribute
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/community.html#meet-the-community">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-community.png" alt="Meet the Community icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Meet the Community
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/cassandra-catalyst-program.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Catalyst Program
                     </div>
                 </a>
             </li>
             <li class="pa-micro hide-mobile">
                 <a href="/_/events.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-events.png" alt="Events icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Events
                     </div>
                 </a>
             </li>
         </ul>
     </li>
     <li>
         <a class="nav-link hide-mobile">Learn</a>
         <ul class="sub-menu bg-white">
             <li class="pa-micro">
                 <a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-basics.png" alt="Basics icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Cassandra 5.0
                     </div>
                 </a>
             </li>
             <li class="pa-micro">
                 <a href="/_/case-studies.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Case Studies
                     </div>
                 </a>
             </li>
             <li class="pa-micro">
                 <a href="/_/resources.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-resources.png" alt="Resources icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Resources
                     </div>
                 </a>
             </li>
             <li class="pa-micro">
                 <a href="/_/blog.html">
                     <div class="sub-nav-icon">
                         <img src="../../assets/img/sub-menu-blog.png" alt="Blog icon">
                     </div>
                     <div class="sub-nav-text teal py-small">
                         Blog
                     </div>
                 </a>
             </li>
         </ul>
     </li>
     <li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
 </ul>
         </div>
     </div>
 </header>

         <div class="hero hero--home grad">
             <div class="eye"></div>
             <div id="home-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
                 <h1>Harry, an Open Source Fuzz Testing and Verification Tool for Apache Cassandra</h1>
                 <h3>November 18, 2021 | Alex Petrov</h3>
             </div>
         </div>
         <div id="blog-post" class="flex-center py-large arrow">
             <div class="blog-breadcrumb mb-medium">
                 <div class="inner inner--narrow">
                     <a href="/_/blog.html">« Back to the Apache Cassandra Blog</a>
                 </div>
             </div>
             <div class="post-content">
                 <div class="inner inner--narrow">
                     <div id="preamble">
 <div class="sectionbody">
 <div class="paragraph">
 <p>Over the years working on Apache Cassandra while writing tests or
 trying to reproduce the issues, I’ve always found myself repeating the
 same procedure over and over again: creating schema, writing loops
 generating data, then either manually reconciling it to check the
 results, or comparing the result set against some predetermined
 expected result. Not only is this approach tedious and time-consuming,
 but it also does not scale: if some set of operations work for one
 schema, there’s no way to know if it will also work for any arbitrary
 schema, whether it will work if operations are executed in a different
 order, or if operations themselves are slightly different.</p>
 </div>
 <div class="paragraph">
 <p>While preparing Apache Cassandra for 4.0 release, we’ve made extensive
 progress in how we test. The new in-tree in-JVM distributed test
 framework enables us to easily write tests that exercise coordinated
 query execution code paths while giving us flexibility and control
 that was previously offered only by CQLTester, a tool for exercising
 node-local query paths. Many subsystems were audited and covered with
 tests. Cassandra users tried the new version out in their clusters and
 reported their findings. All of these things are useful and important,
 but we still needed a tool that would give us the same or higher
 degree of confidence for every commit so that we could know that the
 database is working as expected, not only for an exact set of
 operations that exercised by unit and integration tests, but
 potentially for any use-case and combination of operations under
 circumstances comparable to production.</p>
 </div>
 <div class="paragraph">
 <p>This all led us to develop Harry, a tool that can combine properties
 of stress- and integration-testing tools. Harry is a tool that can
 generate data for an arbitrary schema, execute data modification
 queries against the cluster, track the progress of operation
 execution, and make sure that responses to read queries are correct.</p>
 </div>
 <div class="paragraph">
 <p>After reading this post, you will understand:</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>how Harry generates the data</p>
 </li>
 <li>
 <p>how Harry performs verification</p>
 </li>
 <li>
 <p>which properties values generated by  Harry make verification not only possible but also efficient.</p>
 </li>
 </ul>
 </div>
 <div class="paragraph">
 <p>The primary audience for this post is Cassandra contributors, so you
 will need to be familiar with Apache Cassandra and its tooling.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="fuzz-testing"><a class="anchor" href="#fuzz-testing"></a>Fuzz testing</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Since most of the bugs are reproduced by taking a sequence of actions
 following some pattern, we need to specify what actions can be used to
 lead to a given state. However, there’s a lot of flexibility regarding
 which values exactly are written in specific columns.</p>
 </div>
 <div class="paragraph">
 <p>For example, if we look at
 <a href="https://issues.apache.org/jira/browse/CASSANDRA-16453" target="_blank" rel="noopener">CASSANDRA-16453</a>,
 which was reproduced using Harry. Code to reproduce the issue with
 in-JVM DTests looks something like this:</p>
 </div>
 <div class="listingblock">
 <div class="title">Repro.java</div>
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">try (Cluster cluster = init(builder().withNodes(2).start()))
 {
     cluster.schemaChange(withKeyspace("CREATE TABLE distributed_test_keyspace.table_0 (pk0 bigint,ck0 bigint,regular0 bigint,regular1 bigint,regular2 bigint, PRIMARY KEY (pk0, ck0)) WITH  CLUSTERING ORDER BY (ck0 ASC);"));
     cluster.coordinator(1).execute("DELETE FROM distributed_test_keyspace.table_0 USING TIMESTAMP 1 WHERE pk0=1 AND ck0&gt;2;", ConsistencyLevel.ALL);
     cluster.get(2).executeInternal("DELETE FROM distributed_test_keyspace.table_0 USING TIMESTAMP 1 WHERE pk0=1;");
     cluster.coordinator(1).execute("SELECT * FROM distributed_test_keyspace.table_0 WHERE pk0=1 AND ck0&gt;=1 AND ck0&lt;3;",
                                    ConsistencyLevel.ALL, 1L, 1L, 3L);
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>You can see that, at first glance, there are only three things that
 that are important to reproduce the issue:</p>
 </div>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>The table has to have at least one clustering column</p>
 </li>
 <li>
 <p>Two actions are executed against the cluster: a range deletion, and a partition deletion</p>
 </li>
 <li>
 <p>Both operations have the same timestamp</p>
 </li>
 </ol>
 </div>
 <div class="paragraph">
 <p>The rest of the details do not matter: size of the cluster, number of
 replicas, clustering order, consistency level with which operations
 are executed, types of clustering keys and values written, and so on.</p>
 </div>
 <div class="paragraph">
 <p>The simplest way to cover a case like this with a test is to hardcode
 the schema and then execute a partition deletion and a range deletion
 hardcoding the values, much as we did above. This might work, but
 there’s still a chance that the proposed fix may not work for some
 other schema or some combination of values.</p>
 </div>
 <div class="paragraph">
 <p>To improve the situation, we can express the test in more abstract
 terms and, instead of writing a repro using specific statements, we
 can only use the constraints we’ve specified above:</p>
 </div>
 <div class="listingblock">
 <div class="title">HarryDsl.java</div>
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">test(new SchemaGenerators.Builder("harry")
                          .partitionKeySpec(1, 5)
                          .clusteringKeySpec(1, 5)
                          .regularColumnSpec(1, 10)
                          .generator(),
      historyBuilder -&gt; {
          historyBuilder.nextPartition()
                        .simultaneously()
                        .randomOrder()
                        .partitionDeletion()
                        .rangeDeletion()
                        .finish();
      });</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>This spec can be used to generate clusters of different sizes,
 configured with different schemas, executing the given sequence of
 actions both in isolation and combined with other randomly generated
 ones, with failure-injection. Best of all, this test will <em>not only</em>
 ensure that such a sequence of actions does not produce an exception
 but also ensures that a cluster will respond with correct results to
 <em>any</em> allowed read query.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="generating-data"><a class="anchor" href="#generating-data"></a>Generating data</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Generating random values and sequences of actions and reconciling them
 during verification is in itself not a difficult task. Making this
 process time- and memory-efficient is what makes it more interesting.</p>
 </div>
 <div class="paragraph">
 <p>For space efficiency, the log of actions generated using Harry is not
 kept in memory or saved anywhere on disk since any generated operation
 can be reproduced from its sequence number. In Harry, a sequence
 number consists of two parts: the logical timestamp (LTS, which has
 1-1 mapping to real-time timestamp), and the modification ID, which
 allows having multiple uniquely identifiable operations for each
 logical timestamp. For the sake of simplicity, we’ll just say that
 each operation is represented by its sequence number / LTS.</p>
 </div>
 <div class="paragraph">
 <p>In the example above, the operation order is determined by the seed
 for the given run. Let’s say that partition deletion is executed
 first. To produce a <code>DELETE</code> statement from it, we now need to
 generate a partition key and get a timestamp. Similarly, to generate a
 range deletion, we will need a partition key, two clustering keys that
 will serve as lower and higher bounds for the range tombstone, and a
 timestamp.</p>
 </div>
 <div class="paragraph">
 <p>Using the sequence number and knowing the operation type, we can now
 produce <em>descriptors</em> that are used as the compact internal
 representation of data in Harry. No matter how many parts it consists
 of, any partition key is represented by a single <code>long</code>. The same is
 true for the clustering keys: any clustering key, single-part or
 composite, is represented using a single <code>long</code> descriptor. If we were
 to generate an <code>INSERT</code> or <code>UPDATE</code> operation, each value for a
 regular or a static column would have its own descriptor since we
 would want to distinguish between two writes made by two different
 operations.</p>
 </div>
 <div class="paragraph">
 <p>To summarise, every operation has a sequence number, which determines
 everything that is required to fully reproduce this operation,
 including descriptors that we will later use to generate values
 themselves:</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>partition deletion only has a partition descriptor</p>
 </li>
 <li>
 <p>range deletion has a partition descriptor and two clustering descriptors, specifying tombstone bounds</p>
 </li>
 <li>
 <p>insert or update operation has a partition descriptor, a clustering descriptor, and a set of value descriptors, one for each regular and static column.</p>
 </li>
 </ul>
 </div>
 <div class="paragraph">
 <p>Using descriptors rather than specific values for verification can be
 extremely useful for efficiency. Instead of comparing potentially
 large values, we could just compare two longs that uniquely identify
 them. This means that we have to have a way to not <em>only</em> generate a
 value from the descriptor, but <em>also</em> to compute a descriptor the
 value was generated from.</p>
 </div>
 <div class="paragraph">
 <p>In Harry, we call such a generator <code>Bijection&lt;T&gt;</code>, and every bijection
 can <em>inflate</em> a descriptor into the value of type <code>T</code>. Then <em>deflate</em>
 the value of type <code>T</code> back into the descriptor where it was originally
 generated.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="validating-results"><a class="anchor" href="#validating-results"></a>Validating results</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Applying a predetermined sequence of operations against a single
 partition produces some partition state. Knowing the status of
 execution of each operation, we can deterministically determine the
 state of each node in the cluster and validate the results of
 execution of any <code>SELECT</code> query.</p>
 </div>
 <div class="paragraph">
 <p>Since we can represent any operation as a sequence of descriptors, we
 know the order of operations (since the timestamp determines it). We
 can assume we know the status of each operation (whether or not it has
 been executed against some node), and we can deterministically produce
 partition state for any given point in time. Partition state is
 nothing but a sorted map, where the key is a clustering descriptor,
 and value is a row state. Row state, in this case, holds value
 descriptors for each column, and timestamps where operations were
 executed:</p>
 </div>
 <div class="listingblock">
 <div class="title">PartitionState.java</div>
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">public class PartitionState implements Iterable&lt;RowState&gt; {
     long partitionDescriptor;
     NavigableMap&lt;Long, RowState&gt; rowStates;
 }

 public static class RowState {
     long[] valueDescriptors;
     long[] logicalTimestamps;
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>Similarly, since any value written to the database is generated using
 a bijection, we can produce the partition state from the result set by
 deflating every value returned by the database into the descriptor
 that it was generated from.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="generating-descriptors"><a class="anchor" href="#generating-descriptors"></a>Generating Descriptors</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Reproducible operation sequences can be generated from a set of rules
 that determines what the sequence is going to look like. For example,
 we can specify probability distributions for each operation type or
 give operations relative weights, which can be turned into the
 distribution internally later. Configuration for an insert / update /
 delete workload with a probability of an insert operation (100/251)
 being twice as high as a probability of a row deletion (50/251), and
 ten times more probable than a partition deletion (1/251), would look
 like:</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre>INSERT: 100
 UPDATE: 100
 DELETE_ROW: 50
 DELETE_PARTITION: 1</pre>
 </div>
 </div>
 <div class="paragraph">
 <p>Since each operation is uniquely determined by its sequence number, we
 can deterministically compute its operation type by taking these
 probability distributions. One way to do this is by using PCG random
 number generator, which has some useful properties we’re going to use
 for generating our pseudorandom values.</p>
 </div>
 <div class="paragraph">
 <p>If you’d like to learn more about the mathematical underpinnings of
 PCG, you should read this paper
 (<a href="https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf" target="_blank" rel="noopener">https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf</a>). However,
 to be able to use PCG, it is not necessary to know any of the
 internals. We need a random number generator that will have the
 following properties:</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Long period: sequence of numbers it produces does not repeat
 frequently; ideally the period should be 2^64 when generating a
 random number from 64 bits of entropy</p>
 </li>
 <li>
 <p>Stream selection: the ability to produce different random
 sequences from the same seed, identified by some stream id.</p>
 </li>
 <li>
 <p>Addressability: any number produced by the generator can be
 reproduced from the seed and its sequence number. Ideally, we’d
 like to have methods such as <code>long randomNumber(long
 sequenceNumber, long stream)</code> and <code>long sequenceNumber(long
 randomNumber, long stream)</code>. In other words, we should be able to
 determine the sequence number of the random number in the given
 stream. Using this method, we can also determine <code>distance(long x,
 long y)</code> : how many random numbers we should skip to get <code>y</code> after
 seeing <code>x</code>.</p>
 </li>
 <li>
 <p>Walkability: the ability to produce a number immediately following
 <code>long next(long randomNumber, long stream)</code> or preceding <code>long
 prev(long randomNumber, long stream)</code> the given random number in
 the random sequence.</p>
 </li>
 </ul>
 </div>
 <div class="paragraph">
 <p>You might have noticed that there are two ways to achieve the same
 thing. We can get a pseudorandom number from some number known by the
 system by using <code>randomNumber(i, stream)</code> and by using <code>prev(i,
 stream)</code>. Both variants are valid, and both operations can be
 inverted. We have a slight preference toward using <code>prev</code>, since its
 inverse can be computed in constant time.</p>
 </div>
 <div class="paragraph">
 <p>These properties allow us to reproduce partition state from just
 configuration (i.e., known distributions, schema, size of the
 partition, etc) and a seed:</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Partition descriptor for <code>N</code> th operation can be picked as <code>M</code> th
 random number in the stream of partition descriptors, and the
 relation between <code>N</code> and <code>M</code> is determined by the chosen pattern
 for visiting partitions.</p>
 </li>
 <li>
 <p>Clustering descriptor for <code>N</code> th operation can be picked as <code>M</code> th
 random number in the stream of clustering descriptors <strong>for the
 given partition</strong>, where maximum <code>M</code> is determined by the maximum
 partition size, so there can be no more than <code>max(M)</code> rows in any
 generated partition.</p>
 </li>
 </ul>
 </div>
 <div class="paragraph">
 <p>One of the simplest useful ways to represent a pattern for picking a
 descriptor from the sequence is to use a sliding window. The sliding
 window begins with a preset number of items in it and allows to visit
 each item in the current window one or several times in a round-robin
 fashion. After this, it cycles one of the items out and adds a new one
 in its place.</p>
 </div>
 <div class="paragraph">
 <p>Once operation type, partition descriptor, and clustering descriptors
 are determined, all we have left to cover is how to generate value
 descriptors for <code>INSERT</code> and <code>UPDATE</code> operations. Value descriptor for
 a column is uniquely identified by its sequence number and is bound by
 partition descriptor, clustering descriptor, and column.</p>
 </div>
 <div class="paragraph">
 <p>To summarise, all operations in Harry are deterministic and are
 represented using their descriptors. Descriptors can be computed
 hierarchically using the following rules:</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Partition descriptor is picked from the <em>stream</em> of partition descriptors. Its position in that stream is determined by some rule (for example, a sliding window):</p>
 </li>
 </ul>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">long pd = rng.randomNumber(positionFor(sequenceNumber), PARTITION_DESCRIPTOR_STREAM_ID)</code></pre>
 </div>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Clustering descriptor is picked from the <em>stream</em> of clustering descriptors <strong>for the given partition</strong>.</p>
 </li>
 </ul>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">long cd = rng.prev(positionInPartition, pd);</code></pre>
 </div>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Value descriptor is picked from the <em>stream</em> of descriptors identified by which partition, clustering, and column the value belongs to:</p>
 </li>
 </ul>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">long vd = rng.randomNumber(sequenceNumber, pd ^ cd ^ col);</code></pre>
 </div>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="inflation-and-deflation"><a class="anchor" href="#inflation-and-deflation"></a>Inflation and Deflation</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>We’ve mentioned before that one reason Harry state is so compact and
 can be validated so efficiently is because every value read from the
 database can be traced back to the descriptor it was generated
 from. To achieve this, we generate all values using order-preserving
 bijections. In other words, for any value generated from a descriptor,
 it should be possible to quickly find a descriptor this value was
 generated from, and two values generated from two distinct descriptors
 should sort the same as descriptors themselves.</p>
 </div>
 <div class="paragraph">
 <p>Implementing an order-preserving bijection for 64-bit longs is trivial
 and can be achieved by using an identity function. Essentially, any
 long descriptor <em>is</em> the value it represents:</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">long inflate(long descriptor) {
   return descriptor;
 }

 long deflate(long value) {
   return value;
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>There are many ways to make a bijection for strings. One of the ways
 to do it is to have a set of 256 short strings of the same length in a
 sorted array. When inflating a 64-bit long descriptor into the string,
 we’ll be iterating over these 64 bits, taking 8 bits (one byte) at a
 time, using the value of this byte as an index in an array of 256
 strings.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">String inflate(long descriptor) {
     StringBuilder builder = new StringBuilder();

     for (int i = 0; i &lt; Long.BYTES; i++) {
             int idx = getByte(descriptor, i);
             builder.append(nibbles[idx]);
     }
     return builder.toString();
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>One thing we should take into account here is that strings are
 compared byte-wise, while longs use signed comparison. To make sure
 generated strings have the same order as descriptors, we need to XOR
 the sign bit.</p>
 </div>
 <div class="paragraph">
 <p>Since any two strings produced by this generator will be unique, and
 we can produce at most 2^64 values using this generator, to generate
 longer strings we do not even need larger nibbles. We can append
 random data of arbitrary length to the end of the string. This does
 not change the order since it is determined by the prefix generated
 from nibbles that is unique to each value.</p>
 </div>
 <div class="paragraph">
 <p>Such simple bijections can represent data types used for regular and
 static columns. We’ve previously mentioned that partition and
 clustering keys are also represented using 64-bit
 descriptors. Partition and clustering keys are composite: they consist
 of multiple distinct parts. One way to implement bijection for a
 composite type is to “slice” 64 bits of entropy into smaller chunks,
 each chunk giving some entropy to generate a different part of the
 key. Each slice is then inflated using a bijection that corresponds to
 the part of the key it represents. To convert the value back to the
 descriptor, we must deflate each part of the key and then “stitch” the
 values back together into a 64-bit descriptor.</p>
 </div>
 <div class="paragraph">
 <p>To summarise, key generators are just bijections that can generate
 multiple values for a single 64-bit descriptor instead of one. A
 simplified and generalized version of such bijection may look
 something like this:</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">Object[] inflate(long descriptor) {
   long[] slices = slice(descriptor);
   Object[] key = new Object[slices.length];
   for (int i = 0; i &lt; slices.length; i++) {
      key[i] = children[i].inflate(slices[i]);
   }
   return key;
 }

 long deflate(Object[] value) {
   long[] slices = new long[value.length];
   for (int i = 0; i &lt; value.length; i++) {
      slices[i] = children[i].deflate(value[i]);
   }
   return stitch(slices);
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>Values generated by key generators preserve the order of descriptors
 they were generated from, which allows efficiently checking the order
 of results, comparing clustering descriptors, and validating range
 deletions.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="putting-it-all-together"><a class="anchor" href="#putting-it-all-together"></a>Putting it all together</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>In this post, we’ve learned how the various parts of Harry work,
 starting with how to reproduce a sequence of operations up to how the
 values are generated. Using this information, we can create a
 quiescent model checker that can validate the state of the database in
 the absence of in-flight operations, assuming we know the state of all
 operations before this moment.</p>
 </div>
 <div class="paragraph">
 <p>As we’ve discussed, Harry is working with reproducible histories of
 operations, where the following information identifies each operation:</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">class Operation {
   long lts; // logical timestamp of the operation
   long pd;  // partition descriptor, derived from LTS
   long cd;  // clustering descriptor, derived from LTS and PD
   long OperationKind; // operation type, derived from LTS and PD
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>Now, all we need to do is to produce a sequence of operations. For example, each operation with <code>INSERT</code> kind is going to be represented by:</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">class Insert {
    long lts;   // logical timestamp
    long pd;    // partition descriptor
    long cd;    // clustering descriptor
    long[] vds; // value descriptors for each column
 }</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>We can compile this information into an INSERT statement by inflating
 partition, clustering, and value descriptors, and deriving the
 real-time timestamp from the logical timestamp. This statement can
 then be executed against the system under test (our Cassandra
 cluster).</p>
 </div>
 <div class="paragraph">
 <p>We know from the history of operations which partitions were visited,
 and which operations were executed against them. To validate the state
 of any given partition, all we need to do is to query the database to
 retrieve partition state from the database, deflate every row returned
 in the results: deflate all clustering keys into clustering
 descriptors, and values into corresponding value descriptors,
 producing internal PartitionState.</p>
 </div>
 <div class="paragraph">
 <p>To verify that this partition state is also correct, we replay the
 sequence of operations again. But instead of going all the way to the
 generated INSERT statement, we operate with only descriptors and apply
 operations sequentially to the PartitionState following usual
 Cassandra reconciliation rules (i.e. last-write-wins, partition
 tombstone &gt; tombstone &gt; write), compute logical timestamps and value
 descriptors for each row.</p>
 </div>
 <div class="paragraph">
 <p>Now, having two partition states: one “deflated” from the result set
 returned by the system under test and one “inflated” from the logical
 timestamp, we can directly compare them. If there are any
 inconsistencies between the two sets, we can conclude there has been
 an error.</p>
 </div>
 <div class="paragraph">
 <p>Validating query results by reproducing the sequence of events is more
 efficient than working with full operation logs holding all values for
 every operation executed against the database state. This process can
 be made even more efficient by validating rows individually while
 having enough state in memory to validate a single row.</p>
 </div>
 <div class="paragraph">
 <p>Since implementing other Cassandra features, such as partition
 deletions, writes without primary key liveness, static columns, column
 deletions, etc., do not require any additional information, and
 follows the same rules, for the sake of brevity, it is not covered in
 this post.</p>
 </div>
 <div class="paragraph">
 <p>Quiescent checker is a very useful tool and can validate data sets
 generated by sequential or concurrent runners, as long as the state of
 each operation is known at the moment of validation. Since we simply
 replay the sequence of events to reproduce the state, we can not have
 any events whose state is unknown to the system.</p>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="closing-words"><a class="anchor" href="#closing-words"></a>Closing words</h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Harry is a tool that allows the testing of databases in ways we
 weren’t able to before. Instead of creating test cases expressed in
 specific schemas and statements, it enables us to describe sequences
 of events we’d like tested and generate schemas and possible
 interleavings of statements corresponding to given
 specifications. Creating exhaustive test suites can take a lot of
 time, and we can have contributor creativity poured into patches and
 features, not into test examples. This approach also allows testing
 different features in combination and checking old behaviors in the
 presence of new ones without explicitly creating new tests.</p>
 </div>
 <div class="paragraph">
 <p>That said, integration testing is not always enough, and often we do
 not know which areas of the codebase can be problematic. Harry can be
 helpful here, too, since it will generate data, execute the workload
 against the system and validate the results. Contributors can focus on
 implementing test scenarios such as behavior in presence of unexpected
 failures, cluster expansions, or node replacements.</p>
 </div>
 <div class="paragraph">
 <p>Harry is a productivity tool that encapsulates the properties of a
 stress test and a model checker, allowing us to find issues and
 improve the quality of Cassandra in ways that were not possible
 before.</p>
 </div>
 </div>
 </div>
                 </div>
             </div>
         </div>
         <footer class="grad grad--two flex-center pb-xlarge">
     <div class="inner text-center z2 relative">
         <h2 class="white py-small">Get started with Cassandra, fast.</h2>
         <a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
     </div>
     <div class="inner flex flex-distribute-items mt-xlarge z2 relative">
         <div class="col-2">
             <div id="footer-logo" class="logo logo--footer mb-medium"><img src="../../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
             <p>Apache Cassandra<img src="../../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
             <div class="footer-social-icons">
                 <a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
                 <a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
                  <a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
             </div>
         </div>
         <div class="col-2 flex flex-center">
             <ul class="columns-2">
                 <li class="mb-small"><a href="/">Home</a></li>
                 <li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
                 <li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
                 <li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
                 <li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
                 <li class="mb-small"><a href="/_/community.html">Community</a></li>
                 <li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
                 <li class="mb-small"><a href="/_/resources.html">Resources</a></li>
                 <li class="mb-small"><a href="/_/blog.html">Blog</a></li>
             </ul>
         </div>
     </div>
 </footer>
 <div class="lower-footer bg-white pa-medium">
     <div class="flex flex-row flex-vert-center">
         <div class="pr-medium"><img src="../../assets/img//feather-small.png" alt="ASF" width="20"></div>
         <div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
         <div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
         <div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
         <div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
         <div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
         <div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
         <div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
     </div>
     <p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0.  Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
 </div>
 <div id="fade" class="hidden"></div>
 <div id="modal" class="hidden">
   <div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
   <div id="mod-content" class="vid-mod-content resp-container"></div>
 </div>
 <script>
 jQuery(function(){
     var windowW = $(window).width();
     $(document)
     .on('click','.mobile-nav-icon',function(){
         $('.main-nav').fadeIn();
     })
     .on('click','.main-nav',function(){
         if(windowW <= 1000){
             $(this).fadeOut();
         }
     })
     .on('click','#version-toggle',function(){
       $(this).toggleClass('active');
       $(this).next().fadeToggle();
     })
     .on('click','#mobile-docs-nav-burger', function(){
       $(this).toggleClass('active');
       $('.docs-nav').toggleClass('active');
     });
     var url = window.location.pathname;
     var isQuickstart = url.includes('quickstart.html');
     if(isQuickstart){
       var footerCTA = document.getElementById('footer-cta');
       footerCTA.innerHTML = 'Get latest updates';
       footerCTA.setAttribute('href', '/_/blog.html');
     }
 });
 </script>
       </div>
   </body>
 <script>
 jQuery(function(){

 });
 </script>
 </html>