blob: 306479397074fdd46356af5e7dd6a93d6b1589f2 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance. Linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure make it the perfect platform for mission-critical data. Cassandra's support for replicating across multiple datacenters is best-in-class, providing lower latency for your users and the peace of mind of knowing that you can survive regional outages.
">
<meta name="keywords" content="cassandra, apache, apache cassandra, distributed storage, key value store, scalability, bigtable, dynamo" />
<meta name="robots" content="index,follow" />
<meta name="language" content="en" />
<title>Documentation</title>
<link rel="canonical" href="http://cassandra.apache.org/doc/4.0-alpha3/architecture/dynamo.html">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link rel="stylesheet" href="./../../../css/style.css">
<link rel="stylesheet" href="./../../../css/sphinx.css">
<link rel="top" title="Apache Cassandra Documentation v4.0-alpha3" href="../index.html"/> <link rel="up" title="Architecture" href="index.html"/> <link rel="next" title="Storage Engine" href="storage_engine.html"/> <link rel="prev" title="Overview" href="overview.html"/>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.2.0/css/all.css" integrity="sha384-hWVjflwFxL6sNzntih27bfxkr27PmbbK/iSvJ+a4+0owXq79v+lsFkW54bOGbiDQ" crossorigin="anonymous">
<link type="application/atom+xml" rel="alternate" href="http://cassandra.apache.org/feed.xml" title="Apache Cassandra Website" />
</head>
<body>
<!-- breadcrumbs -->
<div class="topnav">
<div class="container breadcrumb-container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<img class="asf-logo" src="./../../../img/asf_feather.png" />
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="./../../../">Apache Cassandra</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./">Architecture</a></li>
<li>Dynamo</li>
</ul>
</div>
<!-- navbar -->
<nav class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#cassandra-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="./../../../"><img src="./../../../img/cassandra_logo.png" alt="Apache Cassandra logo" /></a>
</div><!-- /.navbar-header -->
<div id="cassandra-menu" class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li><a href="./../../../">Home</a></li>
<li><a href="./../../../download/">Download</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./../../../community/">Community</a></li>
<li>
<a href="./../../../blog/">Blog</a>
</li>
</ul>
</div><!-- /#cassandra-menu -->
</div>
</nav><!-- /.navbar -->
</div><!-- /.topnav -->
<div class="container-fluid">
<div class="row">
<div class="col-md-3">
<div class="doc-navigation">
<div class="doc-menu" role="navigation">
<div class="navbar-header">
<button type="button" class="pull-left navbar-toggle" data-toggle="collapse" data-target=".sidebar-navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-collapse collapse sidebar-navbar-collapse">
<form id="doc-search-form" class="navbar-form" action="../search.html" method="get" role="search">
<div class="form-group">
<input type="text" size="30" class="form-control input-sm" name="q" placeholder="Search docs">
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</form>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../getting_started/index.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="../new/index.html">New Features in Apache Cassandra 4.0</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Architecture</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="overview.html">Overview</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Dynamo</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#gossip">Gossip</a></li>
<li class="toctree-l3"><a class="reference internal" href="#failure-detection">Failure Detection</a></li>
<li class="toctree-l3"><a class="reference internal" href="#token-ring-ranges">Token Ring/Ranges</a></li>
<li class="toctree-l3"><a class="reference internal" href="#replication">Replication</a></li>
<li class="toctree-l3"><a class="reference internal" href="#tunable-consistency">Tunable Consistency</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="storage_engine.html">Storage Engine</a></li>
<li class="toctree-l2"><a class="reference internal" href="guarantees.html">Guarantees</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../cql/index.html">The Cassandra Query Language (CQL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../data_modeling/index.html">Data Modeling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../configuration/index.html">Configuring Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../operating/index.html">Operating Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tools/index.html">Cassandra Tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="../troubleshooting/index.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../development/index.html">Contributing to Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq/index.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../plugins/index.html">Third-Party Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bugs.html">Reporting Bugs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contactus.html">Contact us</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="col-md-8">
<div class="content doc-content">
<div class="content-container">
<div class="section" id="dynamo">
<h1>Dynamo<a class="headerlink" href="#dynamo" title="Permalink to this headline"></a></h1>
<div class="section" id="gossip">
<span id="id1"></span><h2>Gossip<a class="headerlink" href="#gossip" title="Permalink to this headline"></a></h2>
<div class="admonition-todo admonition" id="index-0">
<p class="first admonition-title">Todo</p>
<p class="last">todo</p>
</div>
</div>
<div class="section" id="failure-detection">
<h2>Failure Detection<a class="headerlink" href="#failure-detection" title="Permalink to this headline"></a></h2>
<div class="admonition-todo admonition" id="index-1">
<p class="first admonition-title">Todo</p>
<p class="last">todo</p>
</div>
</div>
<div class="section" id="token-ring-ranges">
<h2>Token Ring/Ranges<a class="headerlink" href="#token-ring-ranges" title="Permalink to this headline"></a></h2>
<div class="admonition-todo admonition" id="index-2">
<p class="first admonition-title">Todo</p>
<p class="last">todo</p>
</div>
</div>
<div class="section" id="replication">
<span id="replication-strategy"></span><h2>Replication<a class="headerlink" href="#replication" title="Permalink to this headline"></a></h2>
<p>The replication strategy of a keyspace determines which nodes are replicas for a given token range. The two main
replication strategies are <a class="reference internal" href="#simple-strategy"><span class="std std-ref">SimpleStrategy</span></a> and <a class="reference internal" href="#network-topology-strategy"><span class="std std-ref">NetworkTopologyStrategy</span></a>.</p>
<div class="section" id="simplestrategy">
<span id="simple-strategy"></span><h3>SimpleStrategy<a class="headerlink" href="#simplestrategy" title="Permalink to this headline"></a></h3>
<p>SimpleStrategy allows a single integer <code class="docutils literal notranslate"><span class="pre">replication_factor</span></code> to be defined. This determines the number of nodes that
should contain a copy of each row. For example, if <code class="docutils literal notranslate"><span class="pre">replication_factor</span></code> is 3, then three different nodes should store
a copy of each row.</p>
<p>SimpleStrategy treats all nodes identically, ignoring any configured datacenters or racks. To determine the replicas
for a token range, Cassandra iterates through the tokens in the ring, starting with the token range of interest. For
each token, it checks whether the owning node has been added to the set of replicas, and if it has not, it is added to
the set. This process continues until <code class="docutils literal notranslate"><span class="pre">replication_factor</span></code> distinct nodes have been added to the set of replicas.</p>
</div>
<div class="section" id="networktopologystrategy">
<span id="network-topology-strategy"></span><h3>NetworkTopologyStrategy<a class="headerlink" href="#networktopologystrategy" title="Permalink to this headline"></a></h3>
<p>NetworkTopologyStrategy allows a replication factor to be specified for each datacenter in the cluster. Even if your
cluster only uses a single datacenter, NetworkTopologyStrategy should be prefered over SimpleStrategy to make it easier
to add new physical or virtual datacenters to the cluster later.</p>
<p>In addition to allowing the replication factor to be specified per-DC, NetworkTopologyStrategy also attempts to choose
replicas within a datacenter from different racks. If the number of racks is greater than or equal to the replication
factor for the DC, each replica will be chosen from a different rack. Otherwise, each rack will hold at least one
replica, but some racks may hold more than one. Note that this rack-aware behavior has some potentially <a class="reference external" href="https://issues.apache.org/jira/browse/CASSANDRA-3810">surprising
implications</a>. For example, if there are not an even number of
nodes in each rack, the data load on the smallest rack may be much higher. Similarly, if a single node is bootstrapped
into a new rack, it will be considered a replica for the entire ring. For this reason, many operators choose to
configure all nodes on a single “rack”.</p>
</div>
<div class="section" id="transient-replication">
<span id="id2"></span><h3>Transient Replication<a class="headerlink" href="#transient-replication" title="Permalink to this headline"></a></h3>
<p>Transient replication allows you to configure a subset of replicas to only replicate data that hasn’t been incrementally
repaired. This allows you to decouple data redundancy from availability. For instance, if you have a keyspace replicated
at rf 3, and alter it to rf 5 with 2 transient replicas, you go from being able to tolerate one failed replica to being
able to tolerate two, without corresponding increase in storage usage. This is because 3 nodes will replicate all the data
for a given token range, and the other 2 will only replicate data that hasn’t been incrementally repaired.</p>
<p>To use transient replication, you first need to enable it in <code class="docutils literal notranslate"><span class="pre">cassandra.yaml</span></code>. Once enabled, both SimpleStrategy and
NetworkTopologyStrategy can be configured to transiently replicate data. You configure it by specifying replication factor
as <code class="docutils literal notranslate"><span class="pre">&lt;total_replicas&gt;/&lt;transient_replicas</span></code> Both SimpleStrategy and NetworkTopologyStrategy support configuring transient
replication.</p>
<p>Transiently replicated keyspaces only support tables created with read_repair set to NONE and monotonic reads are not currently supported.
You also can’t use LWT, logged batches, and counters in 4.0. You will possibly never be able to use materialized views
with transiently replicated keyspaces and probably never be able to use 2i with them.</p>
<p>Transient replication is an experimental feature that may not be ready for production use. The expected audienced is experienced
users of Cassandra capable of fully validating a deployment of their particular application. That means being able check
that operations like reads, writes, decommission, remove, rebuild, repair, and replace all work with your queries, data,
configuration, operational practices, and availability requirements.</p>
<p>It is anticipated that 4.next will support monotonic reads with transient replication as well as LWT, logged batches, and
counters.</p>
</div>
</div>
<div class="section" id="tunable-consistency">
<h2>Tunable Consistency<a class="headerlink" href="#tunable-consistency" title="Permalink to this headline"></a></h2>
<p>Cassandra supports a per-operation tradeoff between consistency and availability through <em>Consistency Levels</em>.
Essentially, an operation’s consistency level specifies how many of the replicas need to respond to the coordinator in
order to consider the operation a success.</p>
<p>The following consistency levels are available:</p>
<dl class="docutils">
<dt><code class="docutils literal notranslate"><span class="pre">ONE</span></code></dt>
<dd>Only a single replica must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">TWO</span></code></dt>
<dd>Two replicas must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">THREE</span></code></dt>
<dd>Three replicas must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">QUORUM</span></code></dt>
<dd>A majority (n/2 + 1) of the replicas must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">ALL</span></code></dt>
<dd>All of the replicas must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">LOCAL_QUORUM</span></code></dt>
<dd>A majority of the replicas in the local datacenter (whichever datacenter the coordinator is in) must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">EACH_QUORUM</span></code></dt>
<dd>A majority of the replicas in each datacenter must respond.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">LOCAL_ONE</span></code></dt>
<dd>Only a single replica must respond. In a multi-datacenter cluster, this also gaurantees that read requests are not
sent to replicas in a remote datacenter.</dd>
<dt><code class="docutils literal notranslate"><span class="pre">ANY</span></code></dt>
<dd>A single replica may respond, or the coordinator may store a hint. If a hint is stored, the coordinator will later
attempt to replay the hint and deliver the mutation to the replicas. This consistency level is only accepted for
write operations.</dd>
</dl>
<p>Write operations are always sent to all replicas, regardless of consistency level. The consistency level simply
controls how many responses the coordinator waits for before responding to the client.</p>
<p>For read operations, the coordinator generally only issues read commands to enough replicas to satisfy the consistency
level, with one exception. Speculative retry may issue a redundant read request to an extra replica if the other replicas
have not responded within a specified time window.</p>
<div class="section" id="picking-consistency-levels">
<h3>Picking Consistency Levels<a class="headerlink" href="#picking-consistency-levels" title="Permalink to this headline"></a></h3>
<p>It is common to pick read and write consistency levels that are high enough to overlap, resulting in “strong”
consistency. This is typically expressed as <code class="docutils literal notranslate"><span class="pre">W</span> <span class="pre">+</span> <span class="pre">R</span> <span class="pre">&gt;</span> <span class="pre">RF</span></code>, where <code class="docutils literal notranslate"><span class="pre">W</span></code> is the write consistency level, <code class="docutils literal notranslate"><span class="pre">R</span></code> is the
read consistency level, and <code class="docutils literal notranslate"><span class="pre">RF</span></code> is the replication factor. For example, if <code class="docutils literal notranslate"><span class="pre">RF</span> <span class="pre">=</span> <span class="pre">3</span></code>, a <code class="docutils literal notranslate"><span class="pre">QUORUM</span></code> request will
require responses from at least two of the three replicas. If <code class="docutils literal notranslate"><span class="pre">QUORUM</span></code> is used for both writes and reads, at least
one of the replicas is guaranteed to participate in <em>both</em> the write and the read request, which in turn guarantees that
the latest write will be read. In a multi-datacenter environment, <code class="docutils literal notranslate"><span class="pre">LOCAL_QUORUM</span></code> can be used to provide a weaker but
still useful guarantee: reads are guaranteed to see the latest write from within the same datacenter.</p>
<p>If this type of strong consistency isn’t required, lower consistency levels like <code class="docutils literal notranslate"><span class="pre">ONE</span></code> may be used to improve
throughput, latency, and availability.</p>
</div>
</div>
</div>
<div class="doc-prev-next-links" role="navigation" aria-label="footer navigation">
<a href="storage_engine.html" class="btn btn-default pull-right " role="button" title="Storage Engine" accesskey="n">Next <span class="glyphicon glyphicon-circle-arrow-right" aria-hidden="true"></span></a>
<a href="overview.html" class="btn btn-default" role="button" title="Overview" accesskey="p"><span class="glyphicon glyphicon-circle-arrow-left" aria-hidden="true"></span> Previous</a>
</div>
</div>
</div>
</div>
</div>
</div>
<hr />
<footer>
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/cassandra"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=cassandra"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheCassandra">Tweet #cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
<a class="subscribe-rss icon-link" href="/feed.xml" title="Subscribe to Blog via RSS">
<span><i class="fa fa-rss"></i></span>
</a>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2016 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache, the Apache feather logo, and Apache Cassandra are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</footer>
<!-- Javascript. Placed here so pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script src="./../../../js/underscore-min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="./../../../js/doctools.js"></script>
<script src="./../../../js/searchtools.js"></script>
<script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: "", VERSION: "", COLLAPSE_INDEX: false, FILE_SUFFIX: ".html", HAS_SOURCE: false, SOURCELINK_SUFFIX: ".txt" }; </script>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try {
var pageTracker = _gat._getTracker("UA-11583863-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>