blob: 1651f261a32897dc4b5753616fcf5d0ff2684d62 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Apache Cassandra | Apache Cassandra Documentation</title>
<link rel="stylesheet" href="../assets/css/site.css">
<link rel="schema.dcterms" href="https://purl.org/dc/terms/">
<meta name="dcterms.subject" content="_">
<meta name="dcterms.identifier" content="master">
<meta name="generator" content="Antora 2.3.4">
<link rel="icon" href="../assets/img/favicon.ico" type="image/x-icon">
<script>
const script = document.createElement("script");
const domain = window.location.hostname;
script.type = "text/javascript";
script.src = "https://plausible.cassandra.apache.org/js/plausible.js";
script.setAttribute("data-domain",domain);
script.setAttribute("defer",'true');
script.setAttribute("async",'true');
document.getElementsByTagName("head")[0].appendChild(script);
</script> </head>
<body class="basic ">
<div class="container mx-auto relative">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<meta property="og:type" content="website" />
<meta property="og:url" content="/" />
<meta property="og:site_name" content="Apache Cassandra" />
<header id="top-nav">
<div class="inner relative">
<div class="header-social-icons text-right">
<a href="https://twitter.com/cassandra?lang=en" target="_blank" styles="margin-left: 20px;"><img src="../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank" styles="margin-left: 20px;"><img src="../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank" styles="margin-left: 20px;"><img src="../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
<div class="cf">
<div class="logo left"><a href="/"><img src="../assets/img/logo-white-r.png" alt="cassandra logo"></a></div>
<div class="mobile-nav-icon right">
<img class="toggle-icon" src="../assets/img/hamburger-nav.svg">
</div>
<ul class="main-nav nav-links right flex flex-vert-center flex-space-between">
<li>
<a class="nav-link hide-mobile">Get Started</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/cassandra-basics.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-basics.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra Basics
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/quickstart.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-rocket.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Quickstart
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/ecosystem.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-ecosystem.png" alt="cassandra basics icon">
</div>
<div class="sub-nav-text teal py-small">
Ecosystem
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link" href="/doc/latest/">Documentation</a></li>
<li>
<a class="nav-link" href="/_/community.html">Community</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/community.html#code-of-conduct">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-welcome.png" alt="welcome icon">
</div>
<div class="sub-nav-text teal py-small">
Welcome
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#discussions">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-discussions.png" alt="discussions icon">
</div>
<div class="sub-nav-text teal py-small">
Discussions
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#project-governance">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-governance.png" alt="Governance icon">
</div>
<div class="sub-nav-text teal py-small">
Governance
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#how-to-contribute">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-contribute.png" alt="Contribute icon">
</div>
<div class="sub-nav-text teal py-small">
Contribute
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/community.html#meet-the-community">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-community.png" alt="Meet the Community icon">
</div>
<div class="sub-nav-text teal py-small">
Meet the Community
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/cassandra-catalyst-program.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-catalyst.png" alt="Catalyst icon">
</div>
<div class="sub-nav-text teal py-small">
Catalyst Program
</div>
</a>
</li>
<li class="pa-micro hide-mobile">
<a href="/_/events.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-events.png" alt="Events icon">
</div>
<div class="sub-nav-text teal py-small">
Events
</div>
</a>
</li>
</ul>
</li>
<li>
<a class="nav-link hide-mobile">Learn</a>
<ul class="sub-menu bg-white">
<li class="pa-micro">
<a href="/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-basics.png" alt="Basics icon">
</div>
<div class="sub-nav-text teal py-small">
Cassandra 5.0
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/case-studies.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-case-study.png" alt="Case Studies icon">
</div>
<div class="sub-nav-text teal py-small">
Case Studies
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/resources.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-resources.png" alt="Resources icon">
</div>
<div class="sub-nav-text teal py-small">
Resources
</div>
</a>
</li>
<li class="pa-micro">
<a href="/_/blog.html">
<div class="sub-nav-icon">
<img src="../assets/img/sub-menu-blog.png" alt="Blog icon">
</div>
<div class="sub-nav-text teal py-small">
Blog
</div>
</a>
</li>
</ul>
</li>
<li><a class="nav-link btn btn--filled" href="/_/download.html">Download Now</a></li>
</ul>
</div>
</div>
</header>
<div class="hero hero--home grad">
<div class="eye"></div>
<div id="home-content" class="text-center flex flex-center flex-column relative z2 ma-xlarge">
<h1>Cassandra Basics</h1>
</div>
</div>
<div class="flex-center py-large arrow">
<div class="inner inner--narrow">
<div class="openblock image-expand inner inner--narrow py-large cf">
<div class="content">
<h2 id="what-is-apache-cassandra" class="discrete">What is Apache Cassandra?</h2>
<div class="paragraph">
<p>Cassandra is a NoSQL distributed database. By design, NoSQL databases are lightweight, open-source, non-relational, and largely distributed. Counted among their strengths are horizontal scalability, distributed architectures, and a flexible approach to schema definition.</p>
</div>
<div class="paragraph">
<p>NoSQL databases enable rapid, ad-hoc organization and analysis of extremely high-volume, disparate data types. That’s become more important in recent years, with the advent of Big Data and the need to rapidly scale databases in the cloud. Cassandra is among the NoSQL databases that have addressed the constraints of previous data management technologies, such as SQL databases.</p>
</div>
<h3 id="distribution-provides-power-and-resilience" class="discrete">Distribution provides power and resilience</h3>
<div class="paragraph">
<p><span class="image right"><img src="_images/diagrams/apache-cassandra-diagrams-01.jpg" alt="NoSQL Distributed Database"></span>
One important Cassandra attribute is that its databases are distributed. That yields both technical and business advantages. Cassandra databases easily scale when an application is under high stress, and the distribution also prevents data loss from any given datacenter’s hardware failure. A distributed architecture also brings technical power; for example, a developer can tweak the throughput of read queries or write queries in isolation.</p>
</div>
<div class="paragraph">
<p>"Distributed" means that Cassandra can run on multiple machines while appearing to users as a unified whole. There is little point in running Cassandsra as a single node, although it is very helpful to do so to help you get up to speed on how it works. But to get the maximum benefit out of Cassandra, you would run it on multiple machines.</p>
</div>
<div class="paragraph">
<p>Since it is a distributed database, Cassandra can (and usually does) have multiple nodes. A node represents a single instance of Cassandra. These nodes communicate with one another through a protocol called gossip, which is a process of computer peer-to-peer communication. Cassandra also has a masterless architecture – any node in the database can provide the exact same functionality as any other node – contributing to Cassandra’s robustness and resilience. Multiple nodes can be organized logically into a cluster, or "ring". You can also have multiple datacenters.</p>
</div>
</div>
</div>
<div class="openblock image-expand inner inner--narrow py-large cf">
<div class="content">
<h3 id="want-more-power-add-more-nodes" class="discrete">Want more power? Add more nodes</h3>
<div class="paragraph">
<p><span class="image left"><img src="_images/diagrams/apache-cassandra-diagrams-02.jpg" alt="scale their databases dynamically"></span>
One reason for Cassandra’s popularity is that it enables developers to scale their databases dynamically, using off-the-shelf hardware, with no downtime. You can expand when you need to – and also shrink, if the application requirements suggest that path.</p>
</div>
<div class="paragraph">
<p>Perhaps you are used to Oracle or MySQL databases. If so, you know that extending them to support more users or storage capacity requires you to add more CPU power, RAM, or faster disks. Each of those costs a significant amount of money. And yet: Eventually you still encounter some ceilings and constraints.</p>
</div>
<div class="paragraph">
<p>In contrast, Cassandra makes it easy to increase the amount of data it can manage. Because it’s based on nodes, Cassandra scales horizontally (aka scale-out), using lower commodity hardware. To double your capacity or double your throughput, double the number of nodes. That’s all it takes. Need more power? Add more nodes – whether that’s 8 more or 8,000 – with no downtime. You also have the flexibility to scale back if you wish.</p>
</div>
<div class="paragraph">
<p>This linear scalability applies essentially indefinitely. This capability has become one of Cassandra’s key strengths.</p>
</div>
</div>
</div>
<div class="openblock image-expand inner inner--narrow py-large cf">
<div class="content">
<h3 id="introducing-partitions" class="discrete">Introducing partitions</h3>
<div class="paragraph">
<p><span class="image right"><img src="_images/diagrams/apache-cassandra-diagrams-03.jpg" alt="scales linearly"></span>
In Cassandra, the data itself is automatically distributed, with (positive) performance consequences. It accomplishes this using partitions. Each node owns a particular set of tokens, and Cassandra distributes data based on the ranges of these tokens across the cluster. The partition key is responsible for distributing data among nodes and is important for determining data locality. When data is inserted into the cluster, the first step is to apply a hash function to the partition key. The output is used to determine what node (based on the token range) will get the data.</p>
</div>
<div class="paragraph">
<p><span class="image left"><img src="_images/diagrams/apache-cassandra-diagrams-04.jpg" alt="Replication ensures reliability and fault tolerance"></span>
When data comes in, the database’s coordinator takes on the job of assigning to a given partition – let’s call it partition 59. Remember that any node in the cluster can take on the role as the coordinator. As we mentioned earlier, nodes gossip to one another; during which they communicate about which node is responsible for what ranges. So in our example, the coordinator does a lookup: Which node has the token 59? When it finds the right one, it forwards that data to that node. The node that owns the data for that range is called a replica node. One piece of data can be replicated to multiple (replica) nodes, ensuring reliability and fault tolerance. So far, our data has only been replicated to one replica. This represents a replication factor of one, or RF = 1.</p>
</div>
<div class="paragraph">
<p>The coordinator node isn’t a single location; the system would be fragile if it were. It’s simply the node that gets the request at that particular moment. Any node can act as the coordinator.</p>
</div>
</div>
</div>
<div class="openblock image-expand inner inner--narrow py-large cf">
<div class="content">
<h3 id="replication-ensures-reliability-and-fault-tolerance" class="discrete">Replication ensures reliability and fault tolerance</h3>
<div class="paragraph">
<p><span class="image right"><img src="_images/diagrams/apache-cassandra-diagrams-05.jpg" alt="multiple (replica) nodes"></span>
One piece of data can be replicated to multiple (replica) nodes, ensuring reliability and fault tolerance. Cassandra supports the notion of a replication factor (RF), which describes how many copies of your data should exist in the database. So far, our data has only been replicated to one replica (RF = 1). If we up this to a replication factor of two (RF = 2), the data needs to be stored on a second replica as well – and hence each node becomes responsible for a secondary range of tokens, in addition to its primary range. A replication factor of three ensures that there are three nodes (replicas) covering that particular token range, and the data is stored on yet another one.</p>
</div>
<div class="paragraph">
<p><span class="image left"><img src="_images/diagrams/apache-cassandra-diagrams-06.jpg" alt="distributed nature of Cassandra"></span>
The distributed nature of Cassandra makes it more resilient and performant. This really comes into play when we have multiple replicas for the same data. Doing so helps the system to be self-healing if something goes wrong, such as if a node goes down, a hard drive fails, or AWS resets an instance. Replication ensures that data isn’t lost. If a request comes in for data, even if one of our replicas has gone down, the other two are still available to fulfill the request. The coordinator stores a “hint” for that data as well, and when the downed replica comes back up, it will find out what it missed, and catch up to speed with the other two replicas. No manual action is required, this is done completely automatically.</p>
</div>
<div class="paragraph">
<p><span class="image right"><img src="_images/diagrams/apache-cassandra-diagrams-07.jpg" alt="The use of multiple replicas also has performance advantages"></span>
The use of multiple replicas also has performance advantages. Because we aren’t limited to a single instance We have three nodes (replicas) that can be accessed to provide data for our operations, which we can load balance amongst to achieve the best performance.</p>
</div>
<div class="paragraph">
<p>Cassandra automatically replicates that data around your different data centers. Your application can write data to a Cassandra node on the U.S. west coast, and that data is automatically available in data centers at nodes in Asia and Europe. That has positive performance advantages – especially if you support a worldwide user base. In a world dependent on cloud computing and fast data access, no user suffers from latency due to distance</p>
</div>
</div>
</div>
<div class="openblock image-expand inner inner--narrow py-large cf">
<div class="content">
<h3 id="tuning-your-consistency" class="discrete">Tuning your consistency</h3>
<div class="paragraph">
<p><span class="image left"><img src="_images/diagrams/apache-cassandra-diagrams-08.jpg" alt="Available Partition-tolerant) database"></span>
We’ve been talking a lot about distributed systems and availability. If you are familiar with CAP theorem, Cassandra is by default an AP (Available Partition-tolerant) database, hence it is “always on”. But you can indeed configure the consistency on a per-query basis. In this context, the consistency level represents the minimum number of Cassandra nodes that must acknowledge a read or write operation to the coordinator before the operation is considered successful. As a general rule, you will select your consistency level (CL) based on your replication factor.
<span class="image right"><img src="_images/diagrams/apache-cassandra-diagrams-09.jpg" alt="Available Partition-tolerant) database"></span> For the example below, our data is replicated out to three nodes. We have a CL=QUORUM (Quorum referring to majority, 2 replicas in this case or RF/2 +1) therefore the coordinator will need to get acknowledgement back from two of the replicas in order for the query to be considered a success.</p>
</div>
<div class="paragraph">
<p>As with other computing tasks, it can take some skill to learn to tune this feature for ideal performance, availability, and data integrity – but the fact that you can control it with such granularity means you can control deployments in great detail.</p>
</div>
<div class="paragraph">
<p><span class="image left"><img src="_images/diagrams/apache-cassandra-diagrams-10.jpg" alt="Cassandra is deployment agnostic"></span>
Ultimately, Cassandra is deployment agnostic. It doesn&#8217;t care where you put it – on prem, a cloud provider, multiple cloud providers. You can use a combination of those for a single database. That gives software developers the maximum amount of flexibility.</p>
</div>
</div>
</div>
</div>
</div>
<footer class="grad grad--two flex-center pb-xlarge">
<div class="inner text-center z2 relative">
<h2 class="white py-small">Get started with Cassandra, fast.</h2>
<a id="footer-cta" href="/_/quickstart.html" class="btn btn--filled ma-medium">Quickstart Guide</a>
</div>
<div class="inner flex flex-distribute-items mt-xlarge z2 relative">
<div class="col-2">
<div id="footer-logo" class="logo logo--footer mb-medium"><img src="../assets/img/logo-white-r.png" alt="Cassandra Logo"></div>
<p>Apache Cassandra<img src="../assets/img/registered.svg" alt="®" style="width:18px;"> powers mission-critical deployments with improved performance and unparalleled levels of scale in the cloud.</p>
<div class="footer-social-icons">
<a href="https://twitter.com/cassandra?lang=en" target="_blank"><img src="../assets/img/twitter-icon-circle-white.svg" alt="twitter icon" width="24"></a>
<a href="https://www.linkedin.com/company/apache-cassandra/" target="_blank"><img src="../assets/img/LI-In-Bug.png" alt="linked-in icon" width="24"></a>
<a href="https://www.youtube.com/c/PlanetCassandra" target="_blank"><img src="../assets/img/youtube-icon.png" alt="youtube icon" width="24"></a>
</div>
</div>
<div class="col-2 flex flex-center">
<ul class="columns-2">
<li class="mb-small"><a href="/">Home</a></li>
<li class="mb-small"><a href="/_/cassandra-basics.html">Cassandra Basics</a></li>
<li class="mb-small"><a href="/_/quickstart.html">Quickstart</a></li>
<li class="mb-small"><a href="/_/ecosystem.html">Ecosystem</a></li>
<li class="mb-small"><a href="/doc/latest/">Documentation</a></li>
<li class="mb-small"><a href="/_/community.html">Community</a></li>
<li class="mb-small"><a href="/_/case-studies.html">Case Studies</a></li>
<li class="mb-small"><a href="/_/resources.html">Resources</a></li>
<li class="mb-small"><a href="/_/blog.html">Blog</a></li>
</ul>
</div>
</div>
</footer>
<div class="lower-footer bg-white pa-medium">
<div class="flex flex-row flex-vert-center">
<div class="pr-medium"><img src="../assets/img//feather-small.png" alt="ASF" width="20"></div>
<div class="pr-medium"><a href="http://www.apache.org/" target="_blank">Foundation</a></div>
<div class="pr-medium"><a href="https://www.apache.org/events/current-event.html" target="_blank">Events</a></div>
<div class="pr-medium"><a href="https://www.apache.org/licenses/" target="_blank">License</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/thanks" target="_blank">Thanks</a></div>
<div class="pr-medium"><a href="https://www.apache.org/security" target="_blank">Security</a></div>
<div class="pr-medium"><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy</a></div>
<div class="pr-medium"><a href="https://www.apache.org/foundation/sponsorship" target="_blank">Sponsorship</a></div>
</div>
<p class="my-medium">© 2009-<script>document.write(new Date().getFullYear())</script> <a href="https://apache.org" target="_blank">The Apache Software Foundation</a> under the terms of the Apache License 2.0. Apache, the Apache feather logo, Apache Cassandra, Cassandra, and the Cassandra logo, are either registered trademarks or trademarks of The Apache Software Foundation.</p>
</div>
<div id="fade" class="hidden"></div>
<div id="modal" class="hidden">
<div id="close-modal" class="cursor-pointer"><svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" stroke-width="2" fill="none" stroke-linecap="round" stroke-linejoin="round" class="css-i6dzq1"><line x1="18" y1="6" x2="6" y2="18"></line><line x1="6" y1="6" x2="18" y2="18"></line></svg></div>
<div id="mod-content" class="vid-mod-content resp-container"></div>
</div>
<script>
jQuery(function(){
var windowW = $(window).width();
$(document)
.on('click','.mobile-nav-icon',function(){
$('.main-nav').fadeIn();
})
.on('click','.main-nav',function(){
if(windowW <= 1000){
$(this).fadeOut();
}
})
.on('click','#version-toggle',function(){
$(this).toggleClass('active');
$(this).next().fadeToggle();
})
.on('click','#mobile-docs-nav-burger', function(){
$(this).toggleClass('active');
$('.docs-nav').toggleClass('active');
});
var url = window.location.pathname;
var isQuickstart = url.includes('quickstart.html');
if(isQuickstart){
var footerCTA = document.getElementById('footer-cta');
footerCTA.innerHTML = 'Get latest updates';
footerCTA.setAttribute('href', '/_/blog.html');
}
});
</script>
</div>
</body>
<script>
jQuery(function(){
jQuery(document)
.on('click','.cassandra-cloud h3',function(){
var el = jQuery(this);
el.toggleClass('active');
el.next().slideToggle();
})
.on('click','.image-expand img', function(){
$(this).clone().appendTo('#mod-content');
$('#fade,#modal,#close-modal').fadeIn();
$('body,html').addClass('no-scroll');
})
.on('click','#fade,#close-modal', function(){
$('#fade,#modal,#close-modal').fadeOut();
$('body,html').removeClass('no-scroll');
$('#mod-content').html('');
});
});
</script>
</html>