blob: 4f56b8006885b9f8b37f9db69a28380e6dad5a41 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance. Linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure make it the perfect platform for mission-critical data. Cassandra's support for replicating across multiple datacenters is best-in-class, providing lower latency for your users and the peace of mind of knowing that you can survive regional outages.
">
<meta name="keywords" content="cassandra, apache, apache cassandra, distributed storage, key value store, scalability, bigtable, dynamo" />
<meta name="robots" content="index,follow" />
<meta name="language" content="en" />
<title>Documentation</title>
<link rel="canonical" href="http://cassandra.apache.org/doc/4.0-alpha4/data_modeling/data_modeling_physical.html">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
<link rel="stylesheet" href="./../../../css/style.css">
<link rel="stylesheet" href="./../../../css/sphinx.css">
<link rel="top" title="Apache Cassandra Documentation v4.0-alpha4" href="../index.html"/> <link rel="up" title="Data Modeling" href="index.html"/> <link rel="next" title="Evaluating and Refining Data Models" href="data_modeling_refining.html"/> <link rel="prev" title="Logical Data Modeling" href="data_modeling_logical.html"/>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.2.0/css/all.css" integrity="sha384-hWVjflwFxL6sNzntih27bfxkr27PmbbK/iSvJ+a4+0owXq79v+lsFkW54bOGbiDQ" crossorigin="anonymous">
<link type="application/atom+xml" rel="alternate" href="http://cassandra.apache.org/feed.xml" title="Apache Cassandra Website" />
</head>
<body>
<!-- breadcrumbs -->
<div class="topnav">
<div class="container breadcrumb-container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<img class="asf-logo" src="./../../../img/asf_feather.png" />
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="./../../../">Apache Cassandra</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./">Data Modeling</a></li>
<li>Physical Data Modeling</li>
</ul>
</div>
<!-- navbar -->
<nav class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#cassandra-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="./../../../"><img src="./../../../img/cassandra_logo.png" alt="Apache Cassandra logo" /></a>
</div><!-- /.navbar-header -->
<div id="cassandra-menu" class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li><a href="./../../../">Home</a></li>
<li><a href="./../../../download/">Download</a></li>
<li><a href="./../../../doc/latest/">Documentation</a></li>
<li><a href="./../../../community/">Community</a></li>
<li>
<a href="./../../../blog/">Blog</a>
</li>
</ul>
</div><!-- /#cassandra-menu -->
</div>
</nav><!-- /.navbar -->
</div><!-- /.topnav -->
<div class="container-fluid">
<div class="row">
<div class="col-md-3">
<div class="doc-navigation">
<div class="doc-menu" role="navigation">
<div class="navbar-header">
<button type="button" class="pull-left navbar-toggle" data-toggle="collapse" data-target=".sidebar-navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-collapse collapse sidebar-navbar-collapse">
<form id="doc-search-form" class="navbar-form" action="../search.html" method="get" role="search">
<div class="form-group">
<input type="text" size="30" class="form-control input-sm" name="q" placeholder="Search docs">
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</form>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../getting_started/index.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="../new/index.html">New Features in Apache Cassandra 4.0</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/index.html">Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cql/index.html">The Cassandra Query Language (CQL)</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Data Modeling</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="intro.html">Introduction</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_conceptual.html">Conceptual Data Modeling</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_rdbms.html">RDBMS Design</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_queries.html">Defining Application Queries</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_logical.html">Logical Data Modeling</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Physical Data Modeling</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#hotel-physical-data-model">Hotel Physical Data Model</a></li>
<li class="toctree-l3"><a class="reference internal" href="#reservation-physical-data-model">Reservation Physical Data Model</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_refining.html">Evaluating and Refining Data Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_schema.html">Defining Database Schema</a></li>
<li class="toctree-l2"><a class="reference internal" href="data_modeling_tools.html">Cassandra Data Modeling Tools</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../configuration/index.html">Configuring Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../operating/index.html">Operating Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tools/index.html">Cassandra Tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="../troubleshooting/index.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../development/index.html">Contributing to Cassandra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq/index.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../plugins/index.html">Third-Party Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bugs.html">Reporting Bugs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contactus.html">Contact us</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="col-md-8">
<div class="content doc-content">
<div class="content-container">
<div class="section" id="physical-data-modeling">
<h1>Physical Data Modeling<a class="headerlink" href="#physical-data-modeling" title="Permalink to this headline"></a></h1>
<p>Once you have a logical data model defined, creating the physical model
is a relatively simple process.</p>
<p>You walk through each of the logical model tables, assigning types to
each item. You can use any valid <a class="reference internal" href="../cql/types.html#data-types"><span class="std std-ref">CQL data type</span></a>,
including the basic types, collections, and user-defined types. You may
identify additional user-defined types that can be created to simplify
your design.</p>
<p>After you’ve assigned data types, you analyze the model by performing
size calculations and testing out how the model works. You may make some
adjustments based on your findings. Once again let’s cover the data
modeling process in more detail by working through an example.</p>
<p>Before getting started, let’s look at a few additions to the Chebotko
notation for physical data models. To draw physical models, you need to
be able to add the typing information for each column. This figure
shows the addition of a type for each column in a sample table.</p>
<img alt="../_images/data_modeling_chebotko_physical.png" src="../_images/data_modeling_chebotko_physical.png" />
<p>The figure includes a designation of the keyspace containing each table
and visual cues for columns represented using collections and
user-defined types. Note the designation of static columns and
secondary index columns. There is no restriction on assigning these as
part of a logical model, but they are typically more of a physical data
modeling concern.</p>
<div class="section" id="hotel-physical-data-model">
<h2>Hotel Physical Data Model<a class="headerlink" href="#hotel-physical-data-model" title="Permalink to this headline"></a></h2>
<p>Now let’s get to work on the physical model. First, you need keyspaces
to contain the tables. To keep the design relatively simple, create a
<code class="docutils literal notranslate"><span class="pre">hotel</span></code> keyspace to contain tables for hotel and availability
data, and a <code class="docutils literal notranslate"><span class="pre">reservation</span></code> keyspace to contain tables for reservation
and guest data. In a real system, you might divide the tables across even
more keyspaces in order to separate concerns.</p>
<p>For the <code class="docutils literal notranslate"><span class="pre">hotels</span></code> table, use Cassandra’s <code class="docutils literal notranslate"><span class="pre">text</span></code> type to
represent the hotel’s <code class="docutils literal notranslate"><span class="pre">id</span></code>. For the address, create an
<code class="docutils literal notranslate"><span class="pre">address</span></code> user defined type. Use the <code class="docutils literal notranslate"><span class="pre">text</span></code> type to represent the
phone number, as there is considerable variance in the formatting of
numbers between countries.</p>
<p>While it would make sense to use the <code class="docutils literal notranslate"><span class="pre">uuid</span></code> type for attributes such
as the <code class="docutils literal notranslate"><span class="pre">hotel_id</span></code>, this document uses mostly <code class="docutils literal notranslate"><span class="pre">text</span></code> attributes as
identifiers, to keep the samples simple and readable. For example, a
common convention in the hospitality industry is to reference properties
by short codes like “AZ123” or “NY229”. This example uses these values
for <code class="docutils literal notranslate"><span class="pre">hotel_ids</span></code>, while acknowledging they are not necessarily globally
unique.</p>
<p>You’ll find that it’s often helpful to use unique IDs to uniquely
reference elements, and to use these <code class="docutils literal notranslate"><span class="pre">uuids</span></code> as references in tables
representing other entities. This helps to minimize coupling between
different entity types. This may prove especially effective if you are
using a microservice architectural style for your application, in which
there are separate services responsible for each entity type.</p>
<p>As you work to create physical representations of various tables in the
logical hotel data model, you use the same approach. The resulting design
is shown in this figure:</p>
<img alt="../_images/data_modeling_hotel_physical.png" src="../_images/data_modeling_hotel_physical.png" />
<p>Note that the <code class="docutils literal notranslate"><span class="pre">address</span></code> type is also included in the design. It
is designated with an asterisk to denote that it is a user-defined type,
and has no primary key columns identified. This type is used in
the <code class="docutils literal notranslate"><span class="pre">hotels</span></code> and <code class="docutils literal notranslate"><span class="pre">hotels_by_poi</span></code> tables.</p>
<p>User-defined types are frequently used to help reduce duplication of
non-primary key columns, as was done with the <code class="docutils literal notranslate"><span class="pre">address</span></code>
user-defined type. This can reduce complexity in the design.</p>
<p>Remember that the scope of a UDT is the keyspace in which it is defined.
To use <code class="docutils literal notranslate"><span class="pre">address</span></code> in the <code class="docutils literal notranslate"><span class="pre">reservation</span></code> keyspace defined below
design, you’ll have to declare it again. This is just one of the many
trade-offs you have to make in data model design.</p>
</div>
<div class="section" id="reservation-physical-data-model">
<h2>Reservation Physical Data Model<a class="headerlink" href="#reservation-physical-data-model" title="Permalink to this headline"></a></h2>
<p>Now, let’s examine reservation tables in the design.
Remember that the logical model contained three denormalized tables to
support queries for reservations by confirmation number, guest, and
hotel and date. For the first iteration of your physical data model
design, assume you’re going to manage this denormalization
manually. Note that this design could be revised to use Cassandra’s
(experimental) materialized view feature.</p>
<img alt="../_images/data_modeling_reservation_physical.png" src="../_images/data_modeling_reservation_physical.png" />
<p>Note that the <code class="docutils literal notranslate"><span class="pre">address</span></code> type is reproduced in this keyspace and
<code class="docutils literal notranslate"><span class="pre">guest_id</span></code> is modeled as a <code class="docutils literal notranslate"><span class="pre">uuid</span></code> type in all of the tables.</p>
<p><em>Material adapted from Cassandra, The Definitive Guide. Published by
O’Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt.
All rights reserved. Used with permission.</em></p>
</div>
</div>
<div class="doc-prev-next-links" role="navigation" aria-label="footer navigation">
<a href="data_modeling_refining.html" class="btn btn-default pull-right " role="button" title="Evaluating and Refining Data Models" accesskey="n">Next <span class="glyphicon glyphicon-circle-arrow-right" aria-hidden="true"></span></a>
<a href="data_modeling_logical.html" class="btn btn-default" role="button" title="Logical Data Modeling" accesskey="p"><span class="glyphicon glyphicon-circle-arrow-left" aria-hidden="true"></span> Previous</a>
</div>
</div>
</div>
</div>
</div>
</div>
<hr />
<footer>
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/cassandra"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=cassandra"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheCassandra">Tweet #cassandra</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
<a class="subscribe-rss icon-link" href="/feed.xml" title="Subscribe to Blog via RSS">
<span><i class="fa fa-rss"></i></span>
</a>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2016 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache, the Apache feather logo, and Apache Cassandra are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</footer>
<!-- Javascript. Placed here so pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script src="./../../../js/underscore-min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<script src="./../../../js/doctools.js"></script>
<script src="./../../../js/searchtools.js"></script>
<script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: "", VERSION: "", COLLAPSE_INDEX: false, FILE_SUFFIX: ".html", HAS_SOURCE: false, SOURCELINK_SUFFIX: ".txt" }; </script>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try {
var pageTracker = _gat._getTracker("UA-11583863-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>