blob: 7d0bbd8adab39c54eb8e9969c092caee3b5c904c [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<title>Apache BookKeeper&trade; - BP-34: 128 bits support</title>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/css/normalize.css">
<link rel="stylesheet" href="/css/tippy.css">
<link rel="stylesheet" href="/css/style.css">
<link rel="shortcut icon" href="/img/favicon.ico">
<script src="/js/tippy.min.js"></script>
<script type="text/javascript">
var shiftWindow = function() { scrollBy(0, -25); };
window.addEventListener("hashchange", shiftWindow);
window.addEventListener("pageshow", shiftWindow);
function load() { if (window.location.hash) shiftWindow(); }
</script>
</head>
<body class="body">
<main class="main">
<nav class="navbar bk-topnav">
<div class="navbar-brand">
<a class="navbar-item bk-brand" href="/">
Apache BookKeeper&trade;
</a>
<div class="navbar-burger burger" data-target="bkNav">
<span></span>
<span></span>
<span></span>
</div>
</div>
<div id="bkNav" class="navbar-menu">
<div class="navbar-start">
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Documentation</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/docs/latest/overview/overview">
Version 4.15.0-SNAPSHOT
<span class="tag is-warning">Development</span>
</a>
<a class="navbar-item" href="/docs/latest/api/javadoc">
<span class="icon bk-javadoc-icon">
<img src="/img/java-icon.svg">
</span>
Javadoc
</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/docs/4.14.0/overview/overview">
Release 4.14.0
</a>
<a class="navbar-item" href="/docs/4.13.0/overview/overview">
Release 4.13.0
</a>
<a class="navbar-item" href="/docs/4.12.1/overview/overview">
Release 4.12.1
</a>
<a class="navbar-item" href="/docs/4.12.0/overview/overview">
Release 4.12.0
</a>
<a class="navbar-item" href="/docs/4.11.1/overview/overview">
Release 4.11.1
<span class="tag is-success">Stable</span>
</a>
<a class="navbar-item" href="/docs/4.11.0/overview/overview">
Release 4.11.0
</a>
<a class="navbar-item" href="/docs/4.10.0/overview/overview">
Release 4.10.0
</a>
<a class="navbar-item" href="/archives/docs/r4.9.2">
Release 4.9.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.9.1">
Release 4.9.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.9.0">
Release 4.9.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.2">
Release 4.8.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.1">
Release 4.8.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.0">
Release 4.8.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.3">
Release 4.7.3
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.2">
Release 4.7.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.1">
Release 4.7.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.0">
Release 4.7.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.2">
Release 4.6.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.1">
Release 4.6.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.0">
Release 4.6.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.5.1">
Release 4.5.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.5.0">
Release 4.5.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.4.0">
Release 4.4.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.2">
Release 4.3.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.1">
Release 4.3.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.0">
Release 4.3.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.4">
Release 4.2.4
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.3">
Release 4.2.3
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.2">
Release 4.2.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.1">
Release 4.2.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.0">
Release 4.2.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.1.0">
Release 4.1.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.0.0">
Release 4.0.0
<span class="tag is-warning">EOL</span>
</a>
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Community</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/community/mailing-lists">Mailing lists</a>
<a class="navbar-item" href="/community/slack">Slack</a>
<a class="navbar-item" href="https://github.com/apache/bookkeeper/issues">Github Issues</a>
<a class="navbar-item" href="/community/releases">Release Management</a>
<a class="navbar-item" href="/community/meeting">Community Meetings</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/community/contributing">Contribution Guide</a>
<a class="navbar-item" href="/community/coding_guide">Coding Guide</a>
<a class="navbar-item" href="/community/testing">Testing Guide</a>
<a class="navbar-item" href="/community/issue-report">Issue Report Guide</a>
<a class="navbar-item" href="/community/release_guide">Release Guide</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/community/presentations">Presentations</a>
<a class="navbar-item" href="/community/bookkeeper_proposals">BookKeeper Proposals</a>
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Project</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/project/who">Who are we?</a>
<a class="navbar-item" href="/project/bylaws">Bylaws</a>
<a class="navbar-item" href="http://www.apache.org/licenses/">License</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/project/privacy">Privacy policy</a>
<a class="navbar-item" href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a class="navbar-item" href="http://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<div class="field is-grouped">
<p class="control">
<a class="button bk-twitter" href="https://twitter.com/asfbookkeeper">
<span class="icon">
<i class="fa fa-twitter"></i>
</span>
<span>Twitter</span>
</a>
</p>
<p class="control">
<a class="button" href="https://github.com/apache/bookkeeper">
<span class="icon">
<i class="fa fa-github"></i>
</span>
<span>GitHub</span>
</a>
</p>
<p class="control">
<a class="button is-primary" href="/releases">
<span class="icon">
<i class="fa fa-download"></i>
</span>
<span>Download</span>
</a>
</p>
</div>
</div>
</div>
</div>
</nav>
<div class="bk-community-container">
<div class="columns">
<div class="column is-12">
<header class="docs-title">
<nav class="level">
<div class="level-left">
<div class="level-item">
<h1 class="title">BP-34: 128 bits support</h1>
</div>
</div>
</nav>
</header>
<hr />
<div class="content is-medium">
<section class="bk-community-content">
<h3 id="motivation">Motivation</h3>
<p>BookKeeper coordinates with a metadata store to generate a cluster wide <code class="highlighter-rouge">ledgerId</code>.
Currently this is a signed <code class="highlighter-rouge">64 bit</code> number (effectively 63 bits). This method works
great because we have a centralized metadata store for coordinating the id generation.
However this method may not scale as the cluster size and number of ledgers grow.</p>
<p><a href="https://en.wikipedia.org/wiki/Globally_unique_identifier">Universally unique identifier - Wikipedia</a>
is a preferred way to generate decentralized globally unique IDs and it takes <code class="highlighter-rouge">128 bits</code>.
This method can scale well as it doesn’t need a centralized coordination.</p>
<p>This BP proposes the changes for increasing ledger id from <code class="highlighter-rouge">63 bits</code> to <code class="highlighter-rouge">128 bits</code>.</p>
<h3 id="128-bits">128 bits</h3>
<p>Since there is no native support for <code class="highlighter-rouge">128 bits</code> in both Java and
<a href="https://github.com/google/protobuf/issues/2180">Protobuf</a>, we have to break <code class="highlighter-rouge">128 bits</code>
into 2 <code class="highlighter-rouge">64 bits</code> numbers for representing the <code class="highlighter-rouge">128 bits</code> id:</p>
<ul>
<li>ledger-id-msb: the most significant 64 bits, bit 64 - 127</li>
<li>ledger-id-lsb: the least significant 64 bits, bit 0 - 63</li>
</ul>
<p>For backward compatibility, the <code class="highlighter-rouge">ledger-id-lsb</code> is the current <code class="highlighter-rouge">64 bits</code> ledger-id.
The <code class="highlighter-rouge">ledger-id-msb</code> will be added as a new field in both API and protocol.</p>
<p>I am proposing calling <code class="highlighter-rouge">ledger-id-msb</code> as <code class="highlighter-rouge">ledger-scope-id</code>. So the current 64bits <code class="highlighter-rouge">ledgerId</code> and
the newly introduced 64bits <code class="highlighter-rouge">ledgerScopeId</code> together will be forming the new <code class="highlighter-rouge">128 bits</code> ledger id.</p>
<p>The default <code class="highlighter-rouge">ledgerScopeId</code> is <code class="highlighter-rouge">0</code>. That means any ledgers created prior to this change are allocated
under scope <code class="highlighter-rouge">0</code>. Hence it maintains backward compatibility during upgrade.</p>
<p>The combination of <code class="highlighter-rouge">ledgerScopeId</code> and <code class="highlighter-rouge">ledgerId</code> forms the <code class="highlighter-rouge">128 bits</code> ledger id. We can introduce a
hex representation of this <code class="highlighter-rouge">128 bits</code> ledger id - <code class="highlighter-rouge">ledgerQualifiedName</code> . This <code class="highlighter-rouge">ledgerQualifiedName</code> can
be useful for CLI tooling, REST api and troubleshooting purpose. The API internally can convert
<code class="highlighter-rouge">ledgerQualifiedName</code> to <code class="highlighter-rouge">ledgerScopeId</code> and <code class="highlighter-rouge">ledgerId</code>.</p>
<h3 id="public-interfaces">Public Interfaces</h3>
<h4 id="api-change">API Change</h4>
<p>The API will be introducing <code class="highlighter-rouge">ledgerScopeId</code> across the interfaces. This field will be optional and default to <code class="highlighter-rouge">0</code>.</p>
<h5 id="handle">Handle</h5>
<p>Introduce a new method <code class="highlighter-rouge">getScopeId</code> for representing the scope id (the most significant <code class="highlighter-rouge">128 bits</code> ledger id).</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">Handle</span> <span class="kd">extends</span> <span class="nc">AutoCloseable</span> <span class="o">{</span>
<span class="o">...</span>
<span class="cm">/**
* Return the ledger scope id. The most significant 64 bits of 128 bits.
*/</span>
<span class="kt">long</span> <span class="nf">getScopeId</span><span class="o">();</span>
<span class="cm">/**
* Return the ledger id. The least significant 64 bits of 128 bits.
*/</span>
<span class="kt">long</span> <span class="nf">getId</span><span class="o">();</span>
<span class="o">...</span>
<span class="o">}</span>
</code></pre></div></div>
<h5 id="create-ledgeradv">Create LedgerAdv</h5>
<p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">CreateAdvBuilder</code> for providing <code class="highlighter-rouge">scopeId</code>
(the most significant 64 bits for 128 bits ledger id) on creating a ledger.</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">CreateAdvBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o">&lt;</span><span class="nc">WriteHandle</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="o">...</span>
<span class="cm">/**
* Set the scope id for the newly created ledger.
* If no explicit scopeId is passed, the new ledger
* will be created under scope `0`.
*/</span>
<span class="nc">CreateAdvBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span>
<span class="o">...</span>
<span class="o">}</span>
</code></pre></div></div>
<h5 id="open-ledger">Open Ledger</h5>
<p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">OpenBuilder</code> for providing <code class="highlighter-rouge">scopeId</code>
(the most significant 64 bits for 128 bits ledger id) on opening a ledger.</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">OpenBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o">&lt;</span><span class="nc">ReadHandle</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="o">...</span>
<span class="cm">/**
* Set the scope id of the ledger to open.
*/</span>
<span class="nc">OpenBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span>
<span class="o">...</span>
<span class="o">}</span>
</code></pre></div></div>
<h5 id="delete-ledger">Delete Ledger</h5>
<p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">DeleteBuilder</code> for providing <code class="highlighter-rouge">scopeId</code>
(the most significant 64 bits for 128 bits ledger id) on deleting a ledger.</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">DeleteBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o">&lt;</span><span class="nc">Void</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="o">...</span>
<span class="cm">/**
* Set the scope id of the ledger to delete.
*/</span>
<span class="nc">DeleteBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span>
<span class="o">...</span>
<span class="o">}</span>
</code></pre></div></div>
<h4 id="cli">CLI</h4>
<p>All BookKeeper CLI tools will be updated with additional option <code class="highlighter-rouge">—ledger-scope-id</code>.
Optionally we can add option <code class="highlighter-rouge">—ledger-qualified-name</code> (the hex representation of 128 bits).
Internally all the CLI tools will convert ledger qualified name to <code class="highlighter-rouge">ledgerId</code> and <code class="highlighter-rouge">ledgerScopeId</code>.</p>
<h4 id="rest">REST</h4>
<ol>
<li>All ledger related endpoints will be adding a new parameter <code class="highlighter-rouge">ledger_scope_id</code>.</li>
<li><code class="highlighter-rouge">ListLedgerService</code> only supports listing ledgers under a given ledger scope id.
If <code class="highlighter-rouge">ledger_scope_id</code> is missing, it will be listing ledgers under scope <code class="highlighter-rouge">0</code>.</li>
</ol>
<h4 id="wire-protocol">Wire Protocol</h4>
<blockquote>
<p>There will be no plan for supporting 128 bits in v2 protocol, due to the limitation in v2 protocol.
So any operations in v2 protocol with scope id not equal to 0 will be failed immediately with
<code class="highlighter-rouge">ILLEGAL_OP</code> exceptions.</p>
</blockquote>
<p>All the request and response messages will be adding an optional field <code class="highlighter-rouge">optional int64 ledgerScopeId</code>.</p>
<h4 id="entry-format">Entry Format</h4>
<p>Currently all the entries written to bookies are encoded in a certain format, including <code class="highlighter-rouge">metadata</code>,
<code class="highlighter-rouge">digest code</code> and <code class="highlighter-rouge">payload</code>. The entry format is not <em>versioned</em>.</p>
<p>In order to support adding another field <code class="highlighter-rouge">ledgerScopeId</code> in the <code class="highlighter-rouge">metadata</code> section, we are introducing
<code class="highlighter-rouge">version</code> in the entry format.</p>
<h5 id="entry-format-v1">Entry Format V1</h5>
<div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Entry</span><span class="w"> </span><span class="err">Format</span><span class="w"> </span><span class="err">V</span><span class="mi">1</span><span class="w">
</span><span class="err">===============</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">header</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">ID</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">8</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">15</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Entry</span><span class="w"> </span><span class="err">ID</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">16</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">23</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">LastAddConfirmed</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">24</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">31</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Length</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">1</span><span class="err">))</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Digest</span><span class="w"> </span><span class="err">Code</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">)</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">payload</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">((</span><span class="mi">32</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x)</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Payload</span><span class="w">
</span></code></pre></div></div>
<blockquote>
<p><code class="highlighter-rouge">x</code> is the length of digest code.</p>
</blockquote>
<blockquote>
<p>Prior to introducing <code class="highlighter-rouge">ledgerScopeId</code>, ledgerId is assumed to be a positive value.</p>
</blockquote>
<h5 id="entry-format-v2">Entry Format V2</h5>
<div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Entry</span><span class="w"> </span><span class="err">Format</span><span class="w"> </span><span class="err">V</span><span class="mi">2</span><span class="w">
</span><span class="err">===============</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">header</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Metadata</span><span class="w"> </span><span class="err">Flags</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">8</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">15</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">Scope</span><span class="w"> </span><span class="err">ID</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">16</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">23</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">ID</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">24</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">31</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Entry</span><span class="w"> </span><span class="err">ID</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">39</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">LastAddConfirmed</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">40</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">47</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Length</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">37</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">(</span><span class="mi">37</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">1</span><span class="err">))</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Digest</span><span class="w"> </span><span class="err">Code</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">)</span><span class="w">
</span><span class="err">---</span><span class="w"> </span><span class="err">payload</span><span class="w"> </span><span class="err">---</span><span class="w">
</span><span class="err">Bytes</span><span class="w"> </span><span class="err">((</span><span class="mi">37</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x)</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Payload</span><span class="w">
</span></code></pre></div></div>
<blockquote>
<p><code class="highlighter-rouge">x</code> is the length of digest code.</p>
</blockquote>
<h6 id="metadata-flags">Metadata Flags</h6>
<div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Metadata:</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(Long)</span><span class="w">
</span><span class="err">------------------------</span><span class="w">
</span><span class="mi">0</span><span class="err">x</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="mi">0</span><span class="w">
</span><span class="err">|__|</span><span class="w">
</span><span class="err">|</span><span class="w">
</span><span class="err">version</span><span class="w">
</span><span class="err">----</span><span class="w">
</span><span class="err">Bit</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">3</span><span class="err">:</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">type</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">,</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">C</span><span class="w"> </span><span class="err">and</span><span class="w"> </span><span class="err">such)</span><span class="w">
</span><span class="err">Bit</span><span class="w"> </span><span class="mi">4</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">:</span><span class="w"> </span><span class="err">version,</span><span class="w"> </span><span class="err">the</span><span class="w"> </span><span class="err">most</span><span class="w"> </span><span class="err">significant</span><span class="w"> </span><span class="err">bit</span><span class="w"> </span><span class="err">of</span><span class="w"> </span><span class="err">this</span><span class="w"> </span><span class="err">byte</span><span class="w"> </span><span class="err">will</span><span class="w"> </span><span class="err">be</span><span class="w"> </span><span class="err">always</span><span class="w"> </span><span class="err">set</span><span class="w"> </span><span class="err">to</span><span class="w"> </span><span class="mi">1</span><span class="err">.</span><span class="w">
</span><span class="err">it</span><span class="w"> </span><span class="err">will</span><span class="w"> </span><span class="err">be</span><span class="w"> </span><span class="err">used</span><span class="w"> </span><span class="err">for</span><span class="w"> </span><span class="err">differentiating</span><span class="w"> </span><span class="err">entry</span><span class="w"> </span><span class="err">format</span><span class="w"> </span><span class="err">v</span><span class="mi">1</span><span class="w"> </span><span class="err">and</span><span class="w"> </span><span class="err">v</span><span class="mi">2</span><span class="err">.</span><span class="w">
</span></code></pre></div></div>
<p>We are setting the most significant bit to be <code class="highlighter-rouge">1</code>. So the first byte in entry v2 will
be a negative value, which can be used for differentiating entry format v1 and v2.
The version will be encoded into the first byte. The version will be used for describing
the entry format.</p>
<h5 id="decoding-entry">Decoding Entry</h5>
<p>The pseudo code for decoding an entry will be described as followings:</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
<span class="nc">ByteBuf</span> <span class="n">entry</span> <span class="o">=</span> <span class="o">...;</span>
<span class="kt">int</span> <span class="n">metadataFlags</span> <span class="o">=</span> <span class="n">entry</span><span class="o">.</span><span class="na">getByte</span><span class="o">();</span>
<span class="k">if</span> <span class="o">(</span><span class="n">metadataFlags</span> <span class="o">&lt;=</span> <span class="mi">128</span><span class="o">)</span> <span class="o">{</span> <span class="c1">// the entry is encoded in v1 format</span>
<span class="c1">// decoding the entry in v1 format</span>
<span class="o">...</span>
<span class="o">}</span> <span class="k">else</span> <span class="o">{</span>
<span class="c1">// decoding the entry in v2 format</span>
<span class="o">}</span>
</code></pre></div></div>
<h4 id="bookie-storage">Bookie Storage</h4>
<h5 id="journal">Journal</h5>
<p>A new method should be added in journal <code class="highlighter-rouge">WriteCallback</code> to handle <code class="highlighter-rouge">ledgerScopeId</code>.</p>
<div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">WriteCallback</span> <span class="o">{</span>
<span class="kt">void</span> <span class="nf">writeComplete</span><span class="o">(</span><span class="kt">int</span> <span class="n">rc</span><span class="o">,</span>
<span class="kt">long</span> <span class="n">ledgerScopeId</span><span class="o">,</span>
<span class="kt">long</span> <span class="n">ledgerId</span><span class="o">,</span>
<span class="kt">long</span> <span class="n">entryId</span><span class="o">,</span>
<span class="nc">BookieSocketAddress</span> <span class="n">addr</span><span class="o">,</span>
<span class="nc">Object</span> <span class="n">ctx</span><span class="o">);</span>
<span class="k">default</span> <span class="kt">void</span> <span class="nf">writeComplete</span><span class="o">(</span><span class="kt">int</span> <span class="n">rc</span><span class="o">,</span>
<span class="kt">long</span> <span class="n">ledgerId</span><span class="o">,</span>
<span class="kt">long</span> <span class="n">entryId</span><span class="o">,</span>
<span class="nc">BookieSocketAddress</span> <span class="n">addr</span><span class="o">,</span>
<span class="nc">Object</span> <span class="n">ctx</span><span class="o">)</span> <span class="o">{</span>
<span class="n">writeComplete</span><span class="o">(</span><span class="n">rc</span><span class="o">,</span> <span class="mi">0L</span><span class="o">,</span> <span class="n">ledgerId</span><span class="o">,</span> <span class="n">entryId</span><span class="o">,</span> <span class="n">addr</span><span class="o">,</span> <span class="n">ctx</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">}</span>
</code></pre></div></div>
<p>The journal should be changed to be able to retrieve <code class="highlighter-rouge">ledgerScopeId</code> from the entry
payload based on <a href="#entry-format">Entry Format</a>.</p>
<h5 id="ledger-storage">Ledger Storage</h5>
<h6 id="entrylogger">EntryLogger</h6>
<ol>
<li>Methods in <code class="highlighter-rouge">EntryLogger</code> should be able to accept <code class="highlighter-rouge">ledgerScopeId</code> as a parameter.</li>
<li>EntryLogger should be updated to retrieve <code class="highlighter-rouge">ledgerScopeId</code> from the entry payload
based on <a href="#entry-format">Entry Format</a>.</li>
</ol>
<h6 id="entrymemtable">EntryMemTable</h6>
<p><code class="highlighter-rouge">ledgerScopeId</code> should be added as part of <code class="highlighter-rouge">EntryKey</code>.</p>
<h6 id="indexpersistencemgr">IndexPersistenceMgr</h6>
<p>Currently the ledger index files (64 bits) are stored into 2-level-hirechicy
directories - <code class="highlighter-rouge">&lt;msb-32bits-hex&gt;/&lt;lsb-32bits-hex&gt;/&lt;ledger-id-hex&gt;.idx</code>.</p>
<p>If <code class="highlighter-rouge">ledgerScopeId</code> is 0, it will be using existing scheme for storing and retrieving
ledger index files.</p>
<p>If <code class="highlighter-rouge">ledgerScopeId</code> is not 0, that means the ledgers are produced by new clients that
support 128-bits, those ledgers will be stored in a 4-level-hirechicy
directories -
<code class="highlighter-rouge">&lt;msb-32bits-hex-ledger-scope-id&gt;/&lt;lsb-32bits-hex-ledger-scope-id&gt;/&lt;msb-32bits-hex-ledger-id&gt;/&lt;lsb-32bits-hex-ledger-id&gt;</code>.</p>
<p>All the file info caches should be updated to use <code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId&gt;</code>
as index keys.</p>
<h6 id="indexinmempagemgr">IndexInMemPageMgr</h6>
<p>The LRU pages map will be updated to use <code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId&gt;</code> as index
keys.</p>
<h6 id="dbledgerstorage">DBLedgerStorage</h6>
<p>Currently DBLedgerStorage use <code class="highlighter-rouge">&lt;ledgerId, entryId&gt;</code> as the index key for indexing entry
locations for each entry.</p>
<p>Similar as <code class="highlighter-rouge">SortedLedgerStorage</code> and <code class="highlighter-rouge">InterleavedLedgerStorage</code>, for ledgers whose
<code class="highlighter-rouge">ledgerScopeId</code> is 0, they will be using existing scheme for storing their entry locations.</p>
<p>For ledgers whose <code class="highlighter-rouge">ledgerScopeId</code> is not 0, they will be stored in a new rocksdb,
whose index key will be <code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId, entryId&gt;</code>.</p>
<h4 id="metadata-store">Metadata Store</h4>
<h5 id="ledgermanager">LedgerManager</h5>
<p>All the interfaces should be updated with accepting <code class="highlighter-rouge">ledgerScopeId</code>.</p>
<p>The actual implementation should decide how to store metadata
for <code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId&gt;</code>.</p>
<h6 id="zookeeper-ledger-manager">ZooKeeper Ledger Manager</h6>
<p>We need to introduce a LongLongHierchicalLedgerManager for storing metadata
indexing by <code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId&gt;</code>.</p>
<p>If <code class="highlighter-rouge">ledgerScopeId</code> is 0, then it will be falling back to <code class="highlighter-rouge">LongHierachicalLedgerManager</code>.
So no behavior is changed.</p>
<p>If <code class="highlighter-rouge">ledgerScopeId</code> is not 0, those ledgers will be indexed in new hierarchy
(possible under a different znode).</p>
<h6 id="ledger-id-generation">Ledger ID generation</h6>
<p>When upgrading from 64bit to 128bits, we probably don’t need any centralized mechanism
for generating ledger id. It can be implemented using UUID generation.</p>
<p>Especially since we are supporting 128bits by introducing <code class="highlighter-rouge">ledgerScopeId</code>. That means
application of bookkeeper can decide its own way for generating their <code class="highlighter-rouge">scopeId</code>.
An application or even bookkeeper client can generate its ledgerId using UUID generation,
then breaks the 128 bits UUID into two parts, one serves as <code class="highlighter-rouge">ledgerScopeId</code> and the other
one serves as <code class="highlighter-rouge">ledgerId</code>.</p>
<h6 id="etcd">Etcd</h6>
<p>Since Etcd has a better key/value presentation, we can basically just combine
<code class="highlighter-rouge">&lt;ledgerScopeId, ledgerId&gt;</code> as the index key for storing ledger metadata in Etcd.
Nothing is needed for special consideration.</p>
<h3 id="performance-concerns">Performance Concerns</h3>
<p>There shouldn’t be any performance difference when not using 128 bit ledger id
(<code class="highlighter-rouge">ledgerScopeId</code> is omitted).</p>
<p>Performance concerns can be arised in following areas:</p>
<ul>
<li><strong>Wire Protocol</strong>: additional 9 bytes will be added per entry, one byte for version
and 8 bytes for the msb of 128 bit ledger id</li>
<li><strong>Journal</strong>: additional 9 bytes will be added per entry (same as wire protocol).</li>
<li><strong>EntryLogger</strong>: additional 9 bytes will be added per entry (same as wire protocol)</li>
<li><strong>Memtable</strong>: additional 8 bytes will be added per indexed entry.</li>
<li><strong>FileInfo</strong>: there is no change to the index file format itself.</li>
<li><strong>IndexPersistenceManager</strong>: Files will be organized in more directory hierarchy.
It shouldn’t be a big deal.</li>
<li><strong>IndexInMemoryManager (LedgerCache)</strong>: additional 8 bytes per index page.</li>
<li><strong>DbLedgerStorage</strong>: additional 8 bytes per entry for entry location.</li>
<li><strong>Metadata</strong>: on zookeeper, we need a 128 bit ledger manager, that means more znode
hierarchy than 64 bit ledger manager. Etcd like key/value metadata store is probably
more preferrable for 128 bit ledger manager.</li>
</ul>
<p>However increasing ledger id from 64 bits to 128 bits can get rid of the only remaining
central point, since we don’t need to use zookeeper for ledger id generation. The id
generation can become decentralized.</p>
<h3 id="proposed-changes">Proposed Changes</h3>
<p>All the required changes are described above. In summary, the changes can
happen in following 2 phases:</p>
<ol>
<li>Ensure all components have <code class="highlighter-rouge">ledgerScopeId</code> added (both wire protocol, storage and such).
Assuming <code class="highlighter-rouge">ledgerScopeId</code> will be 0. The changes can happen independently and ensure
they are backward compatible with old clients.</li>
<li>Add <code class="highlighter-rouge">ledgerScopeId</code> into public API, so application can start using <code class="highlighter-rouge">ledgerScopeId</code>.
After that, applications can use UUID to generate ledger id and break UUID into two parts,
one is <code class="highlighter-rouge">ledgerScopeId</code>, while the other one is <code class="highlighter-rouge">ledgerId</code>.</li>
</ol>
<h3 id="compatibility-deprecation-and-migration-plan">Compatibility, Deprecation, and Migration Plan</h3>
<p>All the changes are backward compatible, since we are doing the changes by adding an optional
field <code class="highlighter-rouge">ledgerScopeId</code>. Old clients can still operating in the mode of <code class="highlighter-rouge">ledgerScopeId == 0</code>.
The new application can activate the feature by starting using <code class="highlighter-rouge">ledgerScopeId</code> in the new API.</p>
<h3 id="test-plan">Test Plan</h3>
<ol>
<li>Add unit tests for individual components on introducing <code class="highlighter-rouge">ledgerScopeId</code>.</li>
<li>Add backward compatibility tests for individual components.</li>
<li>Add end-to-end integration tests for introducing <code class="highlighter-rouge">ledgerScopeId</code>.</li>
<li>Add end-to-end backward compatibility tests.</li>
</ol>
<h3 id="rejected-alternatives">Rejected Alternatives</h3>
<p>N/A</p>
</section>
</div>
</div>
</div>
</div>
</main>
<footer class="footer">
<div class="container">
<div class="content has-text-centered">
<p>
Copyright &copy; 2016 - 2021 <a href="https://www.apache.org/">The Apache Software Foundation</a>,<br /> licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>.
</p>
<p>
Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation.
</p>
</div>
</div>
</footer>
</body>
<script src="/js/app.js"></script>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-104419626-1', 'auto');
ga('send', 'pageview');
</script>
</html>