| <!DOCTYPE html> |
| <html> |
| <head> |
| <title>Apache BookKeeper™ - BP-34: 128 bits support</title> |
| |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| |
| <link rel="stylesheet" href="/css/normalize.css"> |
| <link rel="stylesheet" href="/css/tippy.css"> |
| <link rel="stylesheet" href="/css/style.css"> |
| |
| <link rel="shortcut icon" href="/img/favicon.ico"> |
| |
| <script src="/js/tippy.min.js"></script> |
| |
| <script type="text/javascript"> |
| var shiftWindow = function() { scrollBy(0, -25); }; |
| window.addEventListener("hashchange", shiftWindow); |
| window.addEventListener("pageshow", shiftWindow); |
| function load() { if (window.location.hash) shiftWindow(); } |
| </script> |
| </head> |
| <body class="body"> |
| <main class="main"> |
| |
| <nav class="navbar bk-topnav"> |
| <div class="navbar-brand"> |
| <a class="navbar-item bk-brand" href="/"> |
| Apache BookKeeper™ |
| </a> |
| |
| <div class="navbar-burger burger" data-target="bkNav"> |
| <span></span> |
| <span></span> |
| <span></span> |
| </div> |
| </div> |
| |
| <div id="bkNav" class="navbar-menu"> |
| <div class="navbar-start"> |
| <div class="navbar-item has-dropdown is-hoverable"> |
| <a class="navbar-link">Documentation</a> |
| <div class="navbar-dropdown is-boxed"> |
| <a class="navbar-item" href="/docs/latest/overview/overview"> |
| Version 4.15.0-SNAPSHOT |
| <span class="tag is-warning">Development</span> |
| </a> |
| <a class="navbar-item" href="/docs/latest/api/javadoc"> |
| <span class="icon bk-javadoc-icon"> |
| <img src="/img/java-icon.svg"> |
| </span> |
| Javadoc |
| </a> |
| <hr class="dropdown-divider"> |
| |
| <a class="navbar-item" href="/docs/4.14.0/overview/overview"> |
| Release 4.14.0 |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.13.0/overview/overview"> |
| Release 4.13.0 |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.12.1/overview/overview"> |
| Release 4.12.1 |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.12.0/overview/overview"> |
| Release 4.12.0 |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.11.1/overview/overview"> |
| Release 4.11.1 |
| |
| <span class="tag is-success">Stable</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.11.0/overview/overview"> |
| Release 4.11.0 |
| |
| </a> |
| |
| <a class="navbar-item" href="/docs/4.10.0/overview/overview"> |
| Release 4.10.0 |
| |
| </a> |
| |
| |
| <a class="navbar-item" href="/archives/docs/r4.9.2"> |
| Release 4.9.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.9.1"> |
| Release 4.9.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.9.0"> |
| Release 4.9.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.8.2"> |
| Release 4.8.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.8.1"> |
| Release 4.8.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.8.0"> |
| Release 4.8.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.7.3"> |
| Release 4.7.3 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.7.2"> |
| Release 4.7.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.7.1"> |
| Release 4.7.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.7.0"> |
| Release 4.7.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.6.2"> |
| Release 4.6.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.6.1"> |
| Release 4.6.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.6.0"> |
| Release 4.6.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.5.1"> |
| Release 4.5.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.5.0"> |
| Release 4.5.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.4.0"> |
| Release 4.4.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.3.2"> |
| Release 4.3.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.3.1"> |
| Release 4.3.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.3.0"> |
| Release 4.3.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.2.4"> |
| Release 4.2.4 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.2.3"> |
| Release 4.2.3 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.2.2"> |
| Release 4.2.2 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.2.1"> |
| Release 4.2.1 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.2.0"> |
| Release 4.2.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.1.0"> |
| Release 4.1.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| <a class="navbar-item" href="/archives/docs/r4.0.0"> |
| Release 4.0.0 |
| |
| <span class="tag is-warning">EOL</span> |
| |
| </a> |
| |
| </div> |
| </div> |
| |
| <div class="navbar-item has-dropdown is-hoverable"> |
| <a class="navbar-link">Community</a> |
| <div class="navbar-dropdown is-boxed"> |
| <a class="navbar-item" href="/community/mailing-lists">Mailing lists</a> |
| <a class="navbar-item" href="/community/slack">Slack</a> |
| <a class="navbar-item" href="https://github.com/apache/bookkeeper/issues">Github Issues</a> |
| <a class="navbar-item" href="/community/releases">Release Management</a> |
| <a class="navbar-item" href="/community/meeting">Community Meetings</a> |
| <hr class="dropdown-divider"> |
| <a class="navbar-item" href="/community/contributing">Contribution Guide</a> |
| <a class="navbar-item" href="/community/coding_guide">Coding Guide</a> |
| <a class="navbar-item" href="/community/testing">Testing Guide</a> |
| <a class="navbar-item" href="/community/issue-report">Issue Report Guide</a> |
| <a class="navbar-item" href="/community/release_guide">Release Guide</a> |
| <hr class="dropdown-divider"> |
| <a class="navbar-item" href="/community/presentations">Presentations</a> |
| <a class="navbar-item" href="/community/bookkeeper_proposals">BookKeeper Proposals</a> |
| </div> |
| </div> |
| |
| <div class="navbar-item has-dropdown is-hoverable"> |
| <a class="navbar-link">Project</a> |
| <div class="navbar-dropdown is-boxed"> |
| <a class="navbar-item" href="/project/who">Who are we?</a> |
| <a class="navbar-item" href="/project/bylaws">Bylaws</a> |
| <a class="navbar-item" href="http://www.apache.org/licenses/">License</a> |
| <hr class="dropdown-divider"> |
| <a class="navbar-item" href="/project/privacy">Privacy policy</a> |
| <a class="navbar-item" href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a> |
| <a class="navbar-item" href="http://www.apache.org/foundation/thanks.html">Thanks</a> |
| </div> |
| </div> |
| </div> |
| |
| <div class="navbar-end"> |
| <div class="navbar-item"> |
| <div class="field is-grouped"> |
| <p class="control"> |
| <a class="button bk-twitter" href="https://twitter.com/asfbookkeeper"> |
| <span class="icon"> |
| <i class="fa fa-twitter"></i> |
| </span> |
| <span>Twitter</span> |
| </a> |
| </p> |
| <p class="control"> |
| <a class="button" href="https://github.com/apache/bookkeeper"> |
| <span class="icon"> |
| <i class="fa fa-github"></i> |
| </span> |
| <span>GitHub</span> |
| </a> |
| </p> |
| <p class="control"> |
| <a class="button is-primary" href="/releases"> |
| <span class="icon"> |
| <i class="fa fa-download"></i> |
| </span> |
| <span>Download</span> |
| </a> |
| </p> |
| </div> |
| </div> |
| </div> |
| </div> |
| </nav> |
| |
| |
| <div class="bk-community-container"> |
| <div class="columns"> |
| <div class="column is-12"> |
| <header class="docs-title"> |
| <nav class="level"> |
| <div class="level-left"> |
| <div class="level-item"> |
| <h1 class="title">BP-34: 128 bits support</h1> |
| </div> |
| </div> |
| |
| </nav> |
| |
| |
| </header> |
| |
| <hr /> |
| |
| <div class="content is-medium"> |
| <section class="bk-community-content"> |
| <h3 id="motivation">Motivation</h3> |
| |
| <p>BookKeeper coordinates with a metadata store to generate a cluster wide <code class="highlighter-rouge">ledgerId</code>. |
| Currently this is a signed <code class="highlighter-rouge">64 bit</code> number (effectively 63 bits). This method works |
| great because we have a centralized metadata store for coordinating the id generation. |
| However this method may not scale as the cluster size and number of ledgers grow.</p> |
| |
| <p><a href="https://en.wikipedia.org/wiki/Globally_unique_identifier">Universally unique identifier - Wikipedia</a> |
| is a preferred way to generate decentralized globally unique IDs and it takes <code class="highlighter-rouge">128 bits</code>. |
| This method can scale well as it doesn’t need a centralized coordination.</p> |
| |
| <p>This BP proposes the changes for increasing ledger id from <code class="highlighter-rouge">63 bits</code> to <code class="highlighter-rouge">128 bits</code>.</p> |
| |
| <h3 id="128-bits">128 bits</h3> |
| |
| <p>Since there is no native support for <code class="highlighter-rouge">128 bits</code> in both Java and |
| <a href="https://github.com/google/protobuf/issues/2180">Protobuf</a>, we have to break <code class="highlighter-rouge">128 bits</code> |
| into 2 <code class="highlighter-rouge">64 bits</code> numbers for representing the <code class="highlighter-rouge">128 bits</code> id:</p> |
| |
| <ul> |
| <li>ledger-id-msb: the most significant 64 bits, bit 64 - 127</li> |
| <li>ledger-id-lsb: the least significant 64 bits, bit 0 - 63</li> |
| </ul> |
| |
| <p>For backward compatibility, the <code class="highlighter-rouge">ledger-id-lsb</code> is the current <code class="highlighter-rouge">64 bits</code> ledger-id. |
| The <code class="highlighter-rouge">ledger-id-msb</code> will be added as a new field in both API and protocol.</p> |
| |
| <p>I am proposing calling <code class="highlighter-rouge">ledger-id-msb</code> as <code class="highlighter-rouge">ledger-scope-id</code>. So the current 64bits <code class="highlighter-rouge">ledgerId</code> and |
| the newly introduced 64bits <code class="highlighter-rouge">ledgerScopeId</code> together will be forming the new <code class="highlighter-rouge">128 bits</code> ledger id.</p> |
| |
| <p>The default <code class="highlighter-rouge">ledgerScopeId</code> is <code class="highlighter-rouge">0</code>. That means any ledgers created prior to this change are allocated |
| under scope <code class="highlighter-rouge">0</code>. Hence it maintains backward compatibility during upgrade.</p> |
| |
| <p>The combination of <code class="highlighter-rouge">ledgerScopeId</code> and <code class="highlighter-rouge">ledgerId</code> forms the <code class="highlighter-rouge">128 bits</code> ledger id. We can introduce a |
| hex representation of this <code class="highlighter-rouge">128 bits</code> ledger id - <code class="highlighter-rouge">ledgerQualifiedName</code> . This <code class="highlighter-rouge">ledgerQualifiedName</code> can |
| be useful for CLI tooling, REST api and troubleshooting purpose. The API internally can convert |
| <code class="highlighter-rouge">ledgerQualifiedName</code> to <code class="highlighter-rouge">ledgerScopeId</code> and <code class="highlighter-rouge">ledgerId</code>.</p> |
| |
| <h3 id="public-interfaces">Public Interfaces</h3> |
| |
| <h4 id="api-change">API Change</h4> |
| |
| <p>The API will be introducing <code class="highlighter-rouge">ledgerScopeId</code> across the interfaces. This field will be optional and default to <code class="highlighter-rouge">0</code>.</p> |
| |
| <h5 id="handle">Handle</h5> |
| |
| <p>Introduce a new method <code class="highlighter-rouge">getScopeId</code> for representing the scope id (the most significant <code class="highlighter-rouge">128 bits</code> ledger id).</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">Handle</span> <span class="kd">extends</span> <span class="nc">AutoCloseable</span> <span class="o">{</span> |
| |
| <span class="o">...</span> |
| |
| <span class="cm">/** |
| * Return the ledger scope id. The most significant 64 bits of 128 bits. |
| */</span> |
| <span class="kt">long</span> <span class="nf">getScopeId</span><span class="o">();</span> |
| |
| <span class="cm">/** |
| * Return the ledger id. The least significant 64 bits of 128 bits. |
| */</span> |
| <span class="kt">long</span> <span class="nf">getId</span><span class="o">();</span> |
| |
| <span class="o">...</span> |
| |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h5 id="create-ledgeradv">Create LedgerAdv</h5> |
| |
| <p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">CreateAdvBuilder</code> for providing <code class="highlighter-rouge">scopeId</code> |
| (the most significant 64 bits for 128 bits ledger id) on creating a ledger.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">CreateAdvBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o"><</span><span class="nc">WriteHandle</span><span class="o">></span> <span class="o">{</span> |
| <span class="o">...</span> |
| |
| <span class="cm">/** |
| * Set the scope id for the newly created ledger. |
| * If no explicit scopeId is passed, the new ledger |
| * will be created under scope `0`. |
| */</span> |
| <span class="nc">CreateAdvBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span> |
| |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h5 id="open-ledger">Open Ledger</h5> |
| |
| <p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">OpenBuilder</code> for providing <code class="highlighter-rouge">scopeId</code> |
| (the most significant 64 bits for 128 bits ledger id) on opening a ledger.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">OpenBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o"><</span><span class="nc">ReadHandle</span><span class="o">></span> <span class="o">{</span> |
| <span class="o">...</span> |
| <span class="cm">/** |
| * Set the scope id of the ledger to open. |
| */</span> |
| <span class="nc">OpenBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h5 id="delete-ledger">Delete Ledger</h5> |
| |
| <p>Introduce a new method <code class="highlighter-rouge">withLedgerScopeId</code> in <code class="highlighter-rouge">DeleteBuilder</code> for providing <code class="highlighter-rouge">scopeId</code> |
| (the most significant 64 bits for 128 bits ledger id) on deleting a ledger.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">DeleteBuilder</span> <span class="kd">extends</span> <span class="nc">OpBuilder</span><span class="o"><</span><span class="nc">Void</span><span class="o">></span> <span class="o">{</span> |
| <span class="o">...</span> |
| <span class="cm">/** |
| * Set the scope id of the ledger to delete. |
| */</span> |
| <span class="nc">DeleteBuilder</span> <span class="nf">withLedgerScopeId</span><span class="o">(</span><span class="kt">long</span> <span class="n">scopeId</span><span class="o">);</span> |
| <span class="o">...</span> |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <h4 id="cli">CLI</h4> |
| |
| <p>All BookKeeper CLI tools will be updated with additional option <code class="highlighter-rouge">—ledger-scope-id</code>. |
| Optionally we can add option <code class="highlighter-rouge">—ledger-qualified-name</code> (the hex representation of 128 bits). |
| Internally all the CLI tools will convert ledger qualified name to <code class="highlighter-rouge">ledgerId</code> and <code class="highlighter-rouge">ledgerScopeId</code>.</p> |
| |
| <h4 id="rest">REST</h4> |
| |
| <ol> |
| <li>All ledger related endpoints will be adding a new parameter <code class="highlighter-rouge">ledger_scope_id</code>.</li> |
| <li><code class="highlighter-rouge">ListLedgerService</code> only supports listing ledgers under a given ledger scope id. |
| If <code class="highlighter-rouge">ledger_scope_id</code> is missing, it will be listing ledgers under scope <code class="highlighter-rouge">0</code>.</li> |
| </ol> |
| |
| <h4 id="wire-protocol">Wire Protocol</h4> |
| |
| <blockquote> |
| <p>There will be no plan for supporting 128 bits in v2 protocol, due to the limitation in v2 protocol. |
| So any operations in v2 protocol with scope id not equal to 0 will be failed immediately with |
| <code class="highlighter-rouge">ILLEGAL_OP</code> exceptions.</p> |
| </blockquote> |
| |
| <p>All the request and response messages will be adding an optional field <code class="highlighter-rouge">optional int64 ledgerScopeId</code>.</p> |
| |
| <h4 id="entry-format">Entry Format</h4> |
| |
| <p>Currently all the entries written to bookies are encoded in a certain format, including <code class="highlighter-rouge">metadata</code>, |
| <code class="highlighter-rouge">digest code</code> and <code class="highlighter-rouge">payload</code>. The entry format is not <em>versioned</em>.</p> |
| |
| <p>In order to support adding another field <code class="highlighter-rouge">ledgerScopeId</code> in the <code class="highlighter-rouge">metadata</code> section, we are introducing |
| <code class="highlighter-rouge">version</code> in the entry format.</p> |
| |
| <h5 id="entry-format-v1">Entry Format V1</h5> |
| |
| <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Entry</span><span class="w"> </span><span class="err">Format</span><span class="w"> </span><span class="err">V</span><span class="mi">1</span><span class="w"> |
| </span><span class="err">===============</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">header</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">ID</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">8</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">15</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Entry</span><span class="w"> </span><span class="err">ID</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">16</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">23</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">LastAddConfirmed</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">24</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">31</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Length</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">1</span><span class="err">))</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Digest</span><span class="w"> </span><span class="err">Code</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">)</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">payload</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">((</span><span class="mi">32</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x)</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Payload</span><span class="w"> |
| </span></code></pre></div></div> |
| |
| <blockquote> |
| <p><code class="highlighter-rouge">x</code> is the length of digest code.</p> |
| </blockquote> |
| |
| <blockquote> |
| <p>Prior to introducing <code class="highlighter-rouge">ledgerScopeId</code>, ledgerId is assumed to be a positive value.</p> |
| </blockquote> |
| |
| <h5 id="entry-format-v2">Entry Format V2</h5> |
| |
| <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Entry</span><span class="w"> </span><span class="err">Format</span><span class="w"> </span><span class="err">V</span><span class="mi">2</span><span class="w"> |
| </span><span class="err">===============</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">header</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Metadata</span><span class="w"> </span><span class="err">Flags</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">8</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">15</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">Scope</span><span class="w"> </span><span class="err">ID</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">16</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">23</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Ledger</span><span class="w"> </span><span class="err">ID</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">24</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">31</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Entry</span><span class="w"> </span><span class="err">ID</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">32</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">39</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">LastAddConfirmed</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">40</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">47</span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Length</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(</span><span class="mi">37</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">(</span><span class="mi">37</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">1</span><span class="err">))</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Digest</span><span class="w"> </span><span class="err">Code</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">)</span><span class="w"> |
| </span><span class="err">---</span><span class="w"> </span><span class="err">payload</span><span class="w"> </span><span class="err">---</span><span class="w"> |
| </span><span class="err">Bytes</span><span class="w"> </span><span class="err">((</span><span class="mi">37</span><span class="w"> </span><span class="err">+</span><span class="w"> </span><span class="err">x)</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="err">)</span><span class="w"> </span><span class="err">:</span><span class="w"> </span><span class="err">Payload</span><span class="w"> |
| </span></code></pre></div></div> |
| |
| <blockquote> |
| <p><code class="highlighter-rouge">x</code> is the length of digest code.</p> |
| </blockquote> |
| |
| <h6 id="metadata-flags">Metadata Flags</h6> |
| |
| <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="err">Metadata:</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="err">Bytes</span><span class="w"> </span><span class="err">(Long)</span><span class="w"> |
| </span><span class="err">------------------------</span><span class="w"> |
| </span><span class="mi">0</span><span class="err">x</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="mi">0</span><span class="w"> |
| </span><span class="err">|__|</span><span class="w"> |
| </span><span class="err">|</span><span class="w"> |
| </span><span class="err">version</span><span class="w"> |
| |
| </span><span class="err">----</span><span class="w"> |
| </span><span class="err">Bit</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">3</span><span class="err">:</span><span class="w"> </span><span class="err">digest</span><span class="w"> </span><span class="err">type</span><span class="w"> </span><span class="err">(e.g.</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">,</span><span class="w"> </span><span class="err">CRC</span><span class="mi">32</span><span class="err">C</span><span class="w"> </span><span class="err">and</span><span class="w"> </span><span class="err">such)</span><span class="w"> |
| </span><span class="err">Bit</span><span class="w"> </span><span class="mi">4</span><span class="w"> </span><span class="err">-</span><span class="w"> </span><span class="mi">7</span><span class="err">:</span><span class="w"> </span><span class="err">version,</span><span class="w"> </span><span class="err">the</span><span class="w"> </span><span class="err">most</span><span class="w"> </span><span class="err">significant</span><span class="w"> </span><span class="err">bit</span><span class="w"> </span><span class="err">of</span><span class="w"> </span><span class="err">this</span><span class="w"> </span><span class="err">byte</span><span class="w"> </span><span class="err">will</span><span class="w"> </span><span class="err">be</span><span class="w"> </span><span class="err">always</span><span class="w"> </span><span class="err">set</span><span class="w"> </span><span class="err">to</span><span class="w"> </span><span class="mi">1</span><span class="err">.</span><span class="w"> |
| </span><span class="err">it</span><span class="w"> </span><span class="err">will</span><span class="w"> </span><span class="err">be</span><span class="w"> </span><span class="err">used</span><span class="w"> </span><span class="err">for</span><span class="w"> </span><span class="err">differentiating</span><span class="w"> </span><span class="err">entry</span><span class="w"> </span><span class="err">format</span><span class="w"> </span><span class="err">v</span><span class="mi">1</span><span class="w"> </span><span class="err">and</span><span class="w"> </span><span class="err">v</span><span class="mi">2</span><span class="err">.</span><span class="w"> |
| |
| </span></code></pre></div></div> |
| |
| <p>We are setting the most significant bit to be <code class="highlighter-rouge">1</code>. So the first byte in entry v2 will |
| be a negative value, which can be used for differentiating entry format v1 and v2. |
| The version will be encoded into the first byte. The version will be used for describing |
| the entry format.</p> |
| |
| <h5 id="decoding-entry">Decoding Entry</h5> |
| |
| <p>The pseudo code for decoding an entry will be described as followings:</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code> |
| <span class="nc">ByteBuf</span> <span class="n">entry</span> <span class="o">=</span> <span class="o">...;</span> |
| |
| <span class="kt">int</span> <span class="n">metadataFlags</span> <span class="o">=</span> <span class="n">entry</span><span class="o">.</span><span class="na">getByte</span><span class="o">();</span> |
| |
| <span class="k">if</span> <span class="o">(</span><span class="n">metadataFlags</span> <span class="o"><=</span> <span class="mi">128</span><span class="o">)</span> <span class="o">{</span> <span class="c1">// the entry is encoded in v1 format</span> |
| <span class="c1">// decoding the entry in v1 format</span> |
| <span class="o">...</span> |
| <span class="o">}</span> <span class="k">else</span> <span class="o">{</span> |
| <span class="c1">// decoding the entry in v2 format</span> |
| <span class="o">}</span> |
| |
| </code></pre></div></div> |
| |
| <h4 id="bookie-storage">Bookie Storage</h4> |
| |
| <h5 id="journal">Journal</h5> |
| |
| <p>A new method should be added in journal <code class="highlighter-rouge">WriteCallback</code> to handle <code class="highlighter-rouge">ledgerScopeId</code>.</p> |
| |
| <div class="language-java highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">WriteCallback</span> <span class="o">{</span> |
| |
| <span class="kt">void</span> <span class="nf">writeComplete</span><span class="o">(</span><span class="kt">int</span> <span class="n">rc</span><span class="o">,</span> |
| <span class="kt">long</span> <span class="n">ledgerScopeId</span><span class="o">,</span> |
| <span class="kt">long</span> <span class="n">ledgerId</span><span class="o">,</span> |
| <span class="kt">long</span> <span class="n">entryId</span><span class="o">,</span> |
| <span class="nc">BookieSocketAddress</span> <span class="n">addr</span><span class="o">,</span> |
| <span class="nc">Object</span> <span class="n">ctx</span><span class="o">);</span> |
| |
| <span class="k">default</span> <span class="kt">void</span> <span class="nf">writeComplete</span><span class="o">(</span><span class="kt">int</span> <span class="n">rc</span><span class="o">,</span> |
| <span class="kt">long</span> <span class="n">ledgerId</span><span class="o">,</span> |
| <span class="kt">long</span> <span class="n">entryId</span><span class="o">,</span> |
| <span class="nc">BookieSocketAddress</span> <span class="n">addr</span><span class="o">,</span> |
| <span class="nc">Object</span> <span class="n">ctx</span><span class="o">)</span> <span class="o">{</span> |
| <span class="n">writeComplete</span><span class="o">(</span><span class="n">rc</span><span class="o">,</span> <span class="mi">0L</span><span class="o">,</span> <span class="n">ledgerId</span><span class="o">,</span> <span class="n">entryId</span><span class="o">,</span> <span class="n">addr</span><span class="o">,</span> <span class="n">ctx</span><span class="o">);</span> |
| <span class="o">}</span> |
| |
| <span class="o">}</span> |
| </code></pre></div></div> |
| |
| <p>The journal should be changed to be able to retrieve <code class="highlighter-rouge">ledgerScopeId</code> from the entry |
| payload based on <a href="#entry-format">Entry Format</a>.</p> |
| |
| <h5 id="ledger-storage">Ledger Storage</h5> |
| |
| <h6 id="entrylogger">EntryLogger</h6> |
| |
| <ol> |
| <li>Methods in <code class="highlighter-rouge">EntryLogger</code> should be able to accept <code class="highlighter-rouge">ledgerScopeId</code> as a parameter.</li> |
| <li>EntryLogger should be updated to retrieve <code class="highlighter-rouge">ledgerScopeId</code> from the entry payload |
| based on <a href="#entry-format">Entry Format</a>.</li> |
| </ol> |
| |
| <h6 id="entrymemtable">EntryMemTable</h6> |
| |
| <p><code class="highlighter-rouge">ledgerScopeId</code> should be added as part of <code class="highlighter-rouge">EntryKey</code>.</p> |
| |
| <h6 id="indexpersistencemgr">IndexPersistenceMgr</h6> |
| |
| <p>Currently the ledger index files (64 bits) are stored into 2-level-hirechicy |
| directories - <code class="highlighter-rouge"><msb-32bits-hex>/<lsb-32bits-hex>/<ledger-id-hex>.idx</code>.</p> |
| |
| <p>If <code class="highlighter-rouge">ledgerScopeId</code> is 0, it will be using existing scheme for storing and retrieving |
| ledger index files.</p> |
| |
| <p>If <code class="highlighter-rouge">ledgerScopeId</code> is not 0, that means the ledgers are produced by new clients that |
| support 128-bits, those ledgers will be stored in a 4-level-hirechicy |
| directories - |
| <code class="highlighter-rouge"><msb-32bits-hex-ledger-scope-id>/<lsb-32bits-hex-ledger-scope-id>/<msb-32bits-hex-ledger-id>/<lsb-32bits-hex-ledger-id></code>.</p> |
| |
| <p>All the file info caches should be updated to use <code class="highlighter-rouge"><ledgerScopeId, ledgerId></code> |
| as index keys.</p> |
| |
| <h6 id="indexinmempagemgr">IndexInMemPageMgr</h6> |
| |
| <p>The LRU pages map will be updated to use <code class="highlighter-rouge"><ledgerScopeId, ledgerId></code> as index |
| keys.</p> |
| |
| <h6 id="dbledgerstorage">DBLedgerStorage</h6> |
| |
| <p>Currently DBLedgerStorage use <code class="highlighter-rouge"><ledgerId, entryId></code> as the index key for indexing entry |
| locations for each entry.</p> |
| |
| <p>Similar as <code class="highlighter-rouge">SortedLedgerStorage</code> and <code class="highlighter-rouge">InterleavedLedgerStorage</code>, for ledgers whose |
| <code class="highlighter-rouge">ledgerScopeId</code> is 0, they will be using existing scheme for storing their entry locations.</p> |
| |
| <p>For ledgers whose <code class="highlighter-rouge">ledgerScopeId</code> is not 0, they will be stored in a new rocksdb, |
| whose index key will be <code class="highlighter-rouge"><ledgerScopeId, ledgerId, entryId></code>.</p> |
| |
| <h4 id="metadata-store">Metadata Store</h4> |
| |
| <h5 id="ledgermanager">LedgerManager</h5> |
| |
| <p>All the interfaces should be updated with accepting <code class="highlighter-rouge">ledgerScopeId</code>.</p> |
| |
| <p>The actual implementation should decide how to store metadata |
| for <code class="highlighter-rouge"><ledgerScopeId, ledgerId></code>.</p> |
| |
| <h6 id="zookeeper-ledger-manager">ZooKeeper Ledger Manager</h6> |
| |
| <p>We need to introduce a LongLongHierchicalLedgerManager for storing metadata |
| indexing by <code class="highlighter-rouge"><ledgerScopeId, ledgerId></code>.</p> |
| |
| <p>If <code class="highlighter-rouge">ledgerScopeId</code> is 0, then it will be falling back to <code class="highlighter-rouge">LongHierachicalLedgerManager</code>. |
| So no behavior is changed.</p> |
| |
| <p>If <code class="highlighter-rouge">ledgerScopeId</code> is not 0, those ledgers will be indexed in new hierarchy |
| (possible under a different znode).</p> |
| |
| <h6 id="ledger-id-generation">Ledger ID generation</h6> |
| |
| <p>When upgrading from 64bit to 128bits, we probably don’t need any centralized mechanism |
| for generating ledger id. It can be implemented using UUID generation.</p> |
| |
| <p>Especially since we are supporting 128bits by introducing <code class="highlighter-rouge">ledgerScopeId</code>. That means |
| application of bookkeeper can decide its own way for generating their <code class="highlighter-rouge">scopeId</code>. |
| An application or even bookkeeper client can generate its ledgerId using UUID generation, |
| then breaks the 128 bits UUID into two parts, one serves as <code class="highlighter-rouge">ledgerScopeId</code> and the other |
| one serves as <code class="highlighter-rouge">ledgerId</code>.</p> |
| |
| <h6 id="etcd">Etcd</h6> |
| |
| <p>Since Etcd has a better key/value presentation, we can basically just combine |
| <code class="highlighter-rouge"><ledgerScopeId, ledgerId></code> as the index key for storing ledger metadata in Etcd. |
| Nothing is needed for special consideration.</p> |
| |
| <h3 id="performance-concerns">Performance Concerns</h3> |
| |
| <p>There shouldn’t be any performance difference when not using 128 bit ledger id |
| (<code class="highlighter-rouge">ledgerScopeId</code> is omitted).</p> |
| |
| <p>Performance concerns can be arised in following areas:</p> |
| |
| <ul> |
| <li><strong>Wire Protocol</strong>: additional 9 bytes will be added per entry, one byte for version |
| and 8 bytes for the msb of 128 bit ledger id</li> |
| <li><strong>Journal</strong>: additional 9 bytes will be added per entry (same as wire protocol).</li> |
| <li><strong>EntryLogger</strong>: additional 9 bytes will be added per entry (same as wire protocol)</li> |
| <li><strong>Memtable</strong>: additional 8 bytes will be added per indexed entry.</li> |
| <li><strong>FileInfo</strong>: there is no change to the index file format itself.</li> |
| <li><strong>IndexPersistenceManager</strong>: Files will be organized in more directory hierarchy. |
| It shouldn’t be a big deal.</li> |
| <li><strong>IndexInMemoryManager (LedgerCache)</strong>: additional 8 bytes per index page.</li> |
| <li><strong>DbLedgerStorage</strong>: additional 8 bytes per entry for entry location.</li> |
| <li><strong>Metadata</strong>: on zookeeper, we need a 128 bit ledger manager, that means more znode |
| hierarchy than 64 bit ledger manager. Etcd like key/value metadata store is probably |
| more preferrable for 128 bit ledger manager.</li> |
| </ul> |
| |
| <p>However increasing ledger id from 64 bits to 128 bits can get rid of the only remaining |
| central point, since we don’t need to use zookeeper for ledger id generation. The id |
| generation can become decentralized.</p> |
| |
| <h3 id="proposed-changes">Proposed Changes</h3> |
| |
| <p>All the required changes are described above. In summary, the changes can |
| happen in following 2 phases:</p> |
| |
| <ol> |
| <li>Ensure all components have <code class="highlighter-rouge">ledgerScopeId</code> added (both wire protocol, storage and such). |
| Assuming <code class="highlighter-rouge">ledgerScopeId</code> will be 0. The changes can happen independently and ensure |
| they are backward compatible with old clients.</li> |
| <li>Add <code class="highlighter-rouge">ledgerScopeId</code> into public API, so application can start using <code class="highlighter-rouge">ledgerScopeId</code>. |
| After that, applications can use UUID to generate ledger id and break UUID into two parts, |
| one is <code class="highlighter-rouge">ledgerScopeId</code>, while the other one is <code class="highlighter-rouge">ledgerId</code>.</li> |
| </ol> |
| |
| <h3 id="compatibility-deprecation-and-migration-plan">Compatibility, Deprecation, and Migration Plan</h3> |
| |
| <p>All the changes are backward compatible, since we are doing the changes by adding an optional |
| field <code class="highlighter-rouge">ledgerScopeId</code>. Old clients can still operating in the mode of <code class="highlighter-rouge">ledgerScopeId == 0</code>. |
| The new application can activate the feature by starting using <code class="highlighter-rouge">ledgerScopeId</code> in the new API.</p> |
| |
| <h3 id="test-plan">Test Plan</h3> |
| |
| <ol> |
| <li>Add unit tests for individual components on introducing <code class="highlighter-rouge">ledgerScopeId</code>.</li> |
| <li>Add backward compatibility tests for individual components.</li> |
| <li>Add end-to-end integration tests for introducing <code class="highlighter-rouge">ledgerScopeId</code>.</li> |
| <li>Add end-to-end backward compatibility tests.</li> |
| </ol> |
| |
| <h3 id="rejected-alternatives">Rejected Alternatives</h3> |
| |
| <p>N/A</p> |
| |
| </section> |
| |
| |
| </div> |
| </div> |
| </div> |
| </div> |
| </main> |
| |
| <footer class="footer"> |
| <div class="container"> |
| <div class="content has-text-centered"> |
| <p> |
| Copyright © 2016 - 2021 <a href="https://www.apache.org/">The Apache Software Foundation</a>,<br /> licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>. |
| </p> |
| <p> |
| Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation. |
| </p> |
| </div> |
| </div> |
| </footer> |
| |
| </body> |
| |
| <script src="/js/app.js"></script> |
| |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| http://www.apache.org/licenses/LICENSE-2.0 |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-104419626-1', 'auto'); |
| ga('send', 'pageview'); |
| |
| </script> |
| |
| |
| </html> |