blob: f6926650f508e70b19caa61ece3eba30c8836f58 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<title>Apache BookKeeper&trade; - BP-41: Separate BookieId from Bookie Network Address</title>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/css/normalize.css">
<link rel="stylesheet" href="/css/tippy.css">
<link rel="stylesheet" href="/css/style.css">
<link rel="shortcut icon" href="/img/favicon.ico">
<script src="/js/tippy.min.js"></script>
<script type="text/javascript">
var shiftWindow = function() { scrollBy(0, -25); };
window.addEventListener("hashchange", shiftWindow);
window.addEventListener("pageshow", shiftWindow);
function load() { if (window.location.hash) shiftWindow(); }
</script>
</head>
<body class="body">
<main class="main">
<nav class="navbar bk-topnav">
<div class="navbar-brand">
<a class="navbar-item bk-brand" href="/">
Apache BookKeeper&trade;
</a>
<div class="navbar-burger burger" data-target="bkNav">
<span></span>
<span></span>
<span></span>
</div>
</div>
<div id="bkNav" class="navbar-menu">
<div class="navbar-start">
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Documentation</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/docs/latest/overview/overview">
Version 4.14.0-SNAPSHOT
<span class="tag is-warning">Development</span>
</a>
<a class="navbar-item" href="/docs/latest/api/javadoc">
<span class="icon bk-javadoc-icon">
<img src="/img/java-icon.svg">
</span>
Javadoc
</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/docs/4.13.0/overview/overview">
Release 4.13.0
</a>
<a class="navbar-item" href="/docs/4.12.1/overview/overview">
Release 4.12.1
</a>
<a class="navbar-item" href="/docs/4.12.0/overview/overview">
Release 4.12.0
</a>
<a class="navbar-item" href="/docs/4.11.1/overview/overview">
Release 4.11.1
<span class="tag is-success">Stable</span>
</a>
<a class="navbar-item" href="/docs/4.11.0/overview/overview">
Release 4.11.0
</a>
<a class="navbar-item" href="/docs/4.10.0/overview/overview">
Release 4.10.0
</a>
<a class="navbar-item" href="/archives/docs/r4.9.2">
Release 4.9.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.9.1">
Release 4.9.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.9.0">
Release 4.9.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.2">
Release 4.8.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.1">
Release 4.8.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.8.0">
Release 4.8.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.3">
Release 4.7.3
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.2">
Release 4.7.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.1">
Release 4.7.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.7.0">
Release 4.7.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.2">
Release 4.6.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.1">
Release 4.6.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.6.0">
Release 4.6.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.5.1">
Release 4.5.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.5.0">
Release 4.5.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.4.0">
Release 4.4.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.2">
Release 4.3.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.1">
Release 4.3.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.3.0">
Release 4.3.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.4">
Release 4.2.4
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.3">
Release 4.2.3
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.2">
Release 4.2.2
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.1">
Release 4.2.1
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.2.0">
Release 4.2.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.1.0">
Release 4.1.0
<span class="tag is-warning">EOL</span>
</a>
<a class="navbar-item" href="/archives/docs/r4.0.0">
Release 4.0.0
<span class="tag is-warning">EOL</span>
</a>
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Community</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/community/mailing-lists">Mailing lists</a>
<a class="navbar-item" href="/community/slack">Slack</a>
<a class="navbar-item" href="https://github.com/apache/bookkeeper/issues">Github Issues</a>
<a class="navbar-item" href="/community/releases">Release Management</a>
<a class="navbar-item" href="/community/meeting">Community Meetings</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/community/contributing">Contribution Guide</a>
<a class="navbar-item" href="/community/coding_guide">Coding Guide</a>
<a class="navbar-item" href="/community/testing">Testing Guide</a>
<a class="navbar-item" href="/community/issue-report">Issue Report Guide</a>
<a class="navbar-item" href="/community/release_guide">Release Guide</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/community/presentations">Presentations</a>
<a class="navbar-item" href="/community/bookkeeper_proposals">BookKeeper Proposals</a>
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link">Project</a>
<div class="navbar-dropdown is-boxed">
<a class="navbar-item" href="/project/who">Who are we?</a>
<a class="navbar-item" href="/project/bylaws">Bylaws</a>
<a class="navbar-item" href="http://www.apache.org/licenses/">License</a>
<hr class="dropdown-divider">
<a class="navbar-item" href="/project/privacy">Privacy policy</a>
<a class="navbar-item" href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a class="navbar-item" href="http://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<div class="field is-grouped">
<p class="control">
<a class="button bk-twitter" href="https://twitter.com/asfbookkeeper">
<span class="icon">
<i class="fa fa-twitter"></i>
</span>
<span>Twitter</span>
</a>
</p>
<p class="control">
<a class="button" href="https://github.com/apache/bookkeeper">
<span class="icon">
<i class="fa fa-github"></i>
</span>
<span>GitHub</span>
</a>
</p>
<p class="control">
<a class="button is-primary" href="/releases">
<span class="icon">
<i class="fa fa-download"></i>
</span>
<span>Download</span>
</a>
</p>
</div>
</div>
</div>
</div>
</nav>
<div class="bk-community-container">
<div class="columns">
<div class="column is-12">
<header class="docs-title">
<nav class="level">
<div class="level-left">
<div class="level-item">
<h1 class="title">BP-41: Separate BookieId from Bookie Network Address</h1>
</div>
</div>
</nav>
</header>
<hr />
<div class="content is-medium">
<section class="bk-community-content">
<h3 id="motivation">Motivation</h3>
<p>We want to separate the concepts of <strong>BookieId</strong> from <strong>BookieSocketAddress</strong>.</p>
<p>Currently (up to 4.11.x) there is a too strict coupling from the ID of a Bookie (<strong>BookieId</strong>) and its network location (<strong>BookieSocketAddress</strong>).</p>
<p>The <strong>LedgerMetadata</strong> structure contains the location of the entries of a ledger, and it stores BookieSocketAddresses (simply a hostname:port or ip:port pair).
The client uses this information to connect to the bookies and retrieve ledger data.</p>
<p>So <em>the identity of a bookie is bound to the network address</em> of the primary endpoint of the Bookie: the ‘<strong>bookie-rpc</strong>’ endpoint in terms of <a href="../BP-38-bookie-endpoint-discovery/">BP-38</a></p>
<p>Limits of current version, because:</p>
<ul>
<li>You cannot easily change the network address of a Bookie: a manual intervention is needed.</li>
<li>The Bookie cannot have a <strong>dynamic network address</strong> or DNS name.</li>
<li>You cannot use a custom Id for the bookie, something that is <strong>meaningful</strong> in the context of the deployment of the bookie.</li>
<li>In case of future implementations that will open <strong>multiple endpoints</strong> on the bookie it is not possible to decide which endpoint should be used as BookieId.</li>
</ul>
<p>This proposal addresses these problems by proposing to separate the concept of <strong>BookieId</strong> from <strong>BookieSocketAddress</strong>.</p>
<p>We will have to introduce a little break in the Client API, in order to switch from using BookieSocketAddress to a more opaque BookieId.</p>
<p>Fortunately we will be able to keep compatibility with old clients and old bookies are far as the Bookie continues to use a BookieId that looks like a BookieSocketAddress.
See the paragraphs below for the details.</p>
<h3 id="public-interfaces">Public Interfaces</h3>
<p>We are introducing a new class BookieId that is simply a wrapper for a String.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>final class BookieId {
private final String bookieId;
public String toString() {
return bookieId;
}
public static BookieId parse(String bookieId) {
// validation omitted...
return new BookieId(bookieId);
}
}
</code></pre></div></div>
<p>Having a class instead of a simple String is better because it provides a strongly typed API.</p>
<p>The LedgerMetadata class will be changed to use BookieId instead of BookieSocketAddress.
This will break source and binary compatibility for Java clients, applications that use LedgerMetadata (usually for debug or for tools) will have to be recompiled.</p>
<p>The serialized representation of a BookieSocketAddress, both for LedgerMetadata and Bookie Registration, is a simple String on ZooKeeper: this change is not about the format of data stored on Metadata Service.</p>
<p>It is simply a pure refactor of Java interfaces.</p>
<p>We have to introduce an internal API, <strong>BookieAddressResolver</strong>, that maps a <em>BookieId</em> to a <em>BookieSocketAddress</em>: the the client connectes to a Bookie it looks up the <strong>current network address</strong> using BookieAddressResolver.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>interface BookieAddressResolver {
BookieSocketAddress resolve(BookieId id);
}
</code></pre></div></div>
<p>Initially it is not expected that the user provides a custom implementation of BookieAddressResolver.</p>
<p>It is expected that the implementation of this interface coordinates with the BookieWatcher and the RegistrationDriver in order to:</p>
<ul>
<li>map BookieId to BookieSocketAddress using <code class="highlighter-rouge">getBookieServiceInfo(BookieId id)</code> API</li>
<li>cache efficiently this mapping in order to do not have a significant impact on the hot paths (reads and writes), and to save resources on the Metadata Service</li>
</ul>
<p>We provide an utility method BookieSocketAddress#toBookieId that helps particularly in test cases, this method simply returns a BookieId
built by the serialized representation of the BookieSocketAddress (hostname:port or ip:port)</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>final class BookieSocketAddress {
....
BookieId toBookieId() {
return BookieId.parse(this.toString());
}
}
</code></pre></div></div>
<p>The RegistrationClient and RegistrationManager interfaces will be refactored to use BookiId instead of String and BookieSocketAddress.</p>
<p>The Bookie itself will expose an API to return the current BookieSocketAddress and current BookieId, this is useful for tests and for the RegistrationManager.</p>
<p>The EnsemblePlacementPolicy and the BookieWatcher will deal with BookieIds and not with BookieSocketAddresses.</p>
<p>The implementations of EnsemblePlacementPolicy that are aware of the network location of the Bookies will need to have access to the
BookieAddressResolver, in order to map a BookieId to the BookieSocketAddress and the BookieSocketAddress to the network location.</p>
<h3 id="details-on-the-proposed-changes">Details on the proposed Changes</h3>
<h4 id="bookieid-validation">BookieId validation</h4>
<p>The BookieId is a non empty string that can contain:</p>
<ul>
<li>ASCII digits and letters ([a-zA-Z9-0])</li>
<li>the colon character (‘:’)</li>
<li>the dash character (‘-‘)</li>
<li>the dot character (‘.’)</li>
</ul>
<h4 id="bookkeeper-client-side-changes">BookKeeper Client Side Changes</h4>
<p>See the ‘Public interfaces’ section.</p>
<p>On the client side code it will be clearer when we are dealing with BookieId, and basically the client API won’t deal with network addresses anymore.
This change will be visible both on the legacy LedgerHandle API and on the new WriteHandle/ReadHandle API, basically because the new API is only a wrapper over the LedgerHandle API.</p>
<p>When the BookKeeper client connects to a bookie (see <strong>PerChannelBookieClient</strong>) it uses the BookieAddressResolver interface to get the current BookieSocketAddress of the Bookie.
The key of the Connection Pool inside the BookieClient will be BookieId and no more BookieSocketAddress.</p>
<h4 id="disabling-bookieaddressresolver">Disabling BookieAddressResolver</h4>
<p>Using the BookieServiceInfo abstraction needs additional accesses to the Metadata Service (usually ZooKeeper) and this comes with a cost especially during the bootstrap of the client, because you have to resolve the address of every Bookie you are going to write to or to read from.</p>
<p>We add a flag to disable the BookieAddressResolver, without this feature the client will be able only to connect to Bookies with the legacy BookieId.
In this case the BookieAddressResolver implementation will be a simple conversion from the BookieId, assuming the 4.11 format hostname:port.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>enableBookieAddressResolver=true
</code></pre></div></div>
<p>The <em>enableBookieAddressResolver</em> flag is used by the Client, by the Auditor and by all of the tools and it is enabled by default.</p>
<h4 id="handling-the-local-bookie-node-in-ensembleplacementpolicy">Handling the Local Bookie Node in EnsemblePlacementPolicy</h4>
<p>Unfortunately thru the codebase we used sometimes dummy BookieId that are not mapped to real Bookies, this happens in the EnsamblePlacementPolicies in which we create a BookieId for the ‘local node’ and using TCP port 0. In this case we have to implement a fallback dummy resolution that created a BookieSocketAddress without using the MetadataService</p>
<h4 id="bookie-side-changes">Bookie Side Changes</h4>
<p>On the Bookie we introduce <strong>a configuration option</strong> (bookieid) to set a custom bookie id.
If you set this option then the Bookie will advertise itself on MetadataService using the configured id, and publish the ‘bookie-rpc’ endpoint as configured by
the <strong>advertisedAddress</strong> configuration option and the other network related options.
This BookieId will be present only in the configuration file and it is the key to lookup the <em>Cookie</em> on the MetadataService.
Inadvertently changing the BookieId will prevent the Bookie to boot as it won’t find a matching Cookie.
There is no need to store the BookieId on the cookie or persist it on the local storage (ledgers, indexes or journal directories).</p>
<h4 id="auditor-and-replication-changes">Auditor and Replication Changes</h4>
<p>The Auditor deals with LedgerMetadata and so it will simply work with BookieIds and not with BookieSocketAddress.
When the Auditor needs to connect to a Bookie it will use the BookieAddressResolver to get the current address of the Bookie.</p>
<h4 id="bookie-garbage-collection-changes">Bookie Garbage Collection Changes</h4>
<p>The Bookie decides to reclaim space by looking into LedgerMetadata and checking that a given ledger does not exist anymore.
It will use its own local BookieId instead of the BookieSocketAddress as expected.</p>
<h4 id="tools-changes">Tools changes</h4>
<p>All of the tools that deal with LedgerMetadata will use BookieId instead of BookieSocketAddress, in general this fact will allow to use free forn BookieIDs,
instead of hostname:port pairs (we had validations on tools that helped the user to use always BookieIds in hostname:port form).</p>
<h4 id="rest-api-changes">REST API Changes</h4>
<p>In the REST API we will deal with BookieIds and not with BookieSocketAddresses anymore, the change will be straighforward and compatible with current API.
When new custom BookieIDs will be used then they will appear on the REST API as well, but this will be expected by users.</p>
<h3 id="compatibility-deprecation-and-migration-plan">Compatibility, Deprecation, and Migration Plan</h3>
<p>The proposed change will be compatible with all existing clients and bookies as far as you still use BookieIds in the hostname:port form and to not use a custom BookieId.
The Bookie by default will continue to use as BookieID a compatible value computed exactly as in version 4.11.
Incompatibility will start as soon as you enable custom BookieIDs on the bookies, from that point clients and old Auditors won’t be able to deal with new bookies.
New clients will always be able to connect and use legacy bookies.</p>
<p>Custom EnsemblePlacementPolicies must be adapted to the new interfaces but the change will usually as simple as just replacing BookieSocketAdress with BookieId.
No need to change address to rack mapping scripts, as they will still deal with raw DNS hostnames and not with BookieIds.</p>
<h3 id="test-plan">Test Plan</h3>
<p>New unit tests will be added to cover all of the code changes.
No need for additional integration tests.</p>
<h3 id="rejected-alternatives">Rejected Alternatives</h3>
<h4 id="make-bookiesocketaddress-an-abstract-class">Make BookieSocketAddress an abstract class</h4>
<p>In order to preserve most of the binary compatibility in the Java client we could still keep BookieSocketAddress class in LedgerMetadata and have some “GenericBookieSocketAddress” and “PhysicalBookieSocketAddress” implementations.
But this way it won’t be easy to understand where we are using a “bookie id” and when we are referring to a network address.
The BookieAddressResolver interface would be needed anyway and it should deal with pure BookieIds and BookieSocketAddress instance that are already resolved to
a network address.</p>
<h4 id="force-a-specific-format-like-uuid-to-custom-bookieid">Force a specific format (like UUID) to custom BookieId</h4>
<p>The is no need to force the BookieId to use a fixed format, like a UUID or other form of standard ID scheme.
Probably new IDs will include the region/availability zone information in order to simplify EnsemblePlacement policies (no more need to pass from DNS to switch mappers) and we cannot know now all of the usages of this parameter.</p>
</section>
</div>
</div>
</div>
</div>
</main>
<footer class="footer">
<div class="container">
<div class="content has-text-centered">
<p>
Copyright &copy; 2016 - 2021 <a href="https://www.apache.org/">The Apache Software Foundation</a>,<br /> licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>.
</p>
<p>
Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation.
</p>
</div>
</div>
</footer>
</body>
<script src="/js/app.js"></script>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-104419626-1', 'auto');
ga('send', 'pageview');
</script>
</html>