blob: f76cabfb2cd872dc987144812d33b6a40de4d3e7 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-4.5.1 plugin-docs plugin-id-default docs-doc-id-getting-started/concepts">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.0">
<title data-rh="true">BookKeeper concepts and architecture | Apache BookKeeper</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://bookkeeper.apache.org/docs/4.5.1/getting-started/concepts"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="4.5.1"><meta data-rh="true" name="docusaurus_tag" content="docs-default-4.5.1"><meta data-rh="true" name="docsearch:version" content="4.5.1"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-4.5.1"><meta data-rh="true" property="og:title" content="BookKeeper concepts and architecture | Apache BookKeeper"><meta data-rh="true" name="description" content="BookKeeper is a service that provides persistent storage of streams of log entries---aka records---in sequences called ledgers. BookKeeper replicates stored entries across multiple servers."><meta data-rh="true" property="og:description" content="BookKeeper is a service that provides persistent storage of streams of log entries---aka records---in sequences called ledgers. BookKeeper replicates stored entries across multiple servers."><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://bookkeeper.apache.org/docs/4.5.1/getting-started/concepts"><link data-rh="true" rel="alternate" href="https://bookkeeper.apache.org/docs/4.5.1/getting-started/concepts" hreflang="en"><link data-rh="true" rel="alternate" href="https://bookkeeper.apache.org/docs/4.5.1/getting-started/concepts" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.49914aab.css">
<link rel="preload" href="/assets/js/runtime~main.435d5f64.js" as="script">
<link rel="preload" href="/assets/js/main.0f234beb.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/bk-logo.svg" alt="Apache Bookkeeper" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/bk-logo.svg" alt="Apache Bookkeeper" class="themedImage_ToTc themedImage--dark_i4oU"></div><b class="navbar__title text--truncate">Apache BookKeeper</b></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs/4.5.1/overview/">Documentation</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/mailing-lists">Mailing lists</a></li><li><a class="dropdown__link" href="/community/slack">Slack</a></li><li><a href="https://github.com/apache/bookkeeper/issues" target="_blank" rel="noopener noreferrer" class="dropdown__link">Github issues<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a class="dropdown__link" href="/community/releases">Release management</a></li><li><a class="dropdown__link" href="/community/meeting">Community meetings</a></li><li><a class="dropdown__link" href="/community/contributing">Contribution guide</a></li><li><a class="dropdown__link" href="/community/coding-guide">Coding guide</a></li><li><a class="dropdown__link" href="/community/testing">Testing guide</a></li><li><a class="dropdown__link" href="/community/issue-report">Issue report guide</a></li><li><a class="dropdown__link" href="/community/release-guide">Release guide</a></li><li><a class="dropdown__link" href="/community/presentations">Presentations</a></li><li><a class="dropdown__link" href="/community/bookkeeper-proposals">BookKeeper proposals (BP)</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Project</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/project/who">Who are we?</a></li><li><a class="dropdown__link" href="/project/bylaws">Bylaws</a></li><li><a href="https://apache.org/licenses" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a class="dropdown__link" href="/project/privacy">Privacy policy</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/4.5.1/overview/">4.5.1</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/getting-started/concepts">Next</a></li><li><a class="dropdown__link" href="/docs/getting-started/concepts">4.16.4</a></li><li><a class="dropdown__link" href="/docs/4.15.5/getting-started/concepts">4.15.5</a></li><li><a class="dropdown__link" href="/docs/4.14.8/getting-started/concepts">4.14.8</a></li><li><a class="dropdown__link" href="/docs/4.13.0/getting-started/concepts">4.13.0</a></li><li><a class="dropdown__link" href="/docs/4.12.1/getting-started/concepts">4.12.1</a></li><li><a class="dropdown__link" href="/docs/4.11.1/getting-started/concepts">4.11.1</a></li><li><a class="dropdown__link" href="/docs/4.10.0/getting-started/concepts">4.10.0</a></li><li><a class="dropdown__link" href="/docs/4.9.2/getting-started/concepts">4.9.2</a></li><li><a class="dropdown__link" href="/docs/4.8.2/getting-started/concepts">4.8.2</a></li><li><a class="dropdown__link" href="/docs/4.7.3/getting-started/concepts">4.7.3</a></li><li><a class="dropdown__link" href="/docs/4.6.2/getting-started/concepts">4.6.2</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/4.5.1/getting-started/concepts">4.5.1</a></li></ul></div><a class="navbar__item navbar__link" href="/releases">Download</a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="searchBox_ZlJk"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/4.5.1/overview/">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/4.5.1/getting-started/installation">Getting started</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.5.1/getting-started/installation">Installation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.5.1/getting-started/run-locally">Run bookies locally</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/4.5.1/getting-started/concepts">Concepts and architecture</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/deployment/manual">Deployment</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/admin/bookies">Administration</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/api/overview">API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/security/overview">Security</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/development/protocol">Development</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.5.1/reference/config">Reference</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><span class="theme-doc-version-badge badge badge--secondary">Version: 4.5.1</span><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>BookKeeper concepts and architecture</h1></header><p>BookKeeper is a service that provides persistent storage of streams of log <a href="#entries">entries</a>---aka <em>records</em>---in sequences called <a href="#ledgers">ledgers</a>. BookKeeper replicates stored entries across multiple servers.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="basic-terms">Basic terms<a href="#basic-terms" class="hash-link" aria-label="Direct link to Basic terms" title="Direct link to Basic terms"></a></h2><p>In BookKeeper:</p><ul><li>each unit of a log is an <a href="#entries"><em>entry</em></a> (aka record)</li><li>streams of log entries are called <a href="#ledgers"><em>ledgers</em></a></li><li>individual servers storing ledgers of entries are called <a href="#bookies"><em>bookies</em></a></li></ul><p>BookKeeper is designed to be reliable and resilient to a wide variety of failures. Bookies can crash, corrupt data, or discard data, but as long as there are enough bookies behaving correctly in the ensemble the service as a whole will behave correctly.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="entries">Entries<a href="#entries" class="hash-link" aria-label="Direct link to Entries" title="Direct link to Entries"></a></h2><blockquote><p><strong>Entries</strong> contain the actual data written to ledgers, along with some important metadata.</p></blockquote><p>BookKeeper entries are sequences of bytes that are written to <a href="#ledgers">ledgers</a>. Each entry has the following fields:</p><table><thead><tr><th align="left">Field</th><th align="left">Java type</th><th align="left">Description</th></tr></thead><tbody><tr><td align="left">Ledger number</td><td align="left"><code>long</code></td><td align="left">The ID of the ledger to which the entry has been written</td></tr><tr><td align="left">Entry number</td><td align="left"><code>long</code></td><td align="left">The unique ID of the entry</td></tr><tr><td align="left">Last confirmed (LC)</td><td align="left"><code>long</code></td><td align="left">The ID of the last recorded entry</td></tr><tr><td align="left">Data</td><td align="left"><code>byte[]</code></td><td align="left">The entry&#x27;s data (written by the client application)</td></tr><tr><td align="left">Authentication code</td><td align="left"><code>byte[]</code></td><td align="left">The message auth code, which includes <em>all</em> other fields in the entry</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_LWe7" id="ledgers">Ledgers<a href="#ledgers" class="hash-link" aria-label="Direct link to Ledgers" title="Direct link to Ledgers"></a></h2><blockquote><p><strong>Ledgers</strong> are the basic unit of storage in BookKeeper.</p></blockquote><p>Ledgers are sequences of entries, while each entry is a sequence of bytes. Entries are written to a ledger:</p><ul><li>sequentially, and</li><li>at most once.</li></ul><p>This means that ledgers have <em>append-only</em> semantics. Entries cannot be modified once they&#x27;ve been written to a ledger. Determining the proper write order is the responsibility of <a href="#clients">client applications</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="clients-and-apis">Clients and APIs<a href="#clients-and-apis" class="hash-link" aria-label="Direct link to Clients and APIs" title="Direct link to Clients and APIs"></a></h2><blockquote><p>BookKeeper clients have two main roles: they create and delete ledgers, and they read entries from and write entries to ledgers.</p><p>BookKeeper provides both a lower-level and a higher-level API for ledger interaction.</p></blockquote><p>There are currently two APIs that can be used for interacting with BookKeeper:</p><ul><li>The <a href="/docs/4.5.1/api/ledger-api">ledger API</a> is a lower-level API that enables you to interact with ledgers directly.</li><li>The <a href="/docs/4.5.1/api/distributedlog-api">DistributedLog API</a> is a higher-level API that enables you to use BookKeeper without directly interacting with ledgers.</li></ul><p>In general, you should choose the API based on how much granular control you need over ledger semantics. The two APIs can also both be used within a single application.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="bookies">Bookies<a href="#bookies" class="hash-link" aria-label="Direct link to Bookies" title="Direct link to Bookies"></a></h2><blockquote><p><strong>Bookies</strong> are individual BookKeeper servers that handle ledgers (more specifically, fragments of ledgers). Bookies function as part of an ensemble.</p></blockquote><p>A bookie is an individual BookKeeper storage server. Individual bookies store fragments of ledgers, not entire ledgers (for the sake of performance). For any given ledger <strong>L</strong>, an <em>ensemble</em> is the group of bookies storing the entries in <strong>L</strong>.</p><p>Whenever entries are written to a ledger, those entries are striped across the ensemble (written to a sub-group of bookies rather than to all bookies).</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="motivation">Motivation<a href="#motivation" class="hash-link" aria-label="Direct link to Motivation" title="Direct link to Motivation"></a></h3><blockquote><p>BookKeeper was initially inspired by the NameNode server in HDFS but its uses now extend far beyond this.</p></blockquote><p>The initial motivation for BookKeeper comes from the <a href="http://hadoop.apache.org/" target="_blank" rel="noopener noreferrer">Hadoop</a> ecosystem. In the <a href="https://cwiki.apache.org/confluence/display/HADOOP2/HDFS" target="_blank" rel="noopener noreferrer">Hadoop Distributed File System</a> (HDFS), a special node called the <a href="https://cwiki.apache.org/confluence/display/HADOOP2/NameNode" target="_blank" rel="noopener noreferrer">NameNode</a> logs all operations in a reliable fashion, which ensures that recovery is possible in case of crashes.</p><p>The NameNode, however, served only as initial inspiration for BookKeeper. The applications for BookKeeper extend far beyond this and include essentially any application that requires an append-based storage system. BookKeeper provides a number of advantages for such applications:</p><ul><li>Highly efficient writes</li><li>High fault tolerance via replication of messages within ensembles of bookies</li><li>High throughput for write operations via striping (across as many bookies as you wish)</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="metadata-storage">Metadata storage<a href="#metadata-storage" class="hash-link" aria-label="Direct link to Metadata storage" title="Direct link to Metadata storage"></a></h2><p>BookKeeper requires a metadata storage service to store information related to <a href="#ledgers">ledgers</a> and available bookies. BookKeeper currently uses <a href="https://zookeeper.apache.org" target="_blank" rel="noopener noreferrer">ZooKeeper</a> for this and other tasks.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="data-management-in-bookies">Data management in bookies<a href="#data-management-in-bookies" class="hash-link" aria-label="Direct link to Data management in bookies" title="Direct link to Data management in bookies"></a></h2><p>Bookies manage data in a <a href="https://en.wikipedia.org/wiki/Log-structured_file_system" target="_blank" rel="noopener noreferrer">log-structured</a> way, which is implemented using three types of files:</p><ul><li><a href="#journals">journals</a></li><li><a href="#entry-logs">entry logs</a></li><li><a href="#index-files">index files</a></li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="journals">Journals<a href="#journals" class="hash-link" aria-label="Direct link to Journals" title="Direct link to Journals"></a></h3><p>A journal file contains BookKeeper transaction logs. Before any update to a ledger takes place, the bookie ensures that a transaction describing the update is written to non-volatile storage. A new journal file is created once the bookie starts or the older journal file reaches the journal file size threshold.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="entry-logs">Entry logs<a href="#entry-logs" class="hash-link" aria-label="Direct link to Entry logs" title="Direct link to Entry logs"></a></h3><p>An entry log file manages the written entries received from BookKeeper clients. Entries from different ledgers are aggregated and written sequentially, while their offsets are kept as pointers in a <a href="#ledger-cache">ledger cache</a> for fast lookup.</p><p>A new entry log file is created once the bookie starts or the older entry log file reaches the entry log size threshold. Old entry log files are removed by the Garbage Collector Thread once they are not associated with any active ledger.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="index-files">Index files<a href="#index-files" class="hash-link" aria-label="Direct link to Index files" title="Direct link to Index files"></a></h3><p>An index file is created for each ledger, which comprises a header and several fixed-length index pages that record the offsets of data stored in entry log files.</p><p>Since updating index files would introduce random disk I/O index files are updated lazily by a sync thread running in the background. This ensures speedy performance for updates. Before index pages are persisted to disk, they are gathered in a ledger cache for lookup.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="ledger-cache">Ledger cache<a href="#ledger-cache" class="hash-link" aria-label="Direct link to Ledger cache" title="Direct link to Ledger cache"></a></h3><p>Ledger indexes pages are cached in a memory pool, which allows for more efficient management of disk head scheduling.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="adding-entries">Adding entries<a href="#adding-entries" class="hash-link" aria-label="Direct link to Adding entries" title="Direct link to Adding entries"></a></h3><p>When a client instructs a bookie to write an entry to a ledger, the entry will go through the following steps to be persisted on disk:</p><ol><li>The entry is appended to an <a href="#entry-logs">entry log</a></li><li>The index of the entry is updated in the <a href="#ledger-cache">ledger cache</a></li><li>A transaction corresponding to this entry update is appended to the <a href="#journals">journal</a></li><li>A response is sent to the BookKeeper client</li></ol><blockquote><p>For performance reasons, the entry log buffers entries in memory and commits them in batches, while the ledger cache holds index pages in memory and flushes them lazily. This process is described in more detail in the <a href="#data-flush">Data flush</a> section below.</p></blockquote><h3 class="anchor anchorWithStickyNavbar_LWe7" id="data-flush">Data flush<a href="#data-flush" class="hash-link" aria-label="Direct link to Data flush" title="Direct link to Data flush"></a></h3><p>Ledger index pages are flushed to index files in the following two cases:</p><ul><li>The ledger cache memory limit is reached. There is no more space available to hold newer index pages. Dirty index pages will be evicted from the ledger cache and persisted to index files.</li><li>A background thread synchronous thread is responsible for flushing index pages from the ledger cache to index files periodically.</li></ul><p>Besides flushing index pages, the sync thread is responsible for rolling journal files in case that journal files use too much disk space. The data flush flow in the sync thread is as follows:</p><ul><li><p>A <code>LastLogMark</code> is recorded in memory. The <code>LastLogMark</code> indicates that those entries before it have been persisted (to both index and entry log files) and contains two parts:</p><ol><li>A <code>txnLogId</code> (the file ID of a journal)</li><li>A <code>txnLogPos</code> (offset in a journal)</li></ol></li><li><p>Dirty index pages are flushed from the ledger cache to the index file, and entry log files are flushed to ensure that all buffered entries in entry log files are persisted to disk.</p><p> Ideally, a bookie only needs to flush index pages and entry log files that contain entries before <code>LastLogMark</code>. There is, however, no such information in the ledger and entry log mapping to journal files. Consequently, the thread flushes the ledger cache and entry log entirely here, and may flush entries after the <code>LastLogMark</code>. Flushing more is not a problem, though, just redundant.</p></li><li><p>The <code>LastLogMark</code> is persisted to disk, which means that entries added before <code>LastLogMark</code> whose entry data and index page were also persisted to disk. It is now time to safely remove journal files created earlier than <code>txnLogId</code>.</p></li></ul><p>If the bookie has crashed before persisting <code>LastLogMark</code> to disk, it still has journal files containing entries for which index pages may not have been persisted. Consequently, when this bookie restarts, it inspects journal files to restore those entries and data isn&#x27;t lost.</p><p>Using the above data flush mechanism, it is safe for the sync thread to skip data flushing when the bookie shuts down. However, in the entry logger it uses a buffered channel to write entries in batches and there might be data buffered in the buffered channel upon a shut down. The bookie needs to ensure that the entry log flushes its buffered data during shutdown. Otherwise, entry log files become corrupted with partial entries.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="data-compaction">Data compaction<a href="#data-compaction" class="hash-link" aria-label="Direct link to Data compaction" title="Direct link to Data compaction"></a></h3><p>On bookies, entries of different ledgers are interleaved in entry log files. A bookie runs a garbage collector thread to delete un-associated entry log files to reclaim disk space. If a given entry log file contains entries from a ledger that has not been deleted, then the entry log file would never be removed and the occupied disk space never reclaimed. In order to avoid such a case, a bookie server compacts entry log files in a garbage collector thread to reclaim disk space.</p><p>There are two kinds of compaction running with different frequency: minor compaction and major compaction. The differences between minor compaction and major compaction lies in their threshold value and compaction interval.</p><ul><li>The garbage collection threshold is the size percentage of an entry log file occupied by those undeleted ledgers. The default minor compaction threshold is 0.2, while the major compaction threshold is 0.8.</li><li>The garbage collection interval is how frequently to run the compaction. The default minor compaction interval is 1 hour, while the major compaction threshold is 1 day.</li></ul><blockquote><p>If either the threshold or interval is set to less than or equal to zero, compaction is disabled.</p></blockquote><p>The data compaction flow in the garbage collector thread is as follows:</p><ul><li>The thread scans entry log files to get their entry log metadata, which records a list of ledgers comprising an entry log and their corresponding percentages.</li><li>With the normal garbage collection flow, once the bookie determines that a ledger has been deleted, the ledger will be removed from the entry log metadata and the size of the entry log reduced.</li><li>If the remaining size of an entry log file reaches a specified threshold, the entries of active ledgers in the entry log will be copied to a new entry log file.</li><li>Once all valid entries have been copied, the old entry log file is deleted.</li></ul><h2 class="anchor anchorWithStickyNavbar_LWe7" id="zookeeper-metadata">ZooKeeper metadata<a href="#zookeeper-metadata" class="hash-link" aria-label="Direct link to ZooKeeper metadata" title="Direct link to ZooKeeper metadata"></a></h2><p>BookKeeper requires a ZooKeeper installation for storing <a href="#ledger">ledger</a> metadata. Whenever you construct a <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/client/BookKeeper" target="_blank" rel="noopener noreferrer"><code>BookKeeper</code></a> client object, you need to pass a list of ZooKeeper servers as a parameter to the constructor, like this:</p><div class="language-java codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-java codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">String zkConnectionString = &quot;127.0.0.1:2181&quot;;</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">BookKeeper bkClient = new BookKeeper(zkConnectionString);</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><blockquote><p>For more info on using the BookKeeper Java client, see <a href="/docs/4.5.1/api/ledger-api#the-java-ledger-api-client">this guide</a>.</p></blockquote><h2 class="anchor anchorWithStickyNavbar_LWe7" id="ledger-manager">Ledger manager<a href="#ledger-manager" class="hash-link" aria-label="Direct link to Ledger manager" title="Direct link to Ledger manager"></a></h2><p>A <em>ledger manager</em> handles ledgers&#x27; metadata (which is stored in ZooKeeper). BookKeeper offers two types of ledger managers: the <a href="#flat-ledger-manager">flat ledger manager</a> and the <a href="#hierarchical-ledger-manager">hierarchical ledger manager</a>. Both ledger managers extend the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/meta/AbstractZkLedgerManager" target="_blank" rel="noopener noreferrer"><code>AbstractZkLedgerManager</code></a> abstract class.</p><blockquote><h4 class="anchor anchorWithStickyNavbar_LWe7" id="use-the-flat-ledger-manager-in-most-cases">Use the flat ledger manager in most cases<a href="#use-the-flat-ledger-manager-in-most-cases" class="hash-link" aria-label="Direct link to Use the flat ledger manager in most cases" title="Direct link to Use the flat ledger manager in most cases"></a></h4><p>The flat ledger manager is the default and is recommended for nearly all use cases. The hierarchical ledger manager is better suited only for managing very large numbers of BookKeeper ledgers (&gt; 50,000).</p></blockquote><h3 class="anchor anchorWithStickyNavbar_LWe7" id="flat-ledger-manager">Flat ledger manager<a href="#flat-ledger-manager" class="hash-link" aria-label="Direct link to Flat ledger manager" title="Direct link to Flat ledger manager"></a></h3><p>The <em>flat ledger manager</em>, implemented in the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/meta/FlatLedgerManager.html" target="_blank" rel="noopener noreferrer"><code>FlatLedgerManager</code></a> class, stores all ledgers&#x27; metadata in child nodes of a single ZooKeeper path. The flat ledger manager creates <a href="https://zookeeper.apache.org/doc/trunk/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming" target="_blank" rel="noopener noreferrer">sequential nodes</a> to ensure the uniqueness of the ledger ID and prefixes all nodes with <code>L</code>. Bookie servers manage their own active ledgers in a hash map so that it&#x27;s easy to find which ledgers have been deleted from ZooKeeper and then garbage collect them.</p><p>The flat ledger manager&#x27;s garbage collection follow proceeds as follows:</p><ul><li>All existing ledgers are fetched from ZooKeeper (<code>zkActiveLedgers</code>)</li><li>All ledgers currently active within the bookie are fetched (<code>bkActiveLedgers</code>)</li><li>The currently actively ledgers are looped through to determine which ledgers don&#x27;t currently exist in ZooKeeper. Those are then garbage collected.</li><li>The <em>hierarchical ledger manager</em> stores ledgers&#x27; metadata in two-level <a href="https://zookeeper.apache.org/doc/current/zookeeperOver.html#Nodes+and+ephemeral+nodes" target="_blank" rel="noopener noreferrer">znodes</a>.</li></ul><h3 class="anchor anchorWithStickyNavbar_LWe7" id="hierarchical-ledger-manager">Hierarchical ledger manager<a href="#hierarchical-ledger-manager" class="hash-link" aria-label="Direct link to Hierarchical ledger manager" title="Direct link to Hierarchical ledger manager"></a></h3><p>The <em>hierarchical ledger manager</em>, implemented in the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/meta/HierarchicalLedgerManager" target="_blank" rel="noopener noreferrer"><code>HierarchicalLedgerManager</code></a> class, first obtains a global unique ID from ZooKeeper using an <a href="https://zookeeper.apache.org/doc/current/api/org/apache/zookeeper/CreateMode.html#EPHEMERAL_SEQUENTIAL" target="_blank" rel="noopener noreferrer"><code>EPHEMERAL_SEQUENTIAL</code></a> znode. Since ZooKeeper&#x27;s sequence counter has a format of <code>%10d</code> (10 digits with 0 padding, for example <code>&lt;path&gt;0000000001</code>), the hierarchical ledger manager splits the generated ID into 3 parts:</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level1 </span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">2</span><span class="token plain"> digits</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level2 </span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">4</span><span class="token plain"> digits</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level3 </span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">4</span><span class="token plain"> digits</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>These three parts are used to form the actual ledger node path to store ledger metadata:</p><div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">ledgers_root_path</span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">/</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level1</span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">/</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level2</span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain">/L</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">level3</span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>For example, ledger 0000000001 is split into three parts, 00, 0000, and 00001, and stored in znode <code>/{ledgers_root_path}/00/0000/L0001</code>. Each znode could have as many 10,000 ledgers, which avoids the problem of the child list being larger than the maximum ZooKeeper packet size (which is the <a href="https://issues.apache.org/jira/browse/BOOKKEEPER-39" target="_blank" rel="noopener noreferrer">limitation</a> that initially prompted the creation of the hierarchical ledger manager).</p></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/4.5.1/getting-started/run-locally"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Run bookies locally</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/4.5.1/deployment/manual"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Manual deployment</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#basic-terms" class="table-of-contents__link toc-highlight">Basic terms</a></li><li><a href="#entries" class="table-of-contents__link toc-highlight">Entries</a></li><li><a href="#ledgers" class="table-of-contents__link toc-highlight">Ledgers</a></li><li><a href="#clients-and-apis" class="table-of-contents__link toc-highlight">Clients and APIs</a></li><li><a href="#bookies" class="table-of-contents__link toc-highlight">Bookies</a><ul><li><a href="#motivation" class="table-of-contents__link toc-highlight">Motivation</a></li></ul></li><li><a href="#metadata-storage" class="table-of-contents__link toc-highlight">Metadata storage</a></li><li><a href="#data-management-in-bookies" class="table-of-contents__link toc-highlight">Data management in bookies</a><ul><li><a href="#journals" class="table-of-contents__link toc-highlight">Journals</a></li><li><a href="#entry-logs" class="table-of-contents__link toc-highlight">Entry logs</a></li><li><a href="#index-files" class="table-of-contents__link toc-highlight">Index files</a></li><li><a href="#ledger-cache" class="table-of-contents__link toc-highlight">Ledger cache</a></li><li><a href="#adding-entries" class="table-of-contents__link toc-highlight">Adding entries</a></li><li><a href="#data-flush" class="table-of-contents__link toc-highlight">Data flush</a></li><li><a href="#data-compaction" class="table-of-contents__link toc-highlight">Data compaction</a></li></ul></li><li><a href="#zookeeper-metadata" class="table-of-contents__link toc-highlight">ZooKeeper metadata</a></li><li><a href="#ledger-manager" class="table-of-contents__link toc-highlight">Ledger manager</a><ul><li><a href="#flat-ledger-manager" class="table-of-contents__link toc-highlight">Flat ledger manager</a></li><li><a href="#hierarchical-ledger-manager" class="table-of-contents__link toc-highlight">Hierarchical ledger manager</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Documentation</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/overview">Overview</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/getting-started/installation">Getting started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/deployment/manual">Deployment</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/admin/bookies">Administration</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/overview">API</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/security/overview">Security</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/development/protocol">Development</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/reference/config">Reference</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/community/mailing-lists">Mailing lists</a></li><li class="footer__item"><a class="footer__link-item" href="/community/slack">Slack</a></li><li class="footer__item"><a href="https://github.com/apache/bookkeeper" target="_blank" rel="noopener noreferrer" class="footer__link-item">Github<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://twitter.com/asfbookkeeper" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Project</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/project/who">Who are we?</a></li><li class="footer__item"><a class="footer__link-item" href="/project/bylaws">Bylaws</a></li><li class="footer__item"><a href="https://apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/project/privacy">Privacy policy</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright"><footer class="footer">
<div class="container">
<div class="content has-text-centered">
<p>
Copyright &copy; 2016 - 2024 <a href="https://www.apache.org/">The Apache Software Foundation</a>,<br> licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>.
</p>
<p>
Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation.
</p>
</div>
</div>
</footer>
</div></div></div></footer></div>
<script src="/assets/js/runtime~main.435d5f64.js"></script>
<script src="/assets/js/main.0f234beb.js"></script>
</body>
</html>