blob: f8795e3ad3e4c5f1411399ee3f9678eebbcc1ec6 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-4.12.1 plugin-docs plugin-id-default docs-doc-id-admin/autorecovery">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v2.4.0">
<title data-rh="true">Using AutoRecovery | Apache BookKeeper</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://bookkeeper.apache.org/docs/4.12.1/admin/autorecovery"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="4.12.1"><meta data-rh="true" name="docusaurus_tag" content="docs-default-4.12.1"><meta data-rh="true" name="docsearch:version" content="4.12.1"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-4.12.1"><meta data-rh="true" property="og:title" content="Using AutoRecovery | Apache BookKeeper"><meta data-rh="true" name="description" content="When a bookie crashes, all ledgers on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you&#x27;ll need to recover the data from any offline bookies. There are two ways to recover bookies&#x27; data:"><meta data-rh="true" property="og:description" content="When a bookie crashes, all ledgers on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you&#x27;ll need to recover the data from any offline bookies. There are two ways to recover bookies&#x27; data:"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://bookkeeper.apache.org/docs/4.12.1/admin/autorecovery"><link data-rh="true" rel="alternate" href="https://bookkeeper.apache.org/docs/4.12.1/admin/autorecovery" hreflang="en"><link data-rh="true" rel="alternate" href="https://bookkeeper.apache.org/docs/4.12.1/admin/autorecovery" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.49914aab.css">
<link rel="preload" href="/assets/js/runtime~main.99a29ea0.js" as="script">
<link rel="preload" href="/assets/js/main.812b2dbb.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}return t}()||function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/bk-logo.svg" alt="Apache Bookkeeper" class="themedImage_ToTc themedImage--light_HNdA"><img src="/img/bk-logo.svg" alt="Apache Bookkeeper" class="themedImage_ToTc themedImage--dark_i4oU"></div><b class="navbar__title text--truncate">Apache BookKeeper</b></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs/4.12.1/overview/">Documentation</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/community/mailing-lists">Mailing lists</a></li><li><a class="dropdown__link" href="/community/slack">Slack</a></li><li><a href="https://github.com/apache/bookkeeper/issues" target="_blank" rel="noopener noreferrer" class="dropdown__link">Github issues<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a class="dropdown__link" href="/community/releases">Release management</a></li><li><a class="dropdown__link" href="/community/meeting">Community meetings</a></li><li><a class="dropdown__link" href="/community/contributing">Contribution guide</a></li><li><a class="dropdown__link" href="/community/coding-guide">Coding guide</a></li><li><a class="dropdown__link" href="/community/testing">Testing guide</a></li><li><a class="dropdown__link" href="/community/issue-report">Issue report guide</a></li><li><a class="dropdown__link" href="/community/release-guide">Release guide</a></li><li><a class="dropdown__link" href="/community/presentations">Presentations</a></li><li><a class="dropdown__link" href="/community/bookkeeper-proposals">BookKeeper proposals (BP)</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Project</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/project/who">Who are we?</a></li><li><a class="dropdown__link" href="/project/bylaws">Bylaws</a></li><li><a href="https://apache.org/licenses" target="_blank" rel="noopener noreferrer" class="dropdown__link">License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a class="dropdown__link" href="/project/privacy">Privacy policy</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/4.12.1/overview/">4.12.1</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/admin/autorecovery">Next</a></li><li><a class="dropdown__link" href="/docs/admin/autorecovery">4.16.5</a></li><li><a class="dropdown__link" href="/docs/4.15.5/admin/autorecovery">4.15.5</a></li><li><a class="dropdown__link" href="/docs/4.14.8/admin/autorecovery">4.14.8</a></li><li><a class="dropdown__link" href="/docs/4.13.0/admin/autorecovery">4.13.0</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/4.12.1/admin/autorecovery">4.12.1</a></li><li><a class="dropdown__link" href="/docs/4.11.1/admin/autorecovery">4.11.1</a></li><li><a class="dropdown__link" href="/docs/4.10.0/admin/autorecovery">4.10.0</a></li><li><a class="dropdown__link" href="/docs/4.9.2/admin/autorecovery">4.9.2</a></li><li><a class="dropdown__link" href="/docs/4.8.2/admin/autorecovery">4.8.2</a></li><li><a class="dropdown__link" href="/docs/4.7.3/admin/autorecovery">4.7.3</a></li><li><a class="dropdown__link" href="/docs/4.6.2/admin/autorecovery">4.6.2</a></li><li><a class="dropdown__link" href="/docs/4.5.1/admin/autorecovery">4.5.1</a></li></ul></div><a class="navbar__item navbar__link" href="/releases">Download</a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="searchBox_ZlJk"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0 docsWrapper_BCFX"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docPage__5DB"><aside class="theme-doc-sidebar-container docSidebarContainer_b6E3"><div class="sidebarViewport_Xe31"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/4.12.1/overview/">Overview</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/getting-started/installation">Getting started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/deployment/manual">Deployment</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/4.12.1/admin/bookies">Administration</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.12.1/admin/bookies">BookKeeper administration</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/4.12.1/admin/autorecovery">AutoRecovery</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.12.1/admin/metrics">Metrics collection</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.12.1/admin/upgrade">Upgrade</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.12.1/admin/http">Admin REST API</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/4.12.1/admin/decomission">Decommissioning Bookies</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/api/overview">API</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/security/overview">Security</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/development/protocol">Development</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/4.12.1/reference/config">Reference</a></div></li></ul></nav></div></div></aside><main class="docMainContainer_gTbr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><span class="theme-doc-version-badge badge badge--secondary">Version: 4.12.1</span><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Using AutoRecovery</h1></header><p>When a bookie crashes, all ledgers on that bookie become under-replicated. In order to bring all ledgers in your BookKeeper cluster back to full replication, you&#x27;ll need to <em>recover</em> the data from any offline bookies. There are two ways to recover bookies&#x27; data:</p><ol><li>Using <a href="#manual-recovery">manual recovery</a></li><li>Automatically, using <a href="#autorecovery"><em>AutoRecovery</em></a></li></ol><h2 class="anchor anchorWithStickyNavbar_LWe7" id="manual-recovery">Manual recovery<a href="#manual-recovery" class="hash-link" aria-label="Direct link to Manual recovery" title="Direct link to Manual recovery"></a></h2><p>You can manually recover failed bookies using the <a href="/docs/4.12.1/reference/cli"><code>bookkeeper</code></a> command-line tool. You need to specify:</p><ul><li>the <code>shell recover</code> option </li><li>the IP and port for the failed bookie</li></ul><p>Here&#x27;s an example:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ bin/bookkeeper shell recover </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">192.168</span><span class="token plain">.1.10:3181 </span><span class="token comment" style="color:#999988;font-style:italic"># IP and port for the failed bookie</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>If you wish, you can also specify which ledgers you&#x27;d like to recover. Here&#x27;s an example:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ bin/bookkeeper shell recover </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token number" style="color:#36acaa">192.168</span><span class="token plain">.1.10:3181 </span><span class="token punctuation" style="color:#393A34">\</span><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># IP and port for the failed bookie</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> --ledger ledgerID </span><span class="token comment" style="color:#999988;font-style:italic"># ledgerID which you want to recover </span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h3 class="anchor anchorWithStickyNavbar_LWe7" id="the-manual-recovery-process">The manual recovery process<a href="#the-manual-recovery-process" class="hash-link" aria-label="Direct link to The manual recovery process" title="Direct link to The manual recovery process"></a></h3><p>When you initiate a manual recovery process, the following happens:</p><ol><li>The client (the process running ) reads the metadata of active ledgers from ZooKeeper.</li><li>The ledgers that contain fragments from the failed bookie in their ensemble are selected.</li><li>A recovery process is initiated for each ledger in this list and the rereplication process is run for each ledger.</li><li>Once all the ledgers are marked as fully replicated, bookie recovery is finished.</li></ol><h2 class="anchor anchorWithStickyNavbar_LWe7" id="autorecovery">AutoRecovery<a href="#autorecovery" class="hash-link" aria-label="Direct link to AutoRecovery" title="Direct link to AutoRecovery"></a></h2><p>AutoRecovery is a process that:</p><ul><li>automatically detects when a bookie in your BookKeeper cluster has become unavailable and then</li><li>rereplicates all the ledgers that were stored on that bookie.</li></ul><p>AutoRecovery can be run in three ways:</p><ol><li>On dedicated nodes in your BookKeeper cluster</li><li>On the same machines on which your bookies are running</li><li>On a combination of autorecovery nodes and bookie nodes</li></ol><h2 class="anchor anchorWithStickyNavbar_LWe7" id="running-autorecovery">Running AutoRecovery<a href="#running-autorecovery" class="hash-link" aria-label="Direct link to Running AutoRecovery" title="Direct link to Running AutoRecovery"></a></h2><p>You can start up AutoRecovery using the <a href="/docs/4.12.1/reference/cli#bookkeeper-autorecovery"><code>autorecovery</code></a> command of the <a href="/docs/4.12.1/reference/cli"><code>bookkeeper</code></a> CLI tool.</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ bin/bookkeeper autorecovery</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><blockquote><p>The most important thing to ensure when starting up AutoRecovery is that the ZooKeeper connection string specified by the <a href="/docs/4.12.1/reference/config#zkServers"><code>zkServers</code></a> parameter points to the right ZooKeeper cluster.</p></blockquote><p>If you start up AutoRecovery on a machine that is already running a bookie, then the AutoRecovery process will run alongside the bookie on a separate thread.</p><p>You can also start up AutoRecovery on a fresh machine if you&#x27;d like to create a dedicated cluster of AutoRecovery nodes.</p><p>Note that if you <em>only</em> want the AutoRecovery process to run on your dedicated AutoRecovery nodes, you must set <code>autoRecoveryDaemonEnabled</code> to <code>false</code> in the <code>bookkeeper</code> configuration. Otherwise,
bookkeeper nodes will also handle rereplication work.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="configuration">Configuration<a href="#configuration" class="hash-link" aria-label="Direct link to Configuration" title="Direct link to Configuration"></a></h2><p>There are a handful of AutoRecovery-related configs in the <a href="/docs/4.12.1/reference/config"><code>bk_server.conf</code></a> configuration file. For a listing of those configs, see <a href="/docs/4.12.1/reference/config#autorecovery-settings">AutoRecovery settings</a>.</p><h2 class="anchor anchorWithStickyNavbar_LWe7" id="disable-autorecovery">Disable AutoRecovery<a href="#disable-autorecovery" class="hash-link" aria-label="Direct link to Disable AutoRecovery" title="Direct link to Disable AutoRecovery"></a></h2><p>You can disable AutoRecovery for the whole cluster at any time, for example during maintenance. Disabling AutoRecovery ensures that bookies&#x27; data isn&#x27;t unnecessarily rereplicated when the bookie is only taken down for a short period of time, for example when the bookie is being updated or the configuration if being changed.</p><p>You can disable AutoRecover for the whole cluster using the <a href="/docs/4.12.1/reference/cli#bookkeeper-shell-autorecovery"><code>bookkeeper</code></a> CLI tool:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ bin/bookkeeper shell autorecovery -disable</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><p>Once disabled, you can reenable AutoRecovery for the whole cluster using the <a href="/docs/4.12.1/reference/cli#bookkeeper-shell-autorecovery"><code>enable</code></a> shell command:</p><div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ bin/bookkeeper shell autorecovery -enable</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg class="copyButtonIcon_y97N" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_LjdS" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div><h2 class="anchor anchorWithStickyNavbar_LWe7" id="autorecovery-architecture">AutoRecovery architecture<a href="#autorecovery-architecture" class="hash-link" aria-label="Direct link to AutoRecovery architecture" title="Direct link to AutoRecovery architecture"></a></h2><p>AutoRecovery has two components:</p><ol><li>The <a href="#auditor"><strong>auditor</strong></a> (see the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/replication/Auditor.html" target="_blank" rel="noopener noreferrer"><code>Auditor</code></a> class) is a singleton node that watches bookies to see if they fail and creates rereplication tasks for the ledgers on failed bookies.</li><li>The <a href="#replication-worker"><strong>replication worker</strong></a> (see the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/replication/ReplicationWorker.html" target="_blank" rel="noopener noreferrer"><code>ReplicationWorker</code></a> class) runs on each bookie and executes rereplication tasks provided by the auditor.</li></ol><p>Both of these components run as threads in the <a href="https://bookkeeper.apache.org//docs/latest/api/javadoc/org/apache/bookkeeper/replication/AutoRecoveryMain" target="_blank" rel="noopener noreferrer"><code>AutoRecoveryMain</code></a> process, which runs on each bookie in the cluster. All recovery nodes participate in leader election---using ZooKeeper---to decide which node becomes the auditor. Nodes that fail to become the auditor watch the elected auditor and run an election process again if they see that the auditor node has failed.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="auditor">Auditor<a href="#auditor" class="hash-link" aria-label="Direct link to Auditor" title="Direct link to Auditor"></a></h3><p>The auditor watches all bookies in the cluster that are registered with ZooKeeper. Bookies register with ZooKeeper at startup. If the bookie crashes or is killed, the bookie&#x27;s registration in ZooKeeper disappears and the auditor is notified of the change in the list of registered bookies.</p><p>When the auditor sees that a bookie has disappeared, it immediately scans the complete ledger list to find ledgers that have data stored on the failed bookie. Once it has a list of ledgers for that bookie, the auditor will publish a rereplication task for each ledger under the <code>/underreplicated/</code> <a href="https://zookeeper.apache.org/doc/current/zookeeperOver.html" target="_blank" rel="noopener noreferrer">znode</a> in ZooKeeper.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="replication-worker">Replication Worker<a href="#replication-worker" class="hash-link" aria-label="Direct link to Replication Worker" title="Direct link to Replication Worker"></a></h3><p>Each replication worker watches for tasks being published by the auditor on the <code>/underreplicated/</code> znode in ZooKeeper. When a new task appears, the replication worker will try to get a lock on it. If it cannot acquire the lock, it will try the next entry. The locks are implemented using ZooKeeper ephemeral znodes.</p><p>The replication worker will scan through the rereplication task&#x27;s ledger for fragments of which its local bookie is not a member. When it finds fragments matching this criterion, it will replicate the entries of that fragment to the local bookie. If, after this process, the ledger is fully replicated, the ledgers entry under /underreplicated/ is deleted, and the lock is released. If there is a problem replicating, or there are still fragments in the ledger which are still underreplicated (due to the local bookie already being part of the ensemble for the fragment), then the lock is simply released.</p><p>If the replication worker finds a fragment which needs rereplication, but does not have a defined endpoint (i.e. the final fragment of a ledger currently being written to), it will wait for a grace period before attempting rereplication. If the fragment needing rereplication still does not have a defined endpoint, the ledger is fenced and rereplication then takes place.</p><p>This avoids the situation in which a client is writing to a ledger and one of the bookies goes down, but the client has not written an entry to that bookie before rereplication takes place. The client could continue writing to the old fragment, even though the ensemble for the fragment had changed. This could lead to data loss. Fencing prevents this scenario from happening. In the normal case, the client will try to write to the failed bookie within the grace period, and will have started a new fragment before rereplication starts.</p><p>You can configure this grace period using the <a href="/docs/4.12.1/reference/config#openLedgerRereplicationGracePeriod"><code>openLedgerRereplicationGracePeriod</code></a> parameter.</p><h3 class="anchor anchorWithStickyNavbar_LWe7" id="the-rereplication-process">The rereplication process<a href="#the-rereplication-process" class="hash-link" aria-label="Direct link to The rereplication process" title="Direct link to The rereplication process"></a></h3><p>The ledger rereplication process happens in these steps:</p><ol><li>The client goes through all ledger fragments in the ledger, selecting those that contain the failed bookie.</li><li>A recovery process is initiated for each ledger fragment in this list.<ol><li>The client selects a bookie to which all entries in the ledger fragment will be replicated; In the case of autorecovery, this will always be the local bookie.</li><li>The client reads entries that belong to the ledger fragment from other bookies in the ensemble and writes them to the selected bookie.</li><li>Once all entries have been replicated, the zookeeper metadata for the fragment is updated to reflect the new ensemble.</li><li>The fragment is marked as fully replicated in the recovery tool.</li></ol></li><li>Once all ledger fragments are marked as fully replicated, the ledger is marked as fully replicated.</li></ol></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/4.12.1/admin/bookies"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">BookKeeper administration</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/4.12.1/admin/metrics"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Metric collection</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#manual-recovery" class="table-of-contents__link toc-highlight">Manual recovery</a><ul><li><a href="#the-manual-recovery-process" class="table-of-contents__link toc-highlight">The manual recovery process</a></li></ul></li><li><a href="#autorecovery" class="table-of-contents__link toc-highlight">AutoRecovery</a></li><li><a href="#running-autorecovery" class="table-of-contents__link toc-highlight">Running AutoRecovery</a></li><li><a href="#configuration" class="table-of-contents__link toc-highlight">Configuration</a></li><li><a href="#disable-autorecovery" class="table-of-contents__link toc-highlight">Disable AutoRecovery</a></li><li><a href="#autorecovery-architecture" class="table-of-contents__link toc-highlight">AutoRecovery architecture</a><ul><li><a href="#auditor" class="table-of-contents__link toc-highlight">Auditor</a></li><li><a href="#replication-worker" class="table-of-contents__link toc-highlight">Replication Worker</a></li><li><a href="#the-rereplication-process" class="table-of-contents__link toc-highlight">The rereplication process</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Documentation</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/overview">Overview</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/getting-started/installation">Getting started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/deployment/manual">Deployment</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/admin/bookies">Administration</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/overview">API</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/security/overview">Security</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/development/protocol">Development</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/reference/config">Reference</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/community/mailing-lists">Mailing lists</a></li><li class="footer__item"><a class="footer__link-item" href="/community/slack">Slack</a></li><li class="footer__item"><a href="https://github.com/apache/bookkeeper" target="_blank" rel="noopener noreferrer" class="footer__link-item">Github<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://twitter.com/asfbookkeeper" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Project</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/project/who">Who are we?</a></li><li class="footer__item"><a class="footer__link-item" href="/project/bylaws">Bylaws</a></li><li class="footer__item"><a href="https://apache.org/licenses" target="_blank" rel="noopener noreferrer" class="footer__link-item">License<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/project/privacy">Privacy policy</a></li><li class="footer__item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Sponsorship<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="footer__link-item">Thanks<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright"><footer class="footer">
<div class="container">
<div class="content has-text-centered">
<p>
Copyright &copy; 2016 - 2024 <a href="https://www.apache.org/">The Apache Software Foundation</a>,<br> licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>.
</p>
<p>
Apache BookKeeper, BookKeeper®, Apache®, the Apache feature logo, and the Apache BookKeeper logo are either registered trademarks or trademarks of The Apache Software Foundation.
</p>
</div>
</div>
</footer>
</div></div></div></footer></div>
<script src="/assets/js/runtime~main.99a29ea0.js"></script>
<script src="/assets/js/main.812b2dbb.js"></script>
</body>
</html>