blob: bfecd39bdc2a8934295c326196739a72631fdec3 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Zk Migration</title>
<link rel="stylesheet" href="/assets/css/app.css">
<link rel="shortcut icon" href="/assets/images/favicon.ico">
<link rel="stylesheet" href="/assets/css/utilities.min.css">
<link rel="stylesheet" href="/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Zk Migration | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Zk Migration" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Pegasus’s Meta Server uses Zookeeper to store metadata and leader election, so the instability of the Zookeeper service can cause instability in Pegasus. If necessary, Pegasus metadata needs to be migrated to other more stable or idle Zookeeper." />
<meta property="og:description" content="Pegasus’s Meta Server uses Zookeeper to store metadata and leader election, so the instability of the Zookeeper service can cause instability in Pegasus. If necessary, Pegasus metadata needs to be migrated to other more stable or idle Zookeeper." />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T13:02:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Zk Migration" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T13:02:52+00:00","datePublished":"2024-04-22T13:02:52+00:00","description":"Pegasus’s Meta Server uses Zookeeper to store metadata and leader election, so the instability of the Zookeeper service can cause instability in Pegasus. If necessary, Pegasus metadata needs to be migrated to other more stable or idle Zookeeper.","headline":"Zk Migration","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/zk-migration"},"url":"/administration/zk-migration"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">The Pegasus documentation</p>
<ul class="menu-list">
<li>
<a href="/docs/downloads"
class="">
Downloads
</a>
</li>
</ul>
<p class="menu-label">Building Pegasus</p>
<ul class="menu-list">
<li>
<a href="/docs/build/compile-by-docker"
class="">
Compile by docker (recommended)
</a>
</li>
<li>
<a href="/docs/build/compile-from-source"
class="">
Compile from source
</a>
</li>
</ul>
<p class="menu-label">Client Libs</p>
<ul class="menu-list">
<li>
<a href="/clients/java-client"
class="">
Java Client
</a>
</li>
<li>
<a href="/clients/cpp-client"
class="">
C++ Client
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang Client
</a>
</li>
<li>
<a href="/clients/python-client"
class="">
Python Client
</a>
</li>
<li>
<a href="/clients/node-client"
class="">
NodeJS Client
</a>
</li>
<li>
<a href="/clients/scala-client"
class="">
Scala Client
</a>
</li>
</ul>
<p class="menu-label">Tools</p>
<ul class="menu-list">
<li>
<a href="/docs/tools/shell"
class="">
Pegasus Shell
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
Admin CLI
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
Pegasus data access CLI
</a>
</li>
</ul>
<p class="menu-label">API</p>
<ul class="menu-list">
<li>
<a href="/api/ttl"
class="">
TTL(Time To Live)
</a>
</li>
<li>
<a href="/api/single-atomic"
class="">
Single-Atomic Operations
</a>
</li>
<li>
<a href="/api/redis"
class="">
Redis Adaption
</a>
</li>
<li>
<a href="/api/geo"
class="">
GEO Support
</a>
</li>
<li>
<a href="/api/http"
class="">
HTTP API
</a>
</li>
</ul>
<p class="menu-label">Admin</p>
<ul class="menu-list">
<li>
<a href="/administration/deployment"
class="">
Deployment
</a>
</li>
<li>
<a href="/administration/config"
class="">
Configurations
</a>
</li>
<li>
<a href="/administration/rebalance"
class="">
Rebalance
</a>
</li>
<li>
<a href="/administration/monitoring"
class="">
Monitoring
</a>
</li>
<li>
<a href="/administration/rolling-update"
class="">
Rolling Restart and Upgrade
</a>
</li>
<li>
<a href="/administration/scale-in-out"
class="">
Scale-in and Scale-out
</a>
</li>
<li>
<a href="/administration/resource-management"
class="">
Resource Management
</a>
</li>
<li>
<a href="/administration/cold-backup"
class="">
Cold Backup
</a>
</li>
<li>
<a href="/administration/meta-recovery"
class="">
Metadata Recovery
</a>
</li>
<li>
<a href="/administration/replica-recovery"
class="">
Replica Data Recovery
</a>
</li>
<li>
<a href="/administration/zk-migration"
class="is-active">
Zookeeper Migration
</a>
</li>
<li>
<a href="/administration/table-migration"
class="">
Table Migration
</a>
</li>
<li>
<a href="/administration/table-soft-delete"
class="">
Table Soft-Delete
</a>
</li>
<li>
<a href="/administration/table-env"
class="">
Table Environment Variables
</a>
</li>
<li>
<a href="/administration/remote-commands"
class="">
Remote Command
</a>
</li>
<li>
<a href="/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/administration/duplication"
class="">
Duplication
</a>
</li>
<li>
<a href="/administration/compression"
class="">
Data Compression
</a>
</li>
<li>
<a href="/administration/throttling"
class="">
Throttling
</a>
</li>
<li>
<a href="/administration/experiences"
class="">
Experiences
</a>
</li>
<li>
<a href="/administration/manual-compact"
class="">
Manual Compact
</a>
</li>
<li>
<a href="/administration/usage-scenario"
class="">
Usage Scenario
</a>
</li>
<li>
<a href="/administration/bad-disk"
class="">
Bad Disk Repair
</a>
</li>
<li>
<a href="/administration/whitelist"
class="">
Replica Server Whitelist
</a>
</li>
<li>
<a href="/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/administration/hotspot-detection"
class="">
Hotspot Detection
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/zk-migration"><strong></strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
The Pegasus documentation
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/downloads"
class="navbar-item ">
Downloads
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Building Pegasus
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/build/compile-by-docker"
class="navbar-item ">
Compile by docker (recommended)
</a>
<a href="/docs/build/compile-from-source"
class="navbar-item ">
Compile from source
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Client Libs
</span>
</a>
<div class="navbar-dropdown">
<a href="/clients/java-client"
class="navbar-item ">
Java Client
</a>
<a href="/clients/cpp-client"
class="navbar-item ">
C++ Client
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang Client
</a>
<a href="/clients/python-client"
class="navbar-item ">
Python Client
</a>
<a href="/clients/node-client"
class="navbar-item ">
NodeJS Client
</a>
<a href="/clients/scala-client"
class="navbar-item ">
Scala Client
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Tools
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/tools/shell"
class="navbar-item ">
Pegasus Shell
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
Admin CLI
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
Pegasus data access CLI
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
API
</span>
</a>
<div class="navbar-dropdown">
<a href="/api/ttl"
class="navbar-item ">
TTL(Time To Live)
</a>
<a href="/api/single-atomic"
class="navbar-item ">
Single-Atomic Operations
</a>
<a href="/api/redis"
class="navbar-item ">
Redis Adaption
</a>
<a href="/api/geo"
class="navbar-item ">
GEO Support
</a>
<a href="/api/http"
class="navbar-item ">
HTTP API
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Admin
</span>
</a>
<div class="navbar-dropdown">
<a href="/administration/deployment"
class="navbar-item ">
Deployment
</a>
<a href="/administration/config"
class="navbar-item ">
Configurations
</a>
<a href="/administration/rebalance"
class="navbar-item ">
Rebalance
</a>
<a href="/administration/monitoring"
class="navbar-item ">
Monitoring
</a>
<a href="/administration/rolling-update"
class="navbar-item ">
Rolling Restart and Upgrade
</a>
<a href="/administration/scale-in-out"
class="navbar-item ">
Scale-in and Scale-out
</a>
<a href="/administration/resource-management"
class="navbar-item ">
Resource Management
</a>
<a href="/administration/cold-backup"
class="navbar-item ">
Cold Backup
</a>
<a href="/administration/meta-recovery"
class="navbar-item ">
Metadata Recovery
</a>
<a href="/administration/replica-recovery"
class="navbar-item ">
Replica Data Recovery
</a>
<a href="/administration/zk-migration"
class="navbar-item is-active">
Zookeeper Migration
</a>
<a href="/administration/table-migration"
class="navbar-item ">
Table Migration
</a>
<a href="/administration/table-soft-delete"
class="navbar-item ">
Table Soft-Delete
</a>
<a href="/administration/table-env"
class="navbar-item ">
Table Environment Variables
</a>
<a href="/administration/remote-commands"
class="navbar-item ">
Remote Command
</a>
<a href="/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/administration/duplication"
class="navbar-item ">
Duplication
</a>
<a href="/administration/compression"
class="navbar-item ">
Data Compression
</a>
<a href="/administration/throttling"
class="navbar-item ">
Throttling
</a>
<a href="/administration/experiences"
class="navbar-item ">
Experiences
</a>
<a href="/administration/manual-compact"
class="navbar-item ">
Manual Compact
</a>
<a href="/administration/usage-scenario"
class="navbar-item ">
Usage Scenario
</a>
<a href="/administration/bad-disk"
class="navbar-item ">
Bad Disk Repair
</a>
<a href="/administration/whitelist"
class="navbar-item ">
Replica Server Whitelist
</a>
<a href="/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/administration/hotspot-detection"
class="navbar-item ">
Hotspot Detection
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/zk-migration"><strong></strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Zookeeper Migration</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>Pegasus’s Meta Server uses Zookeeper to store metadata and leader election, so the instability of the Zookeeper service can cause instability in Pegasus. If necessary, Pegasus metadata needs to be migrated to other more stable or idle Zookeeper.</p>
<p>There are two ways to migrate Zookeeper metadata: through metadata recovery, or through the <code class="language-plaintext highlighter-rouge">zkcopy</code> tool.</p>
<h1 id="migration-through-metadata-recovery">Migration through metadata recovery</h1>
<p>Pegasus provides <a href="meta-recovery">Metadata Recovery</a> function, it can also be used for Zookeeper migration. The basic idea is to configure a new Zookeeper list and perform metadata recovery through the <code class="language-plaintext highlighter-rouge">recover</code> command, then the metadata is migrated to the new Zookeeper.</p>
<ol>
<li>
<p>Backup table list</p>
<p>Use the <code class="language-plaintext highlighter-rouge">ls</code> command of the shell tools:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; ls -o apps.list
</code></pre></div> </div>
</li>
<li>
<p>Backup node list</p>
<p>Use the <code class="language-plaintext highlighter-rouge">nodes</code> command of the shell tools:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; nodes -d -o nodes.list
</code></pre></div> </div>
<p>Generate the <code class="language-plaintext highlighter-rouge">recover_node_list</code> file required for metadata recovery:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nb">grep </span>ALIVE nodes.list | <span class="nb">awk</span> <span class="s1">'{print $1}'</span> <span class="o">&gt;</span> recover_node_list
</code></pre></div> </div>
</li>
<li>
<p>Stop all Meta Servers</p>
<p>Stop all Meta Servers, and wait for a period of time (default to 30 seconds, depending on configuration <code class="language-plaintext highlighter-rouge">[replication]config_sync_interval_ms</code>) to ensure that all Replica Servers enter the <code class="language-plaintext highlighter-rouge">INACTIVE</code> state due to the beacon timeout.</p>
</li>
<li>
<p>Modifying Meta Server configuration file</p>
<p>The modified content is as follows:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = true
[zookeeper]
hosts_list = {new Zookeeper host list}
</code></pre></div> </div>
<p>They mean:</p>
<ul>
<li>Set <code class="language-plaintext highlighter-rouge">recover_from_replica_server</code> to <code class="language-plaintext highlighter-rouge">true</code> and enable to recover metadata from Replica Servers</li>
<li>Update Zookeeper configuration to the new service addresses</li>
</ul>
</li>
<li>
<p>Start a Meta Server</p>
<p>Start a Meta Server in the cluster, it will become the leader Meta Server of the cluster.</p>
</li>
<li>
<p>Use the <code class="language-plaintext highlighter-rouge">recover</code> command of the shell tools</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; recover -f recover_node_list
</code></pre></div> </div>
</li>
<li>
<p>Modify the configuration file and restart the Meta Server</p>
<p>After successful recovery, it is necessary to modify the configuration file of the Meta Server and reset to non-recovery state:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = false
</code></pre></div> </div>
</li>
<li>
<p>Restart all Meta Servers, then the cluster enters the normal state.</p>
</li>
</ol>
<h2 id="sample-script">Sample script</h2>
<p>Refer to the main process in the sample script <a href="https://github.com/apache/incubator-pegasus/blob/master/scripts/pegasus_migrate_zookeeper.sh">pegasus_migrate_zookeeper.sh</a> for Zookeeper metadata migration.</p>
<h1 id="migration-through-the-zkcopy-tool">Migration through the <code class="language-plaintext highlighter-rouge">zkcopy</code> tool</h1>
<p>The basic idea is to use <a href="https://github.com/ksprojects/zkcopy">zkcopy tool</a> to copy the Pegasus metadata from the original Zookeeper to the target Zookeeper, modify the Meta Server configuration file, and restart the cluster.</p>
<ol>
<li>
<p>Stop all follower Meta Servers</p>
<p>In order to prevent other follower Meta Servers from requiring the lock and becoming the new leader when restarting the leader Meta Server, causing metadata inconsistency, it is necessary to keep only the leader Meta Server in live state and stop all other follower Meta Servers throughout the entire migration process.</p>
</li>
<li>
<p>Modify the leader Meta Server status to <code class="language-plaintext highlighter-rouge">blind</code></p>
<p>Set the leader Meta Server’s meta_level to <code class="language-plaintext highlighter-rouge">blind</code>, to prohibit any update operations on Zookeeper data and prevent metadata inconsistency during the migration process:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; set_meta_level blind
</code></pre></div> </div>
<blockquote>
<p>For an introduction to Meta Server’s meta_level, please refer to <a href="rebalance">Rebalance</a>.</p>
</blockquote>
</li>
<li>
<p>Use the <code class="language-plaintext highlighter-rouge">zkcopy</code> tool to copy Zookeeper metadata</p>
<p>Obtain the path <code class="language-plaintext highlighter-rouge">zookeeper_root</code> where Pegasus metadata is stored on the Zookeeper through the <code class="language-plaintext highlighter-rouge">cluster_info</code> command of the shell tools, and then use the <code class="language-plaintext highlighter-rouge">zkcopy</code> tool to copy all the data from this path to the new Zookeeper, being careful to recursively copy.</p>
</li>
<li>
<p>Modify configuration file</p>
<p>Modify the configuration file of Meta Servers and change the <code class="language-plaintext highlighter-rouge">hosts_lists</code> value to the new service addresses:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
hosts_list = {new Zookeeper host list}
</code></pre></div> </div>
</li>
<li>
<p>Restart the leader Meta Server</p>
<p>Restart the leader Meta Server and use shell tools to <a href="/administration/experiences#troubleshooting">check</a> that the cluster has entered the normal state.</p>
</li>
<li>
<p>Restart all follower Meta Servers</p>
<p>Start all follower Meta Servers and check the cluster enters the normal state.</p>
</li>
<li>
<p>Clean up data on old Zookeepers</p>
<p>Use the <code class="language-plaintext highlighter-rouge">rmr</code> command of the <a href="https://github.com/openark/zookeepercli">zookeepercli tool</a> to clean up data on old Zookeepers.</p>
</li>
</ol>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
Table of contents
</p>
<ul class="menu-list">
<li><a href="#migration-through-metadata-recovery">Migration through metadata recovery</a>
<ul>
<li><a href="#sample-script">Sample script</a></li>
</ul>
</li>
<li><a href="#migration-through-the-zkcopy-tool">Migration through the zkcopy tool</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>