blob: b58f613942d98cf94357885992f8da9e2a8ca501 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Zk Migration</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Zk Migration | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Zk Migration" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="由于 Pegasus 的 Meta Server 使用 Zookeeper 来存储元数据和选主,所以 Zookeeper 服务的不稳定会造成 Pegasus 服务不稳定,必要时需要迁移元数据到其他更稳定或者空闲的 Zookeeper 上。" />
<meta property="og:description" content="由于 Pegasus 的 Meta Server 使用 Zookeeper 来存储元数据和选主,所以 Zookeeper 服务的不稳定会造成 Pegasus 服务不稳定,必要时需要迁移元数据到其他更稳定或者空闲的 Zookeeper 上。" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T13:02:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Zk Migration" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T13:02:52+00:00","datePublished":"2024-04-22T13:02:52+00:00","description":"由于 Pegasus 的 Meta Server 使用 Zookeeper 来存储元数据和选主,所以 Zookeeper 服务的不稳定会造成 Pegasus 服务不稳定,必要时需要迁移元数据到其他更稳定或者空闲的 Zookeeper 上。","headline":"Zk Migration","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/zk-migration"},"url":"/administration/zk-migration"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus 产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用 Docker 完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java 客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++ 客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang 客户端
</a>
</li>
<li>
<a href="/zh/clients/python-client"
class="">
Python 客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS 客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala 客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis 适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO 支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP 接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群重启和升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica 数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="is-active">
Zookeeper 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table 软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table 环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact 功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario 功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
Replica Server 白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/zk-migration"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus 产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用 Docker 完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java 客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++ 客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang 客户端
</a>
<a href="/zh/clients/python-client"
class="navbar-item ">
Python 客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS 客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala 客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis 适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO 支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP 接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群重启和升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica 数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item is-active">
Zookeeper 迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table 迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table 软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table 环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact 功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario 功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
Replica Server 白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/zk-migration"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Zookeeper 迁移</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>由于 Pegasus 的 Meta Server 使用 Zookeeper 来存储元数据和选主,所以 Zookeeper 服务的不稳定会造成 Pegasus 服务不稳定,必要时需要迁移元数据到其他更稳定或者空闲的 Zookeeper 上。</p>
<p>Zookeeper 元数据迁移有两种方式:通过元数据恢复迁移,或通过 <code class="language-plaintext highlighter-rouge">zkcopy</code> 工具迁移。</p>
<h1 id="通过元数据恢复迁移">通过元数据恢复迁移</h1>
<p>Pegasus 提供了 <a href="meta-recovery">元数据恢复</a> 功能,这个功能也可用于 Zookeeper 迁移。基本思路是配置新的 Zookeeper 后,通过 <code class="language-plaintext highlighter-rouge">recover</code> 命令发起元数据恢复,这样元数据就写入新的 Zookeeper 上。</p>
<ol>
<li>
<p>备份 table 列表</p>
<p>使用 shell 工具的 <code class="language-plaintext highlighter-rouge">ls</code> 命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; ls -o apps.list
</code></pre></div> </div>
</li>
<li>
<p>备份 node 列表</p>
<p>使用 shell 工具的 <code class="language-plaintext highlighter-rouge">nodes</code> 命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; nodes -d -o nodes.list
</code></pre></div> </div>
<p>生成元数据恢复所需的 <code class="language-plaintext highlighter-rouge">recover_node_list</code> 文件:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nb">grep </span>ALIVE nodes.list | <span class="nb">awk</span> <span class="s1">'{print $1}'</span> <span class="o">&gt;</span> recover_node_list
</code></pre></div> </div>
</li>
<li>
<p>停掉所有 Meta Server</p>
<p>停掉所有 Meta Server,并等待一段时间(默认为 30 秒,取决于配置项 <code class="language-plaintext highlighter-rouge">[replication]config_sync_interval_ms</code>),以保证所有 Replica Server 因为心跳超时进入 <code class="language-plaintext highlighter-rouge">INACTIVE</code> 状态。</p>
</li>
<li>
<p>修改 Meta Server 配置文件</p>
<p>修改内容如下:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = true
[zookeeper]
hosts_list = {new Zookeeper host list}
</code></pre></div> </div>
<p>即:</p>
<ul>
<li><code class="language-plaintext highlighter-rouge">recover_from_replica_server</code> 设置为 <code class="language-plaintext highlighter-rouge">true</code>,开启从 Replica Server 恢复元数据的开关</li>
<li>更新 Zookeeper 配置更新为新的服务地址</li>
</ul>
</li>
<li>
<p>启动一个 Meta Server</p>
<p>启动集群中的一个 Meta Server,它将成为集群的主 Meta Server。</p>
</li>
<li>
<p>通过 shell 工具发送 <code class="language-plaintext highlighter-rouge">recover</code> 命令</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; recover -f recover_node_list
</code></pre></div> </div>
</li>
<li>
<p>修改配置文件并重启 Meta Server</p>
<p>恢复成功后,需要修改 Meta Server 的配置文件,重新改回非 recovery 模式:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = false
</code></pre></div> </div>
</li>
<li>
<p>重新启动所有的 Meta Server,集群进入正常状态。</p>
</li>
</ol>
<h2 id="示例脚本">示例脚本</h2>
<p>可以参考 Zookeeper 元数据迁移的示例脚本 <a href="https://github.com/apache/incubator-pegasus/blob/master/scripts/pegasus_migrate_zookeeper.sh">pegasus_migrate_zookeeper.sh</a> 中的主要流程。</p>
<h1 id="通过-zkcopy-工具迁移">通过 <code class="language-plaintext highlighter-rouge">zkcopy</code> 工具迁移</h1>
<p>基本思路就是使用 <a href="https://github.com/ksprojects/zkcopy">zkcopy 工具</a> 将原始 Zookeeper 上的 Pegasus 元数据拷贝到目标 Zookeeper 上,修改 Meta Server 配置文件并重启。</p>
<ol>
<li>
<p>停掉所有的备 Meta Server</p>
<p>为了防止重启主 Meta Server 时,其他的备 Meta Server 抢到锁而成为新的主,造成元数据不一致的问题,需要在整个迁移过程中只保留主 Meta Server 为存活状态,其他的备 Meta Server 全部停掉。</p>
</li>
<li>
<p>修改主 Meta Server 状态为 <code class="language-plaintext highlighter-rouge">blind</code></p>
<p>将主 Meta Server 的 meta_level 设置为 <code class="language-plaintext highlighter-rouge">blind</code>,以禁止任何对 Zookeeper 数据的更新操作,防止在迁移过程中出现引起元数据不一致:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; set_meta_level blind
</code></pre></div> </div>
<blockquote>
<p>关于 Meta Server 的 meta_level 介绍请参见 <a href="rebalance#控制集群的负载均衡">负载均衡</a></p>
</blockquote>
</li>
<li>
<p>使用 zkcopy 工具拷贝 Zookeeper 元数据</p>
<p>通过 shell 工具的 <code class="language-plaintext highlighter-rouge">cluster_info</code> 命令获取 Pegasus 元数据存储在 Zookeeper 上的路径 <code class="language-plaintext highlighter-rouge">zookeeper_root</code>,然后使用 zkcopy 工具将该路径的数据全部拷贝到新 Zookeeper 上,注意需要递归拷贝。</p>
</li>
<li>
<p>修改配置文件</p>
<p>修改 Meta Server 的配置文件,将 <code class="language-plaintext highlighter-rouge">hosts_lists</code> 配置值改为新的服务地址:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
hosts_list = {new Zookeeper host list}
</code></pre></div> </div>
</li>
<li>
<p>重启主 Meta Server</p>
<p>重新启动主 Meta Server,通过 shell 工具 <a href="/zh/administration/experiences#问题排查">检查</a> 集群进入正常状态。</p>
</li>
<li>
<p>启动所有备 Meta Server</p>
<p>启动所有备 Meta Server,集群进入正常状态。</p>
</li>
<li>
<p>清理旧 Zookeeper 上的数据</p>
<p>使用 <a href="https://github.com/openark/zookeepercli">zookeepercli 工具</a><code class="language-plaintext highlighter-rouge">rmr</code> 命令清理旧 Zookeeper 上的数据。</p>
</li>
</ol>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#通过元数据恢复迁移">通过元数据恢复迁移</a>
<ul>
<li><a href="#示例脚本">示例脚本</a></li>
</ul>
</li>
<li><a href="#通过-zkcopy-工具迁移">通过 zkcopy 工具迁移</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>