blob: 8e3a91a0b15a6cdc816342ed69ce1735fdc63cc1 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Zk Migration</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Zk Migration | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.2" />
<meta property="og:title" content="Zk Migration" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="由于Pegasus的meta server依赖Zookeeper存储元数据和抢主,所以Zookeeper服务的不稳定会造成Pegasus服务不稳定,有时就需要迁移到其他更稳定或者空闲的Zookeeper上。" />
<meta property="og:description" content="由于Pegasus的meta server依赖Zookeeper存储元数据和抢主,所以Zookeeper服务的不稳定会造成Pegasus服务不稳定,有时就需要迁移到其他更稳定或者空闲的Zookeeper上。" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2023-11-23T14:51:44+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Zk Migration" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2023-11-23T14:51:44+00:00","datePublished":"2023-11-23T14:51:44+00:00","description":"由于Pegasus的meta server依赖Zookeeper存储元数据和抢主,所以Zookeeper服务的不稳定会造成Pegasus服务不稳定,有时就需要迁移到其他更稳定或者空闲的Zookeeper上。","headline":"Zk Migration","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/zk-migration"},"url":"/administration/zk-migration"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用Docker完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang客户端
</a>
</li>
<li>
<a href="/zh/clients/python2-client"
class="">
Python2客户端
</a>
</li>
<li>
<a href="/zh/clients/python3-client"
class="">
Python3客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="is-active">
Zookeeper迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/zk-migration"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用Docker完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang客户端
</a>
<a href="/zh/clients/python2-client"
class="navbar-item ">
Python2客户端
</a>
<a href="/zh/clients/python3-client"
class="navbar-item ">
Python3客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item is-active">
Zookeeper迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/zk-migration"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Zookeeper迁移</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>由于Pegasus的meta server依赖Zookeeper存储元数据和抢主,所以Zookeeper服务的不稳定会造成Pegasus服务不稳定,有时就需要迁移到其他更稳定或者空闲的Zookeeper上。</p>
<p>Zookeeper迁移提供了两种办法:通过元数据恢复迁移;通过zkcopy工具迁移。</p>
<h1 id="通过元数据恢复迁移">通过元数据恢复迁移</h1>
<p>Pegasus提供了<a href="meta-recovery">元数据恢复</a>功能,这个功能也可用于Zookeeper迁移。基本思路就是配置新的Zookeeper后,通过recover命令发起元数据恢复,这样元数据就写入新的Zookeeper上。</p>
<ol>
<li>
<p>备份app列表</p>
<p>使用shell的<code class="language-plaintext highlighter-rouge">ls</code>命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; ls -o apps.list
</code></pre></div> </div>
</li>
<li>
<p>备份node列表</p>
<p>使用shell的<code class="language-plaintext highlighter-rouge">nodes</code>命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; nodes -d -o nodes.list
</code></pre></div> </div>
<p>生成元数据恢复所需的<code class="language-plaintext highlighter-rouge">recover_node_list</code>文件:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nb">grep </span>ALIVE nodes.list | <span class="nb">awk</span> <span class="s1">'{print $1}'</span> <span class="o">&gt;</span>recover_node_list
</code></pre></div> </div>
</li>
<li>
<p>停掉所有meta</p>
<p>停掉所有meta server,并等待30秒以上,以保证所有replica server因为心跳超时进入INACTIVE状态。</p>
</li>
<li>
<p>修改meta配置文件</p>
<p>修改meta server的配置文件,如下:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = true
[zookeeper]
hosts_list = {new zookeeper host list}
</code></pre></div> </div>
<ul>
<li><code class="language-plaintext highlighter-rouge">recover_from_replica_server</code>设置为true</li>
<li>将zookeeper的<code class="language-plaintext highlighter-rouge">hosts_lists</code>改为新的服务地址</li>
</ul>
</li>
<li>
<p>启动一个meta</p>
<p>启动其中一个meta server。</p>
</li>
<li>
<p>通过shell发送recover命令</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; recover -f recover_node_list
</code></pre></div> </div>
<p>检查恢复结果,如果出错请参考<a href="meta-recovery#常见问题整理">常见问题整理</a>排查问题。</p>
</li>
<li>
<p>修改配置文件并重启meta</p>
<p>恢复成功后,需要修改配置文件,重新改回非recovery模式:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
recover_from_replica_server = false
</code></pre></div> </div>
<p>重新启动所有的meta server,集群进入正常状态。</p>
</li>
</ol>
<p>注:<a href="https://github.com/apache/incubator-pegasus/blob/master/scripts/pegasus_migrate_zookeeper.sh">scripts/pegasus_migrate_zookeeper.sh</a>是我们在内部使用的迁移Zookeeper的脚本,虽然因为服务启停功能的兼容性不能直接使用,但是可以参考其中的流程,或者进行改造。</p>
<h1 id="通过zkcopy工具迁移">通过zkcopy工具迁移</h1>
<p>基本思路就是使用zkcopy工具将原始Zookeeper数据拷贝到目标Zookeeper上,修改meta server配置文件并重启。</p>
<ol>
<li>
<p>停掉所有的备meta server</p>
<p>为了防止重启主meta server时有其他的备meta server抢到锁,造成状态混乱,在整个迁移过程中只保留一个主meta server,其他的备meta server全部停掉。</p>
</li>
<li>
<p>修改主meta server状态为blind</p>
<p>将主meta server的level设置为blind(关于meta server的level介绍请参见<a href="rebalance#控制集群的负载均衡">负载均衡</a>),以禁止任何对Zookeeper数据的更新操作,防止在copy过程中出现不一致:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; set_meta_level blind
</code></pre></div> </div>
</li>
<li>
<p>使用zkcopy工具拷贝Zookeeper数据</p>
<p>通过shell的<code class="language-plaintext highlighter-rouge">cluster_info</code>命令获取Zookeeper元数据节点路径<code class="language-plaintext highlighter-rouge">zookeeper_root</code>,然后使用zkcopy工具将该节点的数据完全拷贝到新集群的节点上,注意需要递归拷贝。</p>
</li>
<li>
<p>修改配置文件</p>
<p>修改meta server的配置文件,将zookeeper的<code class="language-plaintext highlighter-rouge">hosts_lists</code>改为新的服务地址:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[meta_server]
hosts_list = {new zookeeper host list}
</code></pre></div> </div>
</li>
<li>
<p>重启主meta server</p>
<p>重新启动主meta server,通过shell工具检查集群进入正常状态。</p>
</li>
<li>
<p>启动所有备meta server</p>
<p>启动所有备meta server,集群进入正常状态。</p>
</li>
<li>
<p>清理旧Zookeeper上的数据</p>
<p>使用zookeepercli工具的rmr命令清理旧zookeeper上的数据。</p>
</li>
</ol>
<p>注:上面使用到的<code class="language-plaintext highlighter-rouge">zkcopy</code><code class="language-plaintext highlighter-rouge">zookeepercli</code>工具以后会提供出来。</p>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#通过元数据恢复迁移">通过元数据恢复迁移</a></li>
<li><a href="#通过zkcopy工具迁移">通过zkcopy工具迁移</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>