blob: be085d802780bed102081a0674a7d8ec01417d62 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Replica Recovery</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Replica Recovery | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Replica Recovery" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="原理" />
<meta property="og:description" content="原理" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T06:39:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Replica Recovery" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T06:39:52+00:00","datePublished":"2024-04-22T06:39:52+00:00","description":"原理","headline":"Replica Recovery","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/replica-recovery"},"url":"/administration/replica-recovery"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus 产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用 Docker 完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java 客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++ 客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang 客户端
</a>
</li>
<li>
<a href="/zh/clients/python-client"
class="">
Python 客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS 客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala 客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis 适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO 支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP 接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群重启和升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="is-active">
Replica 数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table 软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table 环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact 功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario 功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
Replica Server 白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/replica-recovery"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus 产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用 Docker 完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java 客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++ 客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang 客户端
</a>
<a href="/zh/clients/python-client"
class="navbar-item ">
Python 客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS 客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala 客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis 适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO 支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP 接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群重启和升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item is-active">
Replica 数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper 迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table 迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table 软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table 环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact 功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario 功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
Replica Server 白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/replica-recovery"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Replica 数据恢复</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="原理">原理</h1>
<p>通常来说,Pegasus的数据会存储3个副本。对于每个partition,正常情况下应当都有一主两备3个replica提供服务。</p>
<p>但是,集群不可避免会发生节点宕机、网络异常、心跳失联等情况,造成副本丢失,对服务可用性产生影响。副本丢失的程度会影响读写的能力(在<a href="rebalance#概念篇">负载均衡</a>里也有介绍):</p>
<ul>
<li>一主两备都可用:partition完全健康,可以<strong>正常读和写</strong></li>
<li>一主一备可用:根据PacificA一致性协议,同样可以<strong>安全地读和写</strong></li>
<li>只有一主可用:此时<strong>不可写</strong>,但是<strong>可读</strong></li>
<li>全部不可用:此时<strong>不可读写</strong>。这种情况我们称之为<strong>DDD</strong>,即Dead-Dead-Dead的缩写,表示3个副本都不可用。</li>
</ul>
<p>以上情况中,除了<strong>全部不可用的DDD状态</strong>,其他情况下MetaServer都能自动补充副本,并最终恢复至完全健康状态。但是如果partition进入DDD状态,MetaServer就无法对其进行自动恢复,需要进行人工干预。</p>
<p><a href="https://github.com/XiaoMi/rdsn/issues/80">这个讨论</a>中给出了进入DDD状态的例子。实际上,<strong>只要某个partition进入DDD状态,且LastDrop的最后两个节点中有一个节点无法正常启动,就会进入需要人工干预的DDD状态</strong>。而在线上集群多个节点的起起停停过程中,这种情况是很容易出现的。</p>
<p>可以通过Shell工具的<code class="language-plaintext highlighter-rouge">ls -d</code>命令查看健康状况,如果<code class="language-plaintext highlighter-rouge">read_unhealthy</code>的个数大于0,就表示有partition进入了DDD状态。</p>
<h1 id="ddd诊断工具">DDD诊断工具</h1>
<p>从v1.11.0版本开始,Pegasus在Shell工具中提供了<code class="language-plaintext highlighter-rouge">ddd_diagnose</code>命令,以支持DDD自动诊断功能。</p>
<p>命令用法:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>ddd_diagnose [-g|--gpid appid|appid.pidx] [-d|--diagnose] [-a|--auto_diagnose]
[-s|--skip_prompt] [-o|--output file_name]
</code></pre></div></div>
<p>参数说明:</p>
<ul>
<li><code class="language-plaintext highlighter-rouge">-g</code>:指定app_id或者partition_id,譬如<code class="language-plaintext highlighter-rouge">-g 1</code>或者<code class="language-plaintext highlighter-rouge">-g 1.3</code>;如果不指定,则对所有表执行操作。</li>
<li><code class="language-plaintext highlighter-rouge">-d</code>:进入诊断模式;如果不指定,则只显示DDD情况,但不进行诊断。</li>
<li><code class="language-plaintext highlighter-rouge">-a</code>:开启自动诊断,即在保证数据一致性前提下,如果诊断工具能寻找到合适的备份作为该partition的主备份,则自动将其设置为primary,完成数据恢复,无需人工干预。</li>
<li><code class="language-plaintext highlighter-rouge">-s</code>:避免交互模式;如果不指定,则在进行诊断的过程中可能会要求用户输入信息,以完成选择、确认、或者信息补充。</li>
<li><code class="language-plaintext highlighter-rouge">-o</code>:将结果输出到指定文件。</li>
</ul>
<p>使用示例(如果看不清楚,请在单独的页面中打开图片):</p>
<p><img src="/assets/images/ddd-diagnose.png" alt="ddd-diagnose.png" class="img-responsive" /></p>
<p>上图是使用<code class="language-plaintext highlighter-rouge">ddd_diagnose</code>命令时的输出,我们通过红色的箭头标识依次进行说明:</p>
<ul>
<li>1:当前正在诊断的partition id。</li>
<li>2:该partition在zookeeper上持久化的<code class="language-plaintext highlighter-rouge">ballot</code><code class="language-plaintext highlighter-rouge">last_committed_decree</code>信息,但是由于持久化不是实时的,该值可能小于实际值。</li>
<li>3:dropped列表,列举曾经服务过该partition的节点的状态信息,重点关注:
<ul>
<li>alive:该节点是否可用。</li>
<li>ballot:该partition在该节点上replica的实际<code class="language-plaintext highlighter-rouge">ballot</code>;如果为-1,表示该partition在该节点上不存在数据。</li>
<li>last_committed:该partition在该节点上replica的实际<code class="language-plaintext highlighter-rouge">last_committed_decree</code></li>
<li>last_prepared:该partition在该节点上replica的实际<code class="language-plaintext highlighter-rouge">last_prepared_decree</code></li>
<li>最后如果有<code class="language-plaintext highlighter-rouge">&lt;==</code>,表示该节点是 <strong>最后一个变得不可用</strong>(the latest) 还是 <strong>倒数第二个变得不可用</strong>(the secondary latest)。</li>
</ul>
</li>
<li>4:last_drops列表,记录节点变得不可用的时间顺序。</li>
<li>5:ddd_reason,表示该partition变成DDD状态的原因。</li>
<li>6:recommanded_primary,诊断工具在保证数据一致性的前提下推荐的新primary;如果无法给出,则为<code class="language-plaintext highlighter-rouge">none</code></li>
<li>7:如果第6步给出了推荐节点,则提示用户采取下一步操作(如果指定了<code class="language-plaintext highlighter-rouge">-a</code>或者<code class="language-plaintext highlighter-rouge">-s</code>选项,则不会进入这一步,相当于总是自动选择y):
<ul>
<li>y:采用该推荐节点作为新的primary。</li>
<li>n:不采用该推荐节点,而是让用户选择其他的节点。</li>
<li>s:忽略对该partition的诊断。</li>
</ul>
</li>
<li>8:如果第6步没有给出推荐节点或者第7步选择了n,则提示用户输入新的节点作为primary。</li>
<li>9:生成propose命令,发送给MetaServer,将节点指定为新的primary,对该partition进行恢复。</li>
<li>10:收到propose命令的回复,<code class="language-plaintext highlighter-rouge">ERR_OK</code>表示执行成功。</li>
<li>11:显示当前已经完成的进度,分子为已经完成诊断的个数,分母为需要进行诊断的总个数。</li>
</ul>
<p><strong>推荐用法</strong></p>
<ul>
<li><code class="language-plaintext highlighter-rouge">ddd_diagnose -d -a</code>,即开启自动诊断,对于无法自动完成诊断的partition,通过与用户交互来获得人工干预。这是最简单省心的用法,在大部分情况下都能自动完成恢复过程,无需人工干预。</li>
</ul>
<p>在无法完成自动诊断的情况下,会进入上图中的第8步,需要用户输入新的节点作为primary。那么,在dropped列表的众多节点中,如何选择最合适的节点作为primary呢?我们的建议是:</p>
<ul>
<li><strong>在所有alive为true的节点中,选择<code class="language-plaintext highlighter-rouge">last_prepared</code>值最大的节点</strong>,因为这样能尽可能多地恢复数据,减少数据丢失的可能性。</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#原理">原理</a></li>
<li><a href="#ddd诊断工具">DDD诊断工具</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>