blob: 42d886d123d9194912088d743ca580da2894c63d [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Duplication</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Duplication | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.2" />
<meta property="og:title" content="Duplication" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="在 pegasus 中,跨机房同步又被称为 热备份,或 duplication,简称 dup。这一功能的主要目的是保证 数据中心级别的可用性。当业务需要保证服务与数据能够容忍机房故障时,可以考虑使用此功能。" />
<meta property="og:description" content="在 pegasus 中,跨机房同步又被称为 热备份,或 duplication,简称 dup。这一功能的主要目的是保证 数据中心级别的可用性。当业务需要保证服务与数据能够容忍机房故障时,可以考虑使用此功能。" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2023-11-23T14:51:44+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Duplication" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2023-11-23T14:51:44+00:00","datePublished":"2023-11-23T14:51:44+00:00","description":"在 pegasus 中,跨机房同步又被称为 热备份,或 duplication,简称 dup。这一功能的主要目的是保证 数据中心级别的可用性。当业务需要保证服务与数据能够容忍机房故障时,可以考虑使用此功能。","headline":"Duplication","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/duplication"},"url":"/administration/duplication"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用Docker完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang客户端
</a>
</li>
<li>
<a href="/zh/clients/python2-client"
class="">
Python2客户端
</a>
</li>
<li>
<a href="/zh/clients/python3-client"
class="">
Python3客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="is-active">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/duplication"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用Docker完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang客户端
</a>
<a href="/zh/clients/python2-client"
class="navbar-item ">
Python2客户端
</a>
<a href="/zh/clients/python3-client"
class="navbar-item ">
Python3客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item is-active">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/duplication"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">跨机房同步</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>在 pegasus 中,跨机房同步又被称为 <strong><em>热备份</em></strong>,或 <strong><em>duplication</em></strong>,简称 <strong><em>dup</em></strong>。这一功能的主要目的是保证 <strong>数据中心级别的可用性</strong>。当业务需要保证服务与数据能够容忍机房故障时,可以考虑使用此功能。</p>
<p>此外,当 Pegasus 客户端在多机房分布时,时常会遇到跨机房访问 Pegasus 服务带来的高延时问题,这时我们可以将 Pegasus 的服务与客户端部署在相同的机房内,客户端可以只读写本地机房的服务,然后由热备份功能将写同步到各个机房上。这种做法既能保证各个机房都有完整数据,又能避免跨机房的延时开销。</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code> client client client
+ + +
+---------v-------+ +--------v--------+ +------v-----------+
| | | | | |
| pegasus-beijing &lt;---&gt; pegasus-tianjin &lt;---&gt; pegasus-shanghai |
| | | | | |
+----------^------+ +-----------------+ +---------^--------+
| |
+------------------------------------------+
</code></pre></div></div>
<p>我们能够做到<strong>一主一备(single-master)</strong>,也能提供<strong>多机房多主(multi-master)</strong>,用户可以根据需要进行配置。</p>
<p>这里需要注意的是,跨机房同步是<strong>异步</strong>的数据复制,并非完全实时。与单机房不同,该功能不提供跨机房 <em>read-after-write</em> 的一致性保证。目前在跨机房网络健康的环境下,数据延时大概在 10s 左右,即 A 机房的写数据大概在 10s 后会写入 B 机房。</p>
<h2 id="操作上手">操作上手</h2>
<p>假设我们有两个 pegasus 集群 <em>bjsrv-account</em><em>tjsrv-account</em>,分别位于北京与天津的两个机房内,表 <code class="language-plaintext highlighter-rouge">account_xiaomi</code> 由于存储了极其关键的用户帐号数据,需要能够在双集群保证可用,所以我们为它实施热备份:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt; ./run.sh shell -n bjsrv-account
Type "help" for more information.
Type "Ctrl-D" or "Ctrl-C" to exit the shell.
The cluster name is: bjsrv-account
The cluster meta list is: ***
&gt;&gt;&gt; ls
app_id status app_name
12 AVAILABLE account_xiaomi
&gt;&gt;&gt; add_dup account_xiaomi tjsrv-account
Success for adding duplication [appid: 12, dupid: 1535008534]
&gt;&gt;&gt; query_dup account_xiaomi
duplications of app [account_xiaomi] are listed as below:
| dup_id | status | remote cluster | create time |
| 1535008534 | DS_START | tjsrv-account | 2018-08-23 15:15:34 |
</code></pre></div></div>
<p>通过 <code class="language-plaintext highlighter-rouge">add_dup</code> 命令,bjsrv-account 集群的表 account_xiaomi 将会近实时地把数据复制到 tjsrv-account 上,这意味着,每一条在北京机房的写入,最终都一定会复制到天津机房。</p>
<p>热备份使用日志异步复制的方式来实现跨集群的同步,可与 mysql 的 binlog 复制和 hbase replication 类比。</p>
<p>热备份功能<strong>以表为粒度</strong>,你可以只对集群内一部分表实施热备份。热备份的两集群的表名需要保持一致,但 partition 的个数不需要相同。例如用户可以建表如下:</p>
<div class="language-sh highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c">## bjsrv-account</span>
<span class="o">&gt;&gt;&gt;</span> create account_xiaomi <span class="nt">-p</span> 128
<span class="c">## tjsrv-account</span>
<span class="o">&gt;&gt;&gt;</span> create account_xiaomi <span class="nt">-p</span> 32
</code></pre></div></div>
<h2 id="线上表开启热备份">线上表开启热备份</h2>
<p>有时一个线上表可能在设计之初未考虑到跨机房同步的需求,而在服务一段时间后,才决定进行热备份。此时我们需要将源集群已有的全部数据复制到目的集群。因为是线上表,我们要求拷贝过程中:</p>
<ol>
<li><strong>不可以停止服务</strong></li>
<li>拷贝过程中的<strong>写增量数据不能丢失</strong></li>
</ol>
<p>面对这个需求,我们的操作思路是:</p>
<ol>
<li>首先源集群<strong>保留从此刻开始的所有写增量</strong>(即WAL日志)</li>
<li>将源集群的全量快照(冷备份)上传至 HDFS / xiaomi-FDS 等备份存储上。</li>
<li>然后恢复到目标集群。</li>
<li>此后源集群开启热备份,并复制此前堆积的写增量,复制到远端目标集群。</li>
</ol>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code> +-----Source Table------+
| |
| +---------+ |
2. Backup | | | |
+----------+ | | | |
| | | | RocksDB | +-----+ |
| snapshot +&lt;------+ Store | | | |
| | | | | | WAL +&lt;-------+ 1. No GC
+------+---+ | | | | | |
| | +---------+ +---+-+ |
| | | |
| +-----------------------+
| |
| | 4. Start duplication
| |
| +-----------------v----+
| | |
+--------&gt;+ |
3. Restore | |
+------Dest Table------+
</code></pre></div></div>
<h3 id="执行步骤1">执行步骤1</h3>
<p>如何保留从此刻开始的所有写增量?我们可以如此进行操作:</p>
<p>首先使用 <code class="language-plaintext highlighter-rouge">add_dup [--freezed/-f]</code> 表示不进行日志复制,它的原理就是阻止当前日志 GC(log compaction)。该操作 <strong>必须最先执行</strong>,否则无法保证数据完整性。</p>
<div class="language-sh highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c">## bjsrv-account</span>
<span class="o">&gt;&gt;&gt;</span> add_dup account_xiaomi tjsrv-account <span class="nt">--freezed</span>
</code></pre></div></div>
<p>接着每个分片都会记录<strong>当前确认点(confirmed_decree)</strong>,并持久化到 MetaServer 上。
注意需等待所有的分片都将当前确认点更新至MetaServer后,才可进行下一步操作,这是该功能正确性的前提。</p>
<p><code class="language-plaintext highlighter-rouge">confirme_decree</code> 值为 -1 即表示该分片的确认点尚未同步。</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; query_dup -d account_xiaomi 1535008534
&gt;&gt;&gt; {"dupid":1548442533,"status":"DS_START","remote":"c4srv-feedhistory","create_ts":1548442533763,"progress":[{"pid":0,"confirmed":-1},{"pid":1,"confirmed":276444333},{"pid":2,"confirmed":-1},{"pid":3,"confirmed":-1},{"pid":4,"confirmed":-1},{"pid":5,"confirmed":-1},{"pid":6,"confirmed":-1},{"pid":7,"confirmed":279069949},{"pid":8,"confirmed":-1}}
&gt;&gt;&gt; query_dup -d account_xiaomi 1535008534
&gt;&gt;&gt; {"dupid":1548442533,"status":"DS_START","remote":"c4srv-feedhistory","create_ts":1548442533763,"progress":[{"pid":0,"confirmed":276444111},{"pid":1,"confirmed":276444333},{"pid":2,"confirmed":276444332},{"pid":3,"confirmed":276444222},{"pid":4,"confirmed":276444111},{"pid":5,"confirmed":276444377},{"pid":6,"confirmed":276444388},{"pid":7,"confirmed":279069949},{"pid":8,"confirmed":276444399}}
</code></pre></div></div>
<h3 id="执行步骤23">执行步骤2,3</h3>
<p>使用冷备份功能将数据快照上传至远端存储,再使用恢复功能在目标集群(tjsrv-account)恢复该表。示例命令如下:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># 立刻对表(app_id = 12)进行冷备
./run.sh shell -n bjsrv-account
&gt;&gt;&gt; add_backup_policy -p dup_transfer -b fds_wq -a 12 -i 86400 -s 12:01 -c 1
# 耐心等待备份生成
&gt;&gt;&gt; query_backup_policy -p dup_transfer
policy_info:
name : dup_transfer
backup_provider_type : fds_wq
backup_interval : 86400s
app_ids : {12}
start_time : 12:01
status : enabled
backup_history_count : 1
backup_infos:
[1]
id : 1541649698875
start_time : 2018-11-08 12:01:38
end_time : 2018-11-08 12:03:51
app_ids : {60}
# 在天津机房恢复表
./run.sh shell -n tjsrv-account
&gt;&gt;&gt; restore_app -c bjsrv-account -p dup_transfer -a account_xiaomi -i 12 -t 1541649698875 -b fds_wq
</code></pre></div></div>
<h3 id="执行步骤4">执行步骤4</h3>
<p>现在我们启动热备份。</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code># 开启日志复制
&gt;&gt;&gt; start_dup account_xiaomi &lt;dupid&gt;
# 至此热备份已经完全可用。
</code></pre></div></div>
<p><code class="language-plaintext highlighter-rouge">start_dup</code> 时,热备份任务会从之前记录的确认点开始复制,这样我们就保证了写增量的完整性。</p>
<p>另外需注意的是,由于写增量的长时间堆积,一时可能有大量日志复制,热备份流量会突增,从而导致服务不稳定。因此,我们需要在远端机房设置<a href="/zh/administration/throttling">限流(write throttling)</a></p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; get_app_envs
get app envs succeed, count = 7
=================================
replica.write_throttling = 30000*delay*100,40000*reject*200
=================================
</code></pre></div></div>
<h2 id="热备份的可靠性">热备份的可靠性</h2>
<h3 id="自动故障处理">自动故障处理</h3>
<p>热备份是一个集成在ReplicaServer中的一个在线服务,因而我们对该功能的可靠性有较高的要求。
为应对在热备份过程中可能发生的各种故障,我们提供了几种故障处理的选项:</p>
<ul>
<li>
<p><strong><em>fail-slow</em></strong>:在这种故障处理模式下,热备份对任何故障都会<strong>无限地重试</strong>。我们的运维人员需要对一些关键监控项设置报警,从而可以获知故障的发生。这是Pegasus的<strong>默认故障处理模式</strong></p>
</li>
<li>
<p><strong><em>fail-skip</em></strong>:遇到故障时,重试多次仍不成功后,直接跳过对当前这批数据的热备份,从而复制下一批数据。这适合那些<strong>可容忍数据丢失</strong>的业务场景。该选项通过数据丢失换得更好的可用性。</p>
</li>
</ul>
<p>操作命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>set_dup_fail_mode &lt;app_name&gt; &lt;dupid&gt; &lt;slow|skip&gt;
</code></pre></div></div>
<h3 id="重要监控">重要监控</h3>
<p>在热备份的运维中,我们建议观察几个核心监控,以持续留意服务情况:</p>
<ul>
<li>
<p><code class="language-plaintext highlighter-rouge">collector*app.pegasus*app.stat.dup_failed_shipping_ops#&lt;app_name&gt;</code>:有多少写复制RPC遇到失败。失败往往意味着远端集群或跨集群网络存在不可用。</p>
</li>
<li>
<p><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup.time_lag_ms@&lt;app_name&gt;</code>:P99的数据复制延迟。即源集群的一条写过了多长时间才到达目的集群。</p>
</li>
<li>
<p><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup.lagging_writes@&lt;app_name&gt;</code>:当前有多少写花费了过长的时间才到达目的集群。我们可以配置一个阈值,耗时超过该阈值的一条复制会被记录一次:</p>
<div class="language-ini highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nn">[pegasus.server]</span>
<span class="py">dup_lagging_write_threshold_ms</span> <span class="p">=</span> <span class="s">10000</span>
</code></pre></div> </div>
</li>
<li>
<p><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.pending_mutations_count</code>:当前有多少写堆积在源集群,且尚未复制。如果一切正常,该监控项会稳定维持在某个值上下。当热备份的某个环节出现故障时,往往会有大量的写堆积,该值会持续上涨。</p>
</li>
<li>
<p><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.load_file_failed_count</code>:源集群读取日志文件的失败次数。日志文件的读取是热备份的关键环节,如果该环节因某种原因出现故障,则会导致热备份被阻塞。</p>
</li>
</ul>
<h2 id="热备份的元信息">热备份的元信息</h2>
<p>热备份的元信息会经由 MetaServer 持久化于 Zookeeper 上,其存储路径如下:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code> &lt;cluster_root&gt; &lt;app_id&gt; &lt;dupid&gt;
| | |
| | |
[zk: 127.0.0.1:22181(CONNECTED) 0] get /pegasus/bjsrv-account/0.0.x.x/apps/1/duplication/1537336970
{"remote":"tjsrv-account","status":"DS_START","create_timestamp_ms":1537336970483}
</code></pre></div></div>
<h2 id="完整配置项列表">完整配置项列表</h2>
<div class="language-ini highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nn">[replication]</span>
<span class="c"># 如果遇到紧急情况想要手动关闭热备份,可以将该项设置为 false,默认为 true。
</span> <span class="py">duplication_enabled</span> <span class="p">=</span> <span class="s">true</span>
<span class="nn">[pegasus.clusters]</span>
<span class="c"># 开启热备份的集群必须配置目的集群的具体地址:
</span> <span class="py">tjsrv-account</span> <span class="p">=</span> <span class="s">127.0.0.1:51601,127.0.0.1:51601</span>
<span class="nn">[pegasus.server]</span>
<span class="py">dup_lagging_write_threshold_ms</span> <span class="p">=</span> <span class="s">10000</span>
<span class="c"># 热备份的两个集群需要登记源集群和目的集群的“cluster_id”:
</span><span class="nn">[duplication-group]</span>
<span class="py">tjsrv-account</span> <span class="p">=</span> <span class="s">1</span>
<span class="py">bjsrv-account</span> <span class="p">=</span> <span class="s">2</span>
</code></pre></div></div>
<p>我们在每条数据前都会加上 <code class="language-plaintext highlighter-rouge">timestamp+cluster_id</code> 的前缀,timestamp 即数据写到 pegasus 的时间戳,cluster_id 即上面 duplication-group 中所配置的,tjsrv 的 cluster_id 为 1,bjsrv 的 cluster_id 为 2。</p>
<p>cluster_id 的作用是:一旦出现写冲突,例如 tjsrv 和 bjsrv 同时写 key <code class="language-plaintext highlighter-rouge">"user_1"</code>,系统首先会检查两次写的时间戳,以时间戳大的为最终值。当极罕见地遇到时间戳相同的情况时,以 cluster_id 大的为最终值。使用这种机制我们可以保证两集群的最终值一定相同。</p>
<h2 id="完整监控项列表">完整监控项列表</h2>
<table>
<thead>
<tr>
<th>监控项</th>
</tr>
</thead>
<tbody>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.log_read_bytes_rate</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.log_read_mutations_rate</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.shipped_bytes_rate</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.confirmed_rate</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.pending_mutations_count</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.time_lag(ms)</code> (XiaoMi/rdsn#393)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica_stub*dup.load_file_failed_count</code> (XiaoMi/rdsn#425)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*eon.replica*dup.disabled_non_idempotent_write_count@&lt;app_name&gt;</code> (XiaoMi/rdsn#411)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup_shipped_ops@&lt;gpid&gt;</code> (#399)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup_failed_shipping_ops@&lt;gpid&gt;</code> (#399)</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup.time_lag_ms@&lt;app_name&gt;</code> #526</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">replica*app.pegasus*dup.lagging_writes@&lt;app_name&gt;</code> #526</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">collector*app.pegasus*app.stat.duplicate_qps#&lt;app_name&gt;</code> #520</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">collector*app.pegasus*app.stat.dup_shipped_ops#&lt;app_name&gt;</code> #520</td>
</tr>
<tr>
<td><code class="language-plaintext highlighter-rouge">collector*app.pegasus*app.stat.dup_failed_shipping_ops#&lt;app_name&gt;</code> #520</td>
</tr>
</tbody>
</table>
<h2 id="完整-http-接口列表">完整 HTTP 接口列表</h2>
<ul>
<li>
<p><code class="language-plaintext highlighter-rouge">http://0.0.0.0:34602/meta/app/duplication?name=temp</code></p>
</li>
<li>
<p><code class="language-plaintext highlighter-rouge">http://0.0.0.0:34801/replica/duplication?appid=2</code></p>
</li>
</ul>
<h2 id="known-limitations">Known Limitations</h2>
<ul>
<li>热备份暂时不建议两机房同时写一份数据。在我们的业务经验看来,通常这是可以接受的。用户可以将数据均分在 tjsrv 和 bjsrv 两机房内,热备份能保证当任一机房宕机,只有数秒的数据丢失(假设机房之间网络稳定)。</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#操作上手">操作上手</a></li>
<li><a href="#线上表开启热备份">线上表开启热备份</a>
<ul>
<li><a href="#执行步骤1">执行步骤1</a></li>
<li><a href="#执行步骤23">执行步骤2,3</a></li>
<li><a href="#执行步骤4">执行步骤4</a></li>
</ul>
</li>
<li><a href="#热备份的可靠性">热备份的可靠性</a>
<ul>
<li><a href="#自动故障处理">自动故障处理</a></li>
<li><a href="#重要监控">重要监控</a></li>
</ul>
</li>
<li><a href="#热备份的元信息">热备份的元信息</a></li>
<li><a href="#完整配置项列表">完整配置项列表</a></li>
<li><a href="#完整监控项列表">完整监控项列表</a></li>
<li><a href="#完整-http-接口列表">完整 HTTP 接口列表</a></li>
<li><a href="#known-limitations">Known Limitations</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>