blob: 3ba534d187b547b4c226040aceeddc2d06903559 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Partition Split</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Partition Split | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Partition Split" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="功能简介 在pegasus中,表的partition个数是在创建时指定的,并且不会动态改变,但随着数据量不断增大,可能出现partition数据过大的情况,这样可能会导致读写效率下降,因此需要人工增大partition个数来保障服务质量。" />
<meta property="og:description" content="功能简介 在pegasus中,表的partition个数是在创建时指定的,并且不会动态改变,但随着数据量不断增大,可能出现partition数据过大的情况,这样可能会导致读写效率下降,因此需要人工增大partition个数来保障服务质量。" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T13:02:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Partition Split" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T13:02:52+00:00","datePublished":"2024-04-22T13:02:52+00:00","description":"功能简介 在pegasus中,表的partition个数是在创建时指定的,并且不会动态改变,但随着数据量不断增大,可能出现partition数据过大的情况,这样可能会导致读写效率下降,因此需要人工增大partition个数来保障服务质量。","headline":"Partition Split","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/partition-split"},"url":"/administration/partition-split"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus 产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用 Docker 完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java 客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++ 客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang 客户端
</a>
</li>
<li>
<a href="/zh/clients/python-client"
class="">
Python 客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS 客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala 客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis 适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO 支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP 接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群重启和升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica 数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table 软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table 环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="is-active">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact 功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario 功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
Replica Server 白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/partition-split"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus 产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用 Docker 完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java 客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++ 客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang 客户端
</a>
<a href="/zh/clients/python-client"
class="navbar-item ">
Python 客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS 客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala 客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis 适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO 支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP 接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群重启和升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica 数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper 迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table 迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table 软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table 环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item is-active">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact 功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario 功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
Replica Server 白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/partition-split"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Partition-Split</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="功能简介">功能简介</h1>
<p>在pegasus中,表的partition个数是在创建时指定的,并且不会动态改变,但随着数据量不断增大,可能出现partition数据过大的情况,这样可能会导致读写效率下降,因此需要人工增大partition个数来保障服务质量。</p>
<p>在pegasus的设计中,partition个数为2的幂次,目前split功能会将partition个数翻倍,partition(i)将会被分裂为partition(i)和partition(i+original_count) 。例如,原表有4个partition,在split之后将有8个partition,partition(0)将会分裂为partition(0)和partition(4),partition(1)将会分裂为partition(1)和partition(5)……以此类推。我们将partition(i)称为parent partition,partition(i+original_count)称为 child partition</p>
<h1 id="接口描述">接口描述</h1>
<p><code class="language-plaintext highlighter-rouge">partition_split &lt;app_name&gt; &lt;new_partition_count&gt;</code></p>
<ul>
<li>若当前表不可用,返回 ERR_APP_NOT_EXIST</li>
<li>若new_partition_count != old_partition_count*2,返回 ERR_INVALID_PARAMETERS</li>
<li>若表正在进行split,返回 ERR_BUSY</li>
<li>若split成功返回 ERR_OK</li>
</ul>
<h1 id="设计与实现">设计与实现</h1>
<h2 id="整体流程">整体流程</h2>
<p>partition split过程可分为以下几个步骤:</p>
<ol>
<li>client发送partition split请求给meta server。</li>
<li>replica server通过与meta server的config_sync发现表partition个数发生变化。</li>
<li>每个parent partition通过异步learn的方式复制自己所有数据得到child partition。</li>
<li>当一个group中的所有child partition就绪后,primary给meta server发送注册child partition的请求。</li>
<li>meta server注册child partition。</li>
<li>所有child partition被注册完成后,split过程结束。</li>
<li>清理无效数据,详见<a href="#如何删除无效数据">如何删除无效数据</a></li>
</ol>
<h2 id="partition-split过程中的读写">Partition-Split过程中的读写</h2>
<p>在partition split过程中,读写流程可保持正常进行,在注册child partition期间有短暂拒绝服务。</p>
<p>为了方便说明,假设app的partition个数为4,分裂后为8,而client希望访问的数据,在分裂前由partition(1)服务,分裂后由partition(5)服务。从split开始到partition进行异步learn,都仍将由partition(1)为client进行服务,但当primary给meta server发送注册child partition请求后,partition(1)将拒绝client的读写请求,直到meta server注册完成。当注册完成后,client并不知道将由partition(5)为其服务,仍将请求发送给partition(1),这时partition(1)会提示client更新访问路由表,而更新路由表是对用户透明的。总体来说服务不可用时间非常短。</p>
<h2 id="为什么需要指定partition个数">为什么需要指定partition个数</h2>
<p>由于partition split功能不能取消,且没有减小partition的功能,因此执行partition split需谨慎,虽然目前单次split只能使partition个数翻倍,但仍需指定partition count,这是为了防止client多次重试这个非幂等操作导致partition个数非预期增大。</p>
<h2 id="如何删除无效数据">如何删除无效数据</h2>
<p>执行partition
split前需要保证磁盘空间可用超过50%,内存可用充足,因为split操作首先需要对每个partition进行复制,在split完成后,pegasus会通过rocksdb提供的filter功能在后台删除split造成的无效数据。若磁盘资源紧张或者希望尽快删除无效数据,可在集群CPU空闲期间执行manual_compact功能,手动触发filter,命令详情参见<a href="#操作示例">操作示例</a></p>
<h2 id="partition-split与热点问题">Partition-Split与热点问题</h2>
<p>split功能主要是为了保障在数据量非预期增长情况下的服务质量,并不能完全解决单个partition过热的问题,pegasus的数据模式是hash分片,在split完成后流量并不能保障是被平分在两个partition上,这个是依赖于用户的hashkey决定的,只能说可以缓解热点问题,并且partition split是表级命令,暂时不支持针对单个partition的partition split。</p>
<h1 id="操作示例">操作示例</h1>
<h2 id="执行partition-split">执行Partition-Split</h2>
<p>在split前,建议先通过<code class="language-plaintext highlighter-rouge">app_stat</code>命令查看待split表的大小,再执行如下命令:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; partition_split split_table 8
split app split_table succeed
</code></pre></div></div>
<p>通过shell工具执行partition split,将split_table partition count从4设置为8</p>
<h2 id="partition-split过程中">Partition-Split过程中</h2>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; app split_table -d
[Parameters]
app_name: split_table
detailed: true
[Result]
app_name : split_table
app_id : 2
partition_count : 8
max_replica_count : 3
details :
pidx ballot replica_count primary secondaries
0 3 3/3 10.239.35.234:34802 [10.239.35.234:34803,10.239.35.234:34801]
1 3 3/3 10.239.35.234:34803 [10.239.35.234:34801,10.239.35.234:34802]
2 3 3/3 10.239.35.234:34801 [10.239.35.234:34803,10.239.35.234:34802]
3 3 3/3 10.239.35.234:34802 [10.239.35.234:34801,10.239.35.234:34803]
4 -1 0/0 - []
5 -1 0/0 - []
6 -1 0/0 - []
7 -1 0/0 - []
node primary secondary total
10.239.35.234:34801 1 3 4
10.239.35.234:34802 2 2 4
10.239.35.234:34803 1 3 4
4 8 12
fully_healthy_partition_count : 4
unhealthy_partition_count : 4
write_unhealthy_partition_count : 4
read_unhealthy_partition_count : 4
list app split_table succeed
</code></pre></div></div>
<p>通过<code class="language-plaintext highlighter-rouge">app &lt;table_name&gt; -d</code>命令查看当前表的详情,ballot=-1表示该partition还没有被meta server注册</p>
<h2 id="partition-split完成">Partition-Split完成</h2>
<p>同样通过<code class="language-plaintext highlighter-rouge">app &lt;table_name&gt; -d</code>查看表详情,当发现所有ballot都大于0时表示所有partition都被注册,若当前流量不大可以将meta server设置为lively状态,进行负载均衡,并且通过<code class="language-plaintext highlighter-rouge">app_stat</code>命令查看表的大小,应该是split前的2倍左右</p>
<h2 id="手动触发manual-compact">手动触发Manual-Compact</h2>
<p>关于Manual compact详情可参见<a href="manual-compact">Manual compact功能</a>,在集群CPU空闲时进行操作,建议命令示例如下:
<code class="language-plaintext highlighter-rouge">./scripts/pegasus_manual_compact.sh -c &lt;meta_list&gt; -a &lt;table_name&gt;</code></p>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#功能简介">功能简介</a></li>
<li><a href="#接口描述">接口描述</a></li>
<li><a href="#设计与实现">设计与实现</a>
<ul>
<li><a href="#整体流程">整体流程</a></li>
<li><a href="#partition-split过程中的读写">Partition-Split过程中的读写</a></li>
<li><a href="#为什么需要指定partition个数">为什么需要指定partition个数</a></li>
<li><a href="#如何删除无效数据">如何删除无效数据</a></li>
<li><a href="#partition-split与热点问题">Partition-Split与热点问题</a></li>
</ul>
</li>
<li><a href="#操作示例">操作示例</a>
<ul>
<li><a href="#执行partition-split">执行Partition-Split</a></li>
<li><a href="#partition-split过程中">Partition-Split过程中</a></li>
<li><a href="#partition-split完成">Partition-Split完成</a></li>
<li><a href="#手动触发manual-compact">手动触发Manual-Compact</a></li>
</ul>
</li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>