blob: e719a5d282901170aaae83127e1fdbb4c376af89 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Manual Compact</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Manual Compact | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Manual Compact" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="注:manual compact 功能从 v1.8.1 版本开始支持。 原理" />
<meta property="og:description" content="注:manual compact 功能从 v1.8.1 版本开始支持。 原理" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T13:02:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Manual Compact" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T13:02:52+00:00","datePublished":"2024-04-22T13:02:52+00:00","description":"注:manual compact 功能从 v1.8.1 版本开始支持。 原理","headline":"Manual Compact","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/manual-compact"},"url":"/administration/manual-compact"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus 产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用 Docker 完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java 客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++ 客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang 客户端
</a>
</li>
<li>
<a href="/zh/clients/python-client"
class="">
Python 客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS 客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala 客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis 适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO 支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP 接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群重启和升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica 数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table 软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table 环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="is-active">
Manual Compact 功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario 功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
Replica Server 白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/manual-compact"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus 产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用 Docker 完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java 客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++ 客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang 客户端
</a>
<a href="/zh/clients/python-client"
class="navbar-item ">
Python 客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS 客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala 客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis 适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO 支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP 接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群重启和升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica 数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper 迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table 迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table 软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table 环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item is-active">
Manual Compact 功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario 功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
Replica Server 白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/manual-compact"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Manual Compact 功能</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>注:manual compact 功能从 v1.8.1 版本开始支持。</p>
<h1 id="原理">原理</h1>
<p>RocksDB 除了根据需要自动触发 compaction 外,还能通过接口手动触发 compaction,这个功能称之为 <a href="https://github.com/facebook/rocksdb/wiki/Manual-Compaction">Manual Compaction</a>。其提供了 <code class="language-plaintext highlighter-rouge">CompactRange()</code> 接口,如下:</p>
<pre><code class="language-C++">// CompactRangeOptions is used by CompactRange() call.
struct CompactRangeOptions {
// If true, no other compaction will run at the same time as this
// manual compaction
bool exclusive_manual_compaction = true;
// If true, compacted files will be moved to the minimum level capable
// of holding the data or given level (specified non-negative target_level).
bool change_level = false;
// If change_level is true and target_level have non-negative value, compacted
// files will be moved to target_level.
int target_level = -1;
// Compaction outputs will be placed in options.db_paths[target_path_id].
// Behavior is undefined if target_path_id is out of range.
uint32_t target_path_id = 0;
// By default level based compaction will only compact the bottommost level
// if there is a compaction filter
BottommostLevelCompaction bottommost_level_compaction =
BottommostLevelCompaction::kIfHaveCompactionFilter;
};
// Compact the underlying storage for the key range [*begin,*end].
// The actual compaction interval might be superset of [*begin, *end].
// In particular, deleted and overwritten versions are discarded,
// and the data is rearranged to reduce the cost of operations
// needed to access the data. This operation should typically only
// be invoked by users who understand the underlying implementation.
//
// begin==nullptr is treated as a key before all keys in the database.
// end==nullptr is treated as a key after all keys in the database.
// Therefore the following call will compact the entire database:
// db-&gt;CompactRange(options, nullptr, nullptr);
// Note that after the entire database is compacted, all data are pushed
// down to the last level containing any data. If the total data size after
// compaction is reduced, that level might not be appropriate for hosting all
// the files. In this case, client could set options.change_level to true, to
// move the files back to the minimum level capable of holding the data set
// or a given level (specified by non-negative options.target_level).
virtual Status CompactRange(const CompactRangeOptions&amp; options,
ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end) = 0;
</code></pre>
<p>对应地,Pegasus 对该功能在上层进行了封装,提供了表级别的 Manual Compact 功能,其作用是:</p>
<ul>
<li>通过 compaction 去掉垃圾数据,减少数据量,降低文件层数,提升读操作的性能。</li>
<li>对最高层做 compaction,可以清理掉 Delete 标记数据。</li>
<li>配合 <a href="usage-scenario">Usage Scenario 功能</a> 中表的 bulk_load 模式,可以在灌数据完成后执行一次 Manual Compact,去除垃圾数据,整理数据和文件夹结构,提升读性能。</li>
</ul>
<p>实现方式:</p>
<ul>
<li>在 2.1 版本以前,Pegasus 扩展 RocksDB,并在 Manifest 中记录上一次执行 Manual Compact 的时间,并提供 <code class="language-plaintext highlighter-rouge">GetLastManualCompactFinishTime()</code> 方法来获取该时间。自 2.1 版本开始,Pegasus将该时间记录到了meta column family中,并提供了 <code class="language-plaintext highlighter-rouge">get_last_manual_compact_finish_time()</code>方法来获取。</li>
<li>利用 <a href="table-env">Table 环境变量</a>,来设置两类 Manual Compect 的环境变量:
<ul>
<li>单次 Manual Compact:
<ul>
<li><code class="language-plaintext highlighter-rouge">manual_compact.once.trigger_time</code>:格式为 Unix 时间戳的秒数,可通过 shell 命令 <code class="language-plaintext highlighter-rouge">date +%s</code> 获取当前时间戳。如果 LastManualCompactFinishTime 旧于该 trigger_time,就触发 Manual Compaction 的执行。</li>
<li><code class="language-plaintext highlighter-rouge">manual_compact.once.target_level</code>:用于设置 <code class="language-plaintext highlighter-rouge">CompactRangeOptions::target_level</code>。如果不设置,则使用默认值 <code class="language-plaintext highlighter-rouge">-1</code></li>
<li><code class="language-plaintext highlighter-rouge">manual_compact.once.bottommost_level_compaction</code>:可设置为 <code class="language-plaintext highlighter-rouge">skip</code> 或者 <code class="language-plaintext highlighter-rouge">force</code>。如果是 <code class="language-plaintext highlighter-rouge">skip</code>,则不对最高层做 compaction;如果是 <code class="language-plaintext highlighter-rouge">force</code>,则强制对最高层做 compaction。如果不设置,则默认为 <code class="language-plaintext highlighter-rouge">skip</code></li>
</ul>
</li>
<li>周期 Manual Compact:
<ul>
<li><code class="language-plaintext highlighter-rouge">manual_compact.periodic.trigger_time</code>:格式为逗号分隔的时钟,譬如 <code class="language-plaintext highlighter-rouge">3:00,21:00</code>,表示每一天的 3:00 和 21:00 都触发一次 Manual Compaction 的执行。</li>
<li><code class="language-plaintext highlighter-rouge">manual_compact.periodic.target_level</code>:用于设置 <code class="language-plaintext highlighter-rouge">CompactRangeOptions::target_level</code>。如果不设置,则使用默认值 <code class="language-plaintext highlighter-rouge">-1</code></li>
<li><code class="language-plaintext highlighter-rouge">manual_compact.periodic.bottommost_level_compaction</code>:可设置为 <code class="language-plaintext highlighter-rouge">skip</code> 或者 <code class="language-plaintext highlighter-rouge">force</code>。如果是 <code class="language-plaintext highlighter-rouge">skip</code>,则不对最高层做 compaction;如果是 <code class="language-plaintext highlighter-rouge">force</code>,则强制对最高层做 compaction。如果不设置,则默认为 <code class="language-plaintext highlighter-rouge">skip</code></li>
</ul>
</li>
<li>Manual Compact 总开关:
<ul>
<li><code class="language-plaintext highlighter-rouge">manual_compact.disabled</code>(从 v1.9.0 版本开始支持):如果为 true,则关闭 Manual Compact 功能,并且取消正在执行中的 Manual Compact 动作。如果不设置,默认为 false。</li>
<li><code class="language-plaintext highlighter-rouge">manual_compact.max_concurrent_running_count</code>(从 v1.11.3 版本开始支持):指定最大并发数。实际上,可执行的最大并发数由该参数和服务端<code class="language-plaintext highlighter-rouge">MANUAL_COMPACT_THRAD_POOL</code>的线程数共同决定,取两者的较小值。该参数是节点级别的,如果同一时间进行 manual compaction 的表太多,则很有可能达到该最大并发数,后续该节点上的 replica 会忽略本轮 manual compaction 请求,延后执行。在日志中可以看到 <code class="language-plaintext highlighter-rouge">xxx ignored compact because exceed max_concurrent_running_count</code></li>
</ul>
</li>
</ul>
</li>
</ul>
<p>注意:</p>
<ul>
<li>Manual Compact 功能是分派到独立的 Compact 线程池中执行的,每个线程同一时刻只能处理一个 replica 的 full compaction,因为并发处理量与 Compact 线程池的线程数量有关,可以通过配置文件的 <code class="language-plaintext highlighter-rouge">worker_count</code> 进行配置,如果使用 Manual Compact 比较频繁,建议调大线程数量(譬如设置为 cpu core 数量接近):
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>[threadpool.THREAD_POOL_COMPACT]
name = compact
partitioned = false
max_input_queue_length = 128
worker_priority = THREAD_xPRIORITY_NORMAL
worker_count = 16
</code></pre></div> </div>
</li>
<li>Manual Compact 属于 CPU 和 IO 密集型操作,处理过程中会使 CPU 使用率长期处于高位,容易对集群的读写性能造成影响,所以 ** 建议在流量低峰时段进行操作 **。如果启动后发现读写性能下降影响了业务,可以立即通过设置该表的环境变量 <code class="language-plaintext highlighter-rouge">manual_compact.disabled=true</code> 来中止。</li>
<li>Manual Compact 过程中可能需要较多的额外磁盘空间。因为 compaction 前后文件变化较大,而 Pegasus 又会保留最近多个版本的 checkpoint,所以需要的额外磁盘空间量大约等于执行 Manual Compact 的表的数据存储量。所以,在执行 Manual Compact 前需 ** 确认集群有足够的存储空间 **,同时在执行过程中 ** 关注磁盘空间使用情况 **,避免因为磁盘空间耗尽导致集群节点宕机,影响集群可用度。</li>
</ul>
<h1 id="如何设置">如何设置</h1>
<h2 id="通过-pegasus-shell-工具设置">通过 Pegasus shell 工具设置</h2>
<p>既然 Manual Compact 功能是利用 <a href="table-env">Table 环境变量</a> 触发的,那么可以直接通过 shell 工具的 <a href="/zh/overview/shell#set_app_envs">set_app_envs 命令</a> 来设置,需要设置的环境变量参照上面的描述。</p>
<blockquote>
<p>环境变量设置后不会立即生效,大约需要等几十秒后才能在所有 replica 上生效。</p>
</blockquote>
<p>由于需要设置的环境变量比较多,且对数据格式有要求,所以强烈建议不要自己直接设置,而是通过我们提供的脚本工具来设置,如下所示。</p>
<h2 id="通过脚本设置">通过脚本设置</h2>
<p>我们提供了一个脚本工具 <a href="https://github.com/apache/incubator-pegasus/blob/master/scripts/pegasus_manual_compact.sh">scripts/pegasus_manual_compact.sh</a> 来方便地设置,用法:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>$ ./scripts/pegasus_manual_compact.sh
This tool is for manual compact specified table(app).
USAGE: ./scripts/pegasus_manual_compact.sh -c cluster -a app-name [-t periodic|once] [-w] [-g trigger_time] [...]
Options:
-h|--help print help message
-c|--cluster &lt;str&gt; cluster meta server list, default is "127.0.0.1:34601,127.0.0.1:34602"
-a|--app_name &lt;str&gt; target table(app) name
-t|--type &lt;str&gt; manual compact type, should be periodic or once, default is once
-w|--wait_only this option is only used when the type is once!
not trigger but only wait the last once compact to finish
-g|--trigger_time &lt;str&gt; this option is only used when the type is periodic!
specify trigger time of periodic compact in 24-hour format,
e.g. "3:00,21:00" means 3:00 and 21:00 everyday
--target_level &lt;num&gt; number in range of [-1,num_levels], -1 means automatically, default is -1
--bottommost_level_compaction &lt;skip|force&gt;
skip or force, default is skip
more details: https://github.com/facebook/rocksdb/wiki/Manual-Compaction
--max_concurrent_running_count &lt;num&gt;
max concurrent running count limit, should be positive integer.
if not set, means no limit.
for example:
1) Start once type manual compact with default options:
./scripts/pegasus_manual_compact.sh -c 127.0.0.1:34601,127.0.0.1:34602 -a temp
2) Only wait last once type manual compact to finish:
./scripts/pegasus_manual_compact.sh -c 127.0.0.1:34601,127.0.0.1:34602 -a temp -w
3) Config periodic type manual compact with specified options:
./scripts/pegasus_manual_compact.sh -c 127.0.0.1:34601,127.0.0.1:34602 -a temp -t periodic -g 3:00,21:00 \
--target_level 2 --bottommost_level_compaction force
</code></pre></div></div>
<p>该工具不仅会调用 shell 命令设置 Table 环境变量,对于 once 类型还会等待所有的 replica 上的操作都执行完成,使用起来十分方便。</p>
<p>譬如,在 <a href="usage-scenario#支持场景">bulk load</a> 完成后执行 once manual compact 如下:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>$ ./scripts/pegasus_manual_compact.sh -c 127.0.0.1:34601,127.0.0.1:34602 -a temp
</code></pre></div></div>
<h2 id="通过-admin-cli-设置">通过 admin-cli 设置</h2>
<p><strong>2.4.0</strong> 之后的 Pegasus 版本还支持用 admin-cli 来设置 manual compaction 的开始并且可以方便的查看进行的进度。</p>
<h3 id="使用命令">使用命令</h3>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c"># 开始单次 manual compaction</span>
Pegasus-AdminCli-1.2.0 » manual-compaction start <span class="nt">-h</span>
start manual compaction <span class="k">for </span>a specific table
Usage:
start <span class="o">[</span>flags]
Flags:
<span class="nt">-b</span>, <span class="nt">--bottommostLevelCompaction</span> bottommost level files will be compacted or not, default value is <span class="nb">false</span>
<span class="nt">-h</span>, <span class="nt">--help</span> display <span class="nb">help</span>
<span class="nt">-c</span>, <span class="nt">--maxConcurrentRunningCount</span> int max concurrent running count, default value is 0, no limited <span class="o">(</span>default: 0<span class="o">)</span>
<span class="nt">-a</span>, <span class="nt">--tableName</span> string table name
<span class="nt">-l</span>, <span class="nt">--targetLevel</span> int compacted files move level, default value is <span class="nt">-1</span> <span class="o">(</span>default: <span class="nt">-1</span><span class="o">)</span>
<span class="c"># 查看 manual compaction 进度</span>
Pegasus-AdminCli-1.2.0 » manual-compaction query <span class="nt">-h</span>
query manual compaction progress <span class="k">for </span>a specific table
Usage:
query <span class="o">[</span>flags]
Flags:
<span class="nt">-h</span>, <span class="nt">--help</span> display <span class="nb">help</span>
<span class="nt">-a</span>, <span class="nt">--tableName</span> string table name
</code></pre></div></div>
<h2 id="补充说明">补充说明</h2>
<p>Manual compaction 可与 bulk load 功能配合使用,作为批量导入大量数据后的一种优化读写性能的方式。在需要进行 bulk load 操作的表中,可将 <a href="/zh/administration/usage-scenario">Usage Scenario</a> 参数设置为 <code class="language-plaintext highlighter-rouge">bulk_load</code> 模式,以便减小导入大量数据带来的性能损耗。</p>
<ul>
<li>在 Bulkload 的场景下,开启manual-compaction 一般来说要比引擎层的 compaction 更灵活,因为在选择集中进行 compact 的时间可以视 Pegasus 的使用低峰来设定,并且可以通过参数主动控制并发度。</li>
<li>Bulkload 开启后会将 Usage Scenario 参数变为<code class="language-plaintext highlighter-rouge">bulk_load</code>,在这种模式下,我们会禁止引擎层的 compaction,因为<code class="language-plaintext highlighter-rouge">bulk_load</code>模式下会在 level0 层堆积大量的 sst 文件,如果不关闭引擎 compact 会消耗大量 IO 并且对读非常不友好。</li>
<li>写延迟比较容易被磁盘 IO 瓶颈影响。compact 本质是归并排序磁盘,需要把数据先读到内存中进行排序,然后再写,涉及 2 两次 IO,是一个对磁盘 IO 负载很重的操作,因此会增加一定写延迟。但我们可以灵活的设置 manual-compaction 的并发度,逐个磁盘进行,将影响控制在可接受范围内。</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#原理">原理</a></li>
<li><a href="#如何设置">如何设置</a>
<ul>
<li><a href="#通过-pegasus-shell-工具设置">通过 Pegasus shell 工具设置</a></li>
<li><a href="#通过脚本设置">通过脚本设置</a></li>
<li><a href="#通过-admin-cli-设置">通过 admin-cli 设置</a>
<ul>
<li><a href="#使用命令">使用命令</a></li>
</ul>
</li>
<li><a href="#补充说明">补充说明</a></li>
</ul>
</li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>