blob: 86e8bbadeb804aadd54c59bd4ce0071ca82381d1 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Compression</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Compression | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Compression" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="客户端压缩" />
<meta property="og:description" content="客户端压缩" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T13:02:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Compression" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T13:02:52+00:00","datePublished":"2024-04-22T13:02:52+00:00","description":"客户端压缩","headline":"Compression","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/compression"},"url":"/administration/compression"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus 产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用 Docker 完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java 客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++ 客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang 客户端
</a>
</li>
<li>
<a href="/zh/clients/python-client"
class="">
Python 客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS 客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala 客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis 适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO 支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP 接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群重启和升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica 数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table 迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table 软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table 环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="is-active">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact 功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario 功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
Replica Server 白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/compression"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus 产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用 Docker 完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java 客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++ 客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang 客户端
</a>
<a href="/zh/clients/python-client"
class="navbar-item ">
Python 客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS 客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala 客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis 适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO 支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP 接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群重启和升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica 数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper 迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table 迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table 软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table 环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item is-active">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact 功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario 功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
Replica Server 白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/compression"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">数据压缩</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="客户端压缩">客户端压缩</h1>
<p>请参考 <a href="/zh/clients/java-client#数据序列化">Java 客户端 - 数据序列化</a><a href="/zh/clients/java-client#数据压缩">Java 客户端 - 数据压缩</a></p>
<h1 id="服务端压缩">服务端压缩</h1>
<p>建议:</p>
<ul>
<li>对于 CPU 比较空闲的服务器,采用压缩率最高的 <code class="language-plaintext highlighter-rouge">zstd</code> 算法。</li>
<li>对于 CPU 比较繁忙的服务器,采用压缩率和速度都比较好的 <code class="language-plaintext highlighter-rouge">lz4</code> 算法。</li>
</ul>
<p>Pegasus 服务端支持的压缩算法:</p>
<ul>
<li>snappy</li>
<li>lz4 (从 v1.11.2 版本开始支持)</li>
<li>zstd (从 v1.11.2 版本开始支持)</li>
</ul>
<p>通过 <a href="config">配置文件</a> 来配置压缩算法,例如:</p>
<div class="language-ini highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nn">[pegasus.server]</span>
<span class="py">rocksdb_compression_type</span> <span class="p">=</span> <span class="s">lz4</span>
</code></pre></div></div>
<p>不同压缩算法的比较(数据来自 <a href="https://facebook.github.io/zstd/">zstd 官方的 benchmark</a>):</p>
<table>
<thead>
<tr>
<th>Compressor name</th>
<th>Ratio</th>
<th>Compression (MB/s)</th>
<th>Decompress (MB/s)</th>
</tr>
</thead>
<tbody>
<tr>
<td>zstd 1.3.4 -1</td>
<td>2.877</td>
<td>470</td>
<td>1380</td>
</tr>
<tr>
<td>zlib 1.2.11 -1</td>
<td>2.743</td>
<td>110</td>
<td>400</td>
</tr>
<tr>
<td>brotli 1.0.2 -0</td>
<td>2.701</td>
<td>410</td>
<td>430</td>
</tr>
<tr>
<td>quicklz 1.5.0 -1</td>
<td>2.238</td>
<td>550</td>
<td>710</td>
</tr>
<tr>
<td>lzo1x 2.09 -1</td>
<td>2.108</td>
<td>650</td>
<td>830</td>
</tr>
<tr>
<td>lz4 1.8.1</td>
<td>2.101</td>
<td>750</td>
<td>3700</td>
</tr>
<tr>
<td>snappy 1.1.4</td>
<td>2.091</td>
<td>530</td>
<td>1800</td>
</tr>
<tr>
<td>lzf 3.6 -1</td>
<td>2.077</td>
<td>400</td>
<td>860</td>
</tr>
</tbody>
</table>
<p><img src="/assets/images/compression-comparation.png" alt="compression-comparation.png" class="img-responsive" /></p>
<p>这个结果与 <a href="https://github.com/lz4/lz4#benchmarks">lz4 官方的 benchmark</a> 是一致的。</p>
<p>附上 <a href="https://github.com/facebook/rocksdb/wiki/Compression">RocksDB 的压缩建议</a></p>
<blockquote>
<p>Use options.compression to specify the compression to use. By default it is Snappy. We believe LZ4 is almost always better than Snappy. We leave Snappy as default to avoid unexpected compatibility problems to previous users. LZ4/Snappy is lightweight compression so it usually strikes a good balance between space and CPU usage.</p>
</blockquote>
<blockquote>
<p>If you want to further reduce the in-memory and have some free CPU to use, you can try to set a heavy-weight compression in the latter by setting options.bottommost_compression. The bottommost level will be compressed using this compression style. Usually the bottommost level contains majority of the data, so users get almost optimal space setting, without paying CPU for compress all the data ever flowing to any level. We recommend ZSTD. If it is not available, Zlib is the second choice.</p>
</blockquote>
<blockquote>
<p>If you want have a lot of free CPU and want to reduce not just space but write amplification too, try to set options.compression to heavy weight compression type. We recommend ZSTD. Use Zlib if it is not available.</p>
</blockquote>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#客户端压缩">客户端压缩</a></li>
<li><a href="#服务端压缩">服务端压缩</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>