blob: af362daefbb8d356e1a311ea43674565a283c17a [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Compression</title>
<link rel="stylesheet" href="/assets/css/app.css">
<link rel="shortcut icon" href="/assets/images/favicon.ico">
<link rel="stylesheet" href="/assets/css/utilities.min.css">
<link rel="stylesheet" href="/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Compression | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Compression" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Compression on the client side" />
<meta property="og:description" content="Compression on the client side" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-03-19T12:53:22+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Compression" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-03-19T12:53:22+00:00","datePublished":"2024-03-19T12:53:22+00:00","description":"Compression on the client side","headline":"Compression","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/compression"},"url":"/administration/compression"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">The Pegasus documentation</p>
<ul class="menu-list">
<li>
<a href="/docs/downloads"
class="">
Downloads
</a>
</li>
</ul>
<p class="menu-label">Building Pegasus</p>
<ul class="menu-list">
<li>
<a href="/docs/build/compile-by-docker"
class="">
Compile by docker (recommended)
</a>
</li>
<li>
<a href="/docs/build/compile-from-source"
class="">
Compile from source
</a>
</li>
</ul>
<p class="menu-label">Client Libs</p>
<ul class="menu-list">
<li>
<a href="/clients/java-client"
class="">
Java Client
</a>
</li>
<li>
<a href="/clients/cpp-client"
class="">
C++ Client
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang Client
</a>
</li>
<li>
<a href="/clients/python-client"
class="">
Python Client
</a>
</li>
<li>
<a href="/clients/node-client"
class="">
NodeJS Client
</a>
</li>
<li>
<a href="/clients/scala-client"
class="">
Scala Client
</a>
</li>
</ul>
<p class="menu-label">Tools</p>
<ul class="menu-list">
<li>
<a href="/docs/tools/shell"
class="">
Pegasus Shell
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
Admin CLI
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
Pegasus data access CLI
</a>
</li>
</ul>
<p class="menu-label">API</p>
<ul class="menu-list">
<li>
<a href="/api/ttl"
class="">
TTL(Time To Live)
</a>
</li>
<li>
<a href="/api/single-atomic"
class="">
Single-Atomic Operations
</a>
</li>
<li>
<a href="/api/redis"
class="">
Redis Adaption
</a>
</li>
<li>
<a href="/api/geo"
class="">
GEO Support
</a>
</li>
<li>
<a href="/api/http"
class="">
HTTP API
</a>
</li>
</ul>
<p class="menu-label">Admin</p>
<ul class="menu-list">
<li>
<a href="/administration/deployment"
class="">
Deployment
</a>
</li>
<li>
<a href="/administration/config"
class="">
Configurations
</a>
</li>
<li>
<a href="/administration/rebalance"
class="">
Rebalance
</a>
</li>
<li>
<a href="/administration/monitoring"
class="">
Monitoring
</a>
</li>
<li>
<a href="/administration/rolling-update"
class="">
Rolling Restart and Upgrade
</a>
</li>
<li>
<a href="/administration/scale-in-out"
class="">
Scale-in and Scale-out
</a>
</li>
<li>
<a href="/administration/resource-management"
class="">
Resource Management
</a>
</li>
<li>
<a href="/administration/cold-backup"
class="">
Cold Backup
</a>
</li>
<li>
<a href="/administration/meta-recovery"
class="">
Metadata Recovery
</a>
</li>
<li>
<a href="/administration/replica-recovery"
class="">
Replica Data Recovery
</a>
</li>
<li>
<a href="/administration/zk-migration"
class="">
Zookeeper Migration
</a>
</li>
<li>
<a href="/administration/table-migration"
class="">
Table Migration
</a>
</li>
<li>
<a href="/administration/table-soft-delete"
class="">
Table Soft-Delete
</a>
</li>
<li>
<a href="/administration/table-env"
class="">
Table Environment Variables
</a>
</li>
<li>
<a href="/administration/remote-commands"
class="">
Remote Command
</a>
</li>
<li>
<a href="/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/administration/duplication"
class="">
Duplication
</a>
</li>
<li>
<a href="/administration/compression"
class="is-active">
Data Compression
</a>
</li>
<li>
<a href="/administration/throttling"
class="">
Throttling
</a>
</li>
<li>
<a href="/administration/experiences"
class="">
Experiences
</a>
</li>
<li>
<a href="/administration/manual-compact"
class="">
Manual Compact
</a>
</li>
<li>
<a href="/administration/usage-scenario"
class="">
Usage Scenario
</a>
</li>
<li>
<a href="/administration/bad-disk"
class="">
Bad Disk Repair
</a>
</li>
<li>
<a href="/administration/whitelist"
class="">
Replica Server Whitelist
</a>
</li>
<li>
<a href="/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/administration/hotspot-detection"
class="">
Hotspot Detection
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/compression"><strong></strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
The Pegasus documentation
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/downloads"
class="navbar-item ">
Downloads
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Building Pegasus
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/build/compile-by-docker"
class="navbar-item ">
Compile by docker (recommended)
</a>
<a href="/docs/build/compile-from-source"
class="navbar-item ">
Compile from source
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Client Libs
</span>
</a>
<div class="navbar-dropdown">
<a href="/clients/java-client"
class="navbar-item ">
Java Client
</a>
<a href="/clients/cpp-client"
class="navbar-item ">
C++ Client
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang Client
</a>
<a href="/clients/python-client"
class="navbar-item ">
Python Client
</a>
<a href="/clients/node-client"
class="navbar-item ">
NodeJS Client
</a>
<a href="/clients/scala-client"
class="navbar-item ">
Scala Client
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Tools
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/tools/shell"
class="navbar-item ">
Pegasus Shell
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
Admin CLI
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
Pegasus data access CLI
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
API
</span>
</a>
<div class="navbar-dropdown">
<a href="/api/ttl"
class="navbar-item ">
TTL(Time To Live)
</a>
<a href="/api/single-atomic"
class="navbar-item ">
Single-Atomic Operations
</a>
<a href="/api/redis"
class="navbar-item ">
Redis Adaption
</a>
<a href="/api/geo"
class="navbar-item ">
GEO Support
</a>
<a href="/api/http"
class="navbar-item ">
HTTP API
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Admin
</span>
</a>
<div class="navbar-dropdown">
<a href="/administration/deployment"
class="navbar-item ">
Deployment
</a>
<a href="/administration/config"
class="navbar-item ">
Configurations
</a>
<a href="/administration/rebalance"
class="navbar-item ">
Rebalance
</a>
<a href="/administration/monitoring"
class="navbar-item ">
Monitoring
</a>
<a href="/administration/rolling-update"
class="navbar-item ">
Rolling Restart and Upgrade
</a>
<a href="/administration/scale-in-out"
class="navbar-item ">
Scale-in and Scale-out
</a>
<a href="/administration/resource-management"
class="navbar-item ">
Resource Management
</a>
<a href="/administration/cold-backup"
class="navbar-item ">
Cold Backup
</a>
<a href="/administration/meta-recovery"
class="navbar-item ">
Metadata Recovery
</a>
<a href="/administration/replica-recovery"
class="navbar-item ">
Replica Data Recovery
</a>
<a href="/administration/zk-migration"
class="navbar-item ">
Zookeeper Migration
</a>
<a href="/administration/table-migration"
class="navbar-item ">
Table Migration
</a>
<a href="/administration/table-soft-delete"
class="navbar-item ">
Table Soft-Delete
</a>
<a href="/administration/table-env"
class="navbar-item ">
Table Environment Variables
</a>
<a href="/administration/remote-commands"
class="navbar-item ">
Remote Command
</a>
<a href="/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/administration/duplication"
class="navbar-item ">
Duplication
</a>
<a href="/administration/compression"
class="navbar-item is-active">
Data Compression
</a>
<a href="/administration/throttling"
class="navbar-item ">
Throttling
</a>
<a href="/administration/experiences"
class="navbar-item ">
Experiences
</a>
<a href="/administration/manual-compact"
class="navbar-item ">
Manual Compact
</a>
<a href="/administration/usage-scenario"
class="navbar-item ">
Usage Scenario
</a>
<a href="/administration/bad-disk"
class="navbar-item ">
Bad Disk Repair
</a>
<a href="/administration/whitelist"
class="navbar-item ">
Replica Server Whitelist
</a>
<a href="/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/administration/hotspot-detection"
class="navbar-item ">
Hotspot Detection
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/compression"><strong></strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Data Compression</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="compression-on-the-client-side">Compression on the client side</h1>
<p>Please refer to the <a href="/clients/java-client#data-serialization">Java client - Data Serialization</a> and <a href="/clients/java-client#data-compression">Java client - Data Compression</a>.</p>
<h1 id="compression-on-the-server-side">Compression on the server side</h1>
<p>It’s recommended:</p>
<ul>
<li>For the server with low CPU load, use the <code class="language-plaintext highlighter-rouge">zstd</code> algorithm with the highest compression ratio.</li>
<li>For the server with high CPU load, use the <code class="language-plaintext highlighter-rouge">lz4</code> algorithm with good compression rate and speed.</li>
</ul>
<p>The compression algorithms supported by the Pegasus server:</p>
<ul>
<li>snappy</li>
<li>lz4 (Since Pegasus v1.11.2)</li>
<li>zstd (Since Pegasus v1.11.2)</li>
</ul>
<p>Configure the compression algorithm through <a href="config">Configurations</a>, for example:</p>
<div class="language-ini highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nn">[pegasus.server]</span>
<span class="py">rocksdb_compression_type</span> <span class="p">=</span> <span class="s">lz4</span>
</code></pre></div></div>
<p>Comparison of different compression algorithms (from <a href="https://facebook.github.io/zstd/">zstd official benchmark</a>):</p>
<table>
<thead>
<tr>
<th>Compressor name</th>
<th>Ratio</th>
<th>Compression (MB/s)</th>
<th>Decompress (MB/s)</th>
</tr>
</thead>
<tbody>
<tr>
<td>zstd 1.3.4 -1</td>
<td>2.877</td>
<td>470</td>
<td>1380</td>
</tr>
<tr>
<td>zlib 1.2.11 -1</td>
<td>2.743</td>
<td>110</td>
<td>400</td>
</tr>
<tr>
<td>brotli 1.0.2 -0</td>
<td>2.701</td>
<td>410</td>
<td>430</td>
</tr>
<tr>
<td>quicklz 1.5.0 -1</td>
<td>2.238</td>
<td>550</td>
<td>710</td>
</tr>
<tr>
<td>lzo1x 2.09 -1</td>
<td>2.108</td>
<td>650</td>
<td>830</td>
</tr>
<tr>
<td>lz4 1.8.1</td>
<td>2.101</td>
<td>750</td>
<td>3700</td>
</tr>
<tr>
<td>snappy 1.1.4</td>
<td>2.091</td>
<td>530</td>
<td>1800</td>
</tr>
<tr>
<td>lzf 3.6 -1</td>
<td>2.077</td>
<td>400</td>
<td>860</td>
</tr>
</tbody>
</table>
<p><img src="/assets/images/compression-comparation.png" alt="compression-comparation.png" class="img-responsive" /></p>
<p>This result is consistent with <a href="https://github.com/lz4/lz4#benchmarks">lz4 official benchmark</a>.</p>
<p><a href="https://github.com/facebook/rocksdb/wiki/Compression">Compression algorithm suggestions from RocksDB official</a>:</p>
<blockquote>
<p>Use options.compression to specify the compression to use. By default it is Snappy. We believe LZ4 is almost always better than Snappy. We leave Snappy as default to avoid unexpected compatibility problems to previous users. LZ4/Snappy is lightweight compression so it usually strikes a good balance between space and CPU usage.</p>
</blockquote>
<blockquote>
<p>If you want to further reduce the in-memory and have some free CPU to use, you can try to set a heavy-weight compression in the latter by setting options.bottommost_compression. The bottommost level will be compressed using this compression style. Usually the bottommost level contains majority of the data, so users get almost optimal space setting, without paying CPU for compress all the data ever flowing to any level. We recommend ZSTD. If it is not available, Zlib is the second choice.</p>
</blockquote>
<blockquote>
<p>If you want have a lot of free CPU and want to reduce not just space but write amplification too, try to set options.compression to heavy weight compression type. We recommend ZSTD. Use Zlib if it is not available.</p>
</blockquote>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
Table of contents
</p>
<ul class="menu-list">
<li><a href="#compression-on-the-client-side">Compression on the client side</a></li>
<li><a href="#compression-on-the-server-side">Compression on the server side</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>