blob: 13320a1e052ddfc4343eb2373822e4668bfedea0 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Scale In Out</title>
<link rel="stylesheet" href="/assets/css/app.css">
<link rel="shortcut icon" href="/assets/images/favicon.ico">
<link rel="stylesheet" href="/assets/css/utilities.min.css">
<link rel="stylesheet" href="/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Scale In Out | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Scale In Out" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Design goals" />
<meta property="og:description" content="Design goals" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T06:39:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Scale In Out" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T06:39:52+00:00","datePublished":"2024-04-22T06:39:52+00:00","description":"Design goals","headline":"Scale In Out","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/scale-in-out"},"url":"/administration/scale-in-out"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">The Pegasus documentation</p>
<ul class="menu-list">
<li>
<a href="/docs/downloads"
class="">
Downloads
</a>
</li>
</ul>
<p class="menu-label">Building Pegasus</p>
<ul class="menu-list">
<li>
<a href="/docs/build/compile-by-docker"
class="">
Compile by docker (recommended)
</a>
</li>
<li>
<a href="/docs/build/compile-from-source"
class="">
Compile from source
</a>
</li>
</ul>
<p class="menu-label">Client Libs</p>
<ul class="menu-list">
<li>
<a href="/clients/java-client"
class="">
Java Client
</a>
</li>
<li>
<a href="/clients/cpp-client"
class="">
C++ Client
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang Client
</a>
</li>
<li>
<a href="/clients/python-client"
class="">
Python Client
</a>
</li>
<li>
<a href="/clients/node-client"
class="">
NodeJS Client
</a>
</li>
<li>
<a href="/clients/scala-client"
class="">
Scala Client
</a>
</li>
</ul>
<p class="menu-label">Tools</p>
<ul class="menu-list">
<li>
<a href="/docs/tools/shell"
class="">
Pegasus Shell
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
Admin CLI
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
Pegasus data access CLI
</a>
</li>
</ul>
<p class="menu-label">API</p>
<ul class="menu-list">
<li>
<a href="/api/ttl"
class="">
TTL(Time To Live)
</a>
</li>
<li>
<a href="/api/single-atomic"
class="">
Single-Atomic Operations
</a>
</li>
<li>
<a href="/api/redis"
class="">
Redis Adaption
</a>
</li>
<li>
<a href="/api/geo"
class="">
GEO Support
</a>
</li>
<li>
<a href="/api/http"
class="">
HTTP API
</a>
</li>
</ul>
<p class="menu-label">Admin</p>
<ul class="menu-list">
<li>
<a href="/administration/deployment"
class="">
Deployment
</a>
</li>
<li>
<a href="/administration/config"
class="">
Configurations
</a>
</li>
<li>
<a href="/administration/rebalance"
class="">
Rebalance
</a>
</li>
<li>
<a href="/administration/monitoring"
class="">
Monitoring
</a>
</li>
<li>
<a href="/administration/rolling-update"
class="">
Rolling Restart and Upgrade
</a>
</li>
<li>
<a href="/administration/scale-in-out"
class="is-active">
Scale-in and Scale-out
</a>
</li>
<li>
<a href="/administration/resource-management"
class="">
Resource Management
</a>
</li>
<li>
<a href="/administration/cold-backup"
class="">
Cold Backup
</a>
</li>
<li>
<a href="/administration/meta-recovery"
class="">
Metadata Recovery
</a>
</li>
<li>
<a href="/administration/replica-recovery"
class="">
Replica Data Recovery
</a>
</li>
<li>
<a href="/administration/zk-migration"
class="">
Zookeeper Migration
</a>
</li>
<li>
<a href="/administration/table-migration"
class="">
Table Migration
</a>
</li>
<li>
<a href="/administration/table-soft-delete"
class="">
Table Soft-Delete
</a>
</li>
<li>
<a href="/administration/table-env"
class="">
Table Environment Variables
</a>
</li>
<li>
<a href="/administration/remote-commands"
class="">
Remote Command
</a>
</li>
<li>
<a href="/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/administration/duplication"
class="">
Duplication
</a>
</li>
<li>
<a href="/administration/compression"
class="">
Data Compression
</a>
</li>
<li>
<a href="/administration/throttling"
class="">
Throttling
</a>
</li>
<li>
<a href="/administration/experiences"
class="">
Experiences
</a>
</li>
<li>
<a href="/administration/manual-compact"
class="">
Manual Compact
</a>
</li>
<li>
<a href="/administration/usage-scenario"
class="">
Usage Scenario
</a>
</li>
<li>
<a href="/administration/bad-disk"
class="">
Bad Disk Repair
</a>
</li>
<li>
<a href="/administration/whitelist"
class="">
Replica Server Whitelist
</a>
</li>
<li>
<a href="/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/administration/hotspot-detection"
class="">
Hotspot Detection
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/scale-in-out"><strong>中</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
The Pegasus documentation
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/downloads"
class="navbar-item ">
Downloads
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Building Pegasus
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/build/compile-by-docker"
class="navbar-item ">
Compile by docker (recommended)
</a>
<a href="/docs/build/compile-from-source"
class="navbar-item ">
Compile from source
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Client Libs
</span>
</a>
<div class="navbar-dropdown">
<a href="/clients/java-client"
class="navbar-item ">
Java Client
</a>
<a href="/clients/cpp-client"
class="navbar-item ">
C++ Client
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang Client
</a>
<a href="/clients/python-client"
class="navbar-item ">
Python Client
</a>
<a href="/clients/node-client"
class="navbar-item ">
NodeJS Client
</a>
<a href="/clients/scala-client"
class="navbar-item ">
Scala Client
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Tools
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/tools/shell"
class="navbar-item ">
Pegasus Shell
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
Admin CLI
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
Pegasus data access CLI
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
API
</span>
</a>
<div class="navbar-dropdown">
<a href="/api/ttl"
class="navbar-item ">
TTL(Time To Live)
</a>
<a href="/api/single-atomic"
class="navbar-item ">
Single-Atomic Operations
</a>
<a href="/api/redis"
class="navbar-item ">
Redis Adaption
</a>
<a href="/api/geo"
class="navbar-item ">
GEO Support
</a>
<a href="/api/http"
class="navbar-item ">
HTTP API
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Admin
</span>
</a>
<div class="navbar-dropdown">
<a href="/administration/deployment"
class="navbar-item ">
Deployment
</a>
<a href="/administration/config"
class="navbar-item ">
Configurations
</a>
<a href="/administration/rebalance"
class="navbar-item ">
Rebalance
</a>
<a href="/administration/monitoring"
class="navbar-item ">
Monitoring
</a>
<a href="/administration/rolling-update"
class="navbar-item ">
Rolling Restart and Upgrade
</a>
<a href="/administration/scale-in-out"
class="navbar-item is-active">
Scale-in and Scale-out
</a>
<a href="/administration/resource-management"
class="navbar-item ">
Resource Management
</a>
<a href="/administration/cold-backup"
class="navbar-item ">
Cold Backup
</a>
<a href="/administration/meta-recovery"
class="navbar-item ">
Metadata Recovery
</a>
<a href="/administration/replica-recovery"
class="navbar-item ">
Replica Data Recovery
</a>
<a href="/administration/zk-migration"
class="navbar-item ">
Zookeeper Migration
</a>
<a href="/administration/table-migration"
class="navbar-item ">
Table Migration
</a>
<a href="/administration/table-soft-delete"
class="navbar-item ">
Table Soft-Delete
</a>
<a href="/administration/table-env"
class="navbar-item ">
Table Environment Variables
</a>
<a href="/administration/remote-commands"
class="navbar-item ">
Remote Command
</a>
<a href="/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/administration/duplication"
class="navbar-item ">
Duplication
</a>
<a href="/administration/compression"
class="navbar-item ">
Data Compression
</a>
<a href="/administration/throttling"
class="navbar-item ">
Throttling
</a>
<a href="/administration/experiences"
class="navbar-item ">
Experiences
</a>
<a href="/administration/manual-compact"
class="navbar-item ">
Manual Compact
</a>
<a href="/administration/usage-scenario"
class="navbar-item ">
Usage Scenario
</a>
<a href="/administration/bad-disk"
class="navbar-item ">
Bad Disk Repair
</a>
<a href="/administration/whitelist"
class="navbar-item ">
Replica Server Whitelist
</a>
<a href="/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/administration/hotspot-detection"
class="navbar-item ">
Hotspot Detection
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/administration/scale-in-out"><strong>中</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">Scale-in and Scale-out</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="design-goals">Design goals</h1>
<p>When the storage capacity of the cluster is insufficient or the read/write throughput is too high, it is necessary to scale out the capacity by adding more nodes. On the contrary, scaling in can be achieved by reducing the number of nodes.</p>
<blockquote>
<p>The scaling in and scaling out described in this document are for replica servers.</p>
</blockquote>
<p>When scale out or scale in the cluster, it’s necessary to consider:</p>
<ul>
<li>Do not stop Pegasus service</li>
<li>Try not to affect availability as much as possible</li>
<li>Minimize unnecessary data transmission as much as possible</li>
</ul>
<h1 id="scale-out-steps">Scale out steps</h1>
<p>The scale out steps are relatively simple:</p>
<ul>
<li>To add multiple servers, start the replica server process on these newly added servers. After starting, the replica server will actively contact the meta server and join the node list.</li>
<li>When the meta level is <code class="language-plaintext highlighter-rouge">steady</code>, <a href="rebalance">load balancing</a> is not performed. Therefore, when using the <code class="language-plaintext highlighter-rouge">nodes -d</code> command in the shell tool, you can see that the status of the newly added node is in <code class="language-plaintext highlighter-rouge">ALIVE</code> status, but the count of replicas served by the node is <code class="language-plaintext highlighter-rouge">0</code>.</li>
<li>Set through shell tool <code class="language-plaintext highlighter-rouge">set_meta_level lively</code> to start load balancing, and the meta server will gradually migrate some replicas to the newly added node.</li>
<li>Observe the servicing replicas through the <code class="language-plaintext highlighter-rouge">nodes -d</code> command of the shell tool. After reaching balanced status, use the <code class="language-plaintext highlighter-rouge">set_meta_level steady</code> to turn off load balancing and complete the scale out process.</li>
</ul>
<h1 id="scale-in-steps">Scale in steps</h1>
<p>There are more factors to consider when scaling in compared to scaling out, mainly including:</p>
<ul>
<li>If multiple nodes need to be removed from the cluster at the same time, they need to be removed one by one, and wait for one to be removed completely before removing another to avoid affecting the availability of the cluster and data integrity.</li>
<li>If multiple nodes need to be removed from the cluster at the same time, when removing one node, it is advisable to avoid the meta server assigning replicas to other nodes that are about to be removed when curing replicas. Otherwise, when removing other nodes, it has to cure the replicas again, resulting in unnecessary cross node data transmission. <a href="/administration/rebalance#assign_secondary_black_list">Black_list</a> is provided for this aim.</li>
</ul>
<blockquote>
<p>Note: When the node has been removed, its status on the meta server will change to <code class="language-plaintext highlighter-rouge">UNALIVE</code>, which may cause the proportion of <code class="language-plaintext highlighter-rouge">ALIVE</code> nodes to be lower than the configuration value of <code class="language-plaintext highlighter-rouge">node_live_percentage_threshold_for_update</code>, then the meta server will automatically downgrade to the <code class="language-plaintext highlighter-rouge">freezed</code> state, then all <code class="language-plaintext highlighter-rouge">reconfiguration</code> operations (i.e. reassigning replicas operations) cannot be performed, and the scaling in process cannot be performed. So before scaling in, it is necessary to calculate whether the situation would be caused. If so, modify the configuration of the meta server and set the <code class="language-plaintext highlighter-rouge">node_live_percentage_threshold_for_update</code> to low enough to ensure that the meta server does not automatically downgrade to the <code class="language-plaintext highlighter-rouge">freezed</code> state during the scaling in process.</p>
</blockquote>
<h2 id="recommended-scaling-in-steps">Recommended scaling in steps</h2>
<ul>
<li>Calculate the proportion of <code class="language-plaintext highlighter-rouge">ALIVE</code> nodes after scaling in, if it is lower than configuration value of <code class="language-plaintext highlighter-rouge">node_live_percentage_threshold_for_update</code>, then use <a href="/administration/remote-commands">remote commands</a> to update the value to be small enough.
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; remote_command -t meta-server meta.live_percentage $percentage
</code></pre></div> </div>
<p><code class="language-plaintext highlighter-rouge">percentage</code> is an integer with a value range of [0, 100].</p>
</li>
<li>Using shell tools command <code class="language-plaintext highlighter-rouge">set_meta_level</code> to set the cluster to <code class="language-plaintext highlighter-rouge">steady</code> mode and disable the <a href="rebalance">rebalance</a> to avoid unnecessary replica migration.
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; set_meta_level steady
</code></pre></div> </div>
</li>
<li>Use shell tools to send <a href="remote-commands#meta-server">remote commands</a> to the meta server to update <code class="language-plaintext highlighter-rouge">assign_secondary_black_list</code>:
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; remote_command -t meta-server meta.lb.assign_secondary_black_list $address_list
</code></pre></div> </div>
<p><code class="language-plaintext highlighter-rouge">address_list</code> is the <code class="language-plaintext highlighter-rouge">ip:port</code> list of nodes to be removed, separated by commas.</p>
</li>
<li>Use shell tools to set <code class="language-plaintext highlighter-rouge">assign_delay_ms</code> to 10, to make it possible to cure replicas immediately on other alive nodes after the node has been removed:
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; remote_command -t meta-server meta.lb.assign_delay_ms 10
</code></pre></div> </div>
</li>
<li>Remove replica servers one by one. The removing steps for a single replica server:
<ul>
<li>Kill the replica server process that you want to remove.</li>
<li>Use shell tools command <code class="language-plaintext highlighter-rouge">ls -d</code> to check the cluster status, wait for all partitions to be fully recovered to health status (all tables have 0 unhealthy partition counts).</li>
<li>Clean up the data on this node to free up disk space.</li>
</ul>
</li>
<li>Restart the meta server:
<ul>
<li>Restarting the meta server is to clear the records of the removed nodes (i.e. no longer displaying removed nodes in the <code class="language-plaintext highlighter-rouge">nodes -d</code> command of the shell tools), reset the modified configuration items mentioned above.</li>
</ul>
</li>
</ul>
<h2 id="script">Script</h2>
<p>The above steps are completed by the script <a href="https://github.com/apache/incubator-pegasus/blob/master/scripts/pegasus_offline_node_list.sh">scripts/pegasus_offline_node_list.sh</a>.</p>
<blockquote>
<p>However, this script cannot be used directly because it relies on the <a href="https://github.com/XiaoMi/minos">minos deployment tool</a>.</p>
</blockquote>
<h1 id="nodes-migration">Nodes migration</h1>
<p>The nodes migration of the cluster can be achieved by first scaling out and then scaling in. To minimize unnecessary data transmission, it is recommended to follow the following steps:</p>
<ul>
<li>Scaling out: Add the new servers to the cluster, but temporarily do not perform <a href="/administration/rebalance">rebalance</a> after joining.</li>
<li>Scaling in: Remove the old servers through the <a href="#scale-in-steps">Scale in steps</a> above.</li>
<li>Perform <a href="/administration/rebalance">rebalance</a>.</li>
</ul>
<h1 id="other-configurations">Other configurations</h1>
<ul>
<li>Limit the migration speed. It can be achieved by limiting the read and write bandwidth per disk to avoid the performance impact caused by high disk IO throughput.
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&gt;&gt;&gt; remote_command -t replica-server nfs.max_send_rate_megabytes_per_disk $rate
&gt;&gt;&gt; remote_command -t replica-server nfs.max_copy_rate_megabytes_per_disk $rate
</code></pre></div> </div>
<p>The unit of <code class="language-plaintext highlighter-rouge">rate</code> is <code class="language-plaintext highlighter-rouge">MB/s</code>.</p>
</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
Table of contents
</p>
<ul class="menu-list">
<li><a href="#design-goals">Design goals</a></li>
<li><a href="#scale-out-steps">Scale out steps</a></li>
<li><a href="#scale-in-steps">Scale in steps</a>
<ul>
<li><a href="#recommended-scaling-in-steps">Recommended scaling in steps</a></li>
<li><a href="#script">Script</a></li>
</ul>
</li>
<li><a href="#nodes-migration">Nodes migration</a></li>
<li><a href="#other-configurations">Other configurations</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>