blob: 2315505d332e8e472edf6f646a9d9da131db2b76 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Bad Disk</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Bad Disk | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.2" />
<meta property="og:title" content="Bad Disk" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="磁盘故障时有发生,通常有下列检查方式:" />
<meta property="og:description" content="磁盘故障时有发生,通常有下列检查方式:" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2023-11-23T14:51:44+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Bad Disk" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2023-11-23T14:51:44+00:00","datePublished":"2023-11-23T14:51:44+00:00","description":"磁盘故障时有发生,通常有下列检查方式:","headline":"Bad Disk","mainEntityOfPage":{"@type":"WebPage","@id":"/administration/bad-disk"},"url":"/administration/bad-disk"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用Docker完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang客户端
</a>
</li>
<li>
<a href="/zh/clients/python2-client"
class="">
Python2客户端
</a>
</li>
<li>
<a href="/zh/clients/python3-client"
class="">
Python3客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="is-active">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/bad-disk"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用Docker完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang客户端
</a>
<a href="/zh/clients/python2-client"
class="navbar-item ">
Python2客户端
</a>
<a href="/zh/clients/python3-client"
class="navbar-item ">
Python3客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item ">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item is-active">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /administration/bad-disk"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">坏盘检修</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<p>磁盘故障时有发生,通常有下列检查方式:</p>
<ul>
<li>
<p>可能是某节点的延迟明显高于其他节点,追其原因,如果看到某个 SSD 的 IO await 明显较高,
那基本说明该磁盘是“慢盘”。</p>
</li>
<li>
<p>平时的周期运维检修也容易发现潜在的磁盘故障。</p>
</li>
</ul>
<p>在Pegasus中,我们如何进行坏盘维修的操作?</p>
<h2 id="坏盘黑名单">坏盘黑名单</h2>
<p>Pegasus 支持磁盘黑名单,如果你要下线某块磁盘,首先要把它定义在其所在 Replica 节点的黑名单文件中,
黑名单文件的所在路径依据配置:</p>
<div class="language-ini highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nn">[replication]</span>
<span class="py">data_dirs_black_list_file</span> <span class="p">=</span> <span class="s">/home/work/.pegasus_data_dirs_black_list</span>
</code></pre></div></div>
<p>接着你登录对应节点,编辑 /home/work/.pegasus_data_dirs_black_list:</p>
<pre><code class="language-txt">/home/work/ssd2
/home/work/ssd3
</code></pre>
<p>上面标注磁盘 ssd2 与 ssd3 需要下线。</p>
<h2 id="重启节点">重启节点</h2>
<p>在你标注好坏盘名单后,你可以通过 <a href="rolling-update">高可用升级</a> 单独重启对应节点的服务进程。
通常你在程序日志中能够看到下列记录,表示黑名单内的磁盘的确被忽略了:</p>
<pre><code class="language-log">D2019-07-10 21:54:28.879 (1562766868879176673 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:177:initialize(): data_dirs_black_list_file[/home/work/.pegasus_data_dirs_black_list] found, apply it
D2019-07-10 21:54:28.879 (1562766868879300907 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:194:initialize(): black_list[1] = [/home/work/ssd2/]
D2019-07-10 21:54:28.879 (1562766868879312394 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:194:initialize(): black_list[2] = [/home/work/ssd3/]
W2019-07-10 21:54:28.879 (1562766868879404635 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:218:initialize(): replica data dir /home/work/ssd2/pegasus/c3tst-dup2/replica is in black list, ignore it
W2019-07-10 21:54:28.879 (1562766868879411121 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:218:initialize(): replica data dir /home/work/ssd3/pegasus/c3tst-dup2/replica is in black list, ignore it
D2019-07-10 21:54:28.879 (1562766868879415865 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:220:initialize(): data_dirs[0] = /home/work/ssd4/pegasus/c3tst-dup2/replica, tag = ssd4
D2019-07-10 21:54:28.879 (1562766868879422843 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:220:initialize(): data_dirs[1] = /home/work/ssd5/pegasus/c3tst-dup2/replica, tag = ssd5
D2019-07-10 21:54:28.879 (1562766868879428846 9e8d) replica.default0.00009e5b00010001: replication_common.cpp:220:initialize(): data_dirs[2] = /home/work/ssd6/pegasus/c3tst-dup2/replica, tag = ssd6
</code></pre>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#坏盘黑名单">坏盘黑名单</a></li>
<li><a href="#重启节点">重启节点</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>