blob: 69b95e1a118e65b3b37d310d71255a9439e9238b [file] [log] [blame]
<%--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
import="java.time.Instant"
import="java.time.ZoneId"
import="java.util.Date"
import="java.util.List"
import="java.util.Map"
import="java.util.stream.Collectors"
import="java.time.ZonedDateTime"
import="java.time.format.DateTimeFormatter"
%>
<%@ page import="org.apache.hadoop.fs.Path" %>
<%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %>
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.master.ServerManager" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
<%@ page import="org.apache.hadoop.hbase.master.janitor.CatalogJanitor" %>
<%@ page import="org.apache.hadoop.hbase.master.janitor.Report" %>
<%
final String cacheParameterValue = request.getParameter("cache");
final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
if (!Boolean.parseBoolean(cacheParameterValue)) {
// Run the two reporters inline w/ drawing of the page. If exception, will show in page draw.
try {
master.getMasterRpcServices().runHbckChore(null, null);
} catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException se) {
out.write("Failed generating a new hbck_chore report; using cache; try again or run hbck_chore_run in the shell: " + se.getMessage() + "\n");
}
try {
master.getMasterRpcServices().runCatalogScan(null, null);
} catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException se) {
out.write("Failed generating a new catalogjanitor report; using cache; try again or run catalogjanitor_run in the shell: " + se.getMessage() + "\n");
}
}
HbckChore hbckChore = master.getHbckChore();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
Map<String, ServerName> orphanRegionsOnRS = null;
Map<String, Path> orphanRegionsOnFS = null;
long startTimestamp = 0;
long endTimestamp = 0;
if (hbckChore != null) {
inconsistentRegions = hbckChore.getInconsistentRegions();
orphanRegionsOnRS = hbckChore.getOrphanRegionsOnRS();
orphanRegionsOnFS = hbckChore.getOrphanRegionsOnFS();
startTimestamp = hbckChore.getCheckingStartTimestamp();
endTimestamp = hbckChore.getCheckingEndTimestamp();
}
ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
ZoneId.systemDefault());
String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp),
ZoneId.systemDefault());
String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
CatalogJanitor cj = master.getCatalogJanitor();
Report report = cj == null? null: cj.getLastReport();
final ServerManager serverManager = master.getServerManager();
%>
<jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/>
</jsp:include>
<div class="container-fluid content">
<% if (!master.isInitialized()) { %>
<div class="row">
<div class="page-header">
<h1>Master is not initialized</h1>
</div>
</div>
<jsp:include page="redirect.jsp" />
<% } else { %>
<div class="row">
<div class="page-header">
<p><span>This page displays two reports: the <em>HBCK Chore Report</em> and
the <em>CatalogJanitor Consistency Issues</em> report. Only report titles
show if there are no problems to list. Note some conditions are
<strong>transitory</strong> as regions migrate. Reports are generated
when you invoke this page unless you add <em>?cache=true</em> to the URL. Then
we display the reports cached from the last time the reports were run.
Reports are run by Chores that are hosted by the Master on a cadence.
You can also run them on demand from the hbase shell: invoke <em>catalogjanitor_run</em>
and/or <em>hbck_chore_run</em>.
ServerNames will be links if server is live, italic if dead, and plain if unknown.</span></p>
</div>
</div>
<div class="row">
<div class="page-header">
<h1>HBCK Chore Report</h1>
<p>
<% if (hbckChore.isDisabled()) { %>
<span>HBCK chore is currently disabled. Set hbase.master.hbck.chore.interval > 0 in the config & do a rolling-restart to enable it.</span>
<% } else if (startTimestamp == 0 && endTimestamp == 0){ %>
<span>No report created.</span>
<% } else if (startTimestamp > 0 && endTimestamp == 0){ %>
<span>Checking started at <%= iso8601start %>. Please wait for checking to generate a new sub-report.</span>
<% } else { %>
<span>Checking started at <%= iso8601start %> and generated report at <%= iso8601end %>.</span>
<% } %>
</p>
</div>
</div>
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<div class="row">
<div class="page-header">
<h2>Inconsistent Regions</h2>
</div>
</div>
<p>
<span>
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assign
command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
need to check the server still exists. If not, schedule <em>ServerCrashProcedure</em> for it. If exists,
restart Server2 and Server1):
3. More than one regionserver reports opened this region (Fix: restart the RegionServers).
Note: the reported online regionservers may be not be up-to-date when there are regions in transition.
</span>
</p>
<table class="table table-striped">
<tr>
<th>Region Name</th>
<th>Location in META</th>
<th>Reported Online RegionServers</th>
</tr>
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) {%>
<tr>
<td><%= entry.getKey() %></td>
<td><%= formatServerName(master, serverManager, entry.getValue().getFirst()) %></td>
<td><%= entry.getValue().getSecond().stream().map(s -> formatServerName(master, serverManager, s)).
collect(Collectors.joining(", ")) %></td>
</tr>
<% } %>
<p><%= inconsistentRegions.size() %> region(s) in set.</p>
</table>
<% } %>
<% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on RegionServer</h2>
</div>
</div>
<table class="table table-striped">
<tr>
<th>Region Name</th>
<th>Reported Online RegionServer</th>
</tr>
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= formatServerName(master, serverManager, entry.getValue()) %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
</table>
<% } %>
<% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on FileSystem</h2>
</div>
</div>
<p>
<span>
The below are Regions we've lost account of. To be safe, run bulk load of any data found under these Region orphan directories to have the
cluster re-adopt data.
First make sure <em>hbase:meta</em> is in a healthy state, that there are no holes, overlaps or inconsistencies (else bulk load may fail);
run <em>hbck2 fixMeta</em>. Once this is done, per Region below, run a bulk
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
successful load; all that is left are empty directories and occasionally a seqid marking file).
</span>
</p>
<table class="table table-striped">
<tr>
<th>Region Encoded Name</th>
<th>FileSystem Path</th>
</tr>
<% for (Map.Entry<String, Path> entry : orphanRegionsOnFS.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= entry.getValue() %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnFS.size() %> region(s) in set.</p>
</table>
<% } %>
<%
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()),
ZoneId.systemDefault());
String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
String iso8601reportTime = "-1";
if (report != null) {
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.getCreateTime()),
ZoneId.systemDefault());
iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
}
%>
<div class="row inner_header">
<div class="page-header">
<h1>CatalogJanitor <em>hbase:meta</em> Consistency Issues</h1>
<p>
<% if (report != null) { %>
<span>Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>).</span></p>
<% } else { %>
<span>No report created.</span>
<% } %>
</div>
</div>
<% if (report != null && !report.isEmpty()) { %>
<% if (!report.getHoles().isEmpty()) { %>
<div class="row inner_header">
<div class="page-header">
<h2>Holes</h2>
</div>
</div>
<table class="table table-striped">
<tr>
<th>RegionInfo</th>
<th>RegionInfo</th>
</tr>
<% for (Pair<RegionInfo, RegionInfo> p : report.getHoles()) { %>
<tr>
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getRegionNameAsString() %></span></td>
<td><span title="<%= p.getSecond() %>"><%= p.getSecond().getRegionNameAsString() %></span></td>
</tr>
<% } %>
<p><%= report.getHoles().size() %> hole(s).</p>
</table>
<% } %>
<% if (!report.getOverlaps().isEmpty()) { %>
<div class="row inner_header">
<div class="page-header">
<h2>Overlaps</h2>
<p>
<span>
Regions highlighted in <font color="blue">blue</font> are recently merged regions, HBase is still doing cleanup for them. Overlaps involving these regions cannot be fixed by <em>hbck2 fixMeta</em> at this moment.
Please wait some time, run <i>catalogjanitor_run</i> in hbase shell, refresh ‘HBCK Report’ page, make sure these regions are not highlighted to start the fix.
</span>
</p>
</div>
</div>
<table class="table table-striped">
<tr>
<th>RegionInfo</th>
<th>Other RegionInfo</th>
</tr>
<% for (Pair<RegionInfo, RegionInfo> p : report.getOverlaps()) { %>
<tr>
<% if (report.getMergedRegions().containsKey(p.getFirst())) { %>
<td><span style="color:blue;" title="<%= p.getFirst() %>"><%= p.getFirst().getRegionNameAsString() %></span></td>
<% } else { %>
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getRegionNameAsString() %></span></td>
<% } %>
<% if (report.getMergedRegions().containsKey(p.getSecond())) { %>
<td><span style="color:blue;" title="<%= p.getSecond() %>"><%= p.getSecond().getRegionNameAsString() %></span></td>
<% } else { %>
<td><span title="<%= p.getSecond() %>"><%= p.getSecond().getRegionNameAsString() %></span></td>
<% } %>
</tr>
<% } %>
<p><%= report.getOverlaps().size() %> overlap(s).</p>
</table>
<% } %>
<% if (!report.getUnknownServers().isEmpty()) { %>
<div class="row inner_header">
<div class="page-header">
<h2>Unknown Servers</h2>
</div>
</div>
<p>
<span>The below are servers mentioned in the hbase:meta table that are no longer 'live' or known 'dead'.
The server likely belongs to an older cluster epoch since replaced by a new instance because of a restart/crash.
To clear 'Unknown Servers', run 'hbck2 scheduleRecoveries UNKNOWN_SERVERNAME'. This will schedule a ServerCrashProcedure.
It will clear out 'Unknown Server' references and schedule reassigns of any Regions that were associated with this host.
But first!, be sure the referenced Region is not currently stuck looping trying to OPEN. Does it show as a Region-In-Transition on the
Master home page? Is it mentioned in the 'Procedures and Locks' Procedures list? If so, perhaps it stuck in a loop
trying to OPEN but unable to because of a missing reference or file.
Read the Master log looking for the most recent
mentions of the associated Region name. Try and address any such complaint first. If successful, a side-effect
should be the clean up of the 'Unknown Servers' list. It may take a while. OPENs are retried forever but the interval
between retries grows. The 'Unknown Server' may be cleared because it is just the last RegionServer the Region was
successfully opened on; on the next open, the 'Unknown Server' will be purged.
</span>
</p>
<table class="table table-striped">
<tr>
<th>RegionInfo</th>
<th>ServerName</th>
</tr>
<% for (Pair<RegionInfo, ServerName> p: report.getUnknownServers()) { %>
<tr>
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getRegionNameAsString() %></span></td>
<td><%= p.getSecond() %></td>
</tr>
<% } %>
<p><%= report.getUnknownServers().size() %> unknown servers(s).</p>
</table>
<% } %>
<% if (!report.getEmptyRegionInfo().isEmpty()) { %>
<div class="row inner_header">
<div class="page-header">
<h2>Empty <em>info:regioninfo</em></h2>
</div>
</div>
<table class="table table-striped">
<tr>
<th>Row</th>
</tr>
<% for (byte [] row: report.getEmptyRegionInfo()) { %>
<tr>
<td><%= Bytes.toStringBinary(row) %></td>
</tr>
<% } %>
<p><%= report.getEmptyRegionInfo().size() %> emptyRegionInfo(s).</p>
</table>
<% } %>
<% } %>
<% } %>
</div>
<jsp:include page="footer.jsp"/>
<%!
/**
* Format serverName for display.
* If a live server reference, make it a link.
* If dead, make it italic.
* If unknown, make it plain.
*/
private static String formatServerName(HMaster master,
ServerManager serverManager, ServerName serverName) {
String sn = serverName.toString();
if (serverManager.isServerOnline(serverName)) {
int infoPort = master.getRegionServerInfoPort(serverName);
if (infoPort > 0) {
return "<a href=" + "//" + serverName.getHostname() + ":" +
infoPort + "/rs-status>" + sn + "</a>";
} else {
return "<b>" + sn + "</b>";
}
} else if (serverManager.isServerDead(serverName)) {
return "<i>" + sn + "</i>";
}
return sn;
}
%>