blob: 4a5aa0a1b8e1a285e4e3b19ac732c73dbacffb13 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.janitor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap;
/**
* Server-side fixing of bad or inconsistent state in hbase:meta.
* Distinct from MetaTableAccessor because {@link MetaTableAccessor} is about low-level
* manipulations driven by the Master. This class MetaFixer is
* employed by the Master and it 'knows' about holes and orphans
* and encapsulates their fixing on behalf of the Master.
*/
@InterfaceAudience.Private
public class MetaFixer {
private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
private static final int MAX_MERGE_COUNT_DEFAULT = 64;
private final MasterServices masterServices;
/**
* Maximum for many regions to merge at a time.
*/
private final int maxMergeCount;
public MetaFixer(MasterServices masterServices) {
this.masterServices = masterServices;
this.maxMergeCount = this.masterServices.getConfiguration().
getInt(MAX_MERGE_COUNT_KEY, MAX_MERGE_COUNT_DEFAULT);
}
public void fix() throws IOException {
Report report = this.masterServices.getCatalogJanitor().getLastReport();
if (report == null) {
LOG.info("CatalogJanitor has not generated a report yet; run 'catalogjanitor_run' in " +
"shell or wait until CatalogJanitor chore runs.");
return;
}
fixHoles(report);
fixOverlaps(report);
// Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
// can help cleaning up damaged hbase:meta.
this.masterServices.runReplicationBarrierCleaner();
}
/**
* If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
* Does not assign.
*/
void fixHoles(Report report) {
final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
if (holes.isEmpty()) {
LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
return;
}
LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
holes.size());
final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
final TransitRegionStateProcedure[] assignProcedures = masterServices
.getAssignmentManager()
.createRoundRobinAssignProcedures(newMetaEntries);
masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
LOG.info(
"Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
}
/**
* Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
*/
private static List<RegionInfo> createRegionInfosForHoles(
final List<Pair<RegionInfo, RegionInfo>> holes) {
final List<RegionInfo> newRegionInfos = holes.stream()
.map(MetaFixer::getHoleCover)
.filter(Optional::isPresent)
.map(Optional::get)
.collect(Collectors.toList());
LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
newRegionInfos.size(), holes.size());
return newRegionInfos;
}
/**
* @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
* in {@code hole}.
*/
private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
final RegionInfo left = hole.getFirst();
final RegionInfo right = hole.getSecond();
if (left.getTable().equals(right.getTable())) {
// Simple case.
if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
+ " left=<{}>, right=<{}>", left, right);
return Optional.empty();
}
return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
}
final boolean leftUndefined = left.equals(RegionInfoBuilder.UNDEFINED);
final boolean rightUndefined = right.equals(RegionInfoBuilder.UNDEFINED);
final boolean last = left.isLast();
final boolean first = right.isFirst();
if (leftUndefined && rightUndefined) {
LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
"UNDEFINED; left=<{}>, right=<{}>", left, right);
return Optional.empty();
}
if (leftUndefined || last) {
return Optional.of(
buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
}
if (rightUndefined || first) {
return Optional.of(
buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
}
LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
return Optional.empty();
}
private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
}
/**
* Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
* @param masterServices used to connect to {@code hbase:meta}
* @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
* @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
* successfully created
*/
private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
final List<RegionInfo> newRegionInfos) {
final List<Either<List<RegionInfo>, IOException>> addMetaEntriesResults = newRegionInfos.
stream().map(regionInfo -> {
try {
TableDescriptor td = masterServices.getTableDescriptors().get(regionInfo.getTable());
// Add replicas if needed
// we need to create regions with replicaIds starting from 1
List<RegionInfo> newRegions = RegionReplicaUtil
.addReplicas(Collections.singletonList(regionInfo), 1, td.getRegionReplication());
// Add regions to META
MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), newRegions,
td.getRegionReplication());
// Setup replication for region replicas if needed
if (td.getRegionReplication() > 1) {
ServerRegionReplicaUtil.setupRegionReplicaReplication(masterServices);
}
return Either.<List<RegionInfo>, IOException> ofLeft(newRegions);
} catch (IOException e) {
return Either.<List<RegionInfo>, IOException> ofRight(e);
} catch (ReplicationException e) {
return Either.<List<RegionInfo>, IOException> ofRight(new HBaseIOException(e));
}
})
.collect(Collectors.toList());
final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream()
.filter(Either::hasLeft)
.map(Either::getLeft)
.flatMap(List::stream)
.collect(Collectors.toList());
final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
.filter(Either::hasRight)
.map(Either::getRight)
.collect(Collectors.toList());
LOG.debug("Added {}/{} entries to hbase:meta",
createMetaEntriesSuccesses.size(), newRegionInfos.size());
if (!createMetaEntriesFailures.isEmpty()) {
LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
+ " failure message included; full list of failures with accompanying stack traces is"
+ " available at log level DEBUG. message={}", createMetaEntriesFailures.size(),
addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage());
if (LOG.isDebugEnabled()) {
createMetaEntriesFailures.forEach(
ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
}
}
return createMetaEntriesSuccesses;
}
/**
* Fix overlaps noted in CJ consistency report.
*/
List<Long> fixOverlaps(Report report) throws IOException {
List<Long> pidList = new ArrayList<>();
for (Set<RegionInfo> regions: calculateMerges(maxMergeCount, report.getOverlaps())) {
RegionInfo [] regionsArray = regions.toArray(new RegionInfo [] {});
try {
pidList.add(this.masterServices
.mergeRegions(regionsArray, true, HConstants.NO_NONCE, HConstants.NO_NONCE));
} catch (MergeRegionException mre) {
LOG.warn("Failed overlap fix of {}", regionsArray, mre);
}
}
return pidList;
}
/**
* Run through <code>overlaps</code> and return a list of merges to run.
* Presumes overlaps are ordered (which they are coming out of the CatalogJanitor
* consistency report).
* @param maxMergeCount Maximum regions to merge at a time (avoid merging
* 100k regions in one go!)
*/
static List<SortedSet<RegionInfo>> calculateMerges(int maxMergeCount,
List<Pair<RegionInfo, RegionInfo>> overlaps) {
if (overlaps.isEmpty()) {
LOG.debug("No overlaps.");
return Collections.emptyList();
}
List<SortedSet<RegionInfo>> merges = new ArrayList<>();
// First group overlaps by table then calculate merge table by table.
ListMultimap<TableName, Pair<RegionInfo, RegionInfo>> overlapGroups =
ArrayListMultimap.create();
for (Pair<RegionInfo, RegionInfo> pair : overlaps) {
overlapGroups.put(pair.getFirst().getTable(), pair);
}
for (Map.Entry<TableName, Collection<Pair<RegionInfo, RegionInfo>>> entry : overlapGroups
.asMap().entrySet()) {
calculateTableMerges(maxMergeCount, merges, entry.getValue());
}
return merges;
}
private static void calculateTableMerges(int maxMergeCount, List<SortedSet<RegionInfo>> merges,
Collection<Pair<RegionInfo, RegionInfo>> overlaps) {
SortedSet<RegionInfo> currentMergeSet = new TreeSet<>();
HashSet<RegionInfo> regionsInMergeSet = new HashSet<>();
RegionInfo regionInfoWithlargestEndKey = null;
for (Pair<RegionInfo, RegionInfo> pair: overlaps) {
if (regionInfoWithlargestEndKey != null) {
if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
currentMergeSet.size() >= maxMergeCount) {
// Log when we cut-off-merge because we hit the configured maximum merge limit.
if (currentMergeSet.size() >= maxMergeCount) {
LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
}
// In the case of the merge set contains only 1 region or empty, it does not need to
// submit this merge request as no merge is going to happen. currentMergeSet can be
// reused in this case.
if (currentMergeSet.size() <= 1) {
for (RegionInfo ri : currentMergeSet) {
regionsInMergeSet.remove(ri);
}
currentMergeSet.clear();
} else {
merges.add(currentMergeSet);
currentMergeSet = new TreeSet<>();
}
}
}
// Do not add the same region into multiple merge set, this will fail
// the second merge request.
if (!regionsInMergeSet.contains(pair.getFirst())) {
currentMergeSet.add(pair.getFirst());
regionsInMergeSet.add(pair.getFirst());
}
if (!regionsInMergeSet.contains(pair.getSecond())) {
currentMergeSet.add(pair.getSecond());
regionsInMergeSet.add(pair.getSecond());
}
regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey(
getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()),
regionInfoWithlargestEndKey);
}
merges.add(currentMergeSet);
}
/**
* @return Either <code>a</code> or <code>b</code>, whichever has the
* endkey that is furthest along in the Table.
*/
static RegionInfo getRegionInfoWithLargestEndKey(RegionInfo a, RegionInfo b) {
if (a == null) {
// b may be null.
return b;
}
if (b == null) {
// Both are null. The return is not-defined.
return a;
}
if (!a.getTable().equals(b.getTable())) {
// This is an odd one. This should be the right answer.
return b;
}
if (a.isLast()) {
return a;
}
if (b.isLast()) {
return b;
}
int compare = Bytes.compareTo(a.getEndKey(), b.getEndKey());
return compare == 0 || compare > 0? a: b;
}
/**
* @return True if an overlap found between passed in <code>ri</code> and
* the <code>pair</code>. Does NOT check the pairs themselves overlap.
*/
static boolean isOverlap(RegionInfo ri, Pair<RegionInfo, RegionInfo> pair) {
if (ri == null || pair == null) {
// Can't be an overlap in either of these cases.
return false;
}
return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
}
/**
* A union over {@link L} and {@link R}.
*/
private static class Either<L, R> {
private final L left;
private final R right;
public static <L, R> Either<L, R> ofLeft(L left) {
return new Either<>(left, null);
}
public static <L, R> Either<L, R> ofRight(R right) {
return new Either<>(null, right);
}
Either(L left, R right) {
this.left = left;
this.right = right;
}
public boolean hasLeft() {
return left != null;
}
public L getLeft() {
if (!hasLeft()) {
throw new IllegalStateException("Either contains no left.");
}
return left;
}
public boolean hasRight() {
return right != null;
}
public R getRight() {
if (!hasRight()) {
throw new IllegalStateException("Either contains no right.");
}
return right;
}
}
}