| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.tephra.hbase.txprune; |
| |
| import com.google.common.annotations.VisibleForTesting; |
| import com.google.common.collect.Iterables; |
| import com.google.common.collect.MinMaxPriorityQueue; |
| import com.google.common.collect.Sets; |
| import com.google.gson.Gson; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.hbase.HBaseConfiguration; |
| import org.apache.hadoop.hbase.TableName; |
| import org.apache.hadoop.hbase.client.Connection; |
| import org.apache.hadoop.hbase.client.ConnectionFactory; |
| import org.apache.hadoop.hbase.client.Table; |
| import org.apache.hadoop.hbase.util.Bytes; |
| import org.apache.tephra.TxConstants; |
| import org.apache.tephra.txprune.RegionPruneInfo; |
| import org.apache.tephra.txprune.hbase.InvalidListPruningDebug; |
| import org.apache.tephra.txprune.hbase.RegionsAtTime; |
| import org.apache.tephra.util.TimeMathParser; |
| import org.apache.tephra.util.TxUtils; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.text.SimpleDateFormat; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.SortedSet; |
| import java.util.TreeSet; |
| import java.util.concurrent.TimeUnit; |
| import javax.annotation.Nullable; |
| |
| /** |
| * Invalid List Pruning Debug Tool. |
| */ |
| public class InvalidListPruningDebugTool implements InvalidListPruningDebug { |
| private static final Logger LOG = LoggerFactory.getLogger(InvalidListPruningDebugTool.class); |
| private static final Gson GSON = new Gson(); |
| private static final String NOW = "now"; |
| @VisibleForTesting |
| static final String DATE_FORMAT = "d-MMM-yyyy HH:mm:ss z"; |
| |
| private DataJanitorState dataJanitorState; |
| private Connection connection; |
| private TableName tableName; |
| |
| /** |
| * Initialize the Invalid List Debug Tool. |
| * @param conf {@link Configuration} |
| * @throws IOException when not able to create an HBase connection |
| */ |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| public void initialize(final Configuration conf) throws IOException { |
| LOG.debug("InvalidListPruningDebugMain : initialize method called"); |
| connection = ConnectionFactory.createConnection(conf); |
| tableName = TableName.valueOf(conf.get(TxConstants.TransactionPruning.PRUNE_STATE_TABLE, |
| TxConstants.TransactionPruning.DEFAULT_PRUNE_STATE_TABLE)); |
| dataJanitorState = new DataJanitorState(new DataJanitorState.TableSupplier() { |
| @Override |
| public Table get() throws IOException { |
| return connection.getTable(tableName); |
| } |
| }); |
| } |
| |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| public void destroy() throws IOException { |
| if (connection != null) { |
| connection.close(); |
| } |
| } |
| |
| /** |
| * Returns a set of regions that are live but are not empty nor have a prune upper bound recorded. These regions |
| * will stop the progress of pruning. |
| * <p/> |
| * Note that this can return false positives in the following case - |
| * At time 't' empty regions were recorded, and time 't+1' prune iteration was invoked. |
| * Since a new set of regions was recorded at time 't+1', all regions recorded as empty before time 't + 1' will |
| * now be reported as blocking the pruning, even though they are empty. This is because we cannot tell if those |
| * regions got any new data between time 't' and 't + 1'. |
| * |
| * @param numRegions number of regions |
| * @param time time in milliseconds or relative time, regions recorded before the given time are returned |
| * @return {@link Set} of regions that needs to be compacted and flushed |
| */ |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| public Set<String> getRegionsToBeCompacted(Integer numRegions, String time) throws IOException { |
| // Fetch the live regions at the given time |
| RegionsAtTime timeRegion = getRegionsOnOrBeforeTime(time); |
| if (timeRegion.getRegions().isEmpty()) { |
| return Collections.emptySet(); |
| } |
| |
| Long timestamp = timeRegion.getTime(); |
| SortedSet<String> regions = timeRegion.getRegions(); |
| |
| // Get the live regions |
| SortedSet<String> liveRegions = getRegionsOnOrBeforeTime(NOW).getRegions(); |
| // Retain only the live regions |
| regions = Sets.newTreeSet(Sets.intersection(liveRegions, regions)); |
| |
| SortedSet<byte[]> emptyRegions = dataJanitorState.getEmptyRegionsAfterTime(timestamp, null); |
| SortedSet<String> emptyRegionNames = new TreeSet<>(); |
| Iterable<String> regionStrings = Iterables.transform(emptyRegions, TimeRegions.BYTE_ARR_TO_STRING_FN); |
| for (String regionString : regionStrings) { |
| emptyRegionNames.add(regionString); |
| } |
| |
| Set<String> nonEmptyRegions = Sets.newHashSet(Sets.difference(regions, emptyRegionNames)); |
| |
| // Get all pruned regions for the current time and remove them from the nonEmptyRegions, |
| // resulting in a set of regions that are not empty and have not been registered prune upper bound |
| List<RegionPruneInfo> prunedRegions = dataJanitorState.getPruneInfoForRegions(null); |
| for (RegionPruneInfo prunedRegion : prunedRegions) { |
| if (nonEmptyRegions.contains(prunedRegion.getRegionNameAsString())) { |
| nonEmptyRegions.remove(prunedRegion.getRegionNameAsString()); |
| } |
| } |
| |
| if ((numRegions < 0) || (numRegions >= nonEmptyRegions.size())) { |
| return nonEmptyRegions; |
| } |
| |
| Set<String> subsetRegions = new HashSet<>(numRegions); |
| for (String regionName : nonEmptyRegions) { |
| if (subsetRegions.size() == numRegions) { |
| break; |
| } |
| subsetRegions.add(regionName); |
| } |
| return subsetRegions; |
| } |
| |
| /** |
| * Return a list of RegionPruneInfo. These regions are the ones that have the lowest prune upper bounds. |
| * If -1 is passed in, all the regions and their prune upper bound will be returned. Note that only the regions |
| * that are known to be live will be returned. |
| * |
| * @param numRegions number of regions |
| * @param time time in milliseconds or relative time, regions recorded before the given time are returned |
| * @return Map of region name and its prune upper bound |
| */ |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| public SortedSet<RegionPruneInfoPretty> getIdleRegions(Integer numRegions, String time) throws IOException { |
| List<RegionPruneInfo> regionPruneInfos = dataJanitorState.getPruneInfoForRegions(null); |
| if (regionPruneInfos.isEmpty()) { |
| return new TreeSet<>(); |
| } |
| |
| // Create a set with region names |
| Set<String> pruneRegionNameSet = new HashSet<>(); |
| for (RegionPruneInfo regionPruneInfo : regionPruneInfos) { |
| pruneRegionNameSet.add(regionPruneInfo.getRegionNameAsString()); |
| } |
| |
| // Fetch the latest live regions |
| RegionsAtTime latestRegions = getRegionsOnOrBeforeTime(NOW); |
| |
| // Fetch the regions at the given time |
| RegionsAtTime timeRegions = getRegionsOnOrBeforeTime(time); |
| Set<String> liveRegions = Sets.intersection(latestRegions.getRegions(), timeRegions.getRegions()); |
| Set<String> liveRegionsWithPruneInfo = Sets.intersection(liveRegions, pruneRegionNameSet); |
| List<RegionPruneInfo> liveRegionWithPruneInfoList = new ArrayList<>(); |
| for (RegionPruneInfo regionPruneInfo : regionPruneInfos) { |
| if (liveRegionsWithPruneInfo.contains(regionPruneInfo.getRegionNameAsString())) { |
| liveRegionWithPruneInfoList.add(regionPruneInfo); |
| } |
| |
| // Use the subset of live regions and prune regions |
| regionPruneInfos = liveRegionWithPruneInfoList; |
| } |
| |
| if (numRegions < 0) { |
| numRegions = regionPruneInfos.size(); |
| } |
| |
| Comparator<RegionPruneInfo> comparator = new Comparator<RegionPruneInfo>() { |
| @Override |
| public int compare(RegionPruneInfo o1, RegionPruneInfo o2) { |
| int result = Long.compare(o1.getPruneUpperBound(), o2.getPruneUpperBound()); |
| if (result == 0) { |
| return o1.getRegionNameAsString().compareTo(o2.getRegionNameAsString()); |
| } |
| return result; |
| } |
| }; |
| MinMaxPriorityQueue<RegionPruneInfoPretty> lowestPrunes = |
| MinMaxPriorityQueue.orderedBy(comparator).maximumSize(numRegions).create(); |
| |
| for (RegionPruneInfo pruneInfo : regionPruneInfos) { |
| lowestPrunes.add(new RegionPruneInfoPretty(pruneInfo)); |
| } |
| |
| SortedSet<RegionPruneInfoPretty> regions = new TreeSet<>(comparator); |
| regions.addAll(lowestPrunes); |
| return regions; |
| } |
| |
| /** |
| * Return the prune upper bound value of a given region. If no prune upper bound has been written for this region yet, |
| * it will return a null. |
| * |
| * @param regionId region id |
| * @return {@link RegionPruneInfo} of the region |
| * @throws IOException if there are any errors while trying to fetch the {@link RegionPruneInfo} |
| */ |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| @Nullable |
| public RegionPruneInfoPretty getRegionPruneInfo(String regionId) throws IOException { |
| RegionPruneInfo pruneInfo = dataJanitorState.getPruneInfoForRegion(Bytes.toBytesBinary(regionId)); |
| return pruneInfo == null ? null : new RegionPruneInfoPretty(pruneInfo); |
| } |
| |
| /** |
| * |
| * @param timeString Given a time, provide the {@link TimeRegions} at or before that time. |
| * Time can be in milliseconds or relative time. |
| * @return transactional regions that are present at or before the given time |
| * @throws IOException if there are any errors while trying to fetch the {@link TimeRegions} |
| */ |
| @Override |
| @SuppressWarnings("WeakerAccess") |
| public RegionsAtTime getRegionsOnOrBeforeTime(String timeString) throws IOException { |
| long time = TimeMathParser.parseTime(timeString, TimeUnit.MILLISECONDS); |
| SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT); |
| TimeRegions timeRegions = dataJanitorState.getRegionsOnOrBeforeTime(time); |
| if (timeRegions == null) { |
| return new RegionsAtTime(time, new TreeSet<String>(), dateFormat); |
| } |
| SortedSet<String> regionNames = new TreeSet<>(); |
| Iterable<String> regionStrings = Iterables.transform(timeRegions.getRegions(), TimeRegions.BYTE_ARR_TO_STRING_FN); |
| for (String regionString : regionStrings) { |
| regionNames.add(regionString); |
| } |
| return new RegionsAtTime(timeRegions.getTime(), regionNames, dateFormat); |
| } |
| |
| private void printUsage(PrintWriter pw) { |
| pw.println(); |
| pw.println("Usage : org.apache.tephra.hbase.txprune.InvalidListPruning <command> <parameters>"); |
| pw.println(); |
| pw.println("Available commands"); |
| pw.println("------------------"); |
| pw.println("to-compact-regions limit [time]"); |
| pw.println("Desc: Prints out the regions that are active, but not empty, " + |
| "and have not registered a prune upper bound."); |
| pw.println(); |
| pw.println("idle-regions limit [time]"); |
| pw.println("Desc: Prints out the regions that have the lowest prune upper bounds."); |
| pw.println(); |
| pw.println("prune-info region-name-as-string"); |
| pw.println("Desc: Prints the prune upper bound and the time it was recorded for the given region."); |
| pw.println(); |
| pw.println("time-region [time]"); |
| pw.println("Desc: Prints out the transactional regions present in HBase recorded at or before the given time."); |
| pw.println(); |
| pw.println("Parameters"); |
| pw.println("----------"); |
| pw.println(" * limit - used to limit the number of regions returned, -1 to apply no limit"); |
| pw.println(" * time - if time is not provided, the current time is used. "); |
| pw.println(" When provided, the data recorded on or before the given time is returned."); |
| pw.println(" Time can be provided in milliseconds, or can be provided as a relative time."); |
| pw.println(" Examples for relative time -"); |
| pw.println(" now = current time,"); |
| pw.println(" now-1d = current time - 1 day,"); |
| pw.println(" now-1d+4h = 20 hours before now,"); |
| pw.println(" now+5s = current time + 5 seconds"); |
| pw.println(); |
| } |
| |
| @VisibleForTesting |
| boolean execute(String[] args, PrintWriter out) throws IOException { |
| if (args.length < 1) { |
| printUsage(out); |
| return false; |
| } |
| |
| String command = args[0]; |
| switch (command) { |
| case "time-region": |
| if (args.length <= 2) { |
| String time = args.length == 2 ? args[1] : NOW; |
| RegionsAtTime timeRegion = getRegionsOnOrBeforeTime(time); |
| out.println(GSON.toJson(timeRegion)); |
| return true; |
| } |
| break; |
| case "idle-regions": |
| if (args.length <= 3) { |
| Integer numRegions = Integer.parseInt(args[1]); |
| String time = args.length == 3 ? args[2] : NOW; |
| SortedSet<RegionPruneInfoPretty> regionPruneInfos = getIdleRegions(numRegions, time); |
| out.println(GSON.toJson(regionPruneInfos)); |
| return true; |
| } |
| break; |
| case "prune-info": |
| if (args.length == 2) { |
| String regionName = args[1]; |
| RegionPruneInfo regionPruneInfo = getRegionPruneInfo(regionName); |
| if (regionPruneInfo != null) { |
| out.println(GSON.toJson(regionPruneInfo)); |
| } else { |
| out.println(String.format("No prune info found for the region %s.", regionName)); |
| } |
| return true; |
| } |
| break; |
| case "to-compact-regions": |
| if (args.length <= 3) { |
| Integer numRegions = Integer.parseInt(args[1]); |
| String time = args.length == 3 ? args[2] : NOW; |
| Set<String> toBeCompactedRegions = getRegionsToBeCompacted(numRegions, time); |
| out.println(GSON.toJson(toBeCompactedRegions)); |
| return true; |
| } |
| break; |
| } |
| |
| printUsage(out); |
| return false; |
| } |
| |
| public static void main(String[] args) { |
| Configuration hConf = HBaseConfiguration.create(); |
| InvalidListPruningDebugTool pruningDebug = new InvalidListPruningDebugTool(); |
| try (PrintWriter out = new PrintWriter(System.out)) { |
| pruningDebug.initialize(hConf); |
| boolean success = pruningDebug.execute(args, out); |
| pruningDebug.destroy(); |
| if (!success) { |
| System.exit(1); |
| } |
| } catch (IOException ex) { |
| LOG.error("Received an exception while trying to execute the debug tool. ", ex); |
| } |
| } |
| |
| /** |
| * Wrapper class around {@link RegionPruneInfo} to print human readable dates for timestamps. |
| */ |
| @SuppressWarnings({"WeakerAccess", "unused"}) |
| public static class RegionPruneInfoPretty extends RegionPruneInfo { |
| private final transient SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT); |
| private final String pruneUpperBoundAsString; |
| private final String pruneRecordTimeAsString; |
| |
| public RegionPruneInfoPretty(RegionPruneInfo regionPruneInfo) { |
| this(regionPruneInfo.getRegionName(), regionPruneInfo.getRegionNameAsString(), |
| regionPruneInfo.getPruneUpperBound(), regionPruneInfo.getPruneRecordTime()); |
| } |
| |
| public RegionPruneInfoPretty(byte[] regionName, String regionNameAsString, |
| long pruneUpperBound, long pruneRecordTime) { |
| super(regionName, regionNameAsString, pruneUpperBound, pruneRecordTime); |
| pruneUpperBoundAsString = dateFormat.format(TxUtils.getTimestamp(pruneUpperBound)); |
| pruneRecordTimeAsString = dateFormat.format(pruneRecordTime); |
| } |
| |
| public String getPruneUpperBoundAsString() { |
| return pruneUpperBoundAsString; |
| } |
| |
| public String getPruneRecordTimeAsString() { |
| return pruneRecordTimeAsString; |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| if (this == o) { |
| return true; |
| } |
| if (o == null || getClass() != o.getClass()) { |
| return false; |
| } |
| if (!super.equals(o)) { |
| return false; |
| } |
| RegionPruneInfoPretty that = (RegionPruneInfoPretty) o; |
| return Objects.equals(pruneUpperBoundAsString, that.pruneUpperBoundAsString) && |
| Objects.equals(pruneRecordTimeAsString, that.pruneRecordTimeAsString); |
| } |
| |
| @Override |
| public int hashCode() { |
| return Objects.hash(super.hashCode(), pruneUpperBoundAsString, pruneRecordTimeAsString); |
| } |
| |
| @Override |
| public String toString() { |
| return "RegionPruneInfoPretty{" + |
| ", pruneUpperBoundAsString='" + pruneUpperBoundAsString + '\'' + |
| ", pruneRecordTimeAsString='" + pruneRecordTimeAsString + '\'' + |
| "} " + super.toString(); |
| } |
| } |
| |
| } |