hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java - hbase - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.regionserver;

 import static org.apache.hadoop.hbase.HConstants.REPLICATION_SCOPE_LOCAL;
 import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY;
 import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent;

 import edu.umd.cs.findbugs.annotations.Nullable;
 import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.lang.reflect.Constructor;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
 import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NavigableMap;
 import java.util.NavigableSet;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.RandomAccess;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.UUID;
 import java.util.concurrent.Callable;
 import java.util.concurrent.CompletionService;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.FutureTask;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.LongAdder;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellBuilderType;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.CompareOperator;
 import org.apache.hadoop.hbase.CompoundConfiguration;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.DroppedSnapshotException;
 import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.MetaCellComparator;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.RegionTooBusyException;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.TagUtil;
 import org.apache.hadoop.hbase.UnknownScannerException;
 import org.apache.hadoop.hbase.client.Append;
 import org.apache.hadoop.hbase.client.CheckAndMutate;
 import org.apache.hadoop.hbase.client.CheckAndMutateResult;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.CompactionState;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Durability;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Increment;
 import org.apache.hadoop.hbase.client.IsolationLevel;
 import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.PackagePrivateFieldAccessor;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Row;
 import org.apache.hadoop.hbase.client.RowMutations;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.conf.ConfigurationManager;
 import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver;
 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
 import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
 import org.apache.hadoop.hbase.filter.BinaryComparator;
 import org.apache.hadoop.hbase.filter.ByteArrayComparable;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.FilterWrapper;
 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
 import org.apache.hadoop.hbase.io.HFileLink;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.TimeRange;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.ipc.CallerDisconnectedException;
 import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
 import org.apache.hadoop.hbase.ipc.RpcCall;
 import org.apache.hadoop.hbase.ipc.RpcServer;
 import org.apache.hadoop.hbase.mob.MobFileCache;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
 import org.apache.hadoop.hbase.quotas.RegionServerSpaceQuotaManager;
 import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry;
 import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope;
 import org.apache.hadoop.hbase.regionserver.ScannerContext.NextState;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
 import org.apache.hadoop.hbase.regionserver.compactions.ForbidMajorCompactionChecker;
 import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory;
 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector;
 import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
 import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationObserver;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
 import org.apache.hadoop.hbase.trace.TraceUtil;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.HashedBytes;
 import org.apache.hadoop.hbase.util.NonceKey;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
 import org.apache.hadoop.hbase.util.TableDescriptorChecker;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.hadoop.hbase.wal.WALEdit;
 import org.apache.hadoop.hbase.wal.WALFactory;
 import org.apache.hadoop.hbase.wal.WALKey;
 import org.apache.hadoop.hbase.wal.WALKeyImpl;
 import org.apache.hadoop.hbase.wal.WALSplitUtil;
 import org.apache.hadoop.hbase.wal.WALSplitUtil.MutationReplay;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.htrace.core.TraceScope;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
 import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
 import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
 import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.ServiceDescriptor;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
 import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
 import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
 import org.apache.hbase.thirdparty.com.google.protobuf.Service;
 import org.apache.hbase.thirdparty.com.google.protobuf.TextFormat;
 import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
 import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;

 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.CoprocessorServiceCall;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionLoad;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor.EventType;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor;

 /**
  * Regions store data for a certain region of a table.  It stores all columns
  * for each row. A given table consists of one or more Regions.
  *
  * <p>An Region is defined by its table and its key extent.
  *
  * <p>Locking at the Region level serves only one purpose: preventing the
  * region from being closed (and consequently split) while other operations
  * are ongoing. Each row level operation obtains both a row lock and a region
  * read lock for the duration of the operation. While a scanner is being
  * constructed, getScanner holds a read lock. If the scanner is successfully
  * constructed, it holds a read lock until it is closed. A close takes out a
  * write lock and consequently will block for ongoing operations and will block
  * new operations from starting while the close is in progress.
  */
 @SuppressWarnings("deprecation")
 @InterfaceAudience.Private
 public class HRegion implements HeapSize, PropagatingConfigurationObserver, Region {
   private static final Logger LOG = LoggerFactory.getLogger(HRegion.class);

   public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY =
     "hbase.hregion.scan.loadColumnFamiliesOnDemand";

   public static final String HBASE_MAX_CELL_SIZE_KEY = "hbase.server.keyvalue.maxsize";
   public static final int DEFAULT_MAX_CELL_SIZE = 10485760;

   public static final String HBASE_REGIONSERVER_MINIBATCH_SIZE =
       "hbase.regionserver.minibatch.size";
   public static final int DEFAULT_HBASE_REGIONSERVER_MINIBATCH_SIZE = 20000;

   public static final String WAL_HSYNC_CONF_KEY = "hbase.wal.hsync";
   public static final boolean DEFAULT_WAL_HSYNC = false;

   /**
    * This is for for using HRegion as a local storage, where we may put the recovered edits in a
    * special place. Once this is set, we will only replay the recovered edits under this directory
    * and ignore the original replay directory configs.
    */
   public static final String SPECIAL_RECOVERED_EDITS_DIR =
     "hbase.hregion.special.recovered.edits.dir";

   /**
    * Whether to use {@link MetaCellComparator} even if we are not meta region. Used when creating
    * master local region.
    */
   public static final String USE_META_CELL_COMPARATOR = "hbase.region.use.meta.cell.comparator";

   public static final boolean DEFAULT_USE_META_CELL_COMPARATOR = false;

   final AtomicBoolean closed = new AtomicBoolean(false);

   /* Closing can take some time; use the closing flag if there is stuff we don't
    * want to do while in closing state; e.g. like offer this region up to the
    * master as a region to close if the carrying regionserver is overloaded.
    * Once set, it is never cleared.
    */
   final AtomicBoolean closing = new AtomicBoolean(false);

   /**
    * The max sequence id of flushed data on this region. There is no edit in memory that is
    * less that this sequence id.
    */
   private volatile long maxFlushedSeqId = HConstants.NO_SEQNUM;

   /**
    * Record the sequence id of last flush operation. Can be in advance of
    * {@link #maxFlushedSeqId} when flushing a single column family. In this case,
    * {@link #maxFlushedSeqId} will be older than the oldest edit in memory.
    */
   private volatile long lastFlushOpSeqId = HConstants.NO_SEQNUM;

   /**
    * The sequence id of the last replayed open region event from the primary region. This is used
    * to skip entries before this due to the possibility of replay edits coming out of order from
    * replication.
    */
   protected volatile long lastReplayedOpenRegionSeqId = -1L;
   protected volatile long lastReplayedCompactionSeqId = -1L;

   //////////////////////////////////////////////////////////////////////////////
   // Members
   //////////////////////////////////////////////////////////////////////////////

   // map from a locked row to the context for that lock including:
   // - CountDownLatch for threads waiting on that row
   // - the thread that owns the lock (allow reentrancy)
   // - reference count of (reentrant) locks held by the thread
   // - the row itself
   private final ConcurrentHashMap<HashedBytes, RowLockContext> lockedRows =
       new ConcurrentHashMap<>();

   protected final Map<byte[], HStore> stores =
       new ConcurrentSkipListMap<>(Bytes.BYTES_RAWCOMPARATOR);

   // TODO: account for each registered handler in HeapSize computation
   private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();

   // Track data size in all memstores
   private final MemStoreSizing memStoreSizing = new ThreadSafeMemStoreSizing();
   RegionServicesForStores regionServicesForStores;

   // Debug possible data loss due to WAL off
   final LongAdder numMutationsWithoutWAL = new LongAdder();
   final LongAdder dataInMemoryWithoutWAL = new LongAdder();

   // Debug why CAS operations are taking a while.
   final LongAdder checkAndMutateChecksPassed = new LongAdder();
   final LongAdder checkAndMutateChecksFailed = new LongAdder();

   // Number of requests
   // Count rows for scan
   final LongAdder readRequestsCount = new LongAdder();
   final LongAdder cpRequestsCount = new LongAdder();
   final LongAdder filteredReadRequestsCount = new LongAdder();
   // Count rows for multi row mutations
   final LongAdder writeRequestsCount = new LongAdder();

   // Number of requests blocked by memstore size.
   private final LongAdder blockedRequestsCount = new LongAdder();

   // Compaction LongAdders
   final LongAdder compactionsFinished = new LongAdder();
   final LongAdder compactionsFailed = new LongAdder();
   final LongAdder compactionNumFilesCompacted = new LongAdder();
   final LongAdder compactionNumBytesCompacted = new LongAdder();
   final LongAdder compactionsQueued = new LongAdder();
   final LongAdder flushesQueued = new LongAdder();

   private BlockCache blockCache;
   private MobFileCache mobFileCache;
   private final WAL wal;
   private final HRegionFileSystem fs;
   protected final Configuration conf;
   private final Configuration baseConf;
   private final int rowLockWaitDuration;
   static final int DEFAULT_ROWLOCK_WAIT_DURATION = 30000;

   private Path regionDir;
   private FileSystem walFS;

   // set to true if the region is restored from snapshot
   private boolean isRestoredRegion = false;

   public void setRestoredRegion(boolean restoredRegion) {
     isRestoredRegion = restoredRegion;
   }

   // The internal wait duration to acquire a lock before read/update
   // from the region. It is not per row. The purpose of this wait time
   // is to avoid waiting a long time while the region is busy, so that
   // we can release the IPC handler soon enough to improve the
   // availability of the region server. It can be adjusted by
   // tuning configuration "hbase.busy.wait.duration".
   final long busyWaitDuration;
   static final long DEFAULT_BUSY_WAIT_DURATION = HConstants.DEFAULT_HBASE_RPC_TIMEOUT;

   // If updating multiple rows in one call, wait longer,
   // i.e. waiting for busyWaitDuration * # of rows. However,
   // we can limit the max multiplier.
   final int maxBusyWaitMultiplier;

   // Max busy wait duration. There is no point to wait longer than the RPC
   // purge timeout, when a RPC call will be terminated by the RPC engine.
   final long maxBusyWaitDuration;

   // Max cell size. If nonzero, the maximum allowed size for any given cell
   // in bytes
   final long maxCellSize;

   // Number of mutations for minibatch processing.
   private final int miniBatchSize;

   // negative number indicates infinite timeout
   static final long DEFAULT_ROW_PROCESSOR_TIMEOUT = 60 * 1000L;
   final ExecutorService rowProcessorExecutor = Executors.newCachedThreadPool();

   private final ConcurrentHashMap<RegionScanner, Long> scannerReadPoints;

   /**
    * The sequence ID that was enLongAddered when this region was opened.
    */
   private long openSeqNum = HConstants.NO_SEQNUM;

   /**
    * The default setting for whether to enable on-demand CF loading for
    * scan requests to this region. Requests can override it.
    */
   private boolean isLoadingCfsOnDemandDefault = false;

   private final AtomicInteger majorInProgress = new AtomicInteger(0);
   private final AtomicInteger minorInProgress = new AtomicInteger(0);

   //
   // Context: During replay we want to ensure that we do not lose any data. So, we
   // have to be conservative in how we replay wals. For each store, we calculate
   // the maxSeqId up to which the store was flushed. And, skip the edits which
   // are equal to or lower than maxSeqId for each store.
   // The following map is populated when opening the region
   Map<byte[], Long> maxSeqIdInStores = new TreeMap<>(Bytes.BYTES_COMPARATOR);

   /** Saved state from replaying prepare flush cache */
   private PrepareFlushResult prepareFlushResult = null;

   private volatile ConfigurationManager configurationManager;

   // Used for testing.
   private volatile Long timeoutForWriteLock = null;

   private final CellComparator cellComparator;

   /**
    * @return The smallest mvcc readPoint across all the scanners in this
    * region. Writes older than this readPoint, are included in every
    * read operation.
    */
   public long getSmallestReadPoint() {
     long minimumReadPoint;
     // We need to ensure that while we are calculating the smallestReadPoint
     // no new RegionScanners can grab a readPoint that we are unaware of.
     // We achieve this by synchronizing on the scannerReadPoints object.
     synchronized (scannerReadPoints) {
       minimumReadPoint = mvcc.getReadPoint();
       for (Long readPoint : this.scannerReadPoints.values()) {
         if (readPoint < minimumReadPoint) {
           minimumReadPoint = readPoint;
         }
       }
     }
     return minimumReadPoint;
   }

   /*
    * Data structure of write state flags used coordinating flushes,
    * compactions and closes.
    */
   static class WriteState {
     // Set while a memstore flush is happening.
     volatile boolean flushing = false;
     // Set when a flush has been requested.
     volatile boolean flushRequested = false;
     // Number of compactions running.
     AtomicInteger compacting = new AtomicInteger(0);
     // Gets set in close. If set, cannot compact or flush again.
     volatile boolean writesEnabled = true;
     // Set if region is read-only
     volatile boolean readOnly = false;
     // whether the reads are enabled. This is different than readOnly, because readOnly is
     // static in the lifetime of the region, while readsEnabled is dynamic
     volatile boolean readsEnabled = true;

     /**
      * Set flags that make this region read-only.
      *
      * @param onOff flip value for region r/o setting
      */
     synchronized void setReadOnly(final boolean onOff) {
       this.writesEnabled = !onOff;
       this.readOnly = onOff;
     }

     boolean isReadOnly() {
       return this.readOnly;
     }

     boolean isFlushRequested() {
       return this.flushRequested;
     }

     void setReadsEnabled(boolean readsEnabled) {
       this.readsEnabled = readsEnabled;
     }

     static final long HEAP_SIZE = ClassSize.align(
         ClassSize.OBJECT + 5 * Bytes.SIZEOF_BOOLEAN);
   }

   /**
    * Objects from this class are created when flushing to describe all the different states that
    * that method ends up in. The Result enum describes those states. The sequence id should only
    * be specified if the flush was successful, and the failure message should only be specified
    * if it didn't flush.
    */
   public static class FlushResultImpl implements FlushResult {
     final Result result;
     final String failureReason;
     final long flushSequenceId;
     final boolean wroteFlushWalMarker;

     /**
      * Convenience constructor to use when the flush is successful, the failure message is set to
      * null.
      * @param result Expecting FLUSHED_NO_COMPACTION_NEEDED or FLUSHED_COMPACTION_NEEDED.
      * @param flushSequenceId Generated sequence id that comes right after the edits in the
      *                        memstores.
      */
     FlushResultImpl(Result result, long flushSequenceId) {
       this(result, flushSequenceId, null, false);
       assert result == Result.FLUSHED_NO_COMPACTION_NEEDED || result == Result
           .FLUSHED_COMPACTION_NEEDED;
     }

     /**
      * Convenience constructor to use when we cannot flush.
      * @param result Expecting CANNOT_FLUSH_MEMSTORE_EMPTY or CANNOT_FLUSH.
      * @param failureReason Reason why we couldn't flush.
      */
     FlushResultImpl(Result result, String failureReason, boolean wroteFlushMarker) {
       this(result, -1, failureReason, wroteFlushMarker);
       assert result == Result.CANNOT_FLUSH_MEMSTORE_EMPTY || result == Result.CANNOT_FLUSH;
     }

     /**
      * Constructor with all the parameters.
      * @param result Any of the Result.
      * @param flushSequenceId Generated sequence id if the memstores were flushed else -1.
      * @param failureReason Reason why we couldn't flush, or null.
      */
     FlushResultImpl(Result result, long flushSequenceId, String failureReason,
       boolean wroteFlushMarker) {
       this.result = result;
       this.flushSequenceId = flushSequenceId;
       this.failureReason = failureReason;
       this.wroteFlushWalMarker = wroteFlushMarker;
     }

     /**
      * Convenience method, the equivalent of checking if result is
      * FLUSHED_NO_COMPACTION_NEEDED or FLUSHED_NO_COMPACTION_NEEDED.
      * @return true if the memstores were flushed, else false.
      */
     @Override
     public boolean isFlushSucceeded() {
       return result == Result.FLUSHED_NO_COMPACTION_NEEDED || result == Result
           .FLUSHED_COMPACTION_NEEDED;
     }

     /**
      * Convenience method, the equivalent of checking if result is FLUSHED_COMPACTION_NEEDED.
      * @return True if the flush requested a compaction, else false (doesn't even mean it flushed).
      */
     @Override
     public boolean isCompactionNeeded() {
       return result == Result.FLUSHED_COMPACTION_NEEDED;
     }

     @Override
     public String toString() {
       return new StringBuilder()
         .append("flush result:").append(result).append(", ")
         .append("failureReason:").append(failureReason).append(",")
         .append("flush seq id").append(flushSequenceId).toString();
     }

     @Override
     public Result getResult() {
       return result;
     }
   }

   /** A result object from prepare flush cache stage */
   static class PrepareFlushResult {
     final FlushResultImpl result; // indicating a failure result from prepare
     final TreeMap<byte[], StoreFlushContext> storeFlushCtxs;
     final TreeMap<byte[], List<Path>> committedFiles;
     final TreeMap<byte[], MemStoreSize> storeFlushableSize;
     final long startTime;
     final long flushOpSeqId;
     final long flushedSeqId;
     final MemStoreSizing totalFlushableSize;

     /** Constructs an early exit case */
     PrepareFlushResult(FlushResultImpl result, long flushSeqId) {
       this(result, null, null, null, Math.max(0, flushSeqId), 0, 0, MemStoreSizing.DUD);
     }

     /** Constructs a successful prepare flush result */
     PrepareFlushResult(
       TreeMap<byte[], StoreFlushContext> storeFlushCtxs,
       TreeMap<byte[], List<Path>> committedFiles,
       TreeMap<byte[], MemStoreSize> storeFlushableSize, long startTime, long flushSeqId,
       long flushedSeqId, MemStoreSizing totalFlushableSize) {
       this(null, storeFlushCtxs, committedFiles, storeFlushableSize, startTime,
         flushSeqId, flushedSeqId, totalFlushableSize);
     }

     private PrepareFlushResult(
         FlushResultImpl result,
       TreeMap<byte[], StoreFlushContext> storeFlushCtxs,
       TreeMap<byte[], List<Path>> committedFiles,
       TreeMap<byte[], MemStoreSize> storeFlushableSize, long startTime, long flushSeqId,
       long flushedSeqId, MemStoreSizing totalFlushableSize) {
       this.result = result;
       this.storeFlushCtxs = storeFlushCtxs;
       this.committedFiles = committedFiles;
       this.storeFlushableSize = storeFlushableSize;
       this.startTime = startTime;
       this.flushOpSeqId = flushSeqId;
       this.flushedSeqId = flushedSeqId;
       this.totalFlushableSize = totalFlushableSize;
     }

     public FlushResult getResult() {
       return this.result;
     }
   }

   /**
    * A class that tracks exceptions that have been observed in one batch. Not thread safe.
    */
   static class ObservedExceptionsInBatch {
     private boolean wrongRegion = false;
     private boolean failedSanityCheck = false;
     private boolean wrongFamily = false;

     /**
      * @return If a {@link WrongRegionException} has been observed.
      */
     boolean hasSeenWrongRegion() {
       return wrongRegion;
     }

     /**
      * Records that a {@link WrongRegionException} has been observed.
      */
     void sawWrongRegion() {
       wrongRegion = true;
     }

     /**
      * @return If a {@link FailedSanityCheckException} has been observed.
      */
     boolean hasSeenFailedSanityCheck() {
       return failedSanityCheck;
     }

     /**
      * Records that a {@link FailedSanityCheckException} has been observed.
      */
     void sawFailedSanityCheck() {
       failedSanityCheck = true;
     }

     /**
      * @return If a {@link NoSuchColumnFamilyException} has been observed.
      */
     boolean hasSeenNoSuchFamily() {
       return wrongFamily;
     }

     /**
      * Records that a {@link NoSuchColumnFamilyException} has been observed.
      */
     void sawNoSuchFamily() {
       wrongFamily = true;
     }
   }

   final WriteState writestate = new WriteState();

   long memstoreFlushSize;
   final long timestampSlop;
   final long rowProcessorTimeout;

   // Last flush time for each Store. Useful when we are flushing for each column
   private final ConcurrentMap<HStore, Long> lastStoreFlushTimeMap = new ConcurrentHashMap<>();

   protected RegionServerServices rsServices;
   private RegionServerAccounting rsAccounting;
   private long flushCheckInterval;
   // flushPerChanges is to prevent too many changes in memstore
   private long flushPerChanges;
   private long blockingMemStoreSize;
   // Used to guard closes
   final ReentrantReadWriteLock lock;
   // Used to track interruptible holders of the region lock. Currently that is only RPC handler
   // threads. Boolean value in map determines if lock holder can be interrupted, normally true,
   // but may be false when thread is transiting a critical section.
   final ConcurrentHashMap<Thread, Boolean> regionLockHolders;

   // Stop updates lock
   private final ReentrantReadWriteLock updatesLock = new ReentrantReadWriteLock();

   private final MultiVersionConcurrencyControl mvcc;

   // Coprocessor host
   private RegionCoprocessorHost coprocessorHost;

   private TableDescriptor htableDescriptor = null;
   private RegionSplitPolicy splitPolicy;
   private FlushPolicy flushPolicy;

   private final MetricsRegion metricsRegion;
   private final MetricsRegionWrapperImpl metricsRegionWrapper;
   private final Durability regionDurability;
   private final boolean regionStatsEnabled;
   // Stores the replication scope of the various column families of the table
   // that has non-default scope
   private final NavigableMap<byte[], Integer> replicationScope = new TreeMap<>(
       Bytes.BYTES_COMPARATOR);

   private final StoreHotnessProtector storeHotnessProtector;

   /**
    * HRegion constructor. This constructor should only be used for testing and
    * extensions.  Instances of HRegion should be instantiated with the
    * {@link HRegion#createHRegion} or {@link HRegion#openHRegion} method.
    *
    * @param tableDir qualified path of directory where region should be located,
    * usually the table directory.
    * @param wal The WAL is the outbound log for any updates to the HRegion
    * The wal file is a logfile from the previous execution that's
    * custom-computed for this HRegion. The HRegionServer computes and sorts the
    * appropriate wal info for this HRegion. If there is a previous wal file
    * (implying that the HRegion has been written-to before), then read it from
    * the supplied path.
    * @param fs is the filesystem.
    * @param confParam is global configuration settings.
    * @param regionInfo - RegionInfo that describes the region
    * is new), then read them from the supplied path.
    * @param htd the table descriptor
    * @param rsServices reference to {@link RegionServerServices} or null
    * @deprecated Use other constructors.
    */
   @Deprecated
   public HRegion(final Path tableDir, final WAL wal, final FileSystem fs,
       final Configuration confParam, final RegionInfo regionInfo,
       final TableDescriptor htd, final RegionServerServices rsServices) {
     this(new HRegionFileSystem(confParam, fs, tableDir, regionInfo),
       wal, confParam, htd, rsServices);
   }

   /**
    * HRegion constructor. This constructor should only be used for testing and
    * extensions.  Instances of HRegion should be instantiated with the
    * {@link HRegion#createHRegion} or {@link HRegion#openHRegion} method.
    *
    * @param fs is the filesystem.
    * @param wal The WAL is the outbound log for any updates to the HRegion
    * The wal file is a logfile from the previous execution that's
    * custom-computed for this HRegion. The HRegionServer computes and sorts the
    * appropriate wal info for this HRegion. If there is a previous wal file
    * (implying that the HRegion has been written-to before), then read it from
    * the supplied path.
    * @param confParam is global configuration settings.
    * @param htd the table descriptor
    * @param rsServices reference to {@link RegionServerServices} or null
    */
   public HRegion(final HRegionFileSystem fs, final WAL wal, final Configuration confParam,
       final TableDescriptor htd, final RegionServerServices rsServices) {
     if (htd == null) {
       throw new IllegalArgumentException("Need table descriptor");
     }

     if (confParam instanceof CompoundConfiguration) {
       throw new IllegalArgumentException("Need original base configuration");
     }

     this.wal = wal;
     this.fs = fs;
     this.mvcc = new MultiVersionConcurrencyControl(getRegionInfo().getShortNameToLog());

     // 'conf' renamed to 'confParam' b/c we use this.conf in the constructor
     this.baseConf = confParam;
     this.conf = new CompoundConfiguration().add(confParam).addBytesMap(htd.getValues());
     this.cellComparator = htd.isMetaTable() ||
       conf.getBoolean(USE_META_CELL_COMPARATOR, DEFAULT_USE_META_CELL_COMPARATOR) ?
         MetaCellComparator.META_COMPARATOR : CellComparatorImpl.COMPARATOR;
     this.lock = new ReentrantReadWriteLock(conf.getBoolean(FAIR_REENTRANT_CLOSE_LOCK,
         DEFAULT_FAIR_REENTRANT_CLOSE_LOCK));
     this.regionLockHolders = new ConcurrentHashMap<>();
     this.flushCheckInterval = conf.getInt(MEMSTORE_PERIODIC_FLUSH_INTERVAL,
         DEFAULT_CACHE_FLUSH_INTERVAL);
     this.flushPerChanges = conf.getLong(MEMSTORE_FLUSH_PER_CHANGES, DEFAULT_FLUSH_PER_CHANGES);
     if (this.flushPerChanges > MAX_FLUSH_PER_CHANGES) {
       throw new IllegalArgumentException(MEMSTORE_FLUSH_PER_CHANGES + " can not exceed "
           + MAX_FLUSH_PER_CHANGES);
     }
     int tmpRowLockDuration = conf.getInt("hbase.rowlock.wait.duration",
         DEFAULT_ROWLOCK_WAIT_DURATION);
     if (tmpRowLockDuration <= 0) {
       LOG.info("Found hbase.rowlock.wait.duration set to {}. values <= 0 will cause all row " +
           "locking to fail. Treating it as 1ms to avoid region failure.", tmpRowLockDuration);
       tmpRowLockDuration = 1;
     }
     this.rowLockWaitDuration = tmpRowLockDuration;

     this.isLoadingCfsOnDemandDefault = conf.getBoolean(LOAD_CFS_ON_DEMAND_CONFIG_KEY, true);
     this.htableDescriptor = htd;
     Set<byte[]> families = this.htableDescriptor.getColumnFamilyNames();
     for (byte[] family : families) {
       if (!replicationScope.containsKey(family)) {
         int scope = htd.getColumnFamily(family).getScope();
         // Only store those families that has NON-DEFAULT scope
         if (scope != REPLICATION_SCOPE_LOCAL) {
           // Do a copy before storing it here.
           replicationScope.put(Bytes.copy(family), scope);
         }
       }
     }

     this.rsServices = rsServices;
     if (this.rsServices != null) {
       this.blockCache = rsServices.getBlockCache().orElse(null);
       this.mobFileCache = rsServices.getMobFileCache().orElse(null);
     }
     this.regionServicesForStores = new RegionServicesForStores(this, rsServices);

     setHTableSpecificConf();
     this.scannerReadPoints = new ConcurrentHashMap<>();

     this.busyWaitDuration = conf.getLong(
       "hbase.busy.wait.duration", DEFAULT_BUSY_WAIT_DURATION);
     this.maxBusyWaitMultiplier = conf.getInt("hbase.busy.wait.multiplier.max", 2);
     if (busyWaitDuration * maxBusyWaitMultiplier <= 0L) {
       throw new IllegalArgumentException("Invalid hbase.busy.wait.duration ("
         + busyWaitDuration + ") or hbase.busy.wait.multiplier.max ("
         + maxBusyWaitMultiplier + "). Their product should be positive");
     }
     this.maxBusyWaitDuration = conf.getLong("hbase.ipc.client.call.purge.timeout",
       2 * HConstants.DEFAULT_HBASE_RPC_TIMEOUT);

     /*
      * timestamp.slop provides a server-side constraint on the timestamp. This
      * assumes that you base your TS around currentTimeMillis(). In this case,
      * throw an error to the user if the user-specified TS is newer than now +
      * slop. LATEST_TIMESTAMP == don't use this functionality
      */
     this.timestampSlop = conf.getLong(
         "hbase.hregion.keyvalue.timestamp.slop.millisecs",
         HConstants.LATEST_TIMESTAMP);

     /**
      * Timeout for the process time in processRowsWithLocks().
      * Use -1 to switch off time bound.
      */
     this.rowProcessorTimeout = conf.getLong(
         "hbase.hregion.row.processor.timeout", DEFAULT_ROW_PROCESSOR_TIMEOUT);

     this.storeHotnessProtector = new StoreHotnessProtector(this, conf);

     boolean forceSync = conf.getBoolean(WAL_HSYNC_CONF_KEY, DEFAULT_WAL_HSYNC);
     /**
      * This is the global default value for durability. All tables/mutations not defining a
      * durability or using USE_DEFAULT will default to this value.
      */
     Durability defaultDurability = forceSync ? Durability.FSYNC_WAL : Durability.SYNC_WAL;
     this.regionDurability =
         this.htableDescriptor.getDurability() == Durability.USE_DEFAULT ? defaultDurability :
           this.htableDescriptor.getDurability();

     decorateRegionConfiguration(conf);
     if (rsServices != null) {
       this.rsAccounting = this.rsServices.getRegionServerAccounting();
       // don't initialize coprocessors if not running within a regionserver
       // TODO: revisit if coprocessors should load in other cases
       this.coprocessorHost = new RegionCoprocessorHost(this, rsServices, conf);
       this.metricsRegionWrapper = new MetricsRegionWrapperImpl(this);
       this.metricsRegion = new MetricsRegion(this.metricsRegionWrapper, conf);
     } else {
       this.metricsRegionWrapper = null;
       this.metricsRegion = null;
     }
     if (LOG.isDebugEnabled()) {
       // Write out region name, its encoded name and storeHotnessProtector as string.
       LOG.debug("Instantiated " + this +"; "+ storeHotnessProtector.toString());
     }

     configurationManager = null;

     // disable stats tracking system tables, but check the config for everything else
     this.regionStatsEnabled = htd.getTableName().getNamespaceAsString().equals(
         NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR) ?
           false :
           conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE,
               HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE);

     this.maxCellSize = conf.getLong(HBASE_MAX_CELL_SIZE_KEY, DEFAULT_MAX_CELL_SIZE);
     this.miniBatchSize = conf.getInt(HBASE_REGIONSERVER_MINIBATCH_SIZE,
         DEFAULT_HBASE_REGIONSERVER_MINIBATCH_SIZE);

     // recover the metrics of read and write requests count if they were retained
     if (rsServices != null && rsServices.getRegionServerAccounting() != null) {
       Pair<Long, Long> retainedRWRequestsCnt = rsServices.getRegionServerAccounting()
         .getRetainedRegionRWRequestsCnt().get(getRegionInfo().getEncodedName());
       if (retainedRWRequestsCnt != null) {
         this.setReadRequestsCount(retainedRWRequestsCnt.getFirst());
         this.setWriteRequestsCount(retainedRWRequestsCnt.getSecond());
         // remove them since won't use again
         rsServices.getRegionServerAccounting().getRetainedRegionRWRequestsCnt()
           .remove(getRegionInfo().getEncodedName());
       }
     }
   }

   void setHTableSpecificConf() {
     if (this.htableDescriptor == null) return;
     long flushSize = this.htableDescriptor.getMemStoreFlushSize();

     if (flushSize <= 0) {
       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
           TableDescriptorBuilder.DEFAULT_MEMSTORE_FLUSH_SIZE);
     }
     this.memstoreFlushSize = flushSize;
     long mult = conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER,
         HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER);
     this.blockingMemStoreSize = this.memstoreFlushSize * mult;
   }

   /**
    * Initialize this region.
    * Used only by tests and SplitTransaction to reopen the region.
    * You should use createHRegion() or openHRegion()
    * @return What the next sequence (edit) id should be.
    * @throws IOException e
    * @deprecated use HRegion.createHRegion() or HRegion.openHRegion()
    */
   @Deprecated
   public long initialize() throws IOException {
     return initialize(null);
   }

   /**
    * Initialize this region.
    *
    * @param reporter Tickle every so often if initialize is taking a while.
    * @return What the next sequence (edit) id should be.
    * @throws IOException e
    */
   long initialize(final CancelableProgressable reporter) throws IOException {

     //Refuse to open the region if there is no column family in the table
     if (htableDescriptor.getColumnFamilyCount() == 0) {
       throw new DoNotRetryIOException("Table " + htableDescriptor.getTableName().getNameAsString()+
           " should have at least one column family.");
     }

     MonitoredTask status = TaskMonitor.get().createStatus("Initializing region " + this);
     status.enableStatusJournal(true);
     long nextSeqId = -1;
     try {
       nextSeqId = initializeRegionInternals(reporter, status);
       return nextSeqId;
     } catch (IOException e) {
       LOG.warn("Failed initialize of region= {}, starting to roll back memstore",
           getRegionInfo().getRegionNameAsString(), e);
       // global memstore size will be decreased when dropping memstore
       try {
         //drop the memory used by memstore if open region fails
         dropMemStoreContents();
       } catch (IOException ioE) {
         if (conf.getBoolean(MemStoreLAB.USEMSLAB_KEY, MemStoreLAB.USEMSLAB_DEFAULT)) {
           LOG.warn("Failed drop memstore of region= {}, "
                   + "some chunks may not released forever since MSLAB is enabled",
               getRegionInfo().getRegionNameAsString());
         }

       }
       throw e;
     } finally {
       // nextSeqid will be -1 if the initialization fails.
       // At least it will be 0 otherwise.
       if (nextSeqId == -1) {
         status.abort("Exception during region " + getRegionInfo().getRegionNameAsString() +
           " initialization.");
       }
       if (LOG.isDebugEnabled()) {
         LOG.debug("Region open journal for {}:\n{}", this.getRegionInfo().getEncodedName(),
           status.prettyPrintJournal());
       }
       status.cleanup();
     }
   }

   private long initializeRegionInternals(final CancelableProgressable reporter,
       final MonitoredTask status) throws IOException {
     if (coprocessorHost != null) {
       status.setStatus("Running coprocessor pre-open hook");
       coprocessorHost.preOpen();
     }

     // Write HRI to a file in case we need to recover hbase:meta
     // Only the primary replica should write .regioninfo
     if (this.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
       status.setStatus("Writing region info on filesystem");
       fs.checkRegionInfoOnFilesystem();
     }

     // Initialize all the HStores
     status.setStatus("Initializing all the Stores");
     long maxSeqId = initializeStores(reporter, status);
     this.mvcc.advanceTo(maxSeqId);
     if (!isRestoredRegion && ServerRegionReplicaUtil.shouldReplayRecoveredEdits(this)) {
       Collection<HStore> stores = this.stores.values();
       try {
         // update the stores that we are replaying
         LOG.debug("replaying wal for " + this.getRegionInfo().getEncodedName());
         stores.forEach(HStore::startReplayingFromWAL);
         // Recover any edits if available.
         maxSeqId = Math.max(maxSeqId,
           replayRecoveredEditsIfAny(maxSeqIdInStores, reporter, status));
         // Recover any hfiles if available
         maxSeqId = Math.max(maxSeqId, loadRecoveredHFilesIfAny(stores));
         // Make sure mvcc is up to max.
         this.mvcc.advanceTo(maxSeqId);
       } finally {
         LOG.debug("stopping wal replay for " + this.getRegionInfo().getEncodedName());
         // update the stores that we are done replaying
         stores.forEach(HStore::stopReplayingFromWAL);
       }
     }
     this.lastReplayedOpenRegionSeqId = maxSeqId;

     this.writestate.setReadOnly(ServerRegionReplicaUtil.isReadOnly(this));
     this.writestate.flushRequested = false;
     this.writestate.compacting.set(0);

     if (this.writestate.writesEnabled) {
       LOG.debug("Cleaning up temporary data for " + this.getRegionInfo().getEncodedName());
       // Remove temporary data left over from old regions
       status.setStatus("Cleaning up temporary data from old regions");
       fs.cleanupTempDir();
     }

     if (this.writestate.writesEnabled) {
       status.setStatus("Cleaning up detritus from prior splits");
       // Get rid of any splits or merges that were lost in-progress.  Clean out
       // these directories here on open.  We may be opening a region that was
       // being split but we crashed in the middle of it all.
       LOG.debug("Cleaning up detritus for " + this.getRegionInfo().getEncodedName());
       fs.cleanupAnySplitDetritus();
       fs.cleanupMergesDir();
     }

     // Initialize split policy
     this.splitPolicy = RegionSplitPolicy.create(this, conf);

     // Initialize flush policy
     this.flushPolicy = FlushPolicyFactory.create(this, conf);

     long lastFlushTime = EnvironmentEdgeManager.currentTime();
     for (HStore store: stores.values()) {
       this.lastStoreFlushTimeMap.put(store, lastFlushTime);
     }

     // Use maximum of log sequenceid or that which was found in stores
     // (particularly if no recovered edits, seqid will be -1).
     long nextSeqId = maxSeqId + 1;
     if (!isRestoredRegion) {
       // always get openSeqNum from the default replica, even if we are secondary replicas
       long maxSeqIdFromFile = WALSplitUtil.getMaxRegionSequenceId(conf,
         RegionReplicaUtil.getRegionInfoForDefaultReplica(getRegionInfo()), this::getFilesystem,
         this::getWalFileSystem);
       nextSeqId = Math.max(maxSeqId, maxSeqIdFromFile) + 1;
       // The openSeqNum will always be increase even for read only region, as we rely on it to
       // determine whether a region has been successfully reopened, so here we always need to update
       // the max sequence id file.
       if (RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
         LOG.debug("writing seq id for {}", this.getRegionInfo().getEncodedName());
         WALSplitUtil.writeRegionSequenceIdFile(getWalFileSystem(), getWALRegionDir(),
           nextSeqId - 1);
         // This means we have replayed all the recovered edits and also written out the max sequence
         // id file, let's delete the wrong directories introduced in HBASE-20734, see HBASE-22617
         // for more details.
         Path wrongRegionWALDir = CommonFSUtils.getWrongWALRegionDir(conf,
           getRegionInfo().getTable(), getRegionInfo().getEncodedName());
         FileSystem walFs = getWalFileSystem();
         if (walFs.exists(wrongRegionWALDir)) {
           if (!walFs.delete(wrongRegionWALDir, true)) {
             LOG.debug("Failed to clean up wrong region WAL directory {}", wrongRegionWALDir);
           }
         }
       }
     }

     LOG.info("Opened {}; next sequenceid={}", this.getRegionInfo().getShortNameToLog(), nextSeqId);

     // A region can be reopened if failed a split; reset flags
     this.closing.set(false);
     this.closed.set(false);

     if (coprocessorHost != null) {
       LOG.debug("Running coprocessor post-open hooks for " + this.getRegionInfo().getEncodedName());
       status.setStatus("Running coprocessor post-open hooks");
       coprocessorHost.postOpen();
     }

     status.markComplete("Region opened successfully");
     return nextSeqId;
   }

   /**
    * Open all Stores.
    * @param reporter
    * @param status
    * @return Highest sequenceId found out in a Store.
    * @throws IOException
    */
   private long initializeStores(CancelableProgressable reporter, MonitoredTask status)
       throws IOException {
     return initializeStores(reporter, status, false);
   }

   private long initializeStores(CancelableProgressable reporter, MonitoredTask status,
       boolean warmup) throws IOException {
     // Load in all the HStores.
     long maxSeqId = -1;
     // initialized to -1 so that we pick up MemstoreTS from column families
     long maxMemstoreTS = -1;

     if (htableDescriptor.getColumnFamilyCount() != 0) {
       // initialize the thread pool for opening stores in parallel.
       ThreadPoolExecutor storeOpenerThreadPool =
         getStoreOpenAndCloseThreadPool("StoreOpener-" + this.getRegionInfo().getShortNameToLog());
       CompletionService<HStore> completionService = new ExecutorCompletionService<>(storeOpenerThreadPool);

       // initialize each store in parallel
       for (final ColumnFamilyDescriptor family : htableDescriptor.getColumnFamilies()) {
         status.setStatus("Instantiating store for column family " + family);
         completionService.submit(new Callable<HStore>() {
           @Override
           public HStore call() throws IOException {
             return instantiateHStore(family, warmup);
           }
         });
       }
       boolean allStoresOpened = false;
       boolean hasSloppyStores = false;
       try {
         for (int i = 0; i < htableDescriptor.getColumnFamilyCount(); i++) {
           Future<HStore> future = completionService.take();
           HStore store = future.get();
           this.stores.put(store.getColumnFamilyDescriptor().getName(), store);
           if (store.isSloppyMemStore()) {
             hasSloppyStores = true;
           }

           long storeMaxSequenceId = store.getMaxSequenceId().orElse(0L);
           maxSeqIdInStores.put(Bytes.toBytes(store.getColumnFamilyName()),
               storeMaxSequenceId);
           if (maxSeqId == -1 || storeMaxSequenceId > maxSeqId) {
             maxSeqId = storeMaxSequenceId;
           }
           long maxStoreMemstoreTS = store.getMaxMemStoreTS().orElse(0L);
           if (maxStoreMemstoreTS > maxMemstoreTS) {
             maxMemstoreTS = maxStoreMemstoreTS;
           }
         }
         allStoresOpened = true;
         if(hasSloppyStores) {
           htableDescriptor = TableDescriptorBuilder.newBuilder(htableDescriptor)
                   .setFlushPolicyClassName(FlushNonSloppyStoresFirstPolicy.class.getName())
                   .build();
           LOG.info("Setting FlushNonSloppyStoresFirstPolicy for the region=" + this);
         }
       } catch (InterruptedException e) {
         throw throwOnInterrupt(e);
       } catch (ExecutionException e) {
         throw new IOException(e.getCause());
       } finally {
         storeOpenerThreadPool.shutdownNow();
         if (!allStoresOpened) {
           // something went wrong, close all opened stores
           LOG.error("Could not initialize all stores for the region=" + this);
           for (HStore store : this.stores.values()) {
             try {
               store.close();
             } catch (IOException e) {
               LOG.warn("close store {} failed in region {}", store.toString(), this, e);
             }
           }
         }
       }
     }
     return Math.max(maxSeqId, maxMemstoreTS + 1);
   }

   private void initializeWarmup(final CancelableProgressable reporter) throws IOException {
     MonitoredTask status = TaskMonitor.get().createStatus("Initializing region " + this);
     // Initialize all the HStores
     status.setStatus("Warmup all stores of " + this.getRegionInfo().getRegionNameAsString());
     try {
       initializeStores(reporter, status, true);
     } finally {
       status.markComplete("Warmed up " + this.getRegionInfo().getRegionNameAsString());
     }
   }

   /**
    * @return Map of StoreFiles by column family
    */
   private NavigableMap<byte[], List<Path>> getStoreFiles() {
     NavigableMap<byte[], List<Path>> allStoreFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
     for (HStore store : stores.values()) {
       Collection<HStoreFile> storeFiles = store.getStorefiles();
       if (storeFiles == null) {
         continue;
       }
       List<Path> storeFileNames = new ArrayList<>();
       for (HStoreFile storeFile : storeFiles) {
         storeFileNames.add(storeFile.getPath());
       }
       allStoreFiles.put(store.getColumnFamilyDescriptor().getName(), storeFileNames);
     }
     return allStoreFiles;
   }

   protected void writeRegionOpenMarker(WAL wal, long openSeqId) throws IOException {
     Map<byte[], List<Path>> storeFiles = getStoreFiles();
     RegionEventDescriptor regionOpenDesc = ProtobufUtil.toRegionEventDescriptor(
       RegionEventDescriptor.EventType.REGION_OPEN, getRegionInfo(), openSeqId,
       getRegionServerServices().getServerName(), storeFiles);
     WALUtil.writeRegionEventMarker(wal, getReplicationScope(), getRegionInfo(), regionOpenDesc,
         mvcc);
   }

   private void writeRegionCloseMarker(WAL wal) throws IOException {
     Map<byte[], List<Path>> storeFiles = getStoreFiles();
     RegionEventDescriptor regionEventDesc = ProtobufUtil.toRegionEventDescriptor(
       RegionEventDescriptor.EventType.REGION_CLOSE, getRegionInfo(), mvcc.getReadPoint(),
       getRegionServerServices().getServerName(), storeFiles);
     WALUtil.writeRegionEventMarker(wal, getReplicationScope(), getRegionInfo(), regionEventDesc,
         mvcc);

     // Store SeqId in WAL FileSystem when a region closes
     // checking region folder exists is due to many tests which delete the table folder while a
     // table is still online
     if (getWalFileSystem().exists(getWALRegionDir())) {
       WALSplitUtil.writeRegionSequenceIdFile(getWalFileSystem(), getWALRegionDir(),
         mvcc.getReadPoint());
     }
   }

   /**
    * @return True if this region has references.
    */
   public boolean hasReferences() {
     return stores.values().stream().anyMatch(HStore::hasReferences);
   }

   public void blockUpdates() {
     this.updatesLock.writeLock().lock();
   }

   public void unblockUpdates() {
     this.updatesLock.writeLock().unlock();
   }

   public HDFSBlocksDistribution getHDFSBlocksDistribution() {
     HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
     stores.values().stream().filter(s -> s.getStorefiles() != null)
         .flatMap(s -> s.getStorefiles().stream()).map(HStoreFile::getHDFSBlockDistribution)
         .forEachOrdered(hdfsBlocksDistribution::add);
     return hdfsBlocksDistribution;
   }

   /**
    * This is a helper function to compute HDFS block distribution on demand
    * @param conf configuration
    * @param tableDescriptor TableDescriptor of the table
    * @param regionInfo encoded name of the region
    * @return The HDFS blocks distribution for the given region.
    */
   public static HDFSBlocksDistribution computeHDFSBlocksDistribution(Configuration conf,
     TableDescriptor tableDescriptor, RegionInfo regionInfo) throws IOException {
     Path tablePath =
       CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), tableDescriptor.getTableName());
     return computeHDFSBlocksDistribution(conf, tableDescriptor, regionInfo, tablePath);
   }

   /**
    * This is a helper function to compute HDFS block distribution on demand
    * @param conf configuration
    * @param tableDescriptor TableDescriptor of the table
    * @param regionInfo encoded name of the region
    * @param tablePath the table directory
    * @return The HDFS blocks distribution for the given region.
    * @throws IOException
    */
   public static HDFSBlocksDistribution computeHDFSBlocksDistribution(Configuration conf,
       TableDescriptor tableDescriptor, RegionInfo regionInfo, Path tablePath) throws IOException {
     HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
     FileSystem fs = tablePath.getFileSystem(conf);

     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
     for (ColumnFamilyDescriptor family : tableDescriptor.getColumnFamilies()) {
       List<LocatedFileStatus> locatedFileStatusList = HRegionFileSystem
           .getStoreFilesLocatedStatus(regionFs, family.getNameAsString(), true);
       if (locatedFileStatusList == null) {
         continue;
       }

       for (LocatedFileStatus status : locatedFileStatusList) {
         Path p = status.getPath();
         if (StoreFileInfo.isReference(p) || HFileLink.isHFileLink(p)) {
           // Only construct StoreFileInfo object if its not a hfile, save obj
           // creation
           StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
           hdfsBlocksDistribution.add(storeFileInfo
               .computeHDFSBlocksDistribution(fs));
         } else if (StoreFileInfo.isHFile(p)) {
           // If its a HFile, then lets just add to the block distribution
           // lets not create more objects here, not even another HDFSBlocksDistribution
           FSUtils.addToHDFSBlocksDistribution(hdfsBlocksDistribution,
               status.getBlockLocations());
         } else {
           throw new IOException("path=" + p
               + " doesn't look like a valid StoreFile");
         }
       }
     }
     return hdfsBlocksDistribution;
   }

   /**
    * Increase the size of mem store in this region and the size of global mem
    * store
    */
   void incMemStoreSize(MemStoreSize mss) {
     incMemStoreSize(mss.getDataSize(), mss.getHeapSize(), mss.getOffHeapSize(),
       mss.getCellsCount());
   }

   void incMemStoreSize(long dataSizeDelta, long heapSizeDelta, long offHeapSizeDelta,
       int cellsCountDelta) {
     if (this.rsAccounting != null) {
       rsAccounting.incGlobalMemStoreSize(dataSizeDelta, heapSizeDelta, offHeapSizeDelta);
     }
     long dataSize = this.memStoreSizing.incMemStoreSize(dataSizeDelta, heapSizeDelta,
       offHeapSizeDelta, cellsCountDelta);
     checkNegativeMemStoreDataSize(dataSize, dataSizeDelta);
   }

   void decrMemStoreSize(MemStoreSize mss) {
     decrMemStoreSize(mss.getDataSize(), mss.getHeapSize(), mss.getOffHeapSize(),
       mss.getCellsCount());
   }

   void decrMemStoreSize(long dataSizeDelta, long heapSizeDelta, long offHeapSizeDelta,
       int cellsCountDelta) {
     if (this.rsAccounting != null) {
       rsAccounting.decGlobalMemStoreSize(dataSizeDelta, heapSizeDelta, offHeapSizeDelta);
     }
     long dataSize = this.memStoreSizing.decMemStoreSize(dataSizeDelta, heapSizeDelta,
       offHeapSizeDelta, cellsCountDelta);
     checkNegativeMemStoreDataSize(dataSize, -dataSizeDelta);
   }

   private void checkNegativeMemStoreDataSize(long memStoreDataSize, long delta) {
     // This is extremely bad if we make memStoreSizing negative. Log as much info on the offending
     // caller as possible. (memStoreSizing might be a negative value already -- freeing memory)
     if (memStoreDataSize < 0) {
       LOG.error("Asked to modify this region's (" + this.toString()
           + ") memStoreSizing to a negative value which is incorrect. Current memStoreSizing="
           + (memStoreDataSize - delta) + ", delta=" + delta, new Exception());
     }
   }

   @Override
   public RegionInfo getRegionInfo() {
     return this.fs.getRegionInfo();
   }

   /**
    * @return Instance of {@link RegionServerServices} used by this HRegion.
    * Can be null.
    */
   RegionServerServices getRegionServerServices() {
     return this.rsServices;
   }

   @Override
   public long getReadRequestsCount() {
     return readRequestsCount.sum();
   }

   @Override
   public long getCpRequestsCount() {
     return cpRequestsCount.sum();
   }

   @Override
   public long getFilteredReadRequestsCount() {
     return filteredReadRequestsCount.sum();
   }

   @Override
   public long getWriteRequestsCount() {
     return writeRequestsCount.sum();
   }

   @Override
   public long getMemStoreDataSize() {
     return memStoreSizing.getDataSize();
   }

   @Override
   public long getMemStoreHeapSize() {
     return memStoreSizing.getHeapSize();
   }

   @Override
   public long getMemStoreOffHeapSize() {
     return memStoreSizing.getOffHeapSize();
   }

   /** @return store services for this region, to access services required by store level needs */
   public RegionServicesForStores getRegionServicesForStores() {
     return regionServicesForStores;
   }

   @Override
   public long getNumMutationsWithoutWAL() {
     return numMutationsWithoutWAL.sum();
   }

   @Override
   public long getDataInMemoryWithoutWAL() {
     return dataInMemoryWithoutWAL.sum();
   }

   @Override
   public long getBlockedRequestsCount() {
     return blockedRequestsCount.sum();
   }

   @Override
   public long getCheckAndMutateChecksPassed() {
     return checkAndMutateChecksPassed.sum();
   }

   @Override
   public long getCheckAndMutateChecksFailed() {
     return checkAndMutateChecksFailed.sum();
   }

   // TODO Needs to check whether we should expose our metrics system to CPs. If CPs themselves doing
   // the op and bypassing the core, this might be needed? Should be stop supporting the bypass
   // feature?
   public MetricsRegion getMetrics() {
     return metricsRegion;
   }

   @Override
   public boolean isClosed() {
     return this.closed.get();
   }

   @Override
   public boolean isClosing() {
     return this.closing.get();
   }

   @Override
   public boolean isReadOnly() {
     return this.writestate.isReadOnly();
   }

   @Override
   public boolean isAvailable() {
     return !isClosed() && !isClosing();
   }

   @Override
   public boolean isSplittable() {
     return splitPolicy.canSplit();
   }

   @Override
   public boolean isMergeable() {
     if (!isAvailable()) {
       LOG.debug("Region " + this
           + " is not mergeable because it is closing or closed");
       return false;
     }
     if (hasReferences()) {
       LOG.debug("Region " + this
           + " is not mergeable because it has references");
       return false;
     }

     return true;
   }

   public boolean areWritesEnabled() {
     synchronized(this.writestate) {
       return this.writestate.writesEnabled;
     }
   }

   public MultiVersionConcurrencyControl getMVCC() {
     return mvcc;
   }

   @Override
   public long getMaxFlushedSeqId() {
     return maxFlushedSeqId;
   }

   /**
    * @return readpoint considering given IsolationLevel. Pass {@code null} for default
    */
   public long getReadPoint(IsolationLevel isolationLevel) {
     if (isolationLevel != null && isolationLevel == IsolationLevel.READ_UNCOMMITTED) {
       // This scan can read even uncommitted transactions
       return Long.MAX_VALUE;
     }
     return mvcc.getReadPoint();
   }

   public boolean isLoadingCfsOnDemandDefault() {
     return this.isLoadingCfsOnDemandDefault;
   }

   /**
    * Close down this HRegion.  Flush the cache, shut down each HStore, don't
    * service any more calls.
    *
    * <p>This method could take some time to execute, so don't call it from a
    * time-sensitive thread.
    *
    * @return Vector of all the storage files that the HRegion's component
    * HStores make use of.  It's a list of all StoreFile objects. Returns empty
    * vector if already closed and null if judged that it should not close.
    *
    * @throws IOException e
    * @throws DroppedSnapshotException Thrown when replay of wal is required
    * because a Snapshot was not properly persisted. The region is put in closing mode, and the
    * caller MUST abort after this.
    */
   public Map<byte[], List<HStoreFile>> close() throws IOException {
     return close(false);
   }

   private final Object closeLock = new Object();

   /** Conf key for fair locking policy */
   public static final String FAIR_REENTRANT_CLOSE_LOCK =
       "hbase.regionserver.fair.region.close.lock";
   public static final boolean DEFAULT_FAIR_REENTRANT_CLOSE_LOCK = true;
   /** Conf key for the periodic flush interval */
   public static final String MEMSTORE_PERIODIC_FLUSH_INTERVAL =
       "hbase.regionserver.optionalcacheflushinterval";
   /** Default interval for the memstore flush */
   public static final int DEFAULT_CACHE_FLUSH_INTERVAL = 3600000;
   /** Default interval for System tables memstore flush */
   public static final int SYSTEM_CACHE_FLUSH_INTERVAL = 300000; // 5 minutes

   /** Conf key to force a flush if there are already enough changes for one region in memstore */
   public static final String MEMSTORE_FLUSH_PER_CHANGES =
       "hbase.regionserver.flush.per.changes";
   public static final long DEFAULT_FLUSH_PER_CHANGES = 30000000; // 30 millions
   /**
    * The following MAX_FLUSH_PER_CHANGES is large enough because each KeyValue has 20+ bytes
    * overhead. Therefore, even 1G empty KVs occupy at least 20GB memstore size for a single region
    */
   public static final long MAX_FLUSH_PER_CHANGES = 1000000000; // 1G

   public static final String CLOSE_WAIT_ABORT = "hbase.regionserver.close.wait.abort";
   public static final boolean DEFAULT_CLOSE_WAIT_ABORT = true;
   public static final String CLOSE_WAIT_TIME = "hbase.regionserver.close.wait.time.ms";
   public static final long DEFAULT_CLOSE_WAIT_TIME = 60000;     // 1 minute
   public static final String CLOSE_WAIT_INTERVAL = "hbase.regionserver.close.wait.interval.ms";
   public static final long DEFAULT_CLOSE_WAIT_INTERVAL = 10000; // 10 seconds

   public Map<byte[], List<HStoreFile>> close(boolean abort) throws IOException {
     return close(abort, false);
   }

   /**
    * Close down this HRegion.  Flush the cache unless abort parameter is true,
    * Shut down each HStore, don't service any more calls.
    *
    * This method could take some time to execute, so don't call it from a
    * time-sensitive thread.
    *
    * @param abort true if server is aborting (only during testing)
    * @param ignoreStatus true if ignore the status (wont be showed on task list)
    * @return Vector of all the storage files that the HRegion's component
    * HStores make use of.  It's a list of StoreFile objects.  Can be null if
    * we are not to close at this time or we are already closed.
    *
    * @throws IOException e
    * @throws DroppedSnapshotException Thrown when replay of wal is required
    * because a Snapshot was not properly persisted. The region is put in closing mode, and the
    * caller MUST abort after this.
    */
   public Map<byte[], List<HStoreFile>> close(boolean abort, boolean ignoreStatus)
       throws IOException {
     // Only allow one thread to close at a time. Serialize them so dual
     // threads attempting to close will run up against each other.
     MonitoredTask status = TaskMonitor.get().createStatus(
         "Closing region " + this.getRegionInfo().getEncodedName() +
         (abort ? " due to abort" : ""), ignoreStatus);
     status.enableStatusJournal(true);
     status.setStatus("Waiting for close lock");
     try {
       synchronized (closeLock) {
         return doClose(abort, status);
       }
     } finally {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Region close journal for {}:\n{}", this.getRegionInfo().getEncodedName(),
           status.prettyPrintJournal());
       }
       status.cleanup();
     }
   }

   /**
    * Exposed for some very specific unit tests.
    */
   public void setClosing(boolean closing) {
     this.closing.set(closing);
   }

   /**
    * The {@link HRegion#doClose} will block forever if someone tries proving the dead lock via the unit test.
    * Instead of blocking, the {@link HRegion#doClose} will throw exception if you set the timeout.
    * @param timeoutForWriteLock the second time to wait for the write lock in {@link HRegion#doClose}
    */
   public void setTimeoutForWriteLock(long timeoutForWriteLock) {
     assert timeoutForWriteLock >= 0;
     this.timeoutForWriteLock = timeoutForWriteLock;
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UL_UNRELEASED_LOCK_EXCEPTION_PATH",
       justification="I think FindBugs is confused")
   private Map<byte[], List<HStoreFile>> doClose(boolean abort, MonitoredTask status)
       throws IOException {
     if (isClosed()) {
       LOG.warn("Region " + this + " already closed");
       return null;
     }

     if (coprocessorHost != null) {
       status.setStatus("Running coprocessor pre-close hooks");
       this.coprocessorHost.preClose(abort);
     }
     status.setStatus("Disabling compacts and flushes for region");
     boolean canFlush = true;
     synchronized (writestate) {
       // Disable compacting and flushing by background threads for this
       // region.
       canFlush = !writestate.readOnly;
       writestate.writesEnabled = false;
       LOG.debug("Closing {}, disabling compactions & flushes",
           this.getRegionInfo().getEncodedName());
       waitForFlushesAndCompactions();
     }
     // If we were not just flushing, is it worth doing a preflush...one
     // that will clear out of the bulk of the memstore before we put up
     // the close flag?
     if (!abort && worthPreFlushing() && canFlush) {
       status.setStatus("Pre-flushing region before close");
       LOG.info("Running close preflush of {}", this.getRegionInfo().getEncodedName());
       try {
         internalFlushcache(status);
       } catch (IOException ioe) {
         // Failed to flush the region. Keep going.
         status.setStatus("Failed pre-flush " + this + "; " + ioe.getMessage());
       }
     }

     // Set the closing flag
     // From this point new arrivals at the region lock will get NSRE.

     this.closing.set(true);
     LOG.info("Closing region {}", this);

     // Acquire the close lock

     // The configuration parameter CLOSE_WAIT_ABORT is overloaded to enable both
     // the new regionserver abort condition and interrupts for running requests.
     // If CLOSE_WAIT_ABORT is not enabled there is no change from earlier behavior,
     // we will not attempt to interrupt threads servicing requests nor crash out
     // the regionserver if something remains stubborn.

     final boolean canAbort = conf.getBoolean(CLOSE_WAIT_ABORT, DEFAULT_CLOSE_WAIT_ABORT);
     boolean useTimedWait = false;
     if (timeoutForWriteLock != null && timeoutForWriteLock != Long.MAX_VALUE) {
       // convert legacy use of timeoutForWriteLock in seconds to new use in millis
       timeoutForWriteLock = TimeUnit.SECONDS.toMillis(timeoutForWriteLock);
       useTimedWait = true;
     } else if (canAbort) {
       timeoutForWriteLock = conf.getLong(CLOSE_WAIT_TIME, DEFAULT_CLOSE_WAIT_TIME);
       useTimedWait = true;
     }
     if (LOG.isDebugEnabled()) {
       LOG.debug((useTimedWait ? "Time limited wait" : "Waiting without time limit") +
         " for close lock on " + this);
     }
     final long closeWaitInterval = conf.getLong(CLOSE_WAIT_INTERVAL, DEFAULT_CLOSE_WAIT_INTERVAL);
     long elapsedWaitTime = 0;
     if (useTimedWait) {
       // Sanity check configuration
       long remainingWaitTime = timeoutForWriteLock;
       if (remainingWaitTime < closeWaitInterval) {
         LOG.warn("Time limit for close wait of " + timeoutForWriteLock +
           " ms is less than the configured lock acquisition wait interval " +
           closeWaitInterval + " ms, using wait interval as time limit");
         remainingWaitTime = closeWaitInterval;
       }
       boolean acquired = false;
       do {
         long start = EnvironmentEdgeManager.currentTime();
         try {
           acquired = lock.writeLock().tryLock(Math.min(remainingWaitTime, closeWaitInterval),
             TimeUnit.MILLISECONDS);
         } catch (InterruptedException e) {
           // Interrupted waiting for close lock. More likely the server is shutting down, not
           // normal operation, so aborting upon interrupt while waiting on this lock would not
           // provide much value. Throw an IOE (as IIOE) like we would in the case where we
           // fail to acquire the lock.
           String msg = "Interrupted while waiting for close lock on " + this;
           LOG.warn(msg, e);
           throw (InterruptedIOException) new InterruptedIOException(msg).initCause(e);
         }
         long elapsed = EnvironmentEdgeManager.currentTime() - start;
         elapsedWaitTime += elapsed;
         remainingWaitTime -= elapsed;
         if (canAbort && !acquired && remainingWaitTime > 0) {
           // Before we loop to wait again, interrupt all region operations that might
           // still be in progress, to encourage them to break out of waiting states or
           // inner loops, throw an exception to clients, and release the read lock via
           // endRegionOperation.
           if (LOG.isDebugEnabled()) {
             LOG.debug("Interrupting region operations after waiting for close lock for " +
               elapsedWaitTime + " ms on " + this + ", " + remainingWaitTime +
               " ms remaining");
           }
           interruptRegionOperations();
         }
       } while (!acquired && remainingWaitTime > 0);

       // If we fail to acquire the lock, trigger an abort if we can; otherwise throw an IOE
       // to let the caller know we could not proceed with the close.
       if (!acquired) {
         String msg = "Failed to acquire close lock on " + this + " after waiting " +
           elapsedWaitTime + " ms";
         LOG.error(msg);
         if (canAbort) {
           // If we failed to acquire the write lock, abort the server
           rsServices.abort(msg, null);
         }
         throw new IOException(msg);
       }

     } else {

       long start = EnvironmentEdgeManager.currentTime();
       lock.writeLock().lock();
       elapsedWaitTime = EnvironmentEdgeManager.currentTime() - start;

     }

     if (LOG.isDebugEnabled()) {
       LOG.debug("Acquired close lock on " + this + " after waiting " +
         elapsedWaitTime + " ms");
     }

     status.setStatus("Disabling writes for close");
     try {
       if (this.isClosed()) {
         status.abort("Already got closed by another process");
         // SplitTransaction handles the null
         return null;
       }
       LOG.debug("Updates disabled for region " + this);
       // Don't flush the cache if we are aborting
       if (!abort && canFlush) {
         int failedfFlushCount = 0;
         int flushCount = 0;
         long tmp = 0;
         long remainingSize = this.memStoreSizing.getDataSize();
         while (remainingSize > 0) {
           try {
             internalFlushcache(status);
             if(flushCount >0) {
               LOG.info("Running extra flush, " + flushCount +
                   " (carrying snapshot?) " + this);
             }
             flushCount++;
             tmp = this.memStoreSizing.getDataSize();
             if (tmp >= remainingSize) {
               failedfFlushCount++;
             }
             remainingSize = tmp;
             if (failedfFlushCount > 5) {
               // If we failed 5 times and are unable to clear memory, abort
               // so we do not lose data
               throw new DroppedSnapshotException("Failed clearing memory after " +
                   flushCount + " attempts on region: " +
                   Bytes.toStringBinary(getRegionInfo().getRegionName()));
             }
           } catch (IOException ioe) {
             status.setStatus("Failed flush " + this + ", putting online again");
             synchronized (writestate) {
               writestate.writesEnabled = true;
             }
             // Have to throw to upper layers.  I can't abort server from here.
             throw ioe;
           }
         }
       }

       Map<byte[], List<HStoreFile>> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
       if (!stores.isEmpty()) {
         // initialize the thread pool for closing stores in parallel.
         ThreadPoolExecutor storeCloserThreadPool =
           getStoreOpenAndCloseThreadPool("StoreCloser-" +
             getRegionInfo().getRegionNameAsString());
         CompletionService<Pair<byte[], Collection<HStoreFile>>> completionService =
           new ExecutorCompletionService<>(storeCloserThreadPool);

         // close each store in parallel
         for (HStore store : stores.values()) {
           MemStoreSize mss = store.getFlushableSize();
           if (!(abort || mss.getDataSize() == 0 || writestate.readOnly)) {
             if (getRegionServerServices() != null) {
               getRegionServerServices().abort("Assertion failed while closing store "
                 + getRegionInfo().getRegionNameAsString() + " " + store
                 + ". flushableSize expected=0, actual={" + mss
                 + "}. Current memStoreSize=" + this.memStoreSizing.getMemStoreSize() +
                   ". Maybe a coprocessor "
                 + "operation failed and left the memstore in a partially updated state.", null);
             }
           }
           completionService
               .submit(new Callable<Pair<byte[], Collection<HStoreFile>>>() {
                 @Override
                 public Pair<byte[], Collection<HStoreFile>> call() throws IOException {
                   return new Pair<>(store.getColumnFamilyDescriptor().getName(), store.close());
                 }
               });
         }
         try {
           for (int i = 0; i < stores.size(); i++) {
             Future<Pair<byte[], Collection<HStoreFile>>> future = completionService.take();
             Pair<byte[], Collection<HStoreFile>> storeFiles = future.get();
             List<HStoreFile> familyFiles = result.get(storeFiles.getFirst());
             if (familyFiles == null) {
               familyFiles = new ArrayList<>();
               result.put(storeFiles.getFirst(), familyFiles);
             }
             familyFiles.addAll(storeFiles.getSecond());
           }
         } catch (InterruptedException e) {
           throw throwOnInterrupt(e);
         } catch (ExecutionException e) {
           Throwable cause = e.getCause();
           if (cause instanceof IOException) {
             throw (IOException) cause;
           }
           throw new IOException(cause);
         } finally {
           storeCloserThreadPool.shutdownNow();
         }
       }

       status.setStatus("Writing region close event to WAL");
       // Always write close marker to wal even for read only table. This is not a big problem as we
       // do not write any data into the region; it is just a meta edit in the WAL file.
       if (!abort && wal != null && getRegionServerServices() != null &&
         RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
         writeRegionCloseMarker(wal);
       }

       this.closed.set(true);
       if (!canFlush) {
         decrMemStoreSize(this.memStoreSizing.getMemStoreSize());
       } else if (this.memStoreSizing.getDataSize() != 0) {
         LOG.error("Memstore data size is {} in region {}", this.memStoreSizing.getDataSize(), this);
       }
       if (coprocessorHost != null) {
         status.setStatus("Running coprocessor post-close hooks");
         this.coprocessorHost.postClose(abort);
       }
       if (this.metricsRegion != null) {
         this.metricsRegion.close();
       }
       if (this.metricsRegionWrapper != null) {
         Closeables.close(this.metricsRegionWrapper, true);
       }
       status.markComplete("Closed");
       LOG.info("Closed {}", this);
       return result;
     } finally {
       lock.writeLock().unlock();
     }
   }

   /** Wait for all current flushes and compactions of the region to complete */
   // TODO HBASE-18906. Check the usage (if any) in Phoenix and expose this or give alternate way for
   // Phoenix needs.
   public void waitForFlushesAndCompactions() {
     synchronized (writestate) {
       if (this.writestate.readOnly) {
         // we should not wait for replayed flushed if we are read only (for example in case the
         // region is a secondary replica).
         return;
       }
       boolean interrupted = false;
       try {
         while (writestate.compacting.get() > 0 || writestate.flushing) {
           LOG.debug("waiting for " + writestate.compacting + " compactions"
             + (writestate.flushing ? " & cache flush" : "") + " to complete for region " + this);
           try {
             writestate.wait();
           } catch (InterruptedException iex) {
             // essentially ignore and propagate the interrupt back up
             LOG.warn("Interrupted while waiting in region {}", this);
             interrupted = true;
             break;
           }
         }
       } finally {
         if (interrupted) {
           Thread.currentThread().interrupt();
         }
       }
     }
   }

   /**
    * Wait for all current flushes of the region to complete
    */
   public void waitForFlushes() {
     waitForFlushes(0);// Unbound wait
   }

   @Override
   public boolean waitForFlushes(long timeout) {
     synchronized (writestate) {
       if (this.writestate.readOnly) {
         // we should not wait for replayed flushed if we are read only (for example in case the
         // region is a secondary replica).
         return true;
       }
       if (!writestate.flushing) return true;
       long start = System.currentTimeMillis();
       long duration = 0;
       boolean interrupted = false;
       LOG.debug("waiting for cache flush to complete for region " + this);
       try {
         while (writestate.flushing) {
           if (timeout > 0 && duration >= timeout) break;
           try {
             long toWait = timeout == 0 ? 0 : (timeout - duration);
             writestate.wait(toWait);
           } catch (InterruptedException iex) {
             // essentially ignore and propagate the interrupt back up
             LOG.warn("Interrupted while waiting in region {}", this);
             interrupted = true;
             break;
           } finally {
             duration = System.currentTimeMillis() - start;
           }
         }
       } finally {
         if (interrupted) {
           Thread.currentThread().interrupt();
         }
       }
       LOG.debug("Waited {} ms for region {} flush to complete", duration, this);
       return !(writestate.flushing);
     }
   }

   protected ThreadPoolExecutor getStoreOpenAndCloseThreadPool(
       final String threadNamePrefix) {
     int numStores = Math.max(1, this.htableDescriptor.getColumnFamilyCount());
     int maxThreads = Math.min(numStores,
         conf.getInt(HConstants.HSTORE_OPEN_AND_CLOSE_THREADS_MAX,
             HConstants.DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX));
     return getOpenAndCloseThreadPool(maxThreads, threadNamePrefix);
   }

   protected ThreadPoolExecutor getStoreFileOpenAndCloseThreadPool(
       final String threadNamePrefix) {
     int numStores = Math.max(1, this.htableDescriptor.getColumnFamilyCount());
     int maxThreads = Math.max(1,
         conf.getInt(HConstants.HSTORE_OPEN_AND_CLOSE_THREADS_MAX,
             HConstants.DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX)
             / numStores);
     return getOpenAndCloseThreadPool(maxThreads, threadNamePrefix);
   }

   static ThreadPoolExecutor getOpenAndCloseThreadPool(int maxThreads,
       final String threadNamePrefix) {
     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
       new ThreadFactory() {
         private int count = 1;

         @Override
         public Thread newThread(Runnable r) {
           return new Thread(r, threadNamePrefix + "-" + count++);
         }
       });
   }

    /**
     * @return True if its worth doing a flush before we put up the close flag.
     */
   private boolean worthPreFlushing() {
     return this.memStoreSizing.getDataSize() >
       this.conf.getLong("hbase.hregion.preclose.flush.size", 1024 * 1024 * 5);
   }

   //////////////////////////////////////////////////////////////////////////////
   // HRegion accessors
   //////////////////////////////////////////////////////////////////////////////

   @Override
   public TableDescriptor getTableDescriptor() {
     return this.htableDescriptor;
   }

   public void setTableDescriptor(TableDescriptor desc) {
     htableDescriptor = desc;
   }

   /** @return WAL in use for this region */
   public WAL getWAL() {
     return this.wal;
   }

   public BlockCache getBlockCache() {
     return this.blockCache;
   }

   /**
    * Only used for unit test which doesn't start region server.
    */
   public void setBlockCache(BlockCache blockCache) {
     this.blockCache = blockCache;
   }

   public MobFileCache getMobFileCache() {
     return this.mobFileCache;
   }

   /**
    * Only used for unit test which doesn't start region server.
    */
   public void setMobFileCache(MobFileCache mobFileCache) {
     this.mobFileCache = mobFileCache;
   }

   /**
    * @return split policy for this region.
    */
   RegionSplitPolicy getSplitPolicy() {
     return this.splitPolicy;
   }

   /**
    * A split takes the config from the parent region & passes it to the daughter
    * region's constructor. If 'conf' was passed, you would end up using the HTD
    * of the parent region in addition to the new daughter HTD. Pass 'baseConf'
    * to the daughter regions to avoid this tricky dedupe problem.
    * @return Configuration object
    */
   Configuration getBaseConf() {
     return this.baseConf;
   }

   /** @return {@link FileSystem} being used by this region */
   public FileSystem getFilesystem() {
     return fs.getFileSystem();
   }

   /** @return the {@link HRegionFileSystem} used by this region */
   public HRegionFileSystem getRegionFileSystem() {
     return this.fs;
   }

   /** @return the WAL {@link HRegionFileSystem} used by this region */
   HRegionWALFileSystem getRegionWALFileSystem() throws IOException {
     return new HRegionWALFileSystem(conf, getWalFileSystem(),
       CommonFSUtils.getWALTableDir(conf, htableDescriptor.getTableName()), fs.getRegionInfo());
   }

   /** @return the WAL {@link FileSystem} being used by this region */
   FileSystem getWalFileSystem() throws IOException {
     if (walFS == null) {
       walFS = CommonFSUtils.getWALFileSystem(conf);
     }
     return walFS;
   }

   /**
    * @return the Region directory under WALRootDirectory
    * @throws IOException if there is an error getting WALRootDir
    */
   public Path getWALRegionDir() throws IOException {
     if (regionDir == null) {
       regionDir = CommonFSUtils.getWALRegionDir(conf, getRegionInfo().getTable(),
         getRegionInfo().getEncodedName());
     }
     return regionDir;
   }

   @Override
   public long getEarliestFlushTimeForAllStores() {
     return Collections.min(lastStoreFlushTimeMap.values());
   }

   @Override
   public long getOldestHfileTs(boolean majorCompactionOnly) throws IOException {
     long result = Long.MAX_VALUE;
     for (HStore store : stores.values()) {
       Collection<HStoreFile> storeFiles = store.getStorefiles();
       if (storeFiles == null) {
         continue;
       }
       for (HStoreFile file : storeFiles) {
         StoreFileReader sfReader = file.getReader();
         if (sfReader == null) {
           continue;
         }
         HFile.Reader reader = sfReader.getHFileReader();
         if (reader == null) {
           continue;
         }
         if (majorCompactionOnly) {
           byte[] val = reader.getHFileInfo().get(MAJOR_COMPACTION_KEY);
           if (val == null || !Bytes.toBoolean(val)) {
             continue;
           }
         }
         result = Math.min(result, reader.getFileContext().getFileCreateTime());
       }
     }
     return result == Long.MAX_VALUE ? 0 : result;
   }

   RegionLoad.Builder setCompleteSequenceId(RegionLoad.Builder regionLoadBldr) {
     long lastFlushOpSeqIdLocal = this.lastFlushOpSeqId;
     byte[] encodedRegionName = this.getRegionInfo().getEncodedNameAsBytes();
     regionLoadBldr.clearStoreCompleteSequenceId();
     for (byte[] familyName : this.stores.keySet()) {
       long earliest = this.wal.getEarliestMemStoreSeqNum(encodedRegionName, familyName);
       // Subtract - 1 to go earlier than the current oldest, unflushed edit in memstore; this will
       // give us a sequence id that is for sure flushed. We want edit replay to start after this
       // sequence id in this region. If NO_SEQNUM, use the regions maximum flush id.
       long csid = (earliest == HConstants.NO_SEQNUM)? lastFlushOpSeqIdLocal: earliest - 1;
       regionLoadBldr.addStoreCompleteSequenceId(StoreSequenceId.newBuilder()
           .setFamilyName(UnsafeByteOperations.unsafeWrap(familyName)).setSequenceId(csid).build());
     }
     return regionLoadBldr.setCompleteSequenceId(getMaxFlushedSeqId());
   }

   //////////////////////////////////////////////////////////////////////////////
   // HRegion maintenance.
   //
   // These methods are meant to be called periodically by the HRegionServer for
   // upkeep.
   //////////////////////////////////////////////////////////////////////////////
   /**
    * Do preparation for pending compaction.
    * @throws IOException
    */
   protected void doRegionCompactionPrep() throws IOException {
   }

   /**
    * Synchronously compact all stores in the region.
    * <p>This operation could block for a long time, so don't call it from a
    * time-sensitive thread.
    * <p>Note that no locks are taken to prevent possible conflicts between
    * compaction and splitting activities. The regionserver does not normally compact
    * and split in parallel. However by calling this method you may introduce
    * unexpected and unhandled concurrency. Don't do this unless you know what
    * you are doing.
    *
    * @param majorCompaction True to force a major compaction regardless of thresholds
    * @throws IOException
    */
   public void compact(boolean majorCompaction) throws IOException {
     if (majorCompaction) {
       stores.values().forEach(HStore::triggerMajorCompaction);
     }
     for (HStore s : stores.values()) {
       Optional<CompactionContext> compaction = s.requestCompaction();
       if (compaction.isPresent()) {
         ThroughputController controller = null;
         if (rsServices != null) {
           controller = CompactionThroughputControllerFactory.create(rsServices, conf);
         }
         if (controller == null) {
           controller = NoLimitThroughputController.INSTANCE;
         }
         compact(compaction.get(), s, controller, null);
       }
     }
   }

   /**
    * This is a helper function that compact all the stores synchronously.
    * <p>
    * It is used by utilities and testing
    */
   public void compactStores() throws IOException {
     for (HStore s : stores.values()) {
       Optional<CompactionContext> compaction = s.requestCompaction();
       if (compaction.isPresent()) {
         compact(compaction.get(), s, NoLimitThroughputController.INSTANCE, null);
       }
     }
   }

   /**
    * This is a helper function that compact the given store.
    * <p>
    * It is used by utilities and testing
    */
   void compactStore(byte[] family, ThroughputController throughputController) throws IOException {
     HStore s = getStore(family);
     Optional<CompactionContext> compaction = s.requestCompaction();
     if (compaction.isPresent()) {
       compact(compaction.get(), s, throughputController, null);
     }
   }

   /**
    * Called by compaction thread and after region is opened to compact the
    * HStores if necessary.
    *
    * <p>This operation could block for a long time, so don't call it from a
    * time-sensitive thread.
    *
    * Note that no locking is necessary at this level because compaction only
    * conflicts with a region split, and that cannot happen because the region
    * server does them sequentially and not in parallel.
    *
    * @param compaction Compaction details, obtained by requestCompaction()
    * @param throughputController
    * @return whether the compaction completed
    */
   public boolean compact(CompactionContext compaction, HStore store,
       ThroughputController throughputController) throws IOException {
     return compact(compaction, store, throughputController, null);
   }

   private boolean shouldForbidMajorCompaction() {
     if (rsServices != null && rsServices.getReplicationSourceService() != null) {
       return rsServices.getReplicationSourceService().getSyncReplicationPeerInfoProvider()
           .checkState(getRegionInfo().getTable(), ForbidMajorCompactionChecker.get());
     }
     return false;
   }

   /**
    * We are trying to remove / relax the region read lock for compaction.
    * Let's see what are the potential race conditions among the operations (user scan,
    * region split, region close and region bulk load).
    *
    *  user scan ---> region read lock
    *  region split --> region close first --> region write lock
    *  region close --> region write lock
    *  region bulk load --> region write lock
    *
    * read lock is compatible with read lock. ---> no problem with user scan/read
    * region bulk load does not cause problem for compaction (no consistency problem, store lock
    * will help the store file accounting).
    * They can run almost concurrently at the region level.
    *
    * The only remaining race condition is between the region close and compaction.
    * So we will evaluate, below, how region close intervenes with compaction if compaction does
    * not acquire region read lock.
    *
    * Here are the steps for compaction:
    * 1. obtain list of StoreFile's
    * 2. create StoreFileScanner's based on list from #1
    * 3. perform compaction and save resulting files under tmp dir
    * 4. swap in compacted files
    *
    * #1 is guarded by store lock. This patch does not change this --> no worse or better
    * For #2, we obtain smallest read point (for region) across all the Scanners (for both default
    * compactor and stripe compactor).
    * The read points are for user scans. Region keeps the read points for all currently open
    * user scanners.
    * Compaction needs to know the smallest read point so that during re-write of the hfiles,
    * it can remove the mvcc points for the cells if their mvccs are older than the smallest
    * since they are not needed anymore.
    * This will not conflict with compaction.
    * For #3, it can be performed in parallel to other operations.
    * For #4 bulk load and compaction don't conflict with each other on the region level
    *   (for multi-family atomicy).
    * Region close and compaction are guarded pretty well by the 'writestate'.
    * In HRegion#doClose(), we have :
    * synchronized (writestate) {
    *   // Disable compacting and flushing by background threads for this
    *   // region.
    *   canFlush = !writestate.readOnly;
    *   writestate.writesEnabled = false;
    *   LOG.debug("Closing " + this + ": disabling compactions & flushes");
    *   waitForFlushesAndCompactions();
    * }
    * waitForFlushesAndCompactions() would wait for writestate.compacting to come down to 0.
    * and in HRegion.compact()
    *  try {
    *    synchronized (writestate) {
    *    if (writestate.writesEnabled) {
    *      wasStateSet = true;
    *      ++writestate.compacting;
    *    } else {
    *      String msg = "NOT compacting region " + this + ". Writes disabled.";
    *      LOG.info(msg);
    *      status.abort(msg);
    *      return false;
    *    }
    *  }
    * Also in compactor.performCompaction():
    * check periodically to see if a system stop is requested
    * if (closeChecker != null && closeChecker.isTimeLimit(store, now)) {
    *    progress.cancel();
    *    return false;
    * }
    * if (closeChecker != null && closeChecker.isSizeLimit(store, len)) {
    *   progress.cancel();
    *   return false;
    * }
    */
   public boolean compact(CompactionContext compaction, HStore store,
       ThroughputController throughputController, User user) throws IOException {
     assert compaction != null && compaction.hasSelection();
     assert !compaction.getRequest().getFiles().isEmpty();
     if (this.closing.get() || this.closed.get()) {
       LOG.debug("Skipping compaction on " + this + " because closing/closed");
       store.cancelRequestedCompaction(compaction);
       return false;
     }

     if (compaction.getRequest().isAllFiles() && shouldForbidMajorCompaction()) {
       LOG.warn("Skipping major compaction on " + this
           + " because this cluster is transiting sync replication state"
           + " from STANDBY to DOWNGRADE_ACTIVE");
       store.cancelRequestedCompaction(compaction);
       return false;
     }

     MonitoredTask status = null;
     boolean requestNeedsCancellation = true;
     try {
       byte[] cf = Bytes.toBytes(store.getColumnFamilyName());
       if (stores.get(cf) != store) {
         LOG.warn("Store " + store.getColumnFamilyName() + " on region " + this
             + " has been re-instantiated, cancel this compaction request. "
             + " It may be caused by the roll back of split transaction");
         return false;
       }

       status = TaskMonitor.get().createStatus("Compacting " + store + " in " + this);
       status.enableStatusJournal(false);
       if (this.closed.get()) {
         String msg = "Skipping compaction on " + this + " because closed";
         LOG.debug(msg);
         status.abort(msg);
         return false;
       }
       boolean wasStateSet = false;
       try {
         synchronized (writestate) {
           if (writestate.writesEnabled) {
             wasStateSet = true;
             writestate.compacting.incrementAndGet();
           } else {
             String msg = "NOT compacting region " + this + ". Writes disabled.";
             LOG.info(msg);
             status.abort(msg);
             return false;
           }
         }
         LOG.info("Starting compaction of {} in {}{}", store, this,
             (compaction.getRequest().isOffPeak()?" as an off-peak compaction":""));
         doRegionCompactionPrep();
         try {
           status.setStatus("Compacting store " + store);
           // We no longer need to cancel the request on the way out of this
           // method because Store#compact will clean up unconditionally
           requestNeedsCancellation = false;
           store.compact(compaction, throughputController, user);
         } catch (InterruptedIOException iioe) {
           String msg = "region " + this + " compaction interrupted";
           LOG.info(msg, iioe);
           status.abort(msg);
           return false;
         }
       } finally {
         if (wasStateSet) {
           synchronized (writestate) {
             writestate.compacting.decrementAndGet();
             if (writestate.compacting.get() <= 0) {
               writestate.notifyAll();
             }
           }
         }
       }
       status.markComplete("Compaction complete");
       return true;
     } finally {
       if (requestNeedsCancellation) store.cancelRequestedCompaction(compaction);
       if (status != null) {
         LOG.debug("Compaction status journal for {}:\n{}", this.getRegionInfo().getEncodedName(),
           status.prettyPrintJournal());
         status.cleanup();
       }
     }
   }

   /**
    * Flush the cache.
    *
    * <p>When this method is called the cache will be flushed unless:
    * <ol>
    *   <li>the cache is empty</li>
    *   <li>the region is closed.</li>
    *   <li>a flush is already in progress</li>
    *   <li>writes are disabled</li>
    * </ol>
    *
    * <p>This method may block for some time, so it should not be called from a
    * time-sensitive thread.
    * @param flushAllStores whether we want to force a flush of all stores
    * @return FlushResult indicating whether the flush was successful or not and if
    * the region needs compacting
    *
    * @throws IOException general io exceptions
    * because a snapshot was not properly persisted.
    */
   // TODO HBASE-18905. We might have to expose a requestFlush API for CPs
   public FlushResult flush(boolean flushAllStores) throws IOException {
     return flushcache(flushAllStores, false, FlushLifeCycleTracker.DUMMY);
   }

   public interface FlushResult {
     enum Result {
       FLUSHED_NO_COMPACTION_NEEDED,
       FLUSHED_COMPACTION_NEEDED,
       // Special case where a flush didn't run because there's nothing in the memstores. Used when
       // bulk loading to know when we can still load even if a flush didn't happen.
       CANNOT_FLUSH_MEMSTORE_EMPTY,
       CANNOT_FLUSH
     }

     /** @return the detailed result code */
     Result getResult();

     /** @return true if the memstores were flushed, else false */
     boolean isFlushSucceeded();

     /** @return True if the flush requested a compaction, else false */
     boolean isCompactionNeeded();
   }

   public FlushResultImpl flushcache(boolean flushAllStores, boolean writeFlushRequestWalMarker,
     FlushLifeCycleTracker tracker) throws IOException {
     List families = null;
     if (flushAllStores) {
       families = new ArrayList();
       families.addAll(this.getTableDescriptor().getColumnFamilyNames());
     }
     return this.flushcache(families, writeFlushRequestWalMarker, tracker);
   }

   /**
    * Flush the cache.
    *
    * When this method is called the cache will be flushed unless:
    * <ol>
    *   <li>the cache is empty</li>
    *   <li>the region is closed.</li>
    *   <li>a flush is already in progress</li>
    *   <li>writes are disabled</li>
    * </ol>
    *
    * <p>This method may block for some time, so it should not be called from a
    * time-sensitive thread.
    * @param families stores of region to flush.
    * @param writeFlushRequestWalMarker whether to write the flush request marker to WAL
    * @param tracker used to track the life cycle of this flush
    * @return whether the flush is success and whether the region needs compacting
    *
    * @throws IOException general io exceptions
    * @throws DroppedSnapshotException Thrown when replay of wal is required
    * because a Snapshot was not properly persisted. The region is put in closing mode, and the
    * caller MUST abort after this.
    */
   public FlushResultImpl flushcache(List<byte[]> families,
       boolean writeFlushRequestWalMarker, FlushLifeCycleTracker tracker) throws IOException {
     // fail-fast instead of waiting on the lock
     if (this.closing.get()) {
       String msg = "Skipping flush on " + this + " because closing";
       LOG.debug(msg);
       return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
     }
     MonitoredTask status = TaskMonitor.get().createStatus("Flushing " + this);
     status.enableStatusJournal(false);
     status.setStatus("Acquiring readlock on region");
     // block waiting for the lock for flushing cache
     lock.readLock().lock();
     boolean flushed = true;
     try {
       if (this.closed.get()) {
         String msg = "Skipping flush on " + this + " because closed";
         LOG.debug(msg);
         status.abort(msg);
         flushed = false;
         return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
       }
       if (coprocessorHost != null) {
         status.setStatus("Running coprocessor pre-flush hooks");
         coprocessorHost.preFlush(tracker);
       }
       // TODO: this should be managed within memstore with the snapshot, updated only after flush
       // successful
       if (numMutationsWithoutWAL.sum() > 0) {
         numMutationsWithoutWAL.reset();
         dataInMemoryWithoutWAL.reset();
       }
       synchronized (writestate) {
         if (!writestate.flushing && writestate.writesEnabled) {
           this.writestate.flushing = true;
         } else {
           String msg = "NOT flushing " + this + " as " + (writestate.flushing ? "already flushing"
             : "writes are not enabled");
           LOG.debug(msg);
           status.abort(msg);
           flushed = false;
           return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
         }
       }

       try {
         // The reason that we do not always use flushPolicy is, when the flush is
         // caused by logRoller, we should select stores which must be flushed
         // rather than could be flushed.
         Collection<HStore> specificStoresToFlush = null;
         if (families != null) {
           specificStoresToFlush = getSpecificStores(families);
         } else {
           specificStoresToFlush = flushPolicy.selectStoresToFlush();
         }
         FlushResultImpl fs =
             internalFlushcache(specificStoresToFlush, status, writeFlushRequestWalMarker, tracker);

         if (coprocessorHost != null) {
           status.setStatus("Running post-flush coprocessor hooks");
           coprocessorHost.postFlush(tracker);
         }

         if(fs.isFlushSucceeded()) {
           flushesQueued.reset();
         }

         status.markComplete("Flush successful " + fs.toString());
         return fs;
       } finally {
         synchronized (writestate) {
           writestate.flushing = false;
           this.writestate.flushRequested = false;
           writestate.notifyAll();
         }
       }
     } finally {
       lock.readLock().unlock();
       if (flushed) {
         // Don't log this journal stuff if no flush -- confusing.
         LOG.debug("Flush status journal for {}:\n{}", this.getRegionInfo().getEncodedName(),
           status.prettyPrintJournal());
       }
       status.cleanup();
     }
   }

   /**
    * get stores which matches the specified families
    *
    * @return the stores need to be flushed.
    */
   private Collection<HStore> getSpecificStores(List<byte[]> families) {
     Collection<HStore> specificStoresToFlush = new ArrayList<>();
     for (byte[] family : families) {
       specificStoresToFlush.add(stores.get(family));
     }
     return specificStoresToFlush;
   }

   /**
    * Should the store be flushed because it is old enough.
    * <p>
    * Every FlushPolicy should call this to determine whether a store is old enough to flush (except
    * that you always flush all stores). Otherwise the method will always
    * returns true which will make a lot of flush requests.
    */
   boolean shouldFlushStore(HStore store) {
     long earliest = this.wal.getEarliestMemStoreSeqNum(getRegionInfo().getEncodedNameAsBytes(),
       store.getColumnFamilyDescriptor().getName()) - 1;
     if (earliest > 0 && earliest + flushPerChanges < mvcc.getReadPoint()) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Flush column family " + store.getColumnFamilyName() + " of " +
           getRegionInfo().getEncodedName() + " because unflushed sequenceid=" + earliest +
           " is > " + this.flushPerChanges + " from current=" + mvcc.getReadPoint());
       }
       return true;
     }
     if (this.flushCheckInterval <= 0) {
       return false;
     }
     long now = EnvironmentEdgeManager.currentTime();
     if (store.timeOfOldestEdit() < now - this.flushCheckInterval) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Flush column family: " + store.getColumnFamilyName() + " of " +
           getRegionInfo().getEncodedName() + " because time of oldest edit=" +
             store.timeOfOldestEdit() + " is > " + this.flushCheckInterval + " from now =" + now);
       }
       return true;
     }
     return false;
   }

   /**
    * Should the memstore be flushed now
    */
   boolean shouldFlush(final StringBuilder whyFlush) {
     whyFlush.setLength(0);
     // This is a rough measure.
     if (this.maxFlushedSeqId > 0
           && (this.maxFlushedSeqId + this.flushPerChanges < this.mvcc.getReadPoint())) {
       whyFlush.append("more than max edits, " + this.flushPerChanges + ", since last flush");
       return true;
     }
     long modifiedFlushCheckInterval = flushCheckInterval;
     if (getRegionInfo().getTable().isSystemTable() &&
         getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
       modifiedFlushCheckInterval = SYSTEM_CACHE_FLUSH_INTERVAL;
     }
     if (modifiedFlushCheckInterval <= 0) { //disabled
       return false;
     }
     long now = EnvironmentEdgeManager.currentTime();
     //if we flushed in the recent past, we don't need to do again now
     if ((now - getEarliestFlushTimeForAllStores() < modifiedFlushCheckInterval)) {
       return false;
     }
     //since we didn't flush in the recent past, flush now if certain conditions
     //are met. Return true on first such memstore hit.
     for (HStore s : stores.values()) {
       if (s.timeOfOldestEdit() < now - modifiedFlushCheckInterval) {
         // we have an old enough edit in the memstore, flush
         whyFlush.append(s.toString() + " has an old edit so flush to free WALs");
         return true;
       }
     }
     return false;
   }

   /**
    * Flushing all stores.
    * @see #internalFlushcache(Collection, MonitoredTask, boolean, FlushLifeCycleTracker)
    */
   private FlushResult internalFlushcache(MonitoredTask status) throws IOException {
     return internalFlushcache(stores.values(), status, false, FlushLifeCycleTracker.DUMMY);
   }

   /**
    * Flushing given stores.
    * @see #internalFlushcache(WAL, long, Collection, MonitoredTask, boolean, FlushLifeCycleTracker)
    */
   private FlushResultImpl internalFlushcache(Collection<HStore> storesToFlush, MonitoredTask status,
       boolean writeFlushWalMarker, FlushLifeCycleTracker tracker) throws IOException {
     return internalFlushcache(this.wal, HConstants.NO_SEQNUM, storesToFlush, status,
       writeFlushWalMarker, tracker);
   }

   /**
    * Flush the memstore. Flushing the memstore is a little tricky. We have a lot of updates in the
    * memstore, all of which have also been written to the wal. We need to write those updates in the
    * memstore out to disk, while being able to process reads/writes as much as possible during the
    * flush operation.
    * <p>
    * This method may block for some time. Every time you call it, we up the regions sequence id even
    * if we don't flush; i.e. the returned region id will be at least one larger than the last edit
    * applied to this region. The returned id does not refer to an actual edit. The returned id can
    * be used for say installing a bulk loaded file just ahead of the last hfile that was the result
    * of this flush, etc.
    * @param wal Null if we're NOT to go via wal.
    * @param myseqid The seqid to use if <code>wal</code> is null writing out flush file.
    * @param storesToFlush The list of stores to flush.
    * @return object describing the flush's state
    * @throws IOException general io exceptions
    * @throws DroppedSnapshotException Thrown when replay of WAL is required.
    */
   protected FlushResultImpl internalFlushcache(WAL wal, long myseqid,
       Collection<HStore> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker,
       FlushLifeCycleTracker tracker) throws IOException {
     PrepareFlushResult result =
         internalPrepareFlushCache(wal, myseqid, storesToFlush, status, writeFlushWalMarker, tracker);
     if (result.result == null) {
       return internalFlushCacheAndCommit(wal, status, result, storesToFlush);
     } else {
       return result.result; // early exit due to failure from prepare stage
     }
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="DLS_DEAD_LOCAL_STORE",
       justification="FindBugs seems confused about trxId")
   protected PrepareFlushResult internalPrepareFlushCache(WAL wal, long myseqid,
       Collection<HStore> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker,
       FlushLifeCycleTracker tracker) throws IOException {
     if (this.rsServices != null && this.rsServices.isAborted()) {
       // Don't flush when server aborting, it's unsafe
       throw new IOException("Aborting flush because server is aborted...");
     }
     final long startTime = EnvironmentEdgeManager.currentTime();
     // If nothing to flush, return, but return with a valid unused sequenceId.
     // Its needed by bulk upload IIRC. It flushes until no edits in memory so it can insert a
     // bulk loaded file between memory and existing hfiles. It wants a good seqeunceId that belongs
     // to no other that it can use to associate with the bulk load. Hence this little dance below
     // to go get one.
     if (this.memStoreSizing.getDataSize() <= 0) {
       // Take an update lock so no edits can come into memory just yet.
       this.updatesLock.writeLock().lock();
       WriteEntry writeEntry = null;
       try {
         if (this.memStoreSizing.getDataSize() <= 0) {
           // Presume that if there are still no edits in the memstore, then there are no edits for
           // this region out in the WAL subsystem so no need to do any trickery clearing out
           // edits in the WAL sub-system. Up the sequence number so the resulting flush id is for
           // sure just beyond the last appended region edit and not associated with any edit
           // (useful as marker when bulk loading, etc.).
           if (wal != null) {
             writeEntry = mvcc.begin();
             long flushOpSeqId = writeEntry.getWriteNumber();
             FlushResultImpl flushResult =
                 new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId,
                     "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
             mvcc.completeAndWait(writeEntry);
             // Set to null so we don't complete it again down in finally block.
             writeEntry = null;
             return new PrepareFlushResult(flushResult, myseqid);
           } else {
             return new PrepareFlushResult(new FlushResultImpl(
               FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
           }
         }
       } finally {
         if (writeEntry != null) {
           // If writeEntry is non-null, this operation failed; the mvcc transaction failed...
           // but complete it anyways so it doesn't block the mvcc queue.
           mvcc.complete(writeEntry);
         }
         this.updatesLock.writeLock().unlock();
       }
     }
     logFatLineOnFlush(storesToFlush, myseqid);
     // Stop updates while we snapshot the memstore of all of these regions' stores. We only have
     // to do this for a moment.  It is quick. We also set the memstore size to zero here before we
     // allow updates again so its value will represent the size of the updates received
     // during flush

     // We have to take an update lock during snapshot, or else a write could end up in both snapshot
     // and memstore (makes it difficult to do atomic rows then)
     status.setStatus("Obtaining lock to block concurrent updates");
     // block waiting for the lock for internal flush
     this.updatesLock.writeLock().lock();
     status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
     MemStoreSizing totalSizeOfFlushableStores = new NonThreadSafeMemStoreSizing();

     Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
     for (HStore store : storesToFlush) {
       flushedFamilyNamesToSeq.put(store.getColumnFamilyDescriptor().getName(),
         store.preFlushSeqIDEstimation());
     }

     TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
     TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
     TreeMap<byte[], MemStoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
     // The sequence id of this flush operation which is used to log FlushMarker and pass to
     // createFlushContext to use as the store file's sequence id. It can be in advance of edits
     // still in the memstore, edits that are in other column families yet to be flushed.
     long flushOpSeqId = HConstants.NO_SEQNUM;
     // The max flushed sequence id after this flush operation completes. All edits in memstore
     // will be in advance of this sequence id.
     long flushedSeqId = HConstants.NO_SEQNUM;
     byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
     try {
       if (wal != null) {
         Long earliestUnflushedSequenceIdForTheRegion =
             wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
         if (earliestUnflushedSequenceIdForTheRegion == null) {
           // This should never happen. This is how startCacheFlush signals flush cannot proceed.
           String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
           status.setStatus(msg);
           return new PrepareFlushResult(
               new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false),
               myseqid);
         }
         flushOpSeqId = getNextSequenceId(wal);
         // Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
         flushedSeqId =
             earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM?
                 flushOpSeqId: earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
       } else {
         // use the provided sequence Id as WAL is not being used for this flush.
         flushedSeqId = flushOpSeqId = myseqid;
       }

       for (HStore s : storesToFlush) {
         storeFlushCtxs.put(s.getColumnFamilyDescriptor().getName(),
           s.createFlushContext(flushOpSeqId, tracker));
         // for writing stores to WAL
         committedFiles.put(s.getColumnFamilyDescriptor().getName(), null);
       }

       // write the snapshot start to WAL
       if (wal != null && !writestate.readOnly) {
         FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH,
             getRegionInfo(), flushOpSeqId, committedFiles);
         // No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
         WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false,
             mvcc);
       }

       // Prepare flush (take a snapshot)
       storeFlushCtxs.forEach((name, flush) -> {
         MemStoreSize snapshotSize = flush.prepare();
         totalSizeOfFlushableStores.incMemStoreSize(snapshotSize);
         storeFlushableSize.put(name, snapshotSize);
       });
     } catch (IOException ex) {
       doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
       throw ex;
     } finally {
       this.updatesLock.writeLock().unlock();
     }
     String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " +
         "flushsize=" + totalSizeOfFlushableStores;
     status.setStatus(s);
     doSyncOfUnflushedWALChanges(wal, getRegionInfo());
     return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime,
         flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
   }

   /**
    * Utility method broken out of internalPrepareFlushCache so that method is smaller.
    */
   private void logFatLineOnFlush(Collection<HStore> storesToFlush, long sequenceId) {
     if (!LOG.isInfoEnabled()) {
       return;
     }
     // Log a fat line detailing what is being flushed.
     StringBuilder perCfExtras = null;
     if (!isAllFamilies(storesToFlush)) {
       perCfExtras = new StringBuilder();
       for (HStore store: storesToFlush) {
         MemStoreSize mss = store.getFlushableSize();
         perCfExtras.append("; ").append(store.getColumnFamilyName());
         perCfExtras.append("={dataSize=")
             .append(StringUtils.byteDesc(mss.getDataSize()));
         perCfExtras.append(", heapSize=")
             .append(StringUtils.byteDesc(mss.getHeapSize()));
         perCfExtras.append(", offHeapSize=")
             .append(StringUtils.byteDesc(mss.getOffHeapSize()));
         perCfExtras.append("}");
       }
     }
     MemStoreSize mss = this.memStoreSizing.getMemStoreSize();
     LOG.info("Flushing " + this.getRegionInfo().getEncodedName() + " " +
         storesToFlush.size() + "/" + stores.size() + " column families," +
         " dataSize=" + StringUtils.byteDesc(mss.getDataSize()) +
         " heapSize=" + StringUtils.byteDesc(mss.getHeapSize()) +
         ((perCfExtras != null && perCfExtras.length() > 0)? perCfExtras.toString(): "") +
         ((wal != null) ? "" : "; WAL is null, using passed sequenceid=" + sequenceId));
   }

   private void doAbortFlushToWAL(final WAL wal, final long flushOpSeqId,
       final Map<byte[], List<Path>> committedFiles) {
     if (wal == null) return;
     try {
       FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH,
           getRegionInfo(), flushOpSeqId, committedFiles);
       WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false,
           mvcc);
     } catch (Throwable t) {
       LOG.warn("Received unexpected exception trying to write ABORT_FLUSH marker to WAL: {} in "
         + " region {}", StringUtils.stringifyException(t), this);
       // ignore this since we will be aborting the RS with DSE.
     }
     // we have called wal.startCacheFlush(), now we have to abort it
     wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
   }

   /**
    * Sync unflushed WAL changes. See HBASE-8208 for details
    */
   private static void doSyncOfUnflushedWALChanges(final WAL wal, final RegionInfo hri)
   throws IOException {
     if (wal == null) {
       return;
     }
     try {
       wal.sync(); // ensure that flush marker is sync'ed
     } catch (IOException ioe) {
       wal.abortCacheFlush(hri.getEncodedNameAsBytes());
       throw ioe;
     }
   }

   /**
    * @return True if passed Set is all families in the region.
    */
   private boolean isAllFamilies(Collection<HStore> families) {
     return families == null || this.stores.size() == families.size();
   }

   /**
    * Writes a marker to WAL indicating a flush is requested but cannot be complete due to various
    * reasons. Ignores exceptions from WAL. Returns whether the write succeeded.
    * @param wal
    * @return whether WAL write was successful
    */
   private boolean writeFlushRequestMarkerToWAL(WAL wal, boolean writeFlushWalMarker) {
     if (writeFlushWalMarker && wal != null && !writestate.readOnly) {
       FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.CANNOT_FLUSH,
         getRegionInfo(), -1, new TreeMap<>(Bytes.BYTES_COMPARATOR));
       try {
         WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, true,
             mvcc);
         return true;
       } catch (IOException e) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Received exception while trying to write the flush request to wal", e);
       }
     }
     return false;
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
       justification="Intentional; notify is about completed flush")
   protected FlushResultImpl internalFlushCacheAndCommit(WAL wal, MonitoredTask status,
       PrepareFlushResult prepareResult, Collection<HStore> storesToFlush) throws IOException {
     // prepare flush context is carried via PrepareFlushResult
     TreeMap<byte[], StoreFlushContext> storeFlushCtxs = prepareResult.storeFlushCtxs;
     TreeMap<byte[], List<Path>> committedFiles = prepareResult.committedFiles;
     long startTime = prepareResult.startTime;
     long flushOpSeqId = prepareResult.flushOpSeqId;
     long flushedSeqId = prepareResult.flushedSeqId;

     String s = "Flushing stores of " + this;
     status.setStatus(s);
     if (LOG.isTraceEnabled()) LOG.trace(s);

     // Any failure from here on out will be catastrophic requiring server
     // restart so wal content can be replayed and put back into the memstore.
     // Otherwise, the snapshot content while backed up in the wal, it will not
     // be part of the current running servers state.
     boolean compactionRequested = false;
     long flushedOutputFileSize = 0;
     try {
       // A.  Flush memstore to all the HStores.
       // Keep running vector of all store files that includes both old and the
       // just-made new flush store file. The new flushed file is still in the
       // tmp directory.

       for (StoreFlushContext flush : storeFlushCtxs.values()) {
         flush.flushCache(status);
       }

       // Switch snapshot (in memstore) -> new hfile (thus causing
       // all the store scanners to reset/reseek).
       for (Map.Entry<byte[], StoreFlushContext> flushEntry : storeFlushCtxs.entrySet()) {
         StoreFlushContext sfc = flushEntry.getValue();
         boolean needsCompaction = sfc.commit(status);
         if (needsCompaction) {
           compactionRequested = true;
         }
         byte[] storeName = flushEntry.getKey();
         List<Path> storeCommittedFiles = sfc.getCommittedFiles();
         committedFiles.put(storeName, storeCommittedFiles);
         // Flush committed no files, indicating flush is empty or flush was canceled
         if (storeCommittedFiles == null || storeCommittedFiles.isEmpty()) {
           MemStoreSize storeFlushableSize = prepareResult.storeFlushableSize.get(storeName);
           prepareResult.totalFlushableSize.decMemStoreSize(storeFlushableSize);
         }
         flushedOutputFileSize += sfc.getOutputFileSize();
       }
       storeFlushCtxs.clear();

       // Set down the memstore size by amount of flush.
       MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize();
       this.decrMemStoreSize(mss);

       // Increase the size of this Region for the purposes of quota. Noop if quotas are disabled.
       // During startup, quota manager may not be initialized yet.
       if (rsServices != null) {
         RegionServerSpaceQuotaManager quotaManager = rsServices.getRegionServerSpaceQuotaManager();
         if (quotaManager != null) {
           quotaManager.getRegionSizeStore().incrementRegionSize(
               this.getRegionInfo(), flushedOutputFileSize);
         }
       }

       if (wal != null) {
         // write flush marker to WAL. If fail, we should throw DroppedSnapshotException
         FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.COMMIT_FLUSH,
           getRegionInfo(), flushOpSeqId, committedFiles);
         WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, true,
             mvcc);
       }
     } catch (Throwable t) {
       // An exception here means that the snapshot was not persisted.
       // The wal needs to be replayed so its content is restored to memstore.
       // Currently, only a server restart will do this.
       // We used to only catch IOEs but its possible that we'd get other
       // exceptions -- e.g. HBASE-659 was about an NPE -- so now we catch
       // all and sundry.
       if (wal != null) {
         try {
           FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH,
             getRegionInfo(), flushOpSeqId, committedFiles);
           WALUtil.writeFlushMarker(wal, this.replicationScope, getRegionInfo(), desc, false, mvcc);
         } catch (Throwable ex) {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
               + "failed writing ABORT_FLUSH marker to WAL", ex);
           // ignore this since we will be aborting the RS with DSE.
         }
         wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
       }
       DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
         Bytes.toStringBinary(getRegionInfo().getRegionName()), t);
       status.abort("Flush failed: " + StringUtils.stringifyException(t));

       // Callers for flushcache() should catch DroppedSnapshotException and abort the region server.
       // However, since we may have the region read lock, we cannot call close(true) here since
       // we cannot promote to a write lock. Instead we are setting closing so that all other region
       // operations except for close will be rejected.
       this.closing.set(true);

       if (rsServices != null) {
         // This is a safeguard against the case where the caller fails to explicitly handle aborting
         rsServices.abort("Replay of WAL required. Forcing server shutdown", dse);
       }

       throw dse;
     }

     // If we get to here, the HStores have been written.
     if (wal != null) {
       wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes(), flushedSeqId);
     }

     // Record latest flush time
     for (HStore store: storesToFlush) {
       this.lastStoreFlushTimeMap.put(store, startTime);
     }

     this.maxFlushedSeqId = flushedSeqId;
     this.lastFlushOpSeqId = flushOpSeqId;

     // C. Finally notify anyone waiting on memstore to clear:
     // e.g. checkResources().
     synchronized (this) {
       notifyAll(); // FindBugs NN_NAKED_NOTIFY
     }

     long time = EnvironmentEdgeManager.currentTime() - startTime;
     MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize();
     long memstoresize = this.memStoreSizing.getMemStoreSize().getDataSize();
     String msg = "Finished flush of"
         + " dataSize ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize()
         + ", heapSize ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize()
         + ", currentSize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize
         + " for " + this.getRegionInfo().getEncodedName() + " in " + time + "ms, sequenceid="
         + flushOpSeqId +  ", compaction requested=" + compactionRequested
         + ((wal == null) ? "; wal=null" : "");
     LOG.info(msg);
     status.setStatus(msg);

     if (rsServices != null && rsServices.getMetrics() != null) {
       rsServices.getMetrics().updateFlush(getTableDescriptor().getTableName().getNameAsString(),
           time,
           mss.getDataSize(), flushedOutputFileSize);
     }

     return new FlushResultImpl(compactionRequested ?
         FlushResult.Result.FLUSHED_COMPACTION_NEEDED :
           FlushResult.Result.FLUSHED_NO_COMPACTION_NEEDED, flushOpSeqId);
   }

   /**
    * Method to safely get the next sequence number.
    * @return Next sequence number unassociated with any actual edit.
    * @throws IOException
    */
   protected long getNextSequenceId(final WAL wal) throws IOException {
     WriteEntry we = mvcc.begin();
     mvcc.completeAndWait(we);
     return we.getWriteNumber();
   }

   //////////////////////////////////////////////////////////////////////////////
   // get() methods for client use.
   //////////////////////////////////////////////////////////////////////////////

   @Override
   public RegionScannerImpl getScanner(Scan scan) throws IOException {
    return getScanner(scan, null);
   }

   @Override
   public RegionScannerImpl getScanner(Scan scan, List<KeyValueScanner> additionalScanners)
       throws IOException {
     return getScanner(scan, additionalScanners, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   private RegionScannerImpl getScanner(Scan scan, List<KeyValueScanner> additionalScanners,
       long nonceGroup, long nonce) throws IOException {
     startRegionOperation(Operation.SCAN);
     try {
       // Verify families are all valid
       if (!scan.hasFamilies()) {
         // Adding all families to scanner
         for (byte[] family : this.htableDescriptor.getColumnFamilyNames()) {
           scan.addFamily(family);
         }
       } else {
         for (byte[] family : scan.getFamilyMap().keySet()) {
           checkFamily(family);
         }
       }
       return instantiateRegionScanner(scan, additionalScanners, nonceGroup, nonce);
     } finally {
       closeRegionOperation(Operation.SCAN);
     }
   }

   protected RegionScanner instantiateRegionScanner(Scan scan,
       List<KeyValueScanner> additionalScanners) throws IOException {
     return instantiateRegionScanner(scan, additionalScanners, HConstants.NO_NONCE,
       HConstants.NO_NONCE);
   }

   protected RegionScannerImpl instantiateRegionScanner(Scan scan,
       List<KeyValueScanner> additionalScanners, long nonceGroup, long nonce) throws IOException {
     if (scan.isReversed()) {
       if (scan.getFilter() != null) {
         scan.getFilter().setReversed(true);
       }
       return new ReversedRegionScannerImpl(scan, additionalScanners, this);
     }
     return new RegionScannerImpl(scan, additionalScanners, this, nonceGroup, nonce);
   }

   /**
    * Prepare a delete for a row mutation processor
    * @param delete The passed delete is modified by this method. WARNING!
    * @throws IOException
    */
   public void prepareDelete(Delete delete) throws IOException {
     // Check to see if this is a deleteRow insert
     if(delete.getFamilyCellMap().isEmpty()){
       for(byte [] family : this.htableDescriptor.getColumnFamilyNames()){
         // Don't eat the timestamp
         delete.addFamily(family, delete.getTimestamp());
       }
     } else {
       for(byte [] family : delete.getFamilyCellMap().keySet()) {
         if(family == null) {
           throw new NoSuchColumnFamilyException("Empty family is invalid");
         }
         checkFamily(family, delete.getDurability());
       }
     }
   }

   @Override
   public void delete(Delete delete) throws IOException {
     checkReadOnly();
     checkResources();
     startRegionOperation(Operation.DELETE);
     try {
       // All edits for the given row (across all column families) must happen atomically.
       doBatchMutate(delete);
     } finally {
       closeRegionOperation(Operation.DELETE);
     }
   }

   /**
    * Row needed by below method.
    */
   private static final byte [] FOR_UNIT_TESTS_ONLY = Bytes.toBytes("ForUnitTestsOnly");

   /**
    * This is used only by unit tests. Not required to be a public API.
    * @param familyMap map of family to edits for the given family.
    * @throws IOException
    */
   void delete(NavigableMap<byte[], List<Cell>> familyMap,
       Durability durability) throws IOException {
     Delete delete = new Delete(FOR_UNIT_TESTS_ONLY, HConstants.LATEST_TIMESTAMP, familyMap);
     delete.setDurability(durability);
     doBatchMutate(delete);
   }

   /**
    * Set up correct timestamps in the KVs in Delete object.
    * <p>Caller should have the row and region locks.
    * @param mutation
    * @param familyMap
    * @param byteNow
    * @throws IOException
    */
   public void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyMap,
       byte[] byteNow) throws IOException {
     for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {

       byte[] family = e.getKey();
       List<Cell> cells = e.getValue();
       assert cells instanceof RandomAccess;

       Map<byte[], Integer> kvCount = new TreeMap<>(Bytes.BYTES_COMPARATOR);
       int listSize = cells.size();
       for (int i=0; i < listSize; i++) {
         Cell cell = cells.get(i);
         //  Check if time is LATEST, change to time of most recent addition if so
         //  This is expensive.
         if (cell.getTimestamp() == HConstants.LATEST_TIMESTAMP
             && PrivateCellUtil.isDeleteType(cell)) {
           byte[] qual = CellUtil.cloneQualifier(cell);

           Integer count = kvCount.get(qual);
           if (count == null) {
             kvCount.put(qual, 1);
           } else {
             kvCount.put(qual, count + 1);
           }
           count = kvCount.get(qual);

           Get get = new Get(CellUtil.cloneRow(cell));
           get.readVersions(count);
           get.addColumn(family, qual);
           if (coprocessorHost != null) {
             if (!coprocessorHost.prePrepareTimeStampForDeleteVersion(mutation, cell,
                 byteNow, get)) {
               updateDeleteLatestVersionTimestamp(cell, get, count, byteNow);
             }
           } else {
             updateDeleteLatestVersionTimestamp(cell, get, count, byteNow);
           }
         } else {
           PrivateCellUtil.updateLatestStamp(cell, byteNow);
         }
       }
     }
   }

   void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, byte[] byteNow)
       throws IOException {
     List<Cell> result = get(get, false);

     if (result.size() < count) {
       // Nothing to delete
       PrivateCellUtil.updateLatestStamp(cell, byteNow);
       return;
     }
     if (result.size() > count) {
       throw new RuntimeException("Unexpected size: " + result.size());
     }
     Cell getCell = result.get(count - 1);
     PrivateCellUtil.setTimestamp(cell, getCell.getTimestamp());
   }

   @Override
   public void put(Put put) throws IOException {
     checkReadOnly();

     // Do a rough check that we have resources to accept a write.  The check is
     // 'rough' in that between the resource check and the call to obtain a
     // read lock, resources may run out.  For now, the thought is that this
     // will be extremely rare; we'll deal with it when it happens.
     checkResources();
     startRegionOperation(Operation.PUT);
     try {
       // All edits for the given row (across all column families) must happen atomically.
       doBatchMutate(put);
     } finally {
       closeRegionOperation(Operation.PUT);
     }
   }

   /**
    * Class that tracks the progress of a batch operations, accumulating status codes and tracking
    * the index at which processing is proceeding. These batch operations may get split into
    * mini-batches for processing.
    */
   private abstract static class BatchOperation<T> {
     protected final T[] operations;
     protected final OperationStatus[] retCodeDetails;
     protected final WALEdit[] walEditsFromCoprocessors;
     // reference family cell maps directly so coprocessors can mutate them if desired
     protected final Map<byte[], List<Cell>>[] familyCellMaps;
     // For Increment/Append operations
     protected final Result[] results;
     // For nonce operations
     protected final boolean[] canProceed;

     protected final HRegion region;
     protected int nextIndexToProcess = 0;
     protected final ObservedExceptionsInBatch observedExceptions;
     //Durability of the batch (highest durability of all operations)
     protected Durability durability;
     protected boolean atomic = false;

     public BatchOperation(final HRegion region, T[] operations) {
       this.operations = operations;
       this.retCodeDetails = new OperationStatus[operations.length];
       Arrays.fill(this.retCodeDetails, OperationStatus.NOT_RUN);
       this.walEditsFromCoprocessors = new WALEdit[operations.length];
       familyCellMaps = new Map[operations.length];
       this.results = new Result[operations.length];
       this.canProceed = new boolean[operations.length];

       this.region = region;
       observedExceptions = new ObservedExceptionsInBatch();
       durability = Durability.USE_DEFAULT;
     }

     /**
      * Visitor interface for batch operations
      */
     @FunctionalInterface
     public interface Visitor {
       /**
        * @param index operation index
        * @return If true continue visiting remaining entries, break otherwise
        */
       boolean visit(int index) throws IOException;
     }

     /**
      * Helper method for visiting pending/ all batch operations
      */
     public void visitBatchOperations(boolean pendingOnly, int lastIndexExclusive, Visitor visitor)
         throws IOException {
       assert lastIndexExclusive <= this.size();
       for (int i = nextIndexToProcess; i < lastIndexExclusive; i++) {
         if (!pendingOnly || isOperationPending(i)) {
           if (!visitor.visit(i)) {
             break;
           }
         }
       }
     }

     public abstract Mutation getMutation(int index);

     public abstract long getNonceGroup(int index);

     public abstract long getNonce(int index);

     /**
      * This method is potentially expensive and useful mostly for non-replay CP path.
      */
     public abstract Mutation[] getMutationsForCoprocs();

     public abstract boolean isInReplay();

     public abstract long getOrigLogSeqNum();

     public abstract void startRegionOperation() throws IOException;

     public abstract void closeRegionOperation() throws IOException;

     /**
      * Validates each mutation and prepares a batch for write. If necessary (non-replay case), runs
      * CP prePut()/preDelete()/preIncrement()/preAppend() hooks for all mutations in a batch. This
      * is intended to operate on entire batch and will be called from outside of class to check
      * and prepare batch. This can be implemented by calling helper method
      * {@link #checkAndPrepareMutation(int, long)} in a 'for' loop over mutations.
      */
     public abstract void checkAndPrepare() throws IOException;

     /**
      * Implement any Put request specific check and prepare logic here. Please refer to
      * {@link #checkAndPrepareMutation(Mutation, long)} for how its used.
      */
     protected abstract void checkAndPreparePut(final Put p) throws IOException;

     /**
      * If necessary, calls preBatchMutate() CP hook for a mini-batch and updates metrics, cell
      * count, tags and timestamp for all cells of all operations in a mini-batch.
      */
     public abstract void prepareMiniBatchOperations(MiniBatchOperationInProgress<Mutation>
         miniBatchOp, long timestamp, final List<RowLock> acquiredRowLocks) throws IOException;

     /**
      * Write mini-batch operations to MemStore
      */
     public abstract WriteEntry writeMiniBatchOperationsToMemStore(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WriteEntry writeEntry)
         throws IOException;

     protected void writeMiniBatchOperationsToMemStore(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final long writeNumber)
         throws IOException {
       MemStoreSizing memStoreAccounting = new NonThreadSafeMemStoreSizing();
       visitBatchOperations(true, miniBatchOp.getLastIndexExclusive(), (int index) -> {
         // We need to update the sequence id for following reasons.
         // 1) If the op is in replay mode, FSWALEntry#stampRegionSequenceId won't stamp sequence id.
         // 2) If no WAL, FSWALEntry won't be used
         // we use durability of the original mutation for the mutation passed by CP.
         if (isInReplay() || getMutation(index).getDurability() == Durability.SKIP_WAL) {
           region.updateSequenceId(familyCellMaps[index].values(), writeNumber);
         }
         applyFamilyMapToMemStore(familyCellMaps[index], memStoreAccounting);
         return true;
       });
       // update memStore size
       region.incMemStoreSize(memStoreAccounting.getDataSize(), memStoreAccounting.getHeapSize(),
         memStoreAccounting.getOffHeapSize(), memStoreAccounting.getCellsCount());
     }

     public boolean isDone() {
       return nextIndexToProcess == operations.length;
     }

     public int size() {
       return operations.length;
     }

     public boolean isOperationPending(int index) {
       return retCodeDetails[index].getOperationStatusCode() == OperationStatusCode.NOT_RUN;
     }

     public List<UUID> getClusterIds() {
       assert size() != 0;
       return getMutation(0).getClusterIds();
     }

     boolean isAtomic() {
       return atomic;
     }

     /**
      * Helper method that checks and prepares only one mutation. This can be used to implement
      * {@link #checkAndPrepare()} for entire Batch.
      * NOTE: As CP prePut()/preDelete()/preIncrement()/preAppend() hooks may modify mutations,
      * this method should be called after prePut()/preDelete()/preIncrement()/preAppend() CP hooks
      * are run for the mutation
      */
     protected void checkAndPrepareMutation(Mutation mutation, final long timestamp)
         throws IOException {
       region.checkRow(mutation.getRow(), "batchMutate");
       if (mutation instanceof Put) {
         // Check the families in the put. If bad, skip this one.
         checkAndPreparePut((Put) mutation);
         region.checkTimestamps(mutation.getFamilyCellMap(), timestamp);
       } else if (mutation instanceof Delete) {
         region.prepareDelete((Delete) mutation);
       } else if (mutation instanceof Increment || mutation instanceof Append) {
         region.checkFamilies(mutation.getFamilyCellMap().keySet(), mutation.getDurability());
       }
     }

     protected void checkAndPrepareMutation(int index, long timestamp) throws IOException {
       Mutation mutation = getMutation(index);
       try {
         this.checkAndPrepareMutation(mutation, timestamp);

         // store the family map reference to allow for mutations
         familyCellMaps[index] = mutation.getFamilyCellMap();
         // store durability for the batch (highest durability of all operations in the batch)
         Durability tmpDur = region.getEffectiveDurability(mutation.getDurability());
         if (tmpDur.ordinal() > durability.ordinal()) {
           durability = tmpDur;
         }
       } catch (NoSuchColumnFamilyException nscfe) {
         final String msg = "No such column family in batch mutation in region " + this;
         if (observedExceptions.hasSeenNoSuchFamily()) {
           LOG.warn(msg + nscfe.getMessage());
         } else {
           LOG.warn(msg, nscfe);
           observedExceptions.sawNoSuchFamily();
         }
         retCodeDetails[index] = new OperationStatus(
             OperationStatusCode.BAD_FAMILY, nscfe.getMessage());
         if (isAtomic()) { // fail, atomic means all or none
           throw nscfe;
         }
       } catch (FailedSanityCheckException fsce) {
         final String msg = "Batch Mutation did not pass sanity check in region " + this;
         if (observedExceptions.hasSeenFailedSanityCheck()) {
           LOG.warn(msg + fsce.getMessage());
         } else {
           LOG.warn(msg, fsce);
           observedExceptions.sawFailedSanityCheck();
         }
         retCodeDetails[index] = new OperationStatus(
             OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage());
         if (isAtomic()) {
           throw fsce;
         }
       } catch (WrongRegionException we) {
         final String msg = "Batch mutation had a row that does not belong to this region " + this;
         if (observedExceptions.hasSeenWrongRegion()) {
           LOG.warn(msg + we.getMessage());
         } else {
           LOG.warn(msg, we);
           observedExceptions.sawWrongRegion();
         }
         retCodeDetails[index] = new OperationStatus(
             OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage());
         if (isAtomic()) {
           throw we;
         }
       }
     }

     /**
      * Creates Mini-batch of all operations [nextIndexToProcess, lastIndexExclusive) for which
      * a row lock can be acquired. All mutations with locked rows are considered to be
      * In-progress operations and hence the name {@link MiniBatchOperationInProgress}. Mini batch
      * is window over {@link BatchOperation} and contains contiguous pending operations.
      *
      * @param acquiredRowLocks keeps track of rowLocks acquired.
      */
     public MiniBatchOperationInProgress<Mutation> lockRowsAndBuildMiniBatch(
         List<RowLock> acquiredRowLocks) throws IOException {
       int readyToWriteCount = 0;
       int lastIndexExclusive = 0;
       RowLock prevRowLock = null;
       for (; lastIndexExclusive < size(); lastIndexExclusive++) {
         // It reaches the miniBatchSize, stop here and process the miniBatch
         // This only applies to non-atomic batch operations.
         if (!isAtomic() && (readyToWriteCount == region.miniBatchSize)) {
           break;
         }

         if (!isOperationPending(lastIndexExclusive)) {
           continue;
         }

         // HBASE-19389 Limit concurrency of put with dense (hundreds) columns to avoid exhausting
         // RS handlers, covering both MutationBatchOperation and ReplayBatchOperation
         // The BAD_FAMILY/SANITY_CHECK_FAILURE cases are handled in checkAndPrepare phase and won't
         // pass the isOperationPending check
         Map<byte[], List<Cell>> curFamilyCellMap =
             getMutation(lastIndexExclusive).getFamilyCellMap();
         try {
           // start the protector before acquiring row lock considering performance, and will finish
           // it when encountering exception
           region.storeHotnessProtector.start(curFamilyCellMap);
         } catch (RegionTooBusyException rtbe) {
           region.storeHotnessProtector.finish(curFamilyCellMap);
           if (isAtomic()) {
             throw rtbe;
           }
           retCodeDetails[lastIndexExclusive] =
               new OperationStatus(OperationStatusCode.STORE_TOO_BUSY, rtbe.getMessage());
           continue;
         }

         Mutation mutation = getMutation(lastIndexExclusive);
         // If we haven't got any rows in our batch, we should block to get the next one.
         RowLock rowLock = null;
         boolean throwException = false;
         try {
           // if atomic then get exclusive lock, else shared lock
           rowLock = region.getRowLockInternal(mutation.getRow(), !isAtomic(), prevRowLock);
         } catch (TimeoutIOException | InterruptedIOException e) {
           // NOTE: We will retry when other exceptions, but we should stop if we receive
           // TimeoutIOException or InterruptedIOException as operation has timed out or
           // interrupted respectively.
           throwException = true;
           throw e;
         } catch (IOException ioe) {
           LOG.warn("Failed getting lock, row={}, in region {}",
             Bytes.toStringBinary(mutation.getRow()), this, ioe);
           if (isAtomic()) { // fail, atomic means all or none
             throwException = true;
             throw ioe;
           }
         } catch (Throwable throwable) {
           throwException = true;
           throw throwable;
         } finally {
           if (throwException) {
             region.storeHotnessProtector.finish(curFamilyCellMap);
           }
         }
         if (rowLock == null) {
           // We failed to grab another lock
           if (isAtomic()) {
             region.storeHotnessProtector.finish(curFamilyCellMap);
             throw new IOException("Can't apply all operations atomically!");
           }
           break; // Stop acquiring more rows for this batch
         } else {
           if (rowLock != prevRowLock) {
             // It is a different row now, add this to the acquiredRowLocks and
             // set prevRowLock to the new returned rowLock
             acquiredRowLocks.add(rowLock);
             prevRowLock = rowLock;
           }
         }

         readyToWriteCount++;
       }
       return createMiniBatch(lastIndexExclusive, readyToWriteCount);
     }

     protected MiniBatchOperationInProgress<Mutation> createMiniBatch(final int lastIndexExclusive,
         final int readyToWriteCount) {
       return new MiniBatchOperationInProgress<>(getMutationsForCoprocs(), retCodeDetails,
           walEditsFromCoprocessors, nextIndexToProcess, lastIndexExclusive, readyToWriteCount);
     }

     /**
      * Builds separate WALEdit per nonce by applying input mutations. If WALEdits from CP are
      * present, they are merged to result WALEdit.
      */
     public List<Pair<NonceKey, WALEdit>> buildWALEdits(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp) throws IOException {
       List<Pair<NonceKey, WALEdit>> walEdits = new ArrayList<>();

       visitBatchOperations(true, nextIndexToProcess + miniBatchOp.size(), new Visitor() {
         private Pair<NonceKey, WALEdit> curWALEditForNonce;

         @Override
         public boolean visit(int index) throws IOException {
           Mutation m = getMutation(index);
           // we use durability of the original mutation for the mutation passed by CP.
           if (region.getEffectiveDurability(m.getDurability()) == Durability.SKIP_WAL) {
             region.recordMutationWithoutWal(m.getFamilyCellMap());
             return true;
           }

           // the batch may contain multiple nonce keys (replay case). If so, write WALEdit for each.
           // Given how nonce keys are originally written, these should be contiguous.
           // They don't have to be, it will still work, just write more WALEdits than needed.
           long nonceGroup = getNonceGroup(index);
           long nonce = getNonce(index);
           if (curWALEditForNonce == null ||
               curWALEditForNonce.getFirst().getNonceGroup() != nonceGroup ||
               curWALEditForNonce.getFirst().getNonce() != nonce) {
             curWALEditForNonce = new Pair<>(new NonceKey(nonceGroup, nonce),
                 new WALEdit(miniBatchOp.getCellCount(), isInReplay()));
             walEdits.add(curWALEditForNonce);
           }
           WALEdit walEdit = curWALEditForNonce.getSecond();

           // Add WAL edits from CPs.
           WALEdit fromCP = walEditsFromCoprocessors[index];
           if (fromCP != null) {
             for (Cell cell : fromCP.getCells()) {
               walEdit.add(cell);
             }
           }
           walEdit.add(familyCellMaps[index]);

           return true;
         }
       });
       return walEdits;
     }

     /**
      * This method completes mini-batch operations by calling postBatchMutate() CP hook (if
      * required) and completing mvcc.
      */
     public void completeMiniBatchOperations(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WriteEntry writeEntry)
         throws IOException {
       if (writeEntry != null) {
         region.mvcc.completeAndWait(writeEntry);
       }
     }

     public void doPostOpCleanupForMiniBatch(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WALEdit walEdit,
         boolean success) throws IOException {
       doFinishHotnessProtector(miniBatchOp);
     }

     private void doFinishHotnessProtector(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp) {
       // check and return if the protector is not enabled
       if (!region.storeHotnessProtector.isEnable()) {
         return;
       }
       // miniBatchOp is null, if and only if lockRowsAndBuildMiniBatch throwing exception.
       // This case was handled.
       if (miniBatchOp == null) {
         return;
       }

       final int finalLastIndexExclusive = miniBatchOp.getLastIndexExclusive();

       for (int i = nextIndexToProcess; i < finalLastIndexExclusive; i++) {
         switch (retCodeDetails[i].getOperationStatusCode()) {
           case SUCCESS:
           case FAILURE:
             region.storeHotnessProtector.finish(getMutation(i).getFamilyCellMap());
             break;
           default:
             // do nothing
             // We won't start the protector for NOT_RUN/BAD_FAMILY/SANITY_CHECK_FAILURE and the
             // STORE_TOO_BUSY case is handled in StoreHotnessProtector#start
             break;
         }
       }
     }

     /**
      * Atomically apply the given map of family->edits to the memstore.
      * This handles the consistency control on its own, but the caller
      * should already have locked updatesLock.readLock(). This also does
      * <b>not</b> check the families for validity.
      *
      * @param familyMap Map of Cells by family
      */
     protected void applyFamilyMapToMemStore(Map<byte[], List<Cell>> familyMap,
         MemStoreSizing memstoreAccounting) throws IOException {
       for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
         byte[] family = e.getKey();
         List<Cell> cells = e.getValue();
         assert cells instanceof RandomAccess;
         region.applyToMemStore(region.getStore(family), cells, false, memstoreAccounting);
       }
     }
   }


   /**
    * Batch of mutation operations. Base class is shared with {@link ReplayBatchOperation} as most
    * of the logic is same.
    */
   static class MutationBatchOperation extends BatchOperation<Mutation> {
     private long nonceGroup;
     private long nonce;
     public MutationBatchOperation(final HRegion region, Mutation[] operations, boolean atomic,
         long nonceGroup, long nonce) {
       super(region, operations);
       this.atomic = atomic;
       this.nonceGroup = nonceGroup;
       this.nonce = nonce;
     }

     @Override
     public Mutation getMutation(int index) {
       return this.operations[index];
     }

     @Override
     public long getNonceGroup(int index) {
       return nonceGroup;
     }

     @Override
     public long getNonce(int index) {
       return nonce;
     }

     @Override
     public Mutation[] getMutationsForCoprocs() {
       return this.operations;
     }

     @Override
     public boolean isInReplay() {
       return false;
     }

     @Override
     public long getOrigLogSeqNum() {
       return SequenceId.NO_SEQUENCE_ID;
     }

     @Override
     public void startRegionOperation() throws IOException {
       region.startRegionOperation(Operation.BATCH_MUTATE);
     }

     @Override
     public void closeRegionOperation() throws IOException {
       region.closeRegionOperation(Operation.BATCH_MUTATE);
     }

     @Override
     public void checkAndPreparePut(Put p) throws IOException {
       region.checkFamilies(p.getFamilyCellMap().keySet(), p.getDurability());
     }

     @Override
     public void checkAndPrepare() throws IOException {
       // index 0: puts, index 1: deletes, index 2: increments, index 3: append
       final int[] metrics = {0, 0, 0, 0};

       visitBatchOperations(true, this.size(), new Visitor() {
         private long now = EnvironmentEdgeManager.currentTime();
         private WALEdit walEdit;
         @Override
         public boolean visit(int index) throws IOException {
           // Run coprocessor pre hook outside of locks to avoid deadlock
           if (region.coprocessorHost != null) {
             if (walEdit == null) {
               walEdit = new WALEdit();
             }
             callPreMutateCPHook(index, walEdit, metrics);
             if (!walEdit.isEmpty()) {
               walEditsFromCoprocessors[index] = walEdit;
               walEdit = null;
             }
           }
           if (isOperationPending(index)) {
             // TODO: Currently validation is done with current time before acquiring locks and
             // updates are done with different timestamps after acquiring locks. This behavior is
             // inherited from the code prior to this change. Can this be changed?
             checkAndPrepareMutation(index, now);
           }
           return true;
         }
       });

       // FIXME: we may update metrics twice! here for all operations bypassed by CP and later in
       // normal processing.
       // Update metrics in same way as it is done when we go the normal processing route (we now
       // update general metrics though a Coprocessor did the work).
       if (region.metricsRegion != null) {
         if (metrics[0] > 0) {
           // There were some Puts in the batch.
           region.metricsRegion.updatePut();
         }
         if (metrics[1] > 0) {
           // There were some Deletes in the batch.
           region.metricsRegion.updateDelete();
         }
         if (metrics[2] > 0) {
           // There were some Increment in the batch.
           region.metricsRegion.updateIncrement();
         }
         if (metrics[3] > 0) {
           // There were some Append in the batch.
           region.metricsRegion.updateAppend();
         }
       }
     }

     @Override
     public void prepareMiniBatchOperations(MiniBatchOperationInProgress<Mutation> miniBatchOp,
         long timestamp, final List<RowLock> acquiredRowLocks) throws IOException {
       visitBatchOperations(true, miniBatchOp.getLastIndexExclusive(), (int index) -> {
         Mutation mutation = getMutation(index);
         if (mutation instanceof Put) {
           HRegion.updateCellTimestamps(familyCellMaps[index].values(), Bytes.toBytes(timestamp));
           miniBatchOp.incrementNumOfPuts();
         } else if (mutation instanceof Delete) {
           region.prepareDeleteTimestamps(mutation, familyCellMaps[index],
             Bytes.toBytes(timestamp));
           miniBatchOp.incrementNumOfDeletes();
         } else if (mutation instanceof Increment || mutation instanceof Append) {
           // For nonce operations
           canProceed[index] = startNonceOperation(nonceGroup, nonce);
           if (!canProceed[index]) {
             // convert duplicate increment/append to get
             List<Cell> results = region.get(toGet(mutation), false, nonceGroup, nonce);
             retCodeDetails[index] = new OperationStatus(OperationStatusCode.SUCCESS,
               Result.create(results));
             return true;
           }

           boolean returnResults;
           if (mutation instanceof Increment) {
             returnResults = ((Increment) mutation).isReturnResults();
             miniBatchOp.incrementNumOfIncrements();
           } else {
             returnResults = ((Append) mutation).isReturnResults();
             miniBatchOp.incrementNumOfAppends();
           }
           Result result = doCoprocessorPreCallAfterRowLock(mutation);
           if (result != null) {
             retCodeDetails[index] = new OperationStatus(OperationStatusCode.SUCCESS,
               returnResults ? result : Result.EMPTY_RESULT);
             return true;
           }
           List<Cell> results = returnResults ? new ArrayList<>(mutation.size()) : null;
           familyCellMaps[index] = reckonDeltas(mutation, results, timestamp);
           this.results[index] = results != null ? Result.create(results): Result.EMPTY_RESULT;
         }
         region.rewriteCellTags(familyCellMaps[index], mutation);

         // update cell count
         if (region.getEffectiveDurability(mutation.getDurability()) != Durability.SKIP_WAL) {
           for (List<Cell> cells : mutation.getFamilyCellMap().values()) {
             miniBatchOp.addCellCount(cells.size());
           }
         }

         WALEdit fromCP = walEditsFromCoprocessors[index];
         if (fromCP != null) {
           miniBatchOp.addCellCount(fromCP.size());
         }
         return true;
       });

       if (region.coprocessorHost != null) {
         // calling the pre CP hook for batch mutation
         region.coprocessorHost.preBatchMutate(miniBatchOp);
         checkAndMergeCPMutations(miniBatchOp, acquiredRowLocks, timestamp);
       }
     }

     /**
      * Starts the nonce operation for a mutation, if needed.
      * @param nonceGroup Nonce group from the request.
      * @param nonce Nonce.
      * @return whether to proceed this mutation.
      */
     private boolean startNonceOperation(long nonceGroup, long nonce) throws IOException {
       if (region.rsServices == null || region.rsServices.getNonceManager() == null
         || nonce == HConstants.NO_NONCE) {
         return true;
       }
       boolean canProceed;
       try {
         canProceed = region.rsServices.getNonceManager()
           .startOperation(nonceGroup, nonce, region.rsServices);
       } catch (InterruptedException ex) {
         throw new InterruptedIOException("Nonce start operation interrupted");
       }
       return canProceed;
     }

     /**
      * Ends nonce operation for a mutation, if needed.
      * @param nonceGroup Nonce group from the request. Always 0 in initial implementation.
      * @param nonce Nonce.
      * @param success Whether the operation for this nonce has succeeded.
      */
     private void endNonceOperation(long nonceGroup, long nonce, boolean success) {
       if (region.rsServices != null && region.rsServices.getNonceManager() != null
         && nonce != HConstants.NO_NONCE) {
         region.rsServices.getNonceManager().endOperation(nonceGroup, nonce, success);
       }
     }

     private static Get toGet(final Mutation mutation) throws IOException {
       assert mutation instanceof Increment || mutation instanceof Append;
       Get get = new Get(mutation.getRow());
       CellScanner cellScanner = mutation.cellScanner();
       while (!cellScanner.advance()) {
         Cell cell = cellScanner.current();
         get.addColumn(CellUtil.cloneFamily(cell), CellUtil.cloneQualifier(cell));
       }
       if (mutation instanceof Increment) {
         // Increment
         Increment increment = (Increment) mutation;
         get.setTimeRange(increment.getTimeRange().getMin(), increment.getTimeRange().getMax());
       } else {
         // Append
         Append append = (Append) mutation;
         get.setTimeRange(append.getTimeRange().getMin(), append.getTimeRange().getMax());
       }
       for (Entry<String, byte[]> entry : mutation.getAttributesMap().entrySet()) {
         get.setAttribute(entry.getKey(), entry.getValue());
       }
       return get;
     }

     /**
      * Do coprocessor pre-increment or pre-append after row lock call.
      * @return Result returned out of the coprocessor, which means bypass all further processing
      *   and return the preferred Result instead, or null which means proceed.
      */
     private Result doCoprocessorPreCallAfterRowLock(Mutation mutation) throws IOException {
       assert mutation instanceof Increment || mutation instanceof Append;
       Result result = null;
       if (region.coprocessorHost != null) {
         if (mutation instanceof Increment) {
           result = region.coprocessorHost.preIncrementAfterRowLock((Increment) mutation);
         } else {
           result = region.coprocessorHost.preAppendAfterRowLock((Append) mutation);
         }
       }
       return result;
     }

     private Map<byte[], List<Cell>> reckonDeltas(Mutation mutation, List<Cell> results,
       long now) throws IOException {
       assert mutation instanceof Increment || mutation instanceof Append;
       Map<byte[], List<Cell>> ret = new HashMap<>();
       // Process a Store/family at a time.
       for (Map.Entry<byte [], List<Cell>> entry: mutation.getFamilyCellMap().entrySet()) {
         final byte[] columnFamilyName = entry.getKey();
         List<Cell> deltas = entry.getValue();
         // Reckon for the Store what to apply to WAL and MemStore.
         List<Cell> toApply = reckonDeltasByStore(region.stores.get(columnFamilyName), mutation,
           now, deltas, results);
         if (!toApply.isEmpty()) {
           for (Cell cell : toApply) {
             HStore store = region.getStore(cell);
             if (store == null) {
               region.checkFamily(CellUtil.cloneFamily(cell));
             } else {
               ret.computeIfAbsent(store.getColumnFamilyDescriptor().getName(),
                 key -> new ArrayList<>()).add(cell);
             }
           }
         }
       }
       return ret;
     }

     /**
      * Reckon the Cells to apply to WAL, memstore, and to return to the Client in passed
      * column family/Store.
      *
      * Does Get of current value and then adds passed in deltas for this Store returning the
      * result.
      *
      * @param mutation The encompassing Mutation object
      * @param deltas Changes to apply to this Store; either increment amount or data to append
      * @param results In here we accumulate all the Cells we are to return to the client. If null,
      *   client doesn't want results returned.
      * @return Resulting Cells after <code>deltas</code> have been applied to current
      *   values. Side effect is our filling out of the <code>results</code> List.
      */
     private List<Cell> reckonDeltasByStore(HStore store, Mutation mutation, long now,
       List<Cell> deltas, List<Cell> results) throws IOException {
       assert mutation instanceof Increment || mutation instanceof Append;
       byte[] columnFamily = store.getColumnFamilyDescriptor().getName();
       List<Pair<Cell, Cell>> cellPairs = new ArrayList<>(deltas.size());

       // Get previous values for all columns in this family.
       TimeRange tr;
       if (mutation instanceof Increment) {
         tr = ((Increment) mutation).getTimeRange();
       } else {
         tr = ((Append) mutation).getTimeRange();
       }
       List<Cell> currentValues = get(mutation, store, deltas, tr);

       // Iterate the input columns and update existing values if they were found, otherwise
       // add new column initialized to the delta amount
       int currentValuesIndex = 0;
       for (int i = 0; i < deltas.size(); i++) {
         Cell delta = deltas.get(i);
         Cell currentValue = null;
         if (currentValuesIndex < currentValues.size() &&
           CellUtil.matchingQualifier(currentValues.get(currentValuesIndex), delta)) {
           currentValue = currentValues.get(currentValuesIndex);
           if (i < (deltas.size() - 1) && !CellUtil.matchingQualifier(delta, deltas.get(i + 1))) {
             currentValuesIndex++;
           }
         }
         // Switch on whether this an increment or an append building the new Cell to apply.
         Cell newCell;
         if (mutation instanceof Increment) {
           long deltaAmount = getLongValue(delta);
           final long newValue = currentValue == null ?
             deltaAmount : getLongValue(currentValue) + deltaAmount;
           newCell = reckonDelta(delta, currentValue, columnFamily, now, mutation,
             (oldCell) -> Bytes.toBytes(newValue));
         } else {
           newCell = reckonDelta(delta, currentValue, columnFamily, now, mutation,
             (oldCell) ->
               ByteBuffer.wrap(new byte[delta.getValueLength() + oldCell.getValueLength()])
                 .put(oldCell.getValueArray(), oldCell.getValueOffset(), oldCell.getValueLength())
                 .put(delta.getValueArray(), delta.getValueOffset(), delta.getValueLength())
                 .array()
           );
         }
         if (region.maxCellSize > 0) {
           int newCellSize = PrivateCellUtil.estimatedSerializedSizeOf(newCell);
           if (newCellSize > region.maxCellSize) {
             String msg = "Cell with size " + newCellSize + " exceeds limit of "
               + region.maxCellSize + " bytes in region " + this;
             LOG.debug(msg);
             throw new DoNotRetryIOException(msg);
           }
         }
         cellPairs.add(new Pair<>(currentValue, newCell));
         // Add to results to get returned to the Client. If null, cilent does not want results.
         if (results != null) {
           results.add(newCell);
         }
       }
       // Give coprocessors a chance to update the new cells before apply to WAL or memstore
       if (region.coprocessorHost != null) {
         // Here the operation must be increment or append.
         cellPairs = mutation instanceof Increment ?
           region.coprocessorHost.postIncrementBeforeWAL(mutation, cellPairs) :
           region.coprocessorHost.postAppendBeforeWAL(mutation, cellPairs);
       }
       return cellPairs.stream().map(Pair::getSecond).collect(Collectors.toList());
     }

     private static Cell reckonDelta(final Cell delta, final Cell currentCell,
       final byte[] columnFamily, final long now, Mutation mutation,
       Function<Cell, byte[]> supplier) throws IOException {
       // Forward any tags found on the delta.
       List<Tag> tags = TagUtil.carryForwardTags(delta);
       tags = TagUtil.carryForwardTTLTag(tags, mutation.getTTL());
       if (currentCell != null) {
         tags = TagUtil.carryForwardTags(tags, currentCell);
         byte[] newValue = supplier.apply(currentCell);
         return ExtendedCellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
           .setRow(mutation.getRow(), 0, mutation.getRow().length)
           .setFamily(columnFamily, 0, columnFamily.length)
           // copy the qualifier if the cell is located in shared memory.
           .setQualifier(CellUtil.cloneQualifier(delta))
           .setTimestamp(Math.max(currentCell.getTimestamp() + 1, now))
           .setType(KeyValue.Type.Put.getCode())
           .setValue(newValue, 0, newValue.length)
           .setTags(TagUtil.fromList(tags))
           .build();
       } else {
         PrivateCellUtil.updateLatestStamp(delta, now);
         return CollectionUtils.isEmpty(tags) ? delta : PrivateCellUtil.createCell(delta, tags);
       }
     }

     /**
      * @return Get the long out of the passed in Cell
      */
     private static long getLongValue(final Cell cell) throws DoNotRetryIOException {
       int len = cell.getValueLength();
       if (len != Bytes.SIZEOF_LONG) {
         // throw DoNotRetryIOException instead of IllegalArgumentException
         throw new DoNotRetryIOException("Field is not a long, it's " + len + " bytes wide");
       }
       return PrivateCellUtil.getValueAsLong(cell);
     }

     /**
      * Do a specific Get on passed <code>columnFamily</code> and column qualifiers.
      * @param mutation Mutation we are doing this Get for.
      * @param store Which column family on row (TODO: Go all Gets in one go)
      * @param coordinates Cells from <code>mutation</code> used as coordinates applied to Get.
      * @return Return list of Cells found.
      */
     private List<Cell> get(Mutation mutation, HStore store, List<Cell> coordinates,
       TimeRange tr) throws IOException {
       // Sort the cells so that they match the order that they appear in the Get results.
       // Otherwise, we won't be able to find the existing values if the cells are not specified
       // in order by the client since cells are in an array list.
       // TODO: I don't get why we are sorting. St.Ack 20150107
       sort(coordinates, store.getComparator());
       Get get = new Get(mutation.getRow());
       for (Cell cell: coordinates) {
         get.addColumn(store.getColumnFamilyDescriptor().getName(), CellUtil.cloneQualifier(cell));
       }
       // Increments carry time range. If an Increment instance, put it on the Get.
       if (tr != null) {
         get.setTimeRange(tr.getMin(), tr.getMax());
       }
       return region.get(get, false);
     }

     @Override
     public List<Pair<NonceKey, WALEdit>> buildWALEdits(final MiniBatchOperationInProgress<Mutation>
         miniBatchOp) throws IOException {
       List<Pair<NonceKey, WALEdit>> walEdits = super.buildWALEdits(miniBatchOp);
       // for MutationBatchOperation, more than one nonce is not allowed
       if (walEdits.size() > 1) {
         throw new IOException("Found multiple nonce keys per batch!");
       }
       return walEdits;
     }

     @Override
     public WriteEntry writeMiniBatchOperationsToMemStore(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, @Nullable WriteEntry writeEntry)
         throws IOException {
       if (writeEntry == null) {
         writeEntry = region.mvcc.begin();
       }
       super.writeMiniBatchOperationsToMemStore(miniBatchOp, writeEntry.getWriteNumber());
       return writeEntry;
     }

     @Override
     public void completeMiniBatchOperations(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WriteEntry writeEntry)
         throws IOException {
       // TODO: can it be done after completing mvcc?
       // calling the post CP hook for batch mutation
       if (region.coprocessorHost != null) {
         region.coprocessorHost.postBatchMutate(miniBatchOp);
       }
       super.completeMiniBatchOperations(miniBatchOp, writeEntry);

       if (nonce != HConstants.NO_NONCE) {
         if (region.rsServices != null && region.rsServices.getNonceManager() != null) {
           region.rsServices.getNonceManager()
             .addMvccToOperationContext(nonceGroup, nonce, writeEntry.getWriteNumber());
         }
       }
     }

     @Override
     public void doPostOpCleanupForMiniBatch(MiniBatchOperationInProgress<Mutation> miniBatchOp,
         final WALEdit walEdit, boolean success) throws IOException {

       super.doPostOpCleanupForMiniBatch(miniBatchOp, walEdit, success);
       if (miniBatchOp != null) {
         // synced so that the coprocessor contract is adhered to.
         if (region.coprocessorHost != null) {
           visitBatchOperations(false, miniBatchOp.getLastIndexExclusive(), (int i) -> {
             // only for successful puts/deletes/increments/appends
             if (retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.SUCCESS) {
               Mutation m = getMutation(i);
               if (m instanceof Put) {
                 region.coprocessorHost.postPut((Put) m, walEdit, m.getDurability());
               } else if (m instanceof Delete) {
                 region.coprocessorHost.postDelete((Delete) m, walEdit, m.getDurability());
               } else if (m instanceof Increment) {
                 Result result = region.getCoprocessorHost().postIncrement((Increment) m,
                   results[i]);
                 if (result != results[i]) {
                   retCodeDetails[i] =
                     new OperationStatus(retCodeDetails[i].getOperationStatusCode(), result);
                 }
               } else if (m instanceof Append) {
                 Result result = region.getCoprocessorHost().postAppend((Append) m, results[i]);
                 if (result != results[i]) {
                   retCodeDetails[i] =
                     new OperationStatus(retCodeDetails[i].getOperationStatusCode(), result);
                 }
               }
             }
             return true;
           });
         }

         // For nonce operations
         visitBatchOperations(false, miniBatchOp.getLastIndexExclusive(), (int i) -> {
           if (canProceed[i]) {
             endNonceOperation(nonceGroup, nonce,
               retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.SUCCESS);
           }
           return true;
         });

         // See if the column families were consistent through the whole thing.
         // if they were then keep them. If they were not then pass a null.
         // null will be treated as unknown.
         // Total time taken might be involving Puts, Deletes, Increments and Appends.
         // Split the time for puts and deletes based on the total number of Puts, Deletes,
         // Increments and Appends.
         if (region.metricsRegion != null) {
           if (miniBatchOp.getNumOfPuts() > 0) {
             // There were some Puts in the batch.
             region.metricsRegion.updatePut();
           }
           if (miniBatchOp.getNumOfDeletes() > 0) {
             // There were some Deletes in the batch.
             region.metricsRegion.updateDelete();
           }
           if (miniBatchOp.getNumOfIncrements() > 0) {
             // There were some Increments in the batch.
             region.metricsRegion.updateIncrement();
           }
           if (miniBatchOp.getNumOfAppends() > 0) {
             // There were some Appends in the batch.
             region.metricsRegion.updateAppend();
           }
         }
       }

       if (region.coprocessorHost != null) {
         // call the coprocessor hook to do any finalization steps after the put is done
         region.coprocessorHost.postBatchMutateIndispensably(
             miniBatchOp != null ? miniBatchOp : createMiniBatch(size(), 0), success);
       }
     }

     /**
      * Runs prePut/preDelete/preIncrement/preAppend coprocessor hook for input mutation in a batch
      * @param metrics Array of 2 ints. index 0: count of puts, index 1: count of deletes, index 2:
      *   count of increments and 3: count of appends
      */
     private void callPreMutateCPHook(int index, final WALEdit walEdit, final int[] metrics)
         throws IOException {
       Mutation m = getMutation(index);
       if (m instanceof Put) {
         if (region.coprocessorHost.prePut((Put) m, walEdit, m.getDurability())) {
           // pre hook says skip this Put
           // mark as success and skip in doMiniBatchMutation
           metrics[0]++;
           retCodeDetails[index] = OperationStatus.SUCCESS;
         }
       } else if (m instanceof Delete) {
         Delete curDel = (Delete) m;
         if (curDel.getFamilyCellMap().isEmpty()) {
           // handle deleting a row case
           // TODO: prepareDelete() has been called twice, before and after preDelete() CP hook.
           // Can this be avoided?
           region.prepareDelete(curDel);
         }
         if (region.coprocessorHost.preDelete(curDel, walEdit, m.getDurability())) {
           // pre hook says skip this Delete
           // mark as success and skip in doMiniBatchMutation
           metrics[1]++;
           retCodeDetails[index] = OperationStatus.SUCCESS;
         }
       } else if (m instanceof Increment) {
         Increment increment = (Increment) m;
         Result result = region.coprocessorHost.preIncrement(increment);
         if (result != null) {
           // pre hook says skip this Increment
           // mark as success and skip in doMiniBatchMutation
           metrics[2]++;
           retCodeDetails[index] = new OperationStatus(OperationStatusCode.SUCCESS, result);
         }
       } else if (m instanceof Append) {
         Append append = (Append) m;
         Result result = region.coprocessorHost.preAppend(append);
         if (result != null) {
           // pre hook says skip this Append
           // mark as success and skip in doMiniBatchMutation
           metrics[3]++;
           retCodeDetails[index] = new OperationStatus(OperationStatusCode.SUCCESS, result);
         }
       } else {
         String msg = "Put/Delete/Increment/Append mutations only supported in a batch";
         retCodeDetails[index] = new OperationStatus(OperationStatusCode.FAILURE, msg);
         if (isAtomic()) { // fail, atomic means all or none
           throw new IOException(msg);
         }
       }
     }

     private void checkAndMergeCPMutations(final MiniBatchOperationInProgress<Mutation> miniBatchOp,
         final List<RowLock> acquiredRowLocks, final long timestamp) throws IOException {
       visitBatchOperations(true, nextIndexToProcess + miniBatchOp.size(), (int i) -> {
         // we pass (i - firstIndex) below since the call expects a relative index
         Mutation[] cpMutations = miniBatchOp.getOperationsFromCoprocessors(i - nextIndexToProcess);
         if (cpMutations == null) {
           return true;
         }
         // Else Coprocessor added more Mutations corresponding to the Mutation at this index.
         Mutation mutation = getMutation(i);
         for (Mutation cpMutation : cpMutations) {
           this.checkAndPrepareMutation(cpMutation, timestamp);

           // Acquire row locks. If not, the whole batch will fail.
           acquiredRowLocks.add(region.getRowLockInternal(cpMutation.getRow(), true, null));

           // Returned mutations from coprocessor correspond to the Mutation at index i. We can
           // directly add the cells from those mutations to the familyMaps of this mutation.
           Map<byte[], List<Cell>> cpFamilyMap = cpMutation.getFamilyCellMap();
           region.rewriteCellTags(cpFamilyMap, mutation);
           // will get added to the memStore later
           mergeFamilyMaps(familyCellMaps[i], cpFamilyMap);

           // The durability of returned mutation is replaced by the corresponding mutation.
           // If the corresponding mutation contains the SKIP_WAL, we shouldn't count the
           // cells of returned mutation.
           if (region.getEffectiveDurability(mutation.getDurability()) != Durability.SKIP_WAL) {
             for (List<Cell> cells : cpFamilyMap.values()) {
               miniBatchOp.addCellCount(cells.size());
             }
           }
         }
         return true;
       });
     }

     private void mergeFamilyMaps(Map<byte[], List<Cell>> familyMap,
         Map<byte[], List<Cell>> toBeMerged) {
       for (Map.Entry<byte[], List<Cell>> entry : toBeMerged.entrySet()) {
         List<Cell> cells = familyMap.get(entry.getKey());
         if (cells == null) {
           familyMap.put(entry.getKey(), entry.getValue());
         } else {
           cells.addAll(entry.getValue());
         }
       }
     }
   }

   /**
    * Batch of mutations for replay. Base class is shared with {@link MutationBatchOperation} as most
    * of the logic is same.
    */
   static class ReplayBatchOperation extends BatchOperation<MutationReplay> {
     private long origLogSeqNum = 0;
     public ReplayBatchOperation(final HRegion region, MutationReplay[] operations,
         long origLogSeqNum) {
       super(region, operations);
       this.origLogSeqNum = origLogSeqNum;
     }

     @Override
     public Mutation getMutation(int index) {
       return this.operations[index].mutation;
     }

     @Override
     public long getNonceGroup(int index) {
       return this.operations[index].nonceGroup;
     }

     @Override
     public long getNonce(int index) {
       return this.operations[index].nonce;
     }

     @Override
     public Mutation[] getMutationsForCoprocs() {
       return null;
     }

     @Override
     public boolean isInReplay() {
       return true;
     }

     @Override
     public long getOrigLogSeqNum() {
       return this.origLogSeqNum;
     }

     @Override
     public void startRegionOperation() throws IOException {
       region.startRegionOperation(Operation.REPLAY_BATCH_MUTATE);
     }

     @Override
     public void closeRegionOperation() throws IOException {
       region.closeRegionOperation(Operation.REPLAY_BATCH_MUTATE);
     }

     /**
      * During replay, there could exist column families which are removed between region server
      * failure and replay
      */
     @Override
     protected void checkAndPreparePut(Put p) throws IOException {
       Map<byte[], List<Cell>> familyCellMap = p.getFamilyCellMap();
       List<byte[]> nonExistentList = null;
       for (byte[] family : familyCellMap.keySet()) {
         if (!region.htableDescriptor.hasColumnFamily(family)) {
           if (nonExistentList == null) {
             nonExistentList = new ArrayList<>();
           }
           nonExistentList.add(family);
         }
       }
       if (nonExistentList != null) {
         for (byte[] family : nonExistentList) {
           // Perhaps schema was changed between crash and replay
           LOG.info("No family for {} omit from reply in region {}.", Bytes.toString(family), this);
           familyCellMap.remove(family);
         }
       }
     }

     @Override
     public void checkAndPrepare() throws IOException {
       long now = EnvironmentEdgeManager.currentTime();
       visitBatchOperations(true, this.size(), (int index) -> {
         checkAndPrepareMutation(index, now);
         return true;
       });
     }

     @Override
     public void prepareMiniBatchOperations(MiniBatchOperationInProgress<Mutation> miniBatchOp,
         long timestamp, final List<RowLock> acquiredRowLocks) throws IOException {
       visitBatchOperations(true, miniBatchOp.getLastIndexExclusive(), (int index) -> {
         // update cell count
         for (List<Cell> cells : getMutation(index).getFamilyCellMap().values()) {
           miniBatchOp.addCellCount(cells.size());
         }
         return true;
       });
     }

     @Override
     public WriteEntry writeMiniBatchOperationsToMemStore(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WriteEntry writeEntry)
         throws IOException {
       super.writeMiniBatchOperationsToMemStore(miniBatchOp, getOrigLogSeqNum());
       return writeEntry;
     }

     @Override
     public void completeMiniBatchOperations(
         final MiniBatchOperationInProgress<Mutation> miniBatchOp, final WriteEntry writeEntry)
         throws IOException {
       super.completeMiniBatchOperations(miniBatchOp, writeEntry);
       region.mvcc.advanceTo(getOrigLogSeqNum());
     }
   }

   public OperationStatus[] batchMutate(Mutation[] mutations, boolean atomic, long nonceGroup,
       long nonce) throws IOException {
     // As it stands, this is used for 3 things
     //  * batchMutate with single mutation - put/delete/increment/append, separate or from
     //    checkAndMutate.
     //  * coprocessor calls (see ex. BulkDeleteEndpoint).
     // So nonces are not really ever used by HBase. They could be by coprocs, and checkAnd...
     return batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce));
   }

   @Override
   public OperationStatus[] batchMutate(Mutation[] mutations) throws IOException {
     // If the mutations has any Increment/Append operations, we need to do batchMutate atomically
     boolean atomic = Arrays.stream(mutations)
       .anyMatch(m -> m instanceof Increment || m instanceof Append);
     return batchMutate(mutations, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   public OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId)
       throws IOException {
     if (!RegionReplicaUtil.isDefaultReplica(getRegionInfo())
         && replaySeqId < lastReplayedOpenRegionSeqId) {
       // if it is a secondary replica we should ignore these entries silently
       // since they are coming out of order
       if (LOG.isTraceEnabled()) {
         LOG.trace(getRegionInfo().getEncodedName() + " : "
           + "Skipping " + mutations.length + " mutations with replaySeqId=" + replaySeqId
           + " which is < than lastReplayedOpenRegionSeqId=" + lastReplayedOpenRegionSeqId);
         for (MutationReplay mut : mutations) {
           LOG.trace(getRegionInfo().getEncodedName() + " : Skipping : " + mut.mutation);
         }
       }

       OperationStatus[] statuses = new OperationStatus[mutations.length];
       for (int i = 0; i < statuses.length; i++) {
         statuses[i] = OperationStatus.SUCCESS;
       }
       return statuses;
     }
     return batchMutate(new ReplayBatchOperation(this, mutations, replaySeqId));
   }

   /**
    * Perform a batch of mutations.
    *
    * It supports Put, Delete, Increment, Append mutations and will ignore other types passed.
    * Operations in a batch are stored with highest durability specified of for all operations in a
    * batch, except for {@link Durability#SKIP_WAL}.
    *
    * <p>This function is called from {@link #batchReplay(WALSplitUtil.MutationReplay[], long)} with
    * {@link ReplayBatchOperation} instance and {@link #batchMutate(Mutation[])} with
    * {@link MutationBatchOperation} instance as an argument. As the processing of replay batch
    * and mutation batch is very similar, lot of code is shared by providing generic methods in
    * base class {@link BatchOperation}. The logic for this method and
    * {@link #doMiniBatchMutate(BatchOperation)} is implemented using methods in base class which
    * are overridden by derived classes to implement special behavior.
    *
    * @param batchOp contains the list of mutations
    * @return an array of OperationStatus which internally contains the
    *         OperationStatusCode and the exceptionMessage if any.
    * @throws IOException if an IO problem is encountered
    */
   OperationStatus[] batchMutate(BatchOperation<?> batchOp) throws IOException {
     boolean initialized = false;
     batchOp.startRegionOperation();
     try {
       while (!batchOp.isDone()) {
         if (!batchOp.isInReplay()) {
           checkReadOnly();
         }
         checkResources();

         if (!initialized) {
           this.writeRequestsCount.add(batchOp.size());
           // validate and prepare batch for write, for MutationBatchOperation it also calls CP
           // prePut()/preDelete()/preIncrement()/preAppend() hooks
           batchOp.checkAndPrepare();
           initialized = true;
         }
         doMiniBatchMutate(batchOp);
         requestFlushIfNeeded();
       }
     } finally {
       if (rsServices != null && rsServices.getMetrics() != null) {
         rsServices.getMetrics().updateWriteQueryMeter(this.htableDescriptor.
           getTableName(), batchOp.size());
       }
       batchOp.closeRegionOperation();
     }
     return batchOp.retCodeDetails;
   }

   /**
    * Called to do a piece of the batch that came in to {@link #batchMutate(Mutation[])}
    * In here we also handle replay of edits on region recover. Also gets change in size brought
    * about by applying {@code batchOp}.
    */
   private void doMiniBatchMutate(BatchOperation<?> batchOp) throws IOException {
     boolean success = false;
     WALEdit walEdit = null;
     WriteEntry writeEntry = null;
     boolean locked = false;
     // We try to set up a batch in the range [batchOp.nextIndexToProcess,lastIndexExclusive)
     MiniBatchOperationInProgress<Mutation> miniBatchOp = null;
     /** Keep track of the locks we hold so we can release them in finally clause */
     List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.size());

     // Check for thread interrupt status in case we have been signaled from
     // #interruptRegionOperation.
     checkInterrupt();

     try {
       // STEP 1. Try to acquire as many locks as we can and build mini-batch of operations with
       // locked rows
       miniBatchOp = batchOp.lockRowsAndBuildMiniBatch(acquiredRowLocks);

       // We've now grabbed as many mutations off the list as we can
       // Ensure we acquire at least one.
       if (miniBatchOp.getReadyToWriteCount() <= 0) {
         // Nothing to put/delete/increment/append -- an exception in the above such as
         // NoSuchColumnFamily?
         return;
       }

       // Check for thread interrupt status in case we have been signaled from
       // #interruptRegionOperation. Do it before we take the lock and disable interrupts for
       // the WAL append.
       checkInterrupt();

       lock(this.updatesLock.readLock(), miniBatchOp.getReadyToWriteCount());
       locked = true;

       // From this point until memstore update this operation should not be interrupted.
       disableInterrupts();

       // STEP 2. Update mini batch of all operations in progress with LATEST_TIMESTAMP timestamp
       // We should record the timestamp only after we have acquired the rowLock,
       // otherwise, newer puts/deletes/increment/append are not guaranteed to have a newer
       // timestamp

       long now = EnvironmentEdgeManager.currentTime();
       batchOp.prepareMiniBatchOperations(miniBatchOp, now, acquiredRowLocks);

       // STEP 3. Build WAL edit

       List<Pair<NonceKey, WALEdit>> walEdits = batchOp.buildWALEdits(miniBatchOp);

       // STEP 4. Append the WALEdits to WAL and sync.

       for(Iterator<Pair<NonceKey, WALEdit>> it = walEdits.iterator(); it.hasNext();) {
         Pair<NonceKey, WALEdit> nonceKeyWALEditPair = it.next();
         walEdit = nonceKeyWALEditPair.getSecond();
         NonceKey nonceKey = nonceKeyWALEditPair.getFirst();

         if (walEdit != null && !walEdit.isEmpty()) {
           writeEntry = doWALAppend(walEdit, batchOp.durability, batchOp.getClusterIds(), now,
               nonceKey.getNonceGroup(), nonceKey.getNonce(), batchOp.getOrigLogSeqNum());
         }

         // Complete mvcc for all but last writeEntry (for replay case)
         if (it.hasNext() && writeEntry != null) {
           mvcc.complete(writeEntry);
           writeEntry = null;
         }
       }

       // STEP 5. Write back to memStore
       // NOTE: writeEntry can be null here
       writeEntry = batchOp.writeMiniBatchOperationsToMemStore(miniBatchOp, writeEntry);

       // STEP 6. Complete MiniBatchOperations: If required calls postBatchMutate() CP hook and
       // complete mvcc for last writeEntry
       batchOp.completeMiniBatchOperations(miniBatchOp, writeEntry);
       writeEntry = null;
       success = true;
     } finally {
       // Call complete rather than completeAndWait because we probably had error if walKey != null
       if (writeEntry != null) mvcc.complete(writeEntry);

       if (locked) {
         this.updatesLock.readLock().unlock();
       }
       releaseRowLocks(acquiredRowLocks);

       enableInterrupts();

       final int finalLastIndexExclusive =
           miniBatchOp != null ? miniBatchOp.getLastIndexExclusive() : batchOp.size();
       final boolean finalSuccess = success;
       batchOp.visitBatchOperations(true, finalLastIndexExclusive,
         (int i) -> {
           Mutation mutation = batchOp.getMutation(i);
           if (mutation instanceof Increment || mutation instanceof Append) {
             if (finalSuccess) {
               batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.SUCCESS,
                 batchOp.results[i]);
             } else {
               batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
             }
           } else {
             batchOp.retCodeDetails[i] =
               finalSuccess ? OperationStatus.SUCCESS : OperationStatus.FAILURE;
           }
           return true;
         });

       batchOp.doPostOpCleanupForMiniBatch(miniBatchOp, walEdit, finalSuccess);

       batchOp.nextIndexToProcess = finalLastIndexExclusive;
     }
   }

   /**
    * Returns effective durability from the passed durability and
    * the table descriptor.
    */
   protected Durability getEffectiveDurability(Durability d) {
     return d == Durability.USE_DEFAULT ? this.regionDurability : d;
   }

   @Override
   @Deprecated
   public boolean checkAndMutate(byte[] row, byte[] family, byte[] qualifier, CompareOperator op,
     ByteArrayComparable comparator, TimeRange timeRange, Mutation mutation) throws IOException {
     CheckAndMutate checkAndMutate;
     try {
       CheckAndMutate.Builder builder = CheckAndMutate.newBuilder(row)
         .ifMatches(family, qualifier, op, comparator.getValue()).timeRange(timeRange);
       if (mutation instanceof Put) {
         checkAndMutate = builder.build((Put) mutation);
       } else if (mutation instanceof Delete) {
         checkAndMutate = builder.build((Delete) mutation);
       } else {
         throw new DoNotRetryIOException("Unsupported mutate type: " + mutation.getClass()
           .getSimpleName().toUpperCase());
       }
     } catch (IllegalArgumentException e) {
       throw new DoNotRetryIOException(e.getMessage());
     }
     return checkAndMutate(checkAndMutate).isSuccess();
   }

   @Override
   @Deprecated
   public boolean checkAndMutate(byte[] row, Filter filter, TimeRange timeRange, Mutation mutation)
     throws IOException {
     CheckAndMutate checkAndMutate;
     try {
       CheckAndMutate.Builder builder = CheckAndMutate.newBuilder(row).ifMatches(filter)
         .timeRange(timeRange);
       if (mutation instanceof Put) {
         checkAndMutate = builder.build((Put) mutation);
       } else if (mutation instanceof Delete) {
         checkAndMutate = builder.build((Delete) mutation);
       } else {
         throw new DoNotRetryIOException("Unsupported mutate type: " + mutation.getClass()
           .getSimpleName().toUpperCase());
       }
     } catch (IllegalArgumentException e) {
       throw new DoNotRetryIOException(e.getMessage());
     }
     return checkAndMutate(checkAndMutate).isSuccess();
   }

   @Override
   @Deprecated
   public boolean checkAndRowMutate(byte[] row, byte[] family, byte[] qualifier, CompareOperator op,
     ByteArrayComparable comparator, TimeRange timeRange, RowMutations rm) throws IOException {
     CheckAndMutate checkAndMutate;
     try {
       checkAndMutate = CheckAndMutate.newBuilder(row)
         .ifMatches(family, qualifier, op, comparator.getValue()).timeRange(timeRange).build(rm);
     } catch (IllegalArgumentException e) {
       throw new DoNotRetryIOException(e.getMessage());
     }
     return checkAndMutate(checkAndMutate).isSuccess();
   }

   @Override
   @Deprecated
   public boolean checkAndRowMutate(byte[] row, Filter filter, TimeRange timeRange, RowMutations rm)
     throws IOException {
     CheckAndMutate checkAndMutate;
     try {
       checkAndMutate = CheckAndMutate.newBuilder(row).ifMatches(filter).timeRange(timeRange)
         .build(rm);
     } catch (IllegalArgumentException e) {
       throw new DoNotRetryIOException(e.getMessage());
     }
     return checkAndMutate(checkAndMutate).isSuccess();
   }

   @Override
   public CheckAndMutateResult checkAndMutate(CheckAndMutate checkAndMutate) throws IOException {
     byte[] row = checkAndMutate.getRow();
     Filter filter = null;
     byte[] family = null;
     byte[] qualifier = null;
     CompareOperator op = null;
     ByteArrayComparable comparator = null;
     if (checkAndMutate.hasFilter()) {
       filter = checkAndMutate.getFilter();
     } else {
       family = checkAndMutate.getFamily();
       qualifier = checkAndMutate.getQualifier();
       op = checkAndMutate.getCompareOp();
       comparator = new BinaryComparator(checkAndMutate.getValue());
     }
     TimeRange timeRange = checkAndMutate.getTimeRange();

     Mutation mutation = null;
     RowMutations rowMutations = null;
     if (checkAndMutate.getAction() instanceof Mutation) {
       mutation = (Mutation) checkAndMutate.getAction();
     } else {
       rowMutations = (RowMutations) checkAndMutate.getAction();
     }

     if (mutation != null) {
       checkMutationType(mutation);
       checkRow(mutation, row);
     } else {
       checkRow(rowMutations, row);
     }
     checkReadOnly();
     // TODO, add check for value length also move this check to the client
     checkResources();
     startRegionOperation();
     try {
       Get get = new Get(row);
       if (family != null) {
         checkFamily(family);
         get.addColumn(family, qualifier);
       }
       if (filter != null) {
         get.setFilter(filter);
       }
       if (timeRange != null) {
         get.setTimeRange(timeRange.getMin(), timeRange.getMax());
       }
       // Lock row - note that doBatchMutate will relock this row if called
       checkRow(row, "doCheckAndRowMutate");
       RowLock rowLock = getRowLockInternal(get.getRow(), false, null);
       try {
         if (this.getCoprocessorHost() != null) {
           CheckAndMutateResult result =
             getCoprocessorHost().preCheckAndMutateAfterRowLock(checkAndMutate);
           if (result != null) {
             return result;
           }
         }

         // NOTE: We used to wait here until mvcc caught up:  mvcc.await();
         // Supposition is that now all changes are done under row locks, then when we go to read,
         // we'll get the latest on this row.
         List<Cell> result = get(get, false);
         boolean matches = false;
         long cellTs = 0;
         if (filter != null) {
           if (!result.isEmpty()) {
             matches = true;
             cellTs = result.get(0).getTimestamp();
           }
         } else {
           boolean valueIsNull = comparator.getValue() == null || comparator.getValue().length == 0;
           if (result.isEmpty() && valueIsNull) {
             matches = true;
           } else if (result.size() > 0 && result.get(0).getValueLength() == 0 && valueIsNull) {
             matches = true;
             cellTs = result.get(0).getTimestamp();
           } else if (result.size() == 1 && !valueIsNull) {
             Cell kv = result.get(0);
             cellTs = kv.getTimestamp();
             int compareResult = PrivateCellUtil.compareValue(kv, comparator);
             matches = matches(op, compareResult);
           }
         }

         // If matches, perform the mutation or the rowMutations
         if (matches) {
           // We have acquired the row lock already. If the system clock is NOT monotonically
           // non-decreasing (see HBASE-14070) we should make sure that the mutation has a
           // larger timestamp than what was observed via Get. doBatchMutate already does this, but
           // there is no way to pass the cellTs. See HBASE-14054.
           long now = EnvironmentEdgeManager.currentTime();
           long ts = Math.max(now, cellTs); // ensure write is not eclipsed
           byte[] byteTs = Bytes.toBytes(ts);
           if (mutation != null) {
             if (mutation instanceof Put) {
               updateCellTimestamps(mutation.getFamilyCellMap().values(), byteTs);
             }
             // And else 'delete' is not needed since it already does a second get, and sets the
             // timestamp from get (see prepareDeleteTimestamps).
           } else {
             for (Mutation m: rowMutations.getMutations()) {
               if (m instanceof Put) {
                 updateCellTimestamps(m.getFamilyCellMap().values(), byteTs);
               }
             }
             // And else 'delete' is not needed since it already does a second get, and sets the
             // timestamp from get (see prepareDeleteTimestamps).
           }
           // All edits for the given row (across all column families) must happen atomically.
           Result r = null;
           if (mutation != null) {
             r = doBatchMutate(mutation, true).getResult();
           } else {
             mutateRow(rowMutations);
           }
           this.checkAndMutateChecksPassed.increment();
           return new CheckAndMutateResult(true, r);
         }
         this.checkAndMutateChecksFailed.increment();
         return new CheckAndMutateResult(false, null);
       } finally {
         rowLock.release();
       }
     } finally {
       closeRegionOperation();
     }
   }

   private void checkMutationType(final Mutation mutation)
   throws DoNotRetryIOException {
     if (!(mutation instanceof Put) && !(mutation instanceof Delete) &&
       !(mutation instanceof Increment) && !(mutation instanceof Append)) {
       throw new org.apache.hadoop.hbase.DoNotRetryIOException(
         "Action must be Put or Delete or Increment or Delete");
     }
   }

   private void checkRow(final Row action, final byte[] row)
     throws DoNotRetryIOException {
     if (!Bytes.equals(row, action.getRow())) {
       throw new org.apache.hadoop.hbase.DoNotRetryIOException("Action's getRow must match");
     }
   }

   private boolean matches(final CompareOperator op, final int compareResult) {
     boolean matches = false;
     switch (op) {
       case LESS:
         matches = compareResult < 0;
         break;
       case LESS_OR_EQUAL:
         matches = compareResult <= 0;
         break;
       case EQUAL:
         matches = compareResult == 0;
         break;
       case NOT_EQUAL:
         matches = compareResult != 0;
         break;
       case GREATER_OR_EQUAL:
         matches = compareResult >= 0;
         break;
       case GREATER:
         matches = compareResult > 0;
         break;
       default:
         throw new RuntimeException("Unknown Compare op " + op.name());
     }
     return matches;
   }

   private OperationStatus doBatchMutate(Mutation mutation) throws IOException {
     return doBatchMutate(mutation, false);
   }

   private OperationStatus doBatchMutate(Mutation mutation, boolean atomic) throws IOException {
     return doBatchMutate(mutation, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   private OperationStatus doBatchMutate(Mutation mutation, boolean atomic, long nonceGroup,
     long nonce) throws IOException {
     OperationStatus[] batchMutate = this.batchMutate(new Mutation[]{mutation}, atomic,
       nonceGroup, nonce);
     if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.SANITY_CHECK_FAILURE)) {
       throw new FailedSanityCheckException(batchMutate[0].getExceptionMsg());
     } else if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.BAD_FAMILY)) {
       throw new NoSuchColumnFamilyException(batchMutate[0].getExceptionMsg());
     } else if (batchMutate[0].getOperationStatusCode().equals(
       OperationStatusCode.STORE_TOO_BUSY)) {
       throw new RegionTooBusyException(batchMutate[0].getExceptionMsg());
     }
     return batchMutate[0];
   }

   /**
    * Complete taking the snapshot on the region. Writes the region info and adds references to the
    * working snapshot directory.
    *
    * TODO for api consistency, consider adding another version with no {@link ForeignExceptionSnare}
    * arg.  (In the future other cancellable HRegion methods could eventually add a
    * {@link ForeignExceptionSnare}, or we could do something fancier).
    *
    * @param desc snapshot description object
    * @param exnSnare ForeignExceptionSnare that captures external exceptions in case we need to
    *   bail out.  This is allowed to be null and will just be ignored in that case.
    * @throws IOException if there is an external or internal error causing the snapshot to fail
    */
   public void addRegionToSnapshot(SnapshotDescription desc,
       ForeignExceptionSnare exnSnare) throws IOException {
     Path rootDir = CommonFSUtils.getRootDir(conf);
     Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir, conf);

     SnapshotManifest manifest = SnapshotManifest.create(conf, getFilesystem(),
             snapshotDir, desc, exnSnare);
     manifest.addRegion(this);
   }

   private void updateSequenceId(final Iterable<List<Cell>> cellItr, final long sequenceId)
       throws IOException {
     for (List<Cell> cells: cellItr) {
       if (cells == null) return;
       for (Cell cell : cells) {
         PrivateCellUtil.setSequenceId(cell, sequenceId);
       }
     }
   }

   /**
    * Replace any cell timestamps set to {@link org.apache.hadoop.hbase.HConstants#LATEST_TIMESTAMP}
    * provided current timestamp.
    * @param cellItr
    * @param now
    */
   private static void updateCellTimestamps(final Iterable<List<Cell>> cellItr, final byte[] now)
       throws IOException {
     for (List<Cell> cells: cellItr) {
       if (cells == null) continue;
       // Optimization: 'foreach' loop is not used. See:
       // HBASE-12023 HRegion.applyFamilyMapToMemstore creates too many iterator objects
       assert cells instanceof RandomAccess;
       int listSize = cells.size();
       for (int i = 0; i < listSize; i++) {
         PrivateCellUtil.updateLatestStamp(cells.get(i), now);
       }
     }
   }

   /**
    * Possibly rewrite incoming cell tags.
    */
   void rewriteCellTags(Map<byte[], List<Cell>> familyMap, final Mutation m) {
     // Check if we have any work to do and early out otherwise
     // Update these checks as more logic is added here
     if (m.getTTL() == Long.MAX_VALUE) {
       return;
     }

     // From this point we know we have some work to do
     for (Map.Entry<byte[], List<Cell>> e: familyMap.entrySet()) {
       List<Cell> cells = e.getValue();
       assert cells instanceof RandomAccess;
       int listSize = cells.size();
       for (int i = 0; i < listSize; i++) {
         Cell cell = cells.get(i);
         List<Tag> newTags = TagUtil.carryForwardTags(null, cell);
         newTags = TagUtil.carryForwardTTLTag(newTags, m.getTTL());
         // Rewrite the cell with the updated set of tags
         cells.set(i, PrivateCellUtil.createCell(cell, newTags));
       }
     }
   }

   /*
    * Check if resources to support an update.
    *
    * We throw RegionTooBusyException if above memstore limit
    * and expect client to retry using some kind of backoff
   */
   void checkResources() throws RegionTooBusyException {
     // If catalog region, do not impose resource constraints or block updates.
     if (this.getRegionInfo().isMetaRegion()) return;

     MemStoreSize mss = this.memStoreSizing.getMemStoreSize();
     if (mss.getHeapSize() + mss.getOffHeapSize() > this.blockingMemStoreSize) {
       blockedRequestsCount.increment();
       requestFlush();
       // Don't print current limit because it will vary too much. The message is used as a key
       // over in RetriesExhaustedWithDetailsException processing.
       final String regionName =
         this.getRegionInfo() == null ? "unknown" : this.getRegionInfo().getEncodedName();
       final String serverName = this.getRegionServerServices() == null ?
         "unknown" : (this.getRegionServerServices().getServerName() == null ? "unknown" :
           this.getRegionServerServices().getServerName().toString());
       RegionTooBusyException rtbe = new RegionTooBusyException(
         "Over memstore limit=" + org.apache.hadoop.hbase.procedure2.util.StringUtils
           .humanSize(this.blockingMemStoreSize) + ", regionName=" + regionName + ", server="
           + serverName);
       LOG.warn("Region is too busy due to exceeding memstore size limit.", rtbe);
       throw rtbe;
     }
   }

   /**
    * @throws IOException Throws exception if region is in read-only mode.
    */
   protected void checkReadOnly() throws IOException {
     if (isReadOnly()) {
       throw new DoNotRetryIOException("region is read only");
     }
   }

   protected void checkReadsEnabled() throws IOException {
     if (!this.writestate.readsEnabled) {
       throw new IOException(getRegionInfo().getEncodedName()
         + ": The region's reads are disabled. Cannot serve the request");
     }
   }

   public void setReadsEnabled(boolean readsEnabled) {
    if (readsEnabled && !this.writestate.readsEnabled) {
      LOG.info("Enabling reads for {}", getRegionInfo().getEncodedName());
     }
     this.writestate.setReadsEnabled(readsEnabled);
   }

   /**
    * Add updates first to the wal and then add values to memstore.
    * <p>
    * Warning: Assumption is caller has lock on passed in row.
    * @param edits Cell updates by column
    */
   void put(final byte[] row, byte[] family, List<Cell> edits) throws IOException {
     NavigableMap<byte[], List<Cell>> familyMap;
     familyMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);

     familyMap.put(family, edits);
     Put p = new Put(row, HConstants.LATEST_TIMESTAMP, familyMap);
     doBatchMutate(p);
   }

   /**
    * @param delta If we are doing delta changes -- e.g. increment/append -- then this flag will be
    *          set; when set we will run operations that make sense in the increment/append scenario
    *          but that do not make sense otherwise.
    * @see #applyToMemStore(HStore, Cell, MemStoreSizing)
    */
   private void applyToMemStore(HStore store, List<Cell> cells, boolean delta,
       MemStoreSizing memstoreAccounting) throws IOException {
     // Any change in how we update Store/MemStore needs to also be done in other applyToMemStore!!!!
     boolean upsert = delta && store.getColumnFamilyDescriptor().getMaxVersions() == 1;
     if (upsert) {
       store.upsert(cells, getSmallestReadPoint(), memstoreAccounting);
     } else {
       store.add(cells, memstoreAccounting);
     }
   }

   /**
    * @see #applyToMemStore(HStore, List, boolean, MemStoreSizing)
    */
   private void applyToMemStore(HStore store, Cell cell, MemStoreSizing memstoreAccounting)
       throws IOException {
     // Any change in how we update Store/MemStore needs to also be done in other applyToMemStore!!!!
     if (store == null) {
       checkFamily(CellUtil.cloneFamily(cell));
       // Unreachable because checkFamily will throw exception
     }
     store.add(cell, memstoreAccounting);
   }

   private void checkFamilies(Collection<byte[]> families, Durability durability)
       throws NoSuchColumnFamilyException, InvalidMutationDurabilityException {
     for (byte[] family : families) {
       checkFamily(family, durability);
     }
   }

   private void checkFamily(final byte[] family, Durability durability)
       throws NoSuchColumnFamilyException, InvalidMutationDurabilityException {
     checkFamily(family);
     if (durability.equals(Durability.SKIP_WAL)
         && htableDescriptor.getColumnFamily(family).getScope()
         != HConstants.REPLICATION_SCOPE_LOCAL) {
       throw new InvalidMutationDurabilityException(
           "Mutation's durability is SKIP_WAL but table's column family " + Bytes.toString(family)
               + " need replication");
     }
   }

   void checkFamily(final byte[] family) throws NoSuchColumnFamilyException {
     if (!this.htableDescriptor.hasColumnFamily(family)) {
       throw new NoSuchColumnFamilyException(
           "Column family " + Bytes.toString(family) + " does not exist in region " + this
               + " in table " + this.htableDescriptor);
     }
   }

   /**
    * Check the collection of families for valid timestamps
    * @param familyMap
    * @param now current timestamp
    * @throws FailedSanityCheckException
    */
   public void checkTimestamps(final Map<byte[], List<Cell>> familyMap, long now)
       throws FailedSanityCheckException {
     if (timestampSlop == HConstants.LATEST_TIMESTAMP) {
       return;
     }
     long maxTs = now + timestampSlop;
     for (List<Cell> kvs : familyMap.values()) {
       // Optimization: 'foreach' loop is not used. See:
       // HBASE-12023 HRegion.applyFamilyMapToMemstore creates too many iterator objects
       assert kvs instanceof RandomAccess;
       int listSize  = kvs.size();
       for (int i=0; i < listSize; i++) {
         Cell cell = kvs.get(i);
         // see if the user-side TS is out of range. latest = server-side
         long ts = cell.getTimestamp();
         if (ts != HConstants.LATEST_TIMESTAMP && ts > maxTs) {
           throw new FailedSanityCheckException("Timestamp for KV out of range "
               + cell + " (too.new=" + timestampSlop + ")");
         }
       }
     }
   }

   /*
    * @param size
    * @return True if size is over the flush threshold
    */
   private boolean isFlushSize(MemStoreSize size) {
     return size.getHeapSize() + size.getOffHeapSize() > getMemStoreFlushSize();
   }

   private void deleteRecoveredEdits(FileSystem fs, Iterable<Path> files) throws IOException {
     for (Path file : files) {
       if (!fs.delete(file, false)) {
         LOG.error("Failed delete of {}", file);
       } else {
         LOG.debug("Deleted recovered.edits file={}", file);
       }
     }
   }

   /**
    * Read the edits put under this region by wal splitting process.  Put
    * the recovered edits back up into this region.
    *
    * <p>We can ignore any wal message that has a sequence ID that's equal to or
    * lower than minSeqId.  (Because we know such messages are already
    * reflected in the HFiles.)
    *
    * <p>While this is running we are putting pressure on memory yet we are
    * outside of our usual accounting because we are not yet an onlined region
    * (this stuff is being run as part of Region initialization).  This means
    * that if we're up against global memory limits, we'll not be flagged to flush
    * because we are not online. We can't be flushed by usual mechanisms anyways;
    * we're not yet online so our relative sequenceids are not yet aligned with
    * WAL sequenceids -- not till we come up online, post processing of split
    * edits.
    *
    * <p>But to help relieve memory pressure, at least manage our own heap size
    * flushing if are in excess of per-region limits.  Flushing, though, we have
    * to be careful and avoid using the regionserver/wal sequenceid.  Its running
    * on a different line to whats going on in here in this region context so if we
    * crashed replaying these edits, but in the midst had a flush that used the
    * regionserver wal with a sequenceid in excess of whats going on in here
    * in this region and with its split editlogs, then we could miss edits the
    * next time we go to recover. So, we have to flush inline, using seqids that
    * make sense in a this single region context only -- until we online.
    *
    * @param maxSeqIdInStores Any edit found in split editlogs needs to be in excess of
    * the maxSeqId for the store to be applied, else its skipped.
    * @return the sequence id of the last edit added to this region out of the
    * recovered edits log or <code>minSeqId</code> if nothing added from editlogs.
    */
   long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
     final CancelableProgressable reporter, final MonitoredTask status) throws IOException {
     long minSeqIdForTheRegion = -1;
     for (Long maxSeqIdInStore : maxSeqIdInStores.values()) {
       if (maxSeqIdInStore < minSeqIdForTheRegion || minSeqIdForTheRegion == -1) {
         minSeqIdForTheRegion = maxSeqIdInStore;
       }
     }
     long seqId = minSeqIdForTheRegion;
     String specialRecoveredEditsDirStr = conf.get(SPECIAL_RECOVERED_EDITS_DIR);
     if (org.apache.commons.lang3.StringUtils.isBlank(specialRecoveredEditsDirStr)) {
       FileSystem walFS = getWalFileSystem();
       FileSystem rootFS = getFilesystem();
       Path wrongRegionWALDir = CommonFSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
         getRegionInfo().getEncodedName());
       Path regionWALDir = getWALRegionDir();
       Path regionDir =
         FSUtils.getRegionDirFromRootDir(CommonFSUtils.getRootDir(conf), getRegionInfo());

       // We made a mistake in HBASE-20734 so we need to do this dirty hack...
       NavigableSet<Path> filesUnderWrongRegionWALDir =
         WALSplitUtil.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
       seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
         filesUnderWrongRegionWALDir, reporter, regionDir));
       // This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
       // under the root dir even if walDir is set.
       NavigableSet<Path> filesUnderRootDir = Collections.emptyNavigableSet();
       if (!regionWALDir.equals(regionDir)) {
         filesUnderRootDir = WALSplitUtil.getSplitEditFilesSorted(rootFS, regionDir);
         seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
           filesUnderRootDir, reporter, regionDir));
       }

       NavigableSet<Path> files = WALSplitUtil.getSplitEditFilesSorted(walFS, regionWALDir);
       seqId = Math.max(seqId,
         replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS, files, reporter, regionWALDir));
       if (seqId > minSeqIdForTheRegion) {
         // Then we added some edits to memory. Flush and cleanup split edit files.
         internalFlushcache(null, seqId, stores.values(), status, false,
           FlushLifeCycleTracker.DUMMY);
       }
       // Now delete the content of recovered edits. We're done w/ them.
       if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
         // For debugging data loss issues!
         // If this flag is set, make use of the hfile archiving by making recovered.edits a fake
         // column family. Have to fake out file type too by casting our recovered.edits as
         // storefiles
         String fakeFamilyName = WALSplitUtil.getRegionDirRecoveredEditsDir(regionWALDir).getName();
         Set<HStoreFile> fakeStoreFiles = new HashSet<>(files.size());
         for (Path file : files) {
           fakeStoreFiles.add(new HStoreFile(walFS, file, this.conf, null, null, true));
         }
         getRegionWALFileSystem().archiveRecoveredEdits(fakeFamilyName, fakeStoreFiles);
       } else {
         deleteRecoveredEdits(walFS, Iterables.concat(files, filesUnderWrongRegionWALDir));
         deleteRecoveredEdits(rootFS, filesUnderRootDir);
       }
     } else {
       Path recoveredEditsDir = new Path(specialRecoveredEditsDirStr);
       FileSystem fs = recoveredEditsDir.getFileSystem(conf);
       FileStatus[] files = fs.listStatus(recoveredEditsDir);
       LOG.debug("Found {} recovered edits file(s) under {}", files == null ? 0 : files.length,
         recoveredEditsDir);
       if (files != null) {
         for (FileStatus file : files) {
           // it is safe to trust the zero-length in this case because we've been through rename and
           // lease recovery in the above.
           if (isZeroLengthThenDelete(fs, file, file.getPath())) {
             continue;
           }
           seqId =
             Math.max(seqId, replayRecoveredEdits(file.getPath(), maxSeqIdInStores, reporter, fs));
         }
       }
       if (seqId > minSeqIdForTheRegion) {
         // Then we added some edits to memory. Flush and cleanup split edit files.
         internalFlushcache(null, seqId, stores.values(), status, false,
           FlushLifeCycleTracker.DUMMY);
       }
       deleteRecoveredEdits(fs,
         Stream.of(files).map(FileStatus::getPath).collect(Collectors.toList()));
     }

     return seqId;
   }

   private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
       final NavigableSet<Path> files, final CancelableProgressable reporter, final Path regionDir)
       throws IOException {
     long seqid = minSeqIdForTheRegion;
     if (LOG.isDebugEnabled()) {
       LOG.debug("Found " + (files == null ? 0 : files.size())
           + " recovered edits file(s) under " + regionDir);
     }

     if (files == null || files.isEmpty()) {
       return minSeqIdForTheRegion;
     }

     for (Path edits: files) {
       if (edits == null || !fs.exists(edits)) {
         LOG.warn("Null or non-existent edits file: " + edits);
         continue;
       }
       if (isZeroLengthThenDelete(fs, fs.getFileStatus(edits), edits)) {
         continue;
       }

       long maxSeqId;
       String fileName = edits.getName();
       maxSeqId = Math.abs(Long.parseLong(fileName));
       if (maxSeqId <= minSeqIdForTheRegion) {
         if (LOG.isDebugEnabled()) {
           String msg = "Maximum sequenceid for this wal is " + maxSeqId
               + " and minimum sequenceid for the region " + this + "  is " + minSeqIdForTheRegion
               + ", skipped the whole file, path=" + edits;
           LOG.debug(msg);
         }
         continue;
       }

       try {
         // replay the edits. Replay can return -1 if everything is skipped, only update
         // if seqId is greater
         seqid = Math.max(seqid, replayRecoveredEdits(edits, maxSeqIdInStores, reporter, fs));
       } catch (IOException e) {
         handleException(fs, edits, e);
       }
     }
     return seqid;
   }

   private void handleException(FileSystem fs, Path edits, IOException e) throws IOException {
     boolean skipErrors = conf.getBoolean(HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS,
       conf.getBoolean("hbase.skip.errors", HConstants.DEFAULT_HREGION_EDITS_REPLAY_SKIP_ERRORS));
     if (conf.get("hbase.skip.errors") != null) {
       LOG.warn("The property 'hbase.skip.errors' has been deprecated. Please use "
           + HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS + " instead.");
     }
     if (skipErrors) {
       Path p = WALSplitUtil.moveAsideBadEditsFile(fs, edits);
       LOG.error(HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS + "=true so continuing. Renamed "
           + edits + " as " + p,
         e);
     } else {
       throw e;
     }
   }

   /**
    * @param edits File of recovered edits.
    * @param maxSeqIdInStores Maximum sequenceid found in each store. Edits in wal must be larger
    *          than this to be replayed for each store.
    * @return the sequence id of the last edit added to this region out of the recovered edits log or
    *         <code>minSeqId</code> if nothing added from editlogs.
    */
   private long replayRecoveredEdits(final Path edits, Map<byte[], Long> maxSeqIdInStores,
     final CancelableProgressable reporter, FileSystem fs) throws IOException {
     String msg = "Replaying edits from " + edits;
     LOG.info(msg);
     MonitoredTask status = TaskMonitor.get().createStatus(msg);

     status.setStatus("Opening recovered edits");
     WAL.Reader reader = null;
     try {
       reader = WALFactory.createReader(fs, edits, conf);
       long currentEditSeqId = -1;
       long currentReplaySeqId = -1;
       long firstSeqIdInLog = -1;
       long skippedEdits = 0;
       long editsCount = 0;
       long intervalEdits = 0;
       WAL.Entry entry;
       HStore store = null;
       boolean reported_once = false;
       ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();

       try {
         // How many edits seen before we check elapsed time
         int interval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
         // How often to send a progress report (default 1/2 master timeout)
         int period = this.conf.getInt("hbase.hstore.report.period", 300000);
         long lastReport = EnvironmentEdgeManager.currentTime();

         if (coprocessorHost != null) {
           coprocessorHost.preReplayWALs(this.getRegionInfo(), edits);
         }

         while ((entry = reader.next()) != null) {
           WALKey key = entry.getKey();
           WALEdit val = entry.getEdit();

           if (ng != null) { // some test, or nonces disabled
             ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
           }

           if (reporter != null) {
             intervalEdits += val.size();
             if (intervalEdits >= interval) {
               // Number of edits interval reached
               intervalEdits = 0;
               long cur = EnvironmentEdgeManager.currentTime();
               if (lastReport + period <= cur) {
                 status.setStatus("Replaying edits..." +
                     " skipped=" + skippedEdits +
                     " edits=" + editsCount);
                 // Timeout reached
                 if(!reporter.progress()) {
                   msg = "Progressable reporter failed, stopping replay for region " + this;
                   LOG.warn(msg);
                   status.abort(msg);
                   throw new IOException(msg);
                 }
                 reported_once = true;
                 lastReport = cur;
               }
             }
           }

           if (firstSeqIdInLog == -1) {
             firstSeqIdInLog = key.getSequenceId();
           }
           if (currentEditSeqId > key.getSequenceId()) {
             // when this condition is true, it means we have a serious defect because we need to
             // maintain increasing SeqId for WAL edits per region
             LOG.error(getRegionInfo().getEncodedName() + " : "
                  + "Found decreasing SeqId. PreId=" + currentEditSeqId + " key=" + key
                 + "; edit=" + val);
           } else {
             currentEditSeqId = key.getSequenceId();
           }
           currentReplaySeqId = (key.getOrigLogSeqNum() > 0) ?
             key.getOrigLogSeqNum() : currentEditSeqId;

           // Start coprocessor replay here. The coprocessor is for each WALEdit
           // instead of a KeyValue.
           if (coprocessorHost != null) {
             status.setStatus("Running pre-WAL-restore hook in coprocessors");
             if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
               // if bypass this wal entry, ignore it ...
               continue;
             }
           }
           boolean checkRowWithinBoundary = false;
           // Check this edit is for this region.
           if (!Bytes.equals(key.getEncodedRegionName(),
               this.getRegionInfo().getEncodedNameAsBytes())) {
             checkRowWithinBoundary = true;
           }

           boolean flush = false;
           MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing();
           for (Cell cell: val.getCells()) {
             // Check this edit is for me. Also, guard against writing the special
             // METACOLUMN info such as HBASE::CACHEFLUSH entries
             if (WALEdit.isMetaEditFamily(cell)) {
               // if region names don't match, skipp replaying compaction marker
               if (!checkRowWithinBoundary) {
                 //this is a special edit, we should handle it
                 CompactionDescriptor compaction = WALEdit.getCompaction(cell);
                 if (compaction != null) {
                   //replay the compaction
                   replayWALCompactionMarker(compaction, false, true, Long.MAX_VALUE);
                 }
               }
               skippedEdits++;
               continue;
             }
             // Figure which store the edit is meant for.
             if (store == null || !CellUtil.matchingFamily(cell,
                 store.getColumnFamilyDescriptor().getName())) {
               store = getStore(cell);
             }
             if (store == null) {
               // This should never happen.  Perhaps schema was changed between
               // crash and redeploy?
               LOG.warn("No family for cell {} in region {}", cell, this);
               skippedEdits++;
               continue;
             }
             if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(),
               cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
               LOG.warn("Row of {} is not within region boundary for region {}", cell, this);
               skippedEdits++;
               continue;
             }
             // Now, figure if we should skip this edit.
             if (key.getSequenceId() <= maxSeqIdInStores.get(store.getColumnFamilyDescriptor()
                 .getName())) {
               skippedEdits++;
               continue;
             }
             PrivateCellUtil.setSequenceId(cell, currentReplaySeqId);

             restoreEdit(store, cell, memStoreSizing);
             editsCount++;
           }
           MemStoreSize mss = memStoreSizing.getMemStoreSize();
           incMemStoreSize(mss);
           flush = isFlushSize(this.memStoreSizing.getMemStoreSize());
           if (flush) {
             internalFlushcache(null, currentEditSeqId, stores.values(), status, false,
               FlushLifeCycleTracker.DUMMY);
           }

           if (coprocessorHost != null) {
             coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
           }
         }

         if (coprocessorHost != null) {
           coprocessorHost.postReplayWALs(this.getRegionInfo(), edits);
         }
       } catch (EOFException eof) {
         Path p = WALSplitUtil.moveAsideBadEditsFile(walFS, edits);
         msg = "EnLongAddered EOF. Most likely due to Master failure during "
             + "wal splitting, so we have this data in another edit. Continuing, but renaming "
             + edits + " as " + p + " for region " + this;
         LOG.warn(msg, eof);
         status.abort(msg);
       } catch (IOException ioe) {
         // If the IOE resulted from bad file format,
         // then this problem is idempotent and retrying won't help
         if (ioe.getCause() instanceof ParseException) {
           Path p = WALSplitUtil.moveAsideBadEditsFile(walFS, edits);
           msg = "File corruption enLongAddered!  " +
               "Continuing, but renaming " + edits + " as " + p;
           LOG.warn(msg, ioe);
           status.setStatus(msg);
         } else {
           status.abort(StringUtils.stringifyException(ioe));
           // other IO errors may be transient (bad network connection,
           // checksum exception on one datanode, etc).  throw & retry
           throw ioe;
         }
       }
       if (reporter != null && !reported_once) {
         reporter.progress();
       }
       msg = "Applied " + editsCount + ", skipped " + skippedEdits +
         ", firstSequenceIdInLog=" + firstSeqIdInLog +
         ", maxSequenceIdInLog=" + currentEditSeqId + ", path=" + edits;
       status.markComplete(msg);
       LOG.debug(msg);
       return currentEditSeqId;
     } finally {
       status.cleanup();
       if (reader != null) {
          reader.close();
       }
     }
   }

   /**
    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
    * See HBASE-2331.
    */
   void replayWALCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles,
       boolean removeFiles, long replaySeqId)
       throws IOException {
     try {
       checkTargetRegion(compaction.getEncodedRegionName().toByteArray(),
         "Compaction marker from WAL ", compaction);
     } catch (WrongRegionException wre) {
       if (RegionReplicaUtil.isDefaultReplica(this.getRegionInfo())) {
         // skip the compaction marker since it is not for this region
         return;
       }
       throw wre;
     }

     synchronized (writestate) {
       if (replaySeqId < lastReplayedOpenRegionSeqId) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Skipping replaying compaction event :" + TextFormat.shortDebugString(compaction)
             + " because its sequence id " + replaySeqId + " is smaller than this regions "
             + "lastReplayedOpenRegionSeqId of " + lastReplayedOpenRegionSeqId);
         return;
       }
       if (replaySeqId < lastReplayedCompactionSeqId) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Skipping replaying compaction event :" + TextFormat.shortDebugString(compaction)
             + " because its sequence id " + replaySeqId + " is smaller than this regions "
             + "lastReplayedCompactionSeqId of " + lastReplayedCompactionSeqId);
         return;
       } else {
         lastReplayedCompactionSeqId = replaySeqId;
       }

       if (LOG.isDebugEnabled()) {
         LOG.debug(getRegionInfo().getEncodedName() + " : "
             + "Replaying compaction marker " + TextFormat.shortDebugString(compaction)
             + " with seqId=" + replaySeqId + " and lastReplayedOpenRegionSeqId="
             + lastReplayedOpenRegionSeqId);
       }

       startRegionOperation(Operation.REPLAY_EVENT);
       try {
         HStore store = this.getStore(compaction.getFamilyName().toByteArray());
         if (store == null) {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
               + "Found Compaction WAL edit for deleted family:"
               + Bytes.toString(compaction.getFamilyName().toByteArray()));
           return;
         }
         store.replayCompactionMarker(compaction, pickCompactionFiles, removeFiles);
         logRegionFiles();
       } catch (FileNotFoundException ex) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "At least one of the store files in compaction: "
             + TextFormat.shortDebugString(compaction)
             + " doesn't exist any more. Skip loading the file(s)", ex);
       } finally {
         closeRegionOperation(Operation.REPLAY_EVENT);
       }
     }
   }

   void replayWALFlushMarker(FlushDescriptor flush, long replaySeqId) throws IOException {
     checkTargetRegion(flush.getEncodedRegionName().toByteArray(),
       "Flush marker from WAL ", flush);

     if (ServerRegionReplicaUtil.isDefaultReplica(this.getRegionInfo())) {
       return; // if primary nothing to do
     }

     if (LOG.isDebugEnabled()) {
       LOG.debug(getRegionInfo().getEncodedName() + " : "
           + "Replaying flush marker " + TextFormat.shortDebugString(flush));
     }

     startRegionOperation(Operation.REPLAY_EVENT); // use region close lock to guard against close
     try {
       FlushAction action = flush.getAction();
       switch (action) {
       case START_FLUSH:
         replayWALFlushStartMarker(flush);
         break;
       case COMMIT_FLUSH:
         replayWALFlushCommitMarker(flush);
         break;
       case ABORT_FLUSH:
         replayWALFlushAbortMarker(flush);
         break;
       case CANNOT_FLUSH:
         replayWALFlushCannotFlushMarker(flush, replaySeqId);
         break;
       default:
         LOG.warn(getRegionInfo().getEncodedName() + " : " +
           "Received a flush event with unknown action, ignoring. " +
           TextFormat.shortDebugString(flush));
         break;
       }

       logRegionFiles();
     } finally {
       closeRegionOperation(Operation.REPLAY_EVENT);
     }
   }

   /** Replay the flush marker from primary region by creating a corresponding snapshot of
    * the store memstores, only if the memstores do not have a higher seqId from an earlier wal
    * edit (because the events may be coming out of order).
    */
   PrepareFlushResult replayWALFlushStartMarker(FlushDescriptor flush) throws IOException {
     long flushSeqId = flush.getFlushSequenceNumber();

     HashSet<HStore> storesToFlush = new HashSet<>();
     for (StoreFlushDescriptor storeFlush : flush.getStoreFlushesList()) {
       byte[] family = storeFlush.getFamilyName().toByteArray();
       HStore store = getStore(family);
       if (store == null) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
           + "Received a flush start marker from primary, but the family is not found. Ignoring"
           + " StoreFlushDescriptor:" + TextFormat.shortDebugString(storeFlush));
         continue;
       }
       storesToFlush.add(store);
     }

     MonitoredTask status = TaskMonitor.get().createStatus("Preparing flush " + this);

     // we will use writestate as a coarse-grain lock for all the replay events
     // (flush, compaction, region open etc)
     synchronized (writestate) {
       try {
         if (flush.getFlushSequenceNumber() < lastReplayedOpenRegionSeqId) {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
               + "Skipping replaying flush event :" + TextFormat.shortDebugString(flush)
               + " because its sequence id is smaller than this regions lastReplayedOpenRegionSeqId "
               + " of " + lastReplayedOpenRegionSeqId);
           return null;
         }
         if (numMutationsWithoutWAL.sum() > 0) {
           numMutationsWithoutWAL.reset();
           dataInMemoryWithoutWAL.reset();
         }

         if (!writestate.flushing) {
           // we do not have an active snapshot and corresponding this.prepareResult. This means
           // we can just snapshot our memstores and continue as normal.

           // invoke prepareFlushCache. Send null as wal since we do not want the flush events in wal
           PrepareFlushResult prepareResult = internalPrepareFlushCache(null, flushSeqId,
             storesToFlush, status, false, FlushLifeCycleTracker.DUMMY);
           if (prepareResult.result == null) {
             // save the PrepareFlushResult so that we can use it later from commit flush
             this.writestate.flushing = true;
             this.prepareFlushResult = prepareResult;
             status.markComplete("Flush prepare successful");
             if (LOG.isDebugEnabled()) {
               LOG.debug(getRegionInfo().getEncodedName() + " : "
                   + " Prepared flush with seqId:" + flush.getFlushSequenceNumber());
             }
           } else {
             // special case empty memstore. We will still save the flush result in this case, since
             // our memstore ie empty, but the primary is still flushing
             if (prepareResult.getResult().getResult() ==
                   FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY) {
               this.writestate.flushing = true;
               this.prepareFlushResult = prepareResult;
               if (LOG.isDebugEnabled()) {
                 LOG.debug(getRegionInfo().getEncodedName() + " : "
                   + " Prepared empty flush with seqId:" + flush.getFlushSequenceNumber());
               }
             }
             status.abort("Flush prepare failed with " + prepareResult.result);
             // nothing much to do. prepare flush failed because of some reason.
           }
           return prepareResult;
         } else {
           // we already have an active snapshot.
           if (flush.getFlushSequenceNumber() == this.prepareFlushResult.flushOpSeqId) {
             // They define the same flush. Log and continue.
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a flush prepare marker with the same seqId: " +
                 + flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: "
                 + prepareFlushResult.flushOpSeqId + ". Ignoring");
             // ignore
           } else if (flush.getFlushSequenceNumber() < this.prepareFlushResult.flushOpSeqId) {
             // We received a flush with a smaller seqNum than what we have prepared. We can only
             // ignore this prepare flush request.
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a flush prepare marker with a smaller seqId: " +
                 + flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: "
                 + prepareFlushResult.flushOpSeqId + ". Ignoring");
             // ignore
           } else {
             // We received a flush with a larger seqNum than what we have prepared
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a flush prepare marker with a larger seqId: " +
                 + flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: "
                 + prepareFlushResult.flushOpSeqId + ". Ignoring");
             // We do not have multiple active snapshots in the memstore or a way to merge current
             // memstore snapshot with the contents and resnapshot for now. We cannot take
             // another snapshot and drop the previous one because that will cause temporary
             // data loss in the secondary. So we ignore this for now, deferring the resolution
             // to happen when we see the corresponding flush commit marker. If we have a memstore
             // snapshot with x, and later received another prepare snapshot with y (where x < y),
             // when we see flush commit for y, we will drop snapshot for x, and can also drop all
             // the memstore edits if everything in memstore is < y. This is the usual case for
             // RS crash + recovery where we might see consequtive prepare flush wal markers.
             // Otherwise, this will cause more memory to be used in secondary replica until a
             // further prapare + commit flush is seen and replayed.
           }
         }
       } finally {
         status.cleanup();
         writestate.notifyAll();
       }
     }
     return null;
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
     justification="Intentional; post memstore flush")
   void replayWALFlushCommitMarker(FlushDescriptor flush) throws IOException {
     MonitoredTask status = TaskMonitor.get().createStatus("Committing flush " + this);

     // check whether we have the memstore snapshot with the corresponding seqId. Replay to
     // secondary region replicas are in order, except for when the region moves or then the
     // region server crashes. In those cases, we may receive replay requests out of order from
     // the original seqIds.
     synchronized (writestate) {
       try {
         if (flush.getFlushSequenceNumber() < lastReplayedOpenRegionSeqId) {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Skipping replaying flush event :" + TextFormat.shortDebugString(flush)
             + " because its sequence id is smaller than this regions lastReplayedOpenRegionSeqId "
             + " of " + lastReplayedOpenRegionSeqId);
           return;
         }

         if (writestate.flushing) {
           PrepareFlushResult prepareFlushResult = this.prepareFlushResult;
           if (flush.getFlushSequenceNumber() == prepareFlushResult.flushOpSeqId) {
             if (LOG.isDebugEnabled()) {
               LOG.debug(getRegionInfo().getEncodedName() + " : "
                   + "Received a flush commit marker with seqId:" + flush.getFlushSequenceNumber()
                   + " and a previous prepared snapshot was found");
             }
             // This is the regular case where we received commit flush after prepare flush
             // corresponding to the same seqId.
             replayFlushInStores(flush, prepareFlushResult, true);

             // Set down the memstore size by amount of flush.
             this.decrMemStoreSize(prepareFlushResult.totalFlushableSize.getMemStoreSize());
             this.prepareFlushResult = null;
             writestate.flushing = false;
           } else if (flush.getFlushSequenceNumber() < prepareFlushResult.flushOpSeqId) {
             // This should not happen normally. However, lets be safe and guard against these cases
             // we received a flush commit with a smaller seqId than what we have prepared
             // we will pick the flush file up from this commit (if we have not seen it), but we
             // will not drop the memstore
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a flush commit marker with smaller seqId: "
                 + flush.getFlushSequenceNumber() + " than what we have prepared with seqId: "
                 + prepareFlushResult.flushOpSeqId + ". Picking up new file, but not dropping"
                 +"  prepared memstore snapshot");
             replayFlushInStores(flush, prepareFlushResult, false);

             // snapshot is not dropped, so memstore sizes should not be decremented
             // we still have the prepared snapshot, flushing should still be true
           } else {
             // This should not happen normally. However, lets be safe and guard against these cases
             // we received a flush commit with a larger seqId than what we have prepared
             // we will pick the flush file for this. We will also obtain the updates lock and
             // look for contents of the memstore to see whether we have edits after this seqId.
             // If not, we will drop all the memstore edits and the snapshot as well.
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a flush commit marker with larger seqId: "
                 + flush.getFlushSequenceNumber() + " than what we have prepared with seqId: " +
                 prepareFlushResult.flushOpSeqId + ". Picking up new file and dropping prepared"
                 +" memstore snapshot");

             replayFlushInStores(flush, prepareFlushResult, true);

             // Set down the memstore size by amount of flush.
             this.decrMemStoreSize(prepareFlushResult.totalFlushableSize.getMemStoreSize());

             // Inspect the memstore contents to see whether the memstore contains only edits
             // with seqId smaller than the flush seqId. If so, we can discard those edits.
             dropMemStoreContentsForSeqId(flush.getFlushSequenceNumber(), null);

             this.prepareFlushResult = null;
             writestate.flushing = false;
           }
           // If we were waiting for observing a flush or region opening event for not showing
           // partial data after a secondary region crash, we can allow reads now. We can only make
           // sure that we are not showing partial data (for example skipping some previous edits)
           // until we observe a full flush start and flush commit. So if we were not able to find
           // a previous flush we will not enable reads now.
           this.setReadsEnabled(true);
         } else {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
               + "Received a flush commit marker with seqId:" + flush.getFlushSequenceNumber()
               + ", but no previous prepared snapshot was found");
           // There is no corresponding prepare snapshot from before.
           // We will pick up the new flushed file
           replayFlushInStores(flush, null, false);

           // Inspect the memstore contents to see whether the memstore contains only edits
           // with seqId smaller than the flush seqId. If so, we can discard those edits.
           dropMemStoreContentsForSeqId(flush.getFlushSequenceNumber(), null);
         }

         status.markComplete("Flush commit successful");

         // Update the last flushed sequence id for region.
         this.maxFlushedSeqId = flush.getFlushSequenceNumber();

         // advance the mvcc read point so that the new flushed file is visible.
         mvcc.advanceTo(flush.getFlushSequenceNumber());

       } catch (FileNotFoundException ex) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "At least one of the store files in flush: " + TextFormat.shortDebugString(flush)
             + " doesn't exist any more. Skip loading the file(s)", ex);
       }
       finally {
         status.cleanup();
         writestate.notifyAll();
       }
     }

     // C. Finally notify anyone waiting on memstore to clear:
     // e.g. checkResources().
     synchronized (this) {
       notifyAll(); // FindBugs NN_NAKED_NOTIFY
     }
   }

   /**
    * Replays the given flush descriptor by opening the flush files in stores and dropping the
    * memstore snapshots if requested.
    * @param flush
    * @param prepareFlushResult
    * @param dropMemstoreSnapshot
    * @throws IOException
    */
   private void replayFlushInStores(FlushDescriptor flush, PrepareFlushResult prepareFlushResult,
       boolean dropMemstoreSnapshot)
       throws IOException {
     for (StoreFlushDescriptor storeFlush : flush.getStoreFlushesList()) {
       byte[] family = storeFlush.getFamilyName().toByteArray();
       HStore store = getStore(family);
       if (store == null) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Received a flush commit marker from primary, but the family is not found."
             + "Ignoring StoreFlushDescriptor:" + storeFlush);
         continue;
       }
       List<String> flushFiles = storeFlush.getFlushOutputList();
       StoreFlushContext ctx = null;
       long startTime = EnvironmentEdgeManager.currentTime();
       if (prepareFlushResult == null || prepareFlushResult.storeFlushCtxs == null) {
         ctx = store.createFlushContext(flush.getFlushSequenceNumber(), FlushLifeCycleTracker.DUMMY);
       } else {
         ctx = prepareFlushResult.storeFlushCtxs.get(family);
         startTime = prepareFlushResult.startTime;
       }

       if (ctx == null) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Unexpected: flush commit marker received from store "
             + Bytes.toString(family) + " but no associated flush context. Ignoring");
         continue;
       }

       ctx.replayFlush(flushFiles, dropMemstoreSnapshot); // replay the flush

       // Record latest flush time
       this.lastStoreFlushTimeMap.put(store, startTime);
     }
   }

   private long loadRecoveredHFilesIfAny(Collection<HStore> stores) throws IOException {
     Path regionDir = fs.getRegionDir();
     long maxSeqId = -1;
     for (HStore store : stores) {
       String familyName = store.getColumnFamilyName();
       FileStatus[] files =
           WALSplitUtil.getRecoveredHFiles(fs.getFileSystem(), regionDir, familyName);
       if (files != null && files.length != 0) {
         for (FileStatus file : files) {
           Path filePath = file.getPath();
           // If file length is zero then delete it
           if (isZeroLengthThenDelete(fs.getFileSystem(), file, filePath)) {
             continue;
           }
           try {
             HStoreFile storefile = store.tryCommitRecoveredHFile(file.getPath());
             maxSeqId = Math.max(maxSeqId, storefile.getReader().getSequenceID());
           } catch (IOException e) {
             handleException(fs.getFileSystem(), filePath, e);
             continue;
           }
         }
         if (this.rsServices != null && store.needsCompaction()) {
           this.rsServices.getCompactionRequestor()
               .requestCompaction(this, store, "load recovered hfiles request compaction",
                   Store.PRIORITY_USER + 1, CompactionLifeCycleTracker.DUMMY, null);
         }
       }
     }
     return maxSeqId;
   }

   /**
    * Be careful, this method will drop all data in the memstore of this region.
    * Currently, this method is used to drop memstore to prevent memory leak
    * when replaying recovered.edits while opening region.
    */
   public MemStoreSize dropMemStoreContents() throws IOException {
     MemStoreSizing totalFreedSize = new NonThreadSafeMemStoreSizing();
     this.updatesLock.writeLock().lock();
     try {
       for (HStore s : stores.values()) {
         MemStoreSize memStoreSize = doDropStoreMemStoreContentsForSeqId(s, HConstants.NO_SEQNUM);
         LOG.info("Drop memstore for Store " + s.getColumnFamilyName() + " in region "
                 + this.getRegionInfo().getRegionNameAsString()
                 + " , dropped memstoresize: [" + memStoreSize + " }");
         totalFreedSize.incMemStoreSize(memStoreSize);
       }
       return totalFreedSize.getMemStoreSize();
     } finally {
       this.updatesLock.writeLock().unlock();
     }
   }

   /**
    * Drops the memstore contents after replaying a flush descriptor or region open event replay
    * if the memstore edits have seqNums smaller than the given seq id
    * @throws IOException
    */
   private MemStoreSize dropMemStoreContentsForSeqId(long seqId, HStore store) throws IOException {
     MemStoreSizing totalFreedSize = new NonThreadSafeMemStoreSizing();
     this.updatesLock.writeLock().lock();
     try {

       long currentSeqId = mvcc.getReadPoint();
       if (seqId >= currentSeqId) {
         // then we can drop the memstore contents since everything is below this seqId
         LOG.info(getRegionInfo().getEncodedName() + " : "
             + "Dropping memstore contents as well since replayed flush seqId: "
             + seqId + " is greater than current seqId:" + currentSeqId);

         // Prepare flush (take a snapshot) and then abort (drop the snapshot)
         if (store == null) {
           for (HStore s : stores.values()) {
             totalFreedSize.incMemStoreSize(doDropStoreMemStoreContentsForSeqId(s, currentSeqId));
           }
         } else {
           totalFreedSize.incMemStoreSize(doDropStoreMemStoreContentsForSeqId(store, currentSeqId));
         }
       } else {
         LOG.info(getRegionInfo().getEncodedName() + " : "
             + "Not dropping memstore contents since replayed flush seqId: "
             + seqId + " is smaller than current seqId:" + currentSeqId);
       }
     } finally {
       this.updatesLock.writeLock().unlock();
     }
     return totalFreedSize.getMemStoreSize();
   }

   private MemStoreSize doDropStoreMemStoreContentsForSeqId(HStore s, long currentSeqId)
       throws IOException {
     MemStoreSize flushableSize = s.getFlushableSize();
     this.decrMemStoreSize(flushableSize);
     StoreFlushContext ctx = s.createFlushContext(currentSeqId, FlushLifeCycleTracker.DUMMY);
     ctx.prepare();
     ctx.abort();
     return flushableSize;
   }

   private void replayWALFlushAbortMarker(FlushDescriptor flush) {
     // nothing to do for now. A flush abort will cause a RS abort which means that the region
     // will be opened somewhere else later. We will see the region open event soon, and replaying
     // that will drop the snapshot
   }

   private void replayWALFlushCannotFlushMarker(FlushDescriptor flush, long replaySeqId) {
     synchronized (writestate) {
       if (this.lastReplayedOpenRegionSeqId > replaySeqId) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
           + "Skipping replaying flush event :" + TextFormat.shortDebugString(flush)
           + " because its sequence id " + replaySeqId + " is smaller than this regions "
           + "lastReplayedOpenRegionSeqId of " + lastReplayedOpenRegionSeqId);
         return;
       }

       // If we were waiting for observing a flush or region opening event for not showing partial
       // data after a secondary region crash, we can allow reads now. This event means that the
       // primary was not able to flush because memstore is empty when we requested flush. By the
       // time we observe this, we are guaranteed to have up to date seqId with our previous
       // assignment.
       this.setReadsEnabled(true);
     }
   }

   PrepareFlushResult getPrepareFlushResult() {
     return prepareFlushResult;
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
       justification="Intentional; cleared the memstore")
   void replayWALRegionEventMarker(RegionEventDescriptor regionEvent) throws IOException {
     checkTargetRegion(regionEvent.getEncodedRegionName().toByteArray(),
       "RegionEvent marker from WAL ", regionEvent);

     startRegionOperation(Operation.REPLAY_EVENT);
     try {
       if (ServerRegionReplicaUtil.isDefaultReplica(this.getRegionInfo())) {
         return; // if primary nothing to do
       }

       if (regionEvent.getEventType() == EventType.REGION_CLOSE) {
         // nothing to do on REGION_CLOSE for now.
         return;
       }
       if (regionEvent.getEventType() != EventType.REGION_OPEN) {
         LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Unknown region event received, ignoring :"
             + TextFormat.shortDebugString(regionEvent));
         return;
       }

       if (LOG.isDebugEnabled()) {
         LOG.debug(getRegionInfo().getEncodedName() + " : "
           + "Replaying region open event marker " + TextFormat.shortDebugString(regionEvent));
       }

       // we will use writestate as a coarse-grain lock for all the replay events
       synchronized (writestate) {
         // Replication can deliver events out of order when primary region moves or the region
         // server crashes, since there is no coordination between replication of different wal files
         // belonging to different region servers. We have to safe guard against this case by using
         // region open event's seqid. Since this is the first event that the region puts (after
         // possibly flushing recovered.edits), after seeing this event, we can ignore every edit
         // smaller than this seqId
         if (this.lastReplayedOpenRegionSeqId <= regionEvent.getLogSequenceNumber()) {
           this.lastReplayedOpenRegionSeqId = regionEvent.getLogSequenceNumber();
         } else {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
             + "Skipping replaying region event :" + TextFormat.shortDebugString(regionEvent)
             + " because its sequence id is smaller than this regions lastReplayedOpenRegionSeqId "
             + " of " + lastReplayedOpenRegionSeqId);
           return;
         }

         // region open lists all the files that the region has at the time of the opening. Just pick
         // all the files and drop prepared flushes and empty memstores
         for (StoreDescriptor storeDescriptor : regionEvent.getStoresList()) {
           // stores of primary may be different now
           byte[] family = storeDescriptor.getFamilyName().toByteArray();
           HStore store = getStore(family);
           if (store == null) {
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                 + "Received a region open marker from primary, but the family is not found. "
                 + "Ignoring. StoreDescriptor:" + storeDescriptor);
             continue;
           }

           long storeSeqId = store.getMaxSequenceId().orElse(0L);
           List<String> storeFiles = storeDescriptor.getStoreFileList();
           try {
             store.refreshStoreFiles(storeFiles); // replace the files with the new ones
           } catch (FileNotFoundException ex) {
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                     + "At least one of the store files: " + storeFiles
                     + " doesn't exist any more. Skip loading the file(s)", ex);
             continue;
           }
           if (store.getMaxSequenceId().orElse(0L) != storeSeqId) {
             // Record latest flush time if we picked up new files
             lastStoreFlushTimeMap.put(store, EnvironmentEdgeManager.currentTime());
           }

           if (writestate.flushing) {
             // only drop memstore snapshots if they are smaller than last flush for the store
             if (this.prepareFlushResult.flushOpSeqId <= regionEvent.getLogSequenceNumber()) {
               StoreFlushContext ctx = this.prepareFlushResult.storeFlushCtxs == null ?
                   null : this.prepareFlushResult.storeFlushCtxs.get(family);
               if (ctx != null) {
                 MemStoreSize mss = store.getFlushableSize();
                 ctx.abort();
                 this.decrMemStoreSize(mss);
                 this.prepareFlushResult.storeFlushCtxs.remove(family);
               }
             }
           }

           // Drop the memstore contents if they are now smaller than the latest seen flushed file
           dropMemStoreContentsForSeqId(regionEvent.getLogSequenceNumber(), store);
           if (storeSeqId > this.maxFlushedSeqId) {
             this.maxFlushedSeqId = storeSeqId;
           }
         }

         // if all stores ended up dropping their snapshots, we can safely drop the
         // prepareFlushResult
         dropPrepareFlushIfPossible();

         // advance the mvcc read point so that the new flushed file is visible.
         mvcc.await();

         // If we were waiting for observing a flush or region opening event for not showing partial
         // data after a secondary region crash, we can allow reads now.
         this.setReadsEnabled(true);

         // C. Finally notify anyone waiting on memstore to clear:
         // e.g. checkResources().
         synchronized (this) {
           notifyAll(); // FindBugs NN_NAKED_NOTIFY
         }
       }
       logRegionFiles();
     } finally {
       closeRegionOperation(Operation.REPLAY_EVENT);
     }
   }

   void replayWALBulkLoadEventMarker(WALProtos.BulkLoadDescriptor bulkLoadEvent) throws IOException {
     checkTargetRegion(bulkLoadEvent.getEncodedRegionName().toByteArray(),
       "BulkLoad marker from WAL ", bulkLoadEvent);

     if (ServerRegionReplicaUtil.isDefaultReplica(this.getRegionInfo())) {
       return; // if primary nothing to do
     }

     if (LOG.isDebugEnabled()) {
       LOG.debug(getRegionInfo().getEncodedName() + " : "
               +  "Replaying bulkload event marker " + TextFormat.shortDebugString(bulkLoadEvent));
     }
     // check if multiple families involved
     boolean multipleFamilies = false;
     byte[] family = null;
     for (StoreDescriptor storeDescriptor : bulkLoadEvent.getStoresList()) {
       byte[] fam = storeDescriptor.getFamilyName().toByteArray();
       if (family == null) {
         family = fam;
       } else if (!Bytes.equals(family, fam)) {
         multipleFamilies = true;
         break;
       }
     }

     startBulkRegionOperation(multipleFamilies);
     try {
       // we will use writestate as a coarse-grain lock for all the replay events
       synchronized (writestate) {
         // Replication can deliver events out of order when primary region moves or the region
         // server crashes, since there is no coordination between replication of different wal files
         // belonging to different region servers. We have to safe guard against this case by using
         // region open event's seqid. Since this is the first event that the region puts (after
         // possibly flushing recovered.edits), after seeing this event, we can ignore every edit
         // smaller than this seqId
         if (bulkLoadEvent.getBulkloadSeqNum() >= 0
             && this.lastReplayedOpenRegionSeqId >= bulkLoadEvent.getBulkloadSeqNum()) {
           LOG.warn(getRegionInfo().getEncodedName() + " : "
               + "Skipping replaying bulkload event :"
               + TextFormat.shortDebugString(bulkLoadEvent)
               + " because its sequence id is smaller than this region's lastReplayedOpenRegionSeqId"
               + " =" + lastReplayedOpenRegionSeqId);

           return;
         }

         for (StoreDescriptor storeDescriptor : bulkLoadEvent.getStoresList()) {
           // stores of primary may be different now
           family = storeDescriptor.getFamilyName().toByteArray();
           HStore store = getStore(family);
           if (store == null) {
             LOG.warn(getRegionInfo().getEncodedName() + " : "
                     + "Received a bulk load marker from primary, but the family is not found. "
                     + "Ignoring. StoreDescriptor:" + storeDescriptor);
             continue;
           }

           List<String> storeFiles = storeDescriptor.getStoreFileList();
           for (String storeFile : storeFiles) {
             StoreFileInfo storeFileInfo = null;
             try {
               storeFileInfo = fs.getStoreFileInfo(Bytes.toString(family), storeFile);
               store.bulkLoadHFile(storeFileInfo);
             } catch(FileNotFoundException ex) {
               LOG.warn(getRegionInfo().getEncodedName() + " : "
                       + ((storeFileInfo != null) ? storeFileInfo.toString() :
                             (new Path(Bytes.toString(family), storeFile)).toString())
                       + " doesn't exist any more. Skip loading the file");
             }
           }
         }
       }
       if (bulkLoadEvent.getBulkloadSeqNum() > 0) {
         mvcc.advanceTo(bulkLoadEvent.getBulkloadSeqNum());
       }
     } finally {
       closeBulkRegionOperation();
     }
   }

   /**
    * If all stores ended up dropping their snapshots, we can safely drop the prepareFlushResult
    */
   private void dropPrepareFlushIfPossible() {
     if (writestate.flushing) {
       boolean canDrop = true;
       if (prepareFlushResult.storeFlushCtxs != null) {
         for (Entry<byte[], StoreFlushContext> entry : prepareFlushResult.storeFlushCtxs
             .entrySet()) {
           HStore store = getStore(entry.getKey());
           if (store == null) {
             continue;
           }
           if (store.getSnapshotSize().getDataSize() > 0) {
             canDrop = false;
             break;
           }
         }
       }

       // this means that all the stores in the region has finished flushing, but the WAL marker
       // may not have been written or we did not receive it yet.
       if (canDrop) {
         writestate.flushing = false;
         this.prepareFlushResult = null;
       }
     }
   }

   @Override
   public boolean refreshStoreFiles() throws IOException {
     return refreshStoreFiles(false);
   }

   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NN_NAKED_NOTIFY",
       justification = "Notify is about post replay. Intentional")
   protected boolean refreshStoreFiles(boolean force) throws IOException {
     if (!force && ServerRegionReplicaUtil.isDefaultReplica(this.getRegionInfo())) {
       return false; // if primary nothing to do
     }

     if (LOG.isDebugEnabled()) {
       LOG.debug(getRegionInfo().getEncodedName() + " : "
           + "Refreshing store files to see whether we can free up memstore");
     }

     long totalFreedDataSize = 0;

     long smallestSeqIdInStores = Long.MAX_VALUE;

     startRegionOperation(); // obtain region close lock
     try {
       Map<HStore, Long> map = new HashMap<>();
       synchronized (writestate) {
         for (HStore store : stores.values()) {
           // TODO: some stores might see new data from flush, while others do not which
           // MIGHT break atomic edits across column families.
           long maxSeqIdBefore = store.getMaxSequenceId().orElse(0L);

           // refresh the store files. This is similar to observing a region open wal marker.
           store.refreshStoreFiles();

           long storeSeqId = store.getMaxSequenceId().orElse(0L);
           if (storeSeqId < smallestSeqIdInStores) {
             smallestSeqIdInStores = storeSeqId;
           }

           // see whether we can drop the memstore or the snapshot
           if (storeSeqId > maxSeqIdBefore) {
             if (writestate.flushing) {
               // only drop memstore snapshots if they are smaller than last flush for the store
               if (this.prepareFlushResult.flushOpSeqId <= storeSeqId) {
                 StoreFlushContext ctx = this.prepareFlushResult.storeFlushCtxs == null ?
                     null : this.prepareFlushResult.storeFlushCtxs.get(
                             store.getColumnFamilyDescriptor().getName());
                 if (ctx != null) {
                   MemStoreSize mss = store.getFlushableSize();
                   ctx.abort();
                   this.decrMemStoreSize(mss);
                   this.prepareFlushResult.storeFlushCtxs.
                       remove(store.getColumnFamilyDescriptor().getName());
                   totalFreedDataSize += mss.getDataSize();
                 }
               }
             }

             map.put(store, storeSeqId);
           }
         }

         // if all stores ended up dropping their snapshots, we can safely drop the
         // prepareFlushResult
         dropPrepareFlushIfPossible();

         // advance the mvcc read point so that the new flushed files are visible.
         // either greater than flush seq number or they were already picked up via flush.
         for (HStore s : stores.values()) {
           mvcc.advanceTo(s.getMaxMemStoreTS().orElse(0L));
         }


         // smallestSeqIdInStores is the seqId that we have a corresponding hfile for. We can safely
         // skip all edits that are to be replayed in the future with that has a smaller seqId
         // than this. We are updating lastReplayedOpenRegionSeqId so that we can skip all edits
         // that we have picked the flush files for
         if (this.lastReplayedOpenRegionSeqId < smallestSeqIdInStores) {
           this.lastReplayedOpenRegionSeqId = smallestSeqIdInStores;
         }
       }
       if (!map.isEmpty()) {
         for (Map.Entry<HStore, Long> entry : map.entrySet()) {
           // Drop the memstore contents if they are now smaller than the latest seen flushed file
           totalFreedDataSize += dropMemStoreContentsForSeqId(entry.getValue(), entry.getKey())
               .getDataSize();
         }
       }
       // C. Finally notify anyone waiting on memstore to clear:
       // e.g. checkResources().
       synchronized (this) {
         notifyAll(); // FindBugs NN_NAKED_NOTIFY
       }
       return totalFreedDataSize > 0;
     } finally {
       closeRegionOperation();
     }
   }

   private void logRegionFiles() {
     if (LOG.isTraceEnabled()) {
       LOG.trace(getRegionInfo().getEncodedName() + " : Store files for region: ");
       stores.values().stream().filter(s -> s.getStorefiles() != null)
           .flatMap(s -> s.getStorefiles().stream())
           .forEachOrdered(sf -> LOG.trace(getRegionInfo().getEncodedName() + " : " + sf));
     }
   }

   /** Checks whether the given regionName is either equal to our region, or that
    * the regionName is the primary region to our corresponding range for the secondary replica.
    */
   private void checkTargetRegion(byte[] encodedRegionName, String exceptionMsg, Object payload)
       throws WrongRegionException {
     if (Bytes.equals(this.getRegionInfo().getEncodedNameAsBytes(), encodedRegionName)) {
       return;
     }

     if (!RegionReplicaUtil.isDefaultReplica(this.getRegionInfo()) &&
         Bytes.equals(encodedRegionName,
           this.fs.getRegionInfoForFS().getEncodedNameAsBytes())) {
       return;
     }

     throw new WrongRegionException(exceptionMsg + payload
       + " targetted for region " + Bytes.toStringBinary(encodedRegionName)
       + " does not match this region: " + this.getRegionInfo());
   }

   /**
    * Used by tests
    * @param s Store to add edit too.
    * @param cell Cell to add.
    */
   protected void restoreEdit(HStore s, Cell cell, MemStoreSizing memstoreAccounting) {
     s.add(cell, memstoreAccounting);
   }

   /**
    * make sure have been through lease recovery before get file status, so the file length can be
    * trusted.
    * @param p File to check.
    * @return True if file was zero-length (and if so, we'll delete it in here).
    * @throws IOException
    */
   private static boolean isZeroLengthThenDelete(final FileSystem fs, final FileStatus stat,
       final Path p) throws IOException {
     if (stat.getLen() > 0) {
       return false;
     }
     LOG.warn("File " + p + " is zero-length, deleting.");
     fs.delete(p, false);
     return true;
   }

   protected HStore instantiateHStore(final ColumnFamilyDescriptor family, boolean warmup)
       throws IOException {
     if (family.isMobEnabled()) {
       if (HFile.getFormatVersion(this.conf) < HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
         throw new IOException("A minimum HFile version of " + HFile.MIN_FORMAT_VERSION_WITH_TAGS +
             " is required for MOB feature. Consider setting " + HFile.FORMAT_VERSION_KEY +
             " accordingly.");
       }
       return new HMobStore(this, family, this.conf, warmup);
     }
     return new HStore(this, family, this.conf, warmup);
   }

   @Override
   public HStore getStore(byte[] column) {
     return this.stores.get(column);
   }

   /**
    * Return HStore instance. Does not do any copy: as the number of store is limited, we iterate on
    * the list.
    */
   private HStore getStore(Cell cell) {
     return stores.entrySet().stream().filter(e -> CellUtil.matchingFamily(cell, e.getKey()))
         .map(e -> e.getValue()).findFirst().orElse(null);
   }

   @Override
   public List<HStore> getStores() {
     return new ArrayList<>(stores.values());
   }

   @Override
   public List<String> getStoreFileList(byte[][] columns) throws IllegalArgumentException {
     List<String> storeFileNames = new ArrayList<>();
     synchronized (closeLock) {
       for (byte[] column : columns) {
         HStore store = this.stores.get(column);
         if (store == null) {
           throw new IllegalArgumentException(
               "No column family : " + new String(column, StandardCharsets.UTF_8) + " available");
         }
         Collection<HStoreFile> storeFiles = store.getStorefiles();
         if (storeFiles == null) {
           continue;
         }
         for (HStoreFile storeFile : storeFiles) {
           storeFileNames.add(storeFile.getPath().toString());
         }

         logRegionFiles();
       }
     }
     return storeFileNames;
   }

   //////////////////////////////////////////////////////////////////////////////
   // Support code
   //////////////////////////////////////////////////////////////////////////////

   /** Make sure this is a valid row for the HRegion */
   void checkRow(byte[] row, String op) throws IOException {
     if (!rowIsInRange(getRegionInfo(), row)) {
       throw new WrongRegionException("Requested row out of range for " +
           op + " on HRegion " + this + ", startKey='" +
           Bytes.toStringBinary(getRegionInfo().getStartKey()) + "', getEndKey()='" +
           Bytes.toStringBinary(getRegionInfo().getEndKey()) + "', row='" +
           Bytes.toStringBinary(row) + "'");
     }
   }


   /**
    * Get an exclusive ( write lock ) lock on a given row.
    * @param row Which row to lock.
    * @return A locked RowLock. The lock is exclusive and already aqquired.
    * @throws IOException
    */
   public RowLock getRowLock(byte[] row) throws IOException {
     return getRowLock(row, false);
   }

   @Override
   public RowLock getRowLock(byte[] row, boolean readLock) throws IOException {
     checkRow(row, "row lock");
     return getRowLockInternal(row, readLock, null);
   }

   protected RowLock getRowLockInternal(byte[] row, boolean readLock, final RowLock prevRowLock)
       throws IOException {
     // create an object to use a a key in the row lock map
     HashedBytes rowKey = new HashedBytes(row);

     RowLockContext rowLockContext = null;
     RowLockImpl result = null;

     boolean success = false;
     try (TraceScope scope = TraceUtil.createTrace("HRegion.getRowLock")) {
       TraceUtil.addTimelineAnnotation("Getting a " + (readLock?"readLock":"writeLock"));
       // Keep trying until we have a lock or error out.
       // TODO: do we need to add a time component here?
       while (result == null) {
         rowLockContext = computeIfAbsent(lockedRows, rowKey, () -> new RowLockContext(rowKey));
         // Now try an get the lock.
         // This can fail as
         if (readLock) {
           // For read lock, if the caller has locked the same row previously, it will not try
           // to acquire the same read lock. It simply returns the previous row lock.
           RowLockImpl prevRowLockImpl = (RowLockImpl)prevRowLock;
           if ((prevRowLockImpl != null) && (prevRowLockImpl.getLock() ==
               rowLockContext.readWriteLock.readLock())) {
             success = true;
             return prevRowLock;
           }
           result = rowLockContext.newReadLock();
         } else {
           result = rowLockContext.newWriteLock();
         }
       }

       int timeout = rowLockWaitDuration;
       boolean reachDeadlineFirst = false;
       Optional<RpcCall> call = RpcServer.getCurrentCall();
       if (call.isPresent()) {
         long deadline = call.get().getDeadline();
         if (deadline < Long.MAX_VALUE) {
           int timeToDeadline = (int) (deadline - System.currentTimeMillis());
           if (timeToDeadline <= this.rowLockWaitDuration) {
             reachDeadlineFirst = true;
             timeout = timeToDeadline;
           }
         }
       }

       if (timeout <= 0 || !result.getLock().tryLock(timeout, TimeUnit.MILLISECONDS)) {
         TraceUtil.addTimelineAnnotation("Failed to get row lock");
         String message = "Timed out waiting for lock for row: " + rowKey + " in region "
             + getRegionInfo().getEncodedName();
         if (reachDeadlineFirst) {
           throw new TimeoutIOException(message);
         } else {
           // If timeToDeadline is larger than rowLockWaitDuration, we can not drop the request.
           throw new IOException(message);
         }
       }
       rowLockContext.setThreadName(Thread.currentThread().getName());
       success = true;
       return result;
     } catch (InterruptedException ie) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Thread interrupted waiting for lock on row: {}, in region {}", rowKey,
           getRegionInfo().getRegionNameAsString());
       }
       TraceUtil.addTimelineAnnotation("Interrupted exception getting row lock");
       throw throwOnInterrupt(ie);
     } catch (Error error) {
       // The maximum lock count for read lock is 64K (hardcoded), when this maximum count
       // is reached, it will throw out an Error. This Error needs to be caught so it can
       // go ahead to process the minibatch with lock acquired.
       LOG.warn("Error to get row lock for {}, in region {}, cause: {}", Bytes.toStringBinary(row),
         getRegionInfo().getRegionNameAsString(), error);
       IOException ioe = new IOException(error);
       TraceUtil.addTimelineAnnotation("Error getting row lock");
       throw ioe;
     } finally {
       // Clean up the counts just in case this was the thing keeping the context alive.
       if (!success && rowLockContext != null) {
         rowLockContext.cleanUp();
       }
     }
   }

   private void releaseRowLocks(List<RowLock> rowLocks) {
     if (rowLocks != null) {
       for (RowLock rowLock : rowLocks) {
         rowLock.release();
       }
       rowLocks.clear();
     }
   }

   public int getReadLockCount() {
     return lock.getReadLockCount();
   }

   public ConcurrentHashMap<HashedBytes, RowLockContext> getLockedRows() {
     return lockedRows;
   }

   class RowLockContext {
     private final HashedBytes row;
     final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(true);
     final AtomicBoolean usable = new AtomicBoolean(true);
     final AtomicInteger count = new AtomicInteger(0);
     final Object lock = new Object();
     private String threadName;

     RowLockContext(HashedBytes row) {
       this.row = row;
     }

     RowLockImpl newWriteLock() {
       Lock l = readWriteLock.writeLock();
       return getRowLock(l);
     }
     RowLockImpl newReadLock() {
       Lock l = readWriteLock.readLock();
       return getRowLock(l);
     }

     private RowLockImpl getRowLock(Lock l) {
       count.incrementAndGet();
       synchronized (lock) {
         if (usable.get()) {
           return new RowLockImpl(this, l);
         } else {
           return null;
         }
       }
     }

     void cleanUp() {
       long c = count.decrementAndGet();
       if (c <= 0) {
         synchronized (lock) {
           if (count.get() <= 0 && usable.get()){ // Don't attempt to remove row if already removed
             usable.set(false);
             RowLockContext removed = lockedRows.remove(row);
             assert removed == this: "we should never remove a different context";
           }
         }
       }
     }

     public void setThreadName(String threadName) {
       this.threadName = threadName;
     }

     @Override
     public String toString() {
       return "RowLockContext{" +
           "row=" + row +
           ", readWriteLock=" + readWriteLock +
           ", count=" + count +
           ", threadName=" + threadName +
           '}';
     }
   }

   /**
    * Class used to represent a lock on a row.
    */
   public static class RowLockImpl implements RowLock {
     private final RowLockContext context;
     private final Lock lock;

     public RowLockImpl(RowLockContext context, Lock lock) {
       this.context = context;
       this.lock = lock;
     }

     public Lock getLock() {
       return lock;
     }

     public RowLockContext getContext() {
       return context;
     }

     @Override
     public void release() {
       lock.unlock();
       context.cleanUp();
     }

     @Override
     public String toString() {
       return "RowLockImpl{" +
           "context=" + context +
           ", lock=" + lock +
           '}';
     }
   }

   /**
    * Determines whether multiple column families are present
    * Precondition: familyPaths is not null
    *
    * @param familyPaths List of (column family, hfilePath)
    */
   private static boolean hasMultipleColumnFamilies(Collection<Pair<byte[], String>> familyPaths) {
     boolean multipleFamilies = false;
     byte[] family = null;
     for (Pair<byte[], String> pair : familyPaths) {
       byte[] fam = pair.getFirst();
       if (family == null) {
         family = fam;
       } else if (!Bytes.equals(family, fam)) {
         multipleFamilies = true;
         break;
       }
     }
     return multipleFamilies;
   }

   /**
    * Attempts to atomically load a group of hfiles.  This is critical for loading
    * rows with multiple column families atomically.
    *
    * @param familyPaths List of Pair&lt;byte[] column family, String hfilePath&gt;
    * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
    * file about to be bulk loaded
    * @param assignSeqId
    * @return Map from family to List of store file paths if successful, null if failed recoverably
    * @throws IOException if failed unrecoverably.
    */
   public Map<byte[], List<Path>> bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId,
       BulkLoadListener bulkLoadListener) throws IOException {
     return bulkLoadHFiles(familyPaths, assignSeqId, bulkLoadListener, false,
       null, true);
   }

   /**
    * Listener class to enable callers of
    * bulkLoadHFile() to perform any necessary
    * pre/post processing of a given bulkload call
    */
   public interface BulkLoadListener {
     /**
      * Called before an HFile is actually loaded
      * @param family family being loaded to
      * @param srcPath path of HFile
      * @return final path to be used for actual loading
      * @throws IOException
      */
     String prepareBulkLoad(byte[] family, String srcPath, boolean copyFile)
         throws IOException;

     /**
      * Called after a successful HFile load
      * @param family family being loaded to
      * @param srcPath path of HFile
      * @throws IOException
      */
     void doneBulkLoad(byte[] family, String srcPath) throws IOException;

     /**
      * Called after a failed HFile load
      * @param family family being loaded to
      * @param srcPath path of HFile
      * @throws IOException
      */
     void failedBulkLoad(byte[] family, String srcPath) throws IOException;
   }

   /**
    * Attempts to atomically load a group of hfiles.  This is critical for loading
    * rows with multiple column families atomically.
    *
    * @param familyPaths List of Pair&lt;byte[] column family, String hfilePath&gt;
    * @param assignSeqId
    * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
    * file about to be bulk loaded
    * @param copyFile always copy hfiles if true
    * @param  clusterIds ids from clusters that had already handled the given bulkload event.
    * @return Map from family to List of store file paths if successful, null if failed recoverably
    * @throws IOException if failed unrecoverably.
    */
   public Map<byte[], List<Path>> bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths,
       boolean assignSeqId, BulkLoadListener bulkLoadListener, boolean copyFile,
       List<String> clusterIds, boolean replicate) throws IOException {
     long seqId = -1;
     Map<byte[], List<Path>> storeFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
     Map<String, Long> storeFilesSizes = new HashMap<>();
     Preconditions.checkNotNull(familyPaths);
     // we need writeLock for multi-family bulk load
     startBulkRegionOperation(hasMultipleColumnFamilies(familyPaths));
     boolean isSuccessful = false;
     try {
       this.writeRequestsCount.increment();

       // There possibly was a split that happened between when the split keys
       // were gathered and before the HRegion's write lock was taken. We need
       // to validate the HFile region before attempting to bulk load all of them
       IOException ioException = null;
       List<Pair<byte[], String>> failures = new ArrayList<>();
       for (Pair<byte[], String> p : familyPaths) {
         byte[] familyName = p.getFirst();
         String path = p.getSecond();

         HStore store = getStore(familyName);
         if (store == null) {
           ioException = new org.apache.hadoop.hbase.DoNotRetryIOException(
               "No such column family " + Bytes.toStringBinary(familyName));
         } else {
           try {
             store.assertBulkLoadHFileOk(new Path(path));
           } catch (WrongRegionException wre) {
             // recoverable (file doesn't fit in region)
             failures.add(p);
           } catch (IOException ioe) {
             // unrecoverable (hdfs problem)
             ioException = ioe;
           }
         }

         // validation failed because of some sort of IO problem.
         if (ioException != null) {
           LOG.error("There was IO error when checking if the bulk load is ok in region {}.", this,
             ioException);
           throw ioException;
         }
       }
       // validation failed, bail out before doing anything permanent.
       if (failures.size() != 0) {
         StringBuilder list = new StringBuilder();
         for (Pair<byte[], String> p : failures) {
           list.append("\n").append(Bytes.toString(p.getFirst())).append(" : ")
               .append(p.getSecond());
         }
         // problem when validating
         LOG.warn("There was a recoverable bulk load failure likely due to a split. These (family,"
           + " HFile) pairs were not loaded: {}, in region {}", list.toString(), this);
         return null;
       }

       // We need to assign a sequential ID that's in between two memstores in order to preserve
       // the guarantee that all the edits lower than the highest sequential ID from all the
       // HFiles are flushed on disk. See HBASE-10958.  The sequence id returned when we flush is
       // guaranteed to be one beyond the file made when we flushed (or if nothing to flush, it is
       // a sequence id that we can be sure is beyond the last hfile written).
       if (assignSeqId) {
         FlushResult fs = flushcache(true, false, FlushLifeCycleTracker.DUMMY);
         if (fs.isFlushSucceeded()) {
           seqId = ((FlushResultImpl)fs).flushSequenceId;
         } else if (fs.getResult() == FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY) {
           seqId = ((FlushResultImpl)fs).flushSequenceId;
         } else if (fs.getResult() == FlushResult.Result.CANNOT_FLUSH) {
           // CANNOT_FLUSH may mean that a flush is already on-going
           // we need to wait for that flush to complete
           waitForFlushes();
         } else {
           throw new IOException("Could not bulk load with an assigned sequential ID because the "+
             "flush didn't run. Reason for not flushing: " + ((FlushResultImpl)fs).failureReason);
         }
       }

       Map<byte[], List<Pair<Path, Path>>> familyWithFinalPath =
           new TreeMap<>(Bytes.BYTES_COMPARATOR);
       for (Pair<byte[], String> p : familyPaths) {
         byte[] familyName = p.getFirst();
         String path = p.getSecond();
         HStore store = getStore(familyName);
         if (!familyWithFinalPath.containsKey(familyName)) {
           familyWithFinalPath.put(familyName, new ArrayList<>());
         }
         List<Pair<Path, Path>> lst = familyWithFinalPath.get(familyName);
         try {
           String finalPath = path;
           if (bulkLoadListener != null) {
             finalPath = bulkLoadListener.prepareBulkLoad(familyName, path, copyFile);
           }
           Pair<Path, Path> pair = store.preBulkLoadHFile(finalPath, seqId);
           lst.add(pair);
         } catch (IOException ioe) {
           // A failure here can cause an atomicity violation that we currently
           // cannot recover from since it is likely a failed HDFS operation.

           LOG.error("There was a partial failure due to IO when attempting to" +
               " load " + Bytes.toString(p.getFirst()) + " : " + p.getSecond(), ioe);
           if (bulkLoadListener != null) {
             try {
               bulkLoadListener.failedBulkLoad(familyName, path);
             } catch (Exception ex) {
               LOG.error("Error while calling failedBulkLoad for family " +
                   Bytes.toString(familyName) + " with path " + path, ex);
             }
           }
           throw ioe;
         }
       }

       if (this.getCoprocessorHost() != null) {
         for (Map.Entry<byte[], List<Pair<Path, Path>>> entry : familyWithFinalPath.entrySet()) {
           this.getCoprocessorHost().preCommitStoreFile(entry.getKey(), entry.getValue());
         }
       }
       for (Map.Entry<byte[], List<Pair<Path, Path>>> entry : familyWithFinalPath.entrySet()) {
         byte[] familyName = entry.getKey();
         for (Pair<Path, Path> p : entry.getValue()) {
           String path = p.getFirst().toString();
           Path commitedStoreFile = p.getSecond();
           HStore store = getStore(familyName);
           try {
             store.bulkLoadHFile(familyName, path, commitedStoreFile);
             // Note the size of the store file
             try {
               FileSystem fs = commitedStoreFile.getFileSystem(baseConf);
               storeFilesSizes.put(commitedStoreFile.getName(), fs.getFileStatus(commitedStoreFile)
                   .getLen());
             } catch (IOException e) {
               LOG.warn("Failed to find the size of hfile " + commitedStoreFile, e);
               storeFilesSizes.put(commitedStoreFile.getName(), 0L);
             }

             if(storeFiles.containsKey(familyName)) {
               storeFiles.get(familyName).add(commitedStoreFile);
             } else {
               List<Path> storeFileNames = new ArrayList<>();
               storeFileNames.add(commitedStoreFile);
               storeFiles.put(familyName, storeFileNames);
             }
             if (bulkLoadListener != null) {
               bulkLoadListener.doneBulkLoad(familyName, path);
             }
           } catch (IOException ioe) {
             // A failure here can cause an atomicity violation that we currently
             // cannot recover from since it is likely a failed HDFS operation.

             // TODO Need a better story for reverting partial failures due to HDFS.
             LOG.error("There was a partial failure due to IO when attempting to" +
                 " load " + Bytes.toString(familyName) + " : " + p.getSecond(), ioe);
             if (bulkLoadListener != null) {
               try {
                 bulkLoadListener.failedBulkLoad(familyName, path);
               } catch (Exception ex) {
                 LOG.error("Error while calling failedBulkLoad for family " +
                     Bytes.toString(familyName) + " with path " + path, ex);
               }
             }
             throw ioe;
           }
         }
       }

       isSuccessful = true;
       //request compaction
       familyWithFinalPath.keySet().forEach(family -> {
         HStore store = getStore(family);
         try {
           if (this.rsServices != null && store.needsCompaction()) {
             this.rsServices.getCompactionRequestor().requestCompaction(this, store,
               "bulkload hfiles request compaction", Store.PRIORITY_USER + 1,
               CompactionLifeCycleTracker.DUMMY, null);
           }
         } catch (IOException e) {
           LOG.error("bulkload hfiles request compaction error ", e);
         }
       });
     } finally {
       if (wal != null && !storeFiles.isEmpty()) {
         // Write a bulk load event for hfiles that are loaded
         try {
           WALProtos.BulkLoadDescriptor loadDescriptor =
               ProtobufUtil.toBulkLoadDescriptor(this.getRegionInfo().getTable(),
                   UnsafeByteOperations.unsafeWrap(this.getRegionInfo().getEncodedNameAsBytes()),
                   storeFiles, storeFilesSizes, seqId, clusterIds, replicate);
           WALUtil.writeBulkLoadMarkerAndSync(this.wal, this.getReplicationScope(), getRegionInfo(),
               loadDescriptor, mvcc);
         } catch (IOException ioe) {
           if (this.rsServices != null) {
             // Have to abort region server because some hfiles has been loaded but we can't write
             // the event into WAL
             isSuccessful = false;
             this.rsServices.abort("Failed to write bulk load event into WAL.", ioe);
           }
         }
       }

       closeBulkRegionOperation();
     }
     return isSuccessful ? storeFiles : null;
   }

   @Override
   public boolean equals(Object o) {
     return o instanceof HRegion && Bytes.equals(getRegionInfo().getRegionName(),
                                                 ((HRegion) o).getRegionInfo().getRegionName());
   }

   @Override
   public int hashCode() {
     return Bytes.hashCode(getRegionInfo().getRegionName());
   }

   @Override
   public String toString() {
     return getRegionInfo().getRegionNameAsString();
   }

   /**
    * RegionScannerImpl is used to combine scanners from multiple Stores (aka column families).
    */
   class RegionScannerImpl
       implements RegionScanner, Shipper, org.apache.hadoop.hbase.ipc.RpcCallback {
     // Package local for testability
     KeyValueHeap storeHeap = null;
     /** Heap of key-values that are not essential for the provided filters and are thus read
      * on demand, if on-demand column family loading is enabled.*/
     KeyValueHeap joinedHeap = null;
     /**
      * If the joined heap data gathering is interrupted due to scan limits, this will
      * contain the row for which we are populating the values.*/
     protected Cell joinedContinuationRow = null;
     private boolean filterClosed = false;

     protected final byte[] stopRow;
     protected final boolean includeStopRow;
     protected final HRegion region;
     protected final CellComparator comparator;

     private final long readPt;
     private final long maxResultSize;
     private final ScannerContext defaultScannerContext;
     private final FilterWrapper filter;

     @Override
     public RegionInfo getRegionInfo() {
       return region.getRegionInfo();
     }

     RegionScannerImpl(Scan scan, List<KeyValueScanner> additionalScanners, HRegion region)
         throws IOException {
       this(scan, additionalScanners, region, HConstants.NO_NONCE, HConstants.NO_NONCE);
     }

     RegionScannerImpl(Scan scan, List<KeyValueScanner> additionalScanners, HRegion region,
         long nonceGroup, long nonce) throws IOException {
       this.region = region;
       this.maxResultSize = scan.getMaxResultSize();
       if (scan.hasFilter()) {
         this.filter = new FilterWrapper(scan.getFilter());
       } else {
         this.filter = null;
       }
       this.comparator = region.getCellComparator();
       /**
        * By default, calls to next/nextRaw must enforce the batch limit. Thus, construct a default
        * scanner context that can be used to enforce the batch limit in the event that a
        * ScannerContext is not specified during an invocation of next/nextRaw
        */
       defaultScannerContext = ScannerContext.newBuilder()
           .setBatchLimit(scan.getBatch()).build();
       this.stopRow = scan.getStopRow();
       this.includeStopRow = scan.includeStopRow();

       // synchronize on scannerReadPoints so that nobody calculates
       // getSmallestReadPoint, before scannerReadPoints is updated.
       IsolationLevel isolationLevel = scan.getIsolationLevel();
       long mvccReadPoint = PackagePrivateFieldAccessor.getMvccReadPoint(scan);
       synchronized (scannerReadPoints) {
         if (mvccReadPoint > 0) {
           this.readPt = mvccReadPoint;
         } else if (nonce == HConstants.NO_NONCE || rsServices == null
             || rsServices.getNonceManager() == null) {
           this.readPt = getReadPoint(isolationLevel);
         } else {
           this.readPt = rsServices.getNonceManager().getMvccFromOperationContext(nonceGroup, nonce);
         }
         scannerReadPoints.put(this, this.readPt);
       }
       initializeScanners(scan, additionalScanners);
     }

     protected void initializeScanners(Scan scan, List<KeyValueScanner> additionalScanners)
         throws IOException {
       // Here we separate all scanners into two lists - scanner that provide data required
       // by the filter to operate (scanners list) and all others (joinedScanners list).
       List<KeyValueScanner> scanners = new ArrayList<>(scan.getFamilyMap().size());
       List<KeyValueScanner> joinedScanners = new ArrayList<>(scan.getFamilyMap().size());
       // Store all already instantiated scanners for exception handling
       List<KeyValueScanner> instantiatedScanners = new ArrayList<>();
       // handle additionalScanners
       if (additionalScanners != null && !additionalScanners.isEmpty()) {
         scanners.addAll(additionalScanners);
         instantiatedScanners.addAll(additionalScanners);
       }

       try {
         for (Map.Entry<byte[], NavigableSet<byte[]>> entry : scan.getFamilyMap().entrySet()) {
           HStore store = stores.get(entry.getKey());
           KeyValueScanner scanner = store.getScanner(scan, entry.getValue(), this.readPt);
           instantiatedScanners.add(scanner);
           if (this.filter == null || !scan.doLoadColumnFamiliesOnDemand()
               || this.filter.isFamilyEssential(entry.getKey())) {
             scanners.add(scanner);
           } else {
             joinedScanners.add(scanner);
           }
         }
         initializeKVHeap(scanners, joinedScanners, region);
       } catch (Throwable t) {
         throw handleException(instantiatedScanners, t);
       }
     }

     protected void initializeKVHeap(List<KeyValueScanner> scanners,
         List<KeyValueScanner> joinedScanners, HRegion region)
         throws IOException {
       this.storeHeap = new KeyValueHeap(scanners, comparator);
       if (!joinedScanners.isEmpty()) {
         this.joinedHeap = new KeyValueHeap(joinedScanners, comparator);
       }
     }

     private IOException handleException(List<KeyValueScanner> instantiatedScanners,
         Throwable t) {
       // remove scaner read point before throw the exception
       scannerReadPoints.remove(this);
       if (storeHeap != null) {
         storeHeap.close();
         storeHeap = null;
         if (joinedHeap != null) {
           joinedHeap.close();
           joinedHeap = null;
         }
       } else {
         // close all already instantiated scanners before throwing the exception
         for (KeyValueScanner scanner : instantiatedScanners) {
           scanner.close();
         }
       }
       return t instanceof IOException ? (IOException) t : new IOException(t);
     }

     @Override
     public long getMaxResultSize() {
       return maxResultSize;
     }

     @Override
     public long getMvccReadPoint() {
       return this.readPt;
     }

     @Override
     public int getBatch() {
       return this.defaultScannerContext.getBatchLimit();
     }

     /**
      * Reset both the filter and the old filter.
      *
      * @throws IOException in case a filter raises an I/O exception.
      */
     protected void resetFilters() throws IOException {
       if (filter != null) {
         filter.reset();
       }
     }

     @Override
     public boolean next(List<Cell> outResults)
         throws IOException {
       // apply the batching limit by default
       return next(outResults, defaultScannerContext);
     }

     @Override
     public synchronized boolean next(List<Cell> outResults, ScannerContext scannerContext)
     throws IOException {
       if (this.filterClosed) {
         throw new UnknownScannerException("Scanner was closed (timed out?) " +
             "after we renewed it. Could be caused by a very slow scanner " +
             "or a lengthy garbage collection");
       }
       startRegionOperation(Operation.SCAN);
       try {
         return nextRaw(outResults, scannerContext);
       } finally {
         closeRegionOperation(Operation.SCAN);
       }
     }

     @Override
     public boolean nextRaw(List<Cell> outResults) throws IOException {
       // Use the RegionScanner's context by default
       return nextRaw(outResults, defaultScannerContext);
     }

     @Override
     public boolean nextRaw(List<Cell> outResults, ScannerContext scannerContext)
         throws IOException {
       if (storeHeap == null) {
         // scanner is closed
         throw new UnknownScannerException("Scanner was closed");
       }
       boolean moreValues = false;
       if (outResults.isEmpty()) {
         // Usually outResults is empty. This is true when next is called
         // to handle scan or get operation.
         moreValues = nextInternal(outResults, scannerContext);
       } else {
         List<Cell> tmpList = new ArrayList<>();
         moreValues = nextInternal(tmpList, scannerContext);
         outResults.addAll(tmpList);
       }

       if (!outResults.isEmpty()) {
         readRequestsCount.increment();
         if (metricsRegion != null) {
           metricsRegion.updateReadRequestCount();
         }
       }
       if (rsServices != null && rsServices.getMetrics() != null) {
         rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable());
       }

       // If the size limit was reached it means a partial Result is being returned. Returning a
       // partial Result means that we should not reset the filters; filters should only be reset in
       // between rows
       if (!scannerContext.mayHaveMoreCellsInRow()) {
         resetFilters();
       }

       if (isFilterDoneInternal()) {
         moreValues = false;
       }
       return moreValues;
     }

     /**
      * @return true if more cells exist after this batch, false if scanner is done
      */
     private boolean populateFromJoinedHeap(List<Cell> results, ScannerContext scannerContext)
             throws IOException {
       assert joinedContinuationRow != null;
       boolean moreValues = populateResult(results, this.joinedHeap, scannerContext,
           joinedContinuationRow);

       if (!scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
         // We are done with this row, reset the continuation.
         joinedContinuationRow = null;
       }
       // As the data is obtained from two independent heaps, we need to
       // ensure that result list is sorted, because Result relies on that.
       sort(results, comparator);
       return moreValues;
     }

     /**
      * Fetches records with currentRow into results list, until next row, batchLimit (if not -1) is
      * reached, or remainingResultSize (if not -1) is reaced
      * @param heap KeyValueHeap to fetch data from.It must be positioned on correct row before call.
      * @param scannerContext
      * @param currentRowCell
      * @return state of last call to {@link KeyValueHeap#next()}
      */
     private boolean populateResult(List<Cell> results, KeyValueHeap heap,
         ScannerContext scannerContext, Cell currentRowCell) throws IOException {
       Cell nextKv;
       boolean moreCellsInRow = false;
       boolean tmpKeepProgress = scannerContext.getKeepProgress();
       // Scanning between column families and thus the scope is between cells
       LimitScope limitScope = LimitScope.BETWEEN_CELLS;
       do {
         // Check for thread interrupt status in case we have been signaled from
         // #interruptRegionOperation.
         checkInterrupt();

         // We want to maintain any progress that is made towards the limits while scanning across
         // different column families. To do this, we toggle the keep progress flag on during calls
         // to the StoreScanner to ensure that any progress made thus far is not wiped away.
         scannerContext.setKeepProgress(true);
         heap.next(results, scannerContext);
         scannerContext.setKeepProgress(tmpKeepProgress);

         nextKv = heap.peek();
         moreCellsInRow = moreCellsInRow(nextKv, currentRowCell);
         if (!moreCellsInRow) incrementCountOfRowsScannedMetric(scannerContext);
         if (moreCellsInRow && scannerContext.checkBatchLimit(limitScope)) {
           return scannerContext.setScannerState(NextState.BATCH_LIMIT_REACHED).hasMoreValues();
         } else if (scannerContext.checkSizeLimit(limitScope)) {
           ScannerContext.NextState state =
               moreCellsInRow ? NextState.SIZE_LIMIT_REACHED_MID_ROW : NextState.SIZE_LIMIT_REACHED;
           return scannerContext.setScannerState(state).hasMoreValues();
         } else if (scannerContext.checkTimeLimit(limitScope)) {
           ScannerContext.NextState state =
               moreCellsInRow ? NextState.TIME_LIMIT_REACHED_MID_ROW : NextState.TIME_LIMIT_REACHED;
           return scannerContext.setScannerState(state).hasMoreValues();
         }
       } while (moreCellsInRow);
       return nextKv != null;
     }

     /**
      * Based on the nextKv in the heap, and the current row, decide whether or not there are more
      * cells to be read in the heap. If the row of the nextKv in the heap matches the current row
      * then there are more cells to be read in the row.
      * @param nextKv
      * @param currentRowCell
      * @return true When there are more cells in the row to be read
      */
     private boolean moreCellsInRow(final Cell nextKv, Cell currentRowCell) {
       return nextKv != null && CellUtil.matchingRows(nextKv, currentRowCell);
     }

     /*
      * @return True if a filter rules the scanner is over, done.
      */
     @Override
     public synchronized boolean isFilterDone() throws IOException {
       return isFilterDoneInternal();
     }

     private boolean isFilterDoneInternal() throws IOException {
       return this.filter != null && this.filter.filterAllRemaining();
     }

     private boolean nextInternal(List<Cell> results, ScannerContext scannerContext)
         throws IOException {
       if (!results.isEmpty()) {
         throw new IllegalArgumentException("First parameter should be an empty list");
       }
       if (scannerContext == null) {
         throw new IllegalArgumentException("Scanner context cannot be null");
       }
       Optional<RpcCall> rpcCall = RpcServer.getCurrentCall();

       // Save the initial progress from the Scanner context in these local variables. The progress
       // may need to be reset a few times if rows are being filtered out so we save the initial
       // progress.
       int initialBatchProgress = scannerContext.getBatchProgress();
       long initialSizeProgress = scannerContext.getDataSizeProgress();
       long initialHeapSizeProgress = scannerContext.getHeapSizeProgress();

       // Used to check time limit
       LimitScope limitScope = LimitScope.BETWEEN_CELLS;

       // The loop here is used only when at some point during the next we determine
       // that due to effects of filters or otherwise, we have an empty row in the result.
       // Then we loop and try again. Otherwise, we must get out on the first iteration via return,
       // "true" if there's more data to read, "false" if there isn't (storeHeap is at a stop row,
       // and joinedHeap has no more data to read for the last row (if set, joinedContinuationRow).
       while (true) {
         // Starting to scan a new row. Reset the scanner progress according to whether or not
         // progress should be kept.
         if (scannerContext.getKeepProgress()) {
           // Progress should be kept. Reset to initial values seen at start of method invocation.
           scannerContext.setProgress(initialBatchProgress, initialSizeProgress,
               initialHeapSizeProgress);
         } else {
           scannerContext.clearProgress();
         }
         if (rpcCall.isPresent()) {
           // If a user specifies a too-restrictive or too-slow scanner, the
           // client might time out and disconnect while the server side
           // is still processing the request. We should abort aggressively
           // in that case.
           long afterTime = rpcCall.get().disconnectSince();
           if (afterTime >= 0) {
             throw new CallerDisconnectedException(
                 "Aborting on region " + getRegionInfo().getRegionNameAsString() + ", call " +
                     this + " after " + afterTime + " ms, since " +
                     "caller disconnected");
           }
         }

         // Check for thread interrupt status in case we have been signaled from
         // #interruptRegionOperation.
         checkInterrupt();

         // Let's see what we have in the storeHeap.
         Cell current = this.storeHeap.peek();

         boolean shouldStop = shouldStop(current);
         // When has filter row is true it means that the all the cells for a particular row must be
         // read before a filtering decision can be made. This means that filters where hasFilterRow
         // run the risk of enLongAddering out of memory errors in the case that they are applied to a
         // table that has very large rows.
         boolean hasFilterRow = this.filter != null && this.filter.hasFilterRow();

         // If filter#hasFilterRow is true, partial results are not allowed since allowing them
         // would prevent the filters from being evaluated. Thus, if it is true, change the
         // scope of any limits that could potentially create partial results to
         // LimitScope.BETWEEN_ROWS so that those limits are not reached mid-row
         if (hasFilterRow) {
           if (LOG.isTraceEnabled()) {
             LOG.trace("filter#hasFilterRow is true which prevents partial results from being "
                 + " formed. Changing scope of limits that may create partials");
           }
           scannerContext.setSizeLimitScope(LimitScope.BETWEEN_ROWS);
           scannerContext.setTimeLimitScope(LimitScope.BETWEEN_ROWS);
           limitScope = LimitScope.BETWEEN_ROWS;
         }

         if (scannerContext.checkTimeLimit(LimitScope.BETWEEN_CELLS)) {
           if (hasFilterRow) {
             throw new IncompatibleFilterException(
                 "Filter whose hasFilterRow() returns true is incompatible with scans that must " +
                     " stop mid-row because of a limit. ScannerContext:" + scannerContext);
           }
           return true;
         }

         // Check if we were getting data from the joinedHeap and hit the limit.
         // If not, then it's main path - getting results from storeHeap.
         if (joinedContinuationRow == null) {
           // First, check if we are at a stop row. If so, there are no more results.
           if (shouldStop) {
             if (hasFilterRow) {
               filter.filterRowCells(results);
             }
             return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
           }

           // Check if rowkey filter wants to exclude this row. If so, loop to next.
           // Technically, if we hit limits before on this row, we don't need this call.
           if (filterRowKey(current)) {
             incrementCountOfRowsFilteredMetric(scannerContext);
             // early check, see HBASE-16296
             if (isFilterDoneInternal()) {
               return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
             }
             // Typically the count of rows scanned is incremented inside #populateResult. However,
             // here we are filtering a row based purely on its row key, preventing us from calling
             // #populateResult. Thus, perform the necessary increment here to rows scanned metric
             incrementCountOfRowsScannedMetric(scannerContext);
             boolean moreRows = nextRow(scannerContext, current);
             if (!moreRows) {
               return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
             }
             results.clear();

             // Read nothing as the rowkey was filtered, but still need to check time limit
             if (scannerContext.checkTimeLimit(limitScope)) {
               return true;
             }
             continue;
           }

           // Ok, we are good, let's try to get some results from the main heap.
           populateResult(results, this.storeHeap, scannerContext, current);
           if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
             if (hasFilterRow) {
               throw new IncompatibleFilterException(
                   "Filter whose hasFilterRow() returns true is incompatible with scans that must "
                       + " stop mid-row because of a limit. ScannerContext:" + scannerContext);
             }
             return true;
           }

           // Check for thread interrupt status in case we have been signaled from
           // #interruptRegionOperation.
           checkInterrupt();

           Cell nextKv = this.storeHeap.peek();
           shouldStop = shouldStop(nextKv);
           // save that the row was empty before filters applied to it.
           final boolean isEmptyRow = results.isEmpty();

           // We have the part of the row necessary for filtering (all of it, usually).
           // First filter with the filterRow(List).
           FilterWrapper.FilterRowRetCode ret = FilterWrapper.FilterRowRetCode.NOT_CALLED;
           if (hasFilterRow) {
             ret = filter.filterRowCellsWithRet(results);

             // We don't know how the results have changed after being filtered. Must set progress
             // according to contents of results now.
             if (scannerContext.getKeepProgress()) {
               scannerContext.setProgress(initialBatchProgress, initialSizeProgress,
                   initialHeapSizeProgress);
             } else {
               scannerContext.clearProgress();
             }
             scannerContext.incrementBatchProgress(results.size());
             for (Cell cell : results) {
               scannerContext.incrementSizeProgress(PrivateCellUtil.estimatedSerializedSizeOf(cell),
                 cell.heapSize());
             }
           }

           if (isEmptyRow || ret == FilterWrapper.FilterRowRetCode.EXCLUDE || filterRow()) {
             incrementCountOfRowsFilteredMetric(scannerContext);
             results.clear();
             boolean moreRows = nextRow(scannerContext, current);
             if (!moreRows) {
               return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
             }

             // This row was totally filtered out, if this is NOT the last row,
             // we should continue on. Otherwise, nothing else to do.
             if (!shouldStop) {
               // Read nothing as the cells was filtered, but still need to check time limit
               if (scannerContext.checkTimeLimit(limitScope)) {
                 return true;
               }
               continue;
             }
             return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
           }

           // Ok, we are done with storeHeap for this row.
           // Now we may need to fetch additional, non-essential data into row.
           // These values are not needed for filter to work, so we postpone their
           // fetch to (possibly) reduce amount of data loads from disk.
           if (this.joinedHeap != null) {
             boolean mayHaveData = joinedHeapMayHaveData(current);
             if (mayHaveData) {
               joinedContinuationRow = current;
               populateFromJoinedHeap(results, scannerContext);

               if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
                 return true;
               }
             }
           }
         } else {
           // Populating from the joined heap was stopped by limits, populate some more.
           populateFromJoinedHeap(results, scannerContext);
           if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
             return true;
           }
         }
         // We may have just called populateFromJoinedMap and hit the limits. If that is
         // the case, we need to call it again on the next next() invocation.
         if (joinedContinuationRow != null) {
           return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
         }

         // Finally, we are done with both joinedHeap and storeHeap.
         // Double check to prevent empty rows from appearing in result. It could be
         // the case when SingleColumnValueExcludeFilter is used.
         if (results.isEmpty()) {
           incrementCountOfRowsFilteredMetric(scannerContext);
           boolean moreRows = nextRow(scannerContext, current);
           if (!moreRows) {
             return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
           }
           if (!shouldStop) continue;
         }

         if (shouldStop) {
           return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
         } else {
           return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
         }
       }
     }

     protected void incrementCountOfRowsFilteredMetric(ScannerContext scannerContext) {
       filteredReadRequestsCount.increment();
       if (metricsRegion != null) {
         metricsRegion.updateFilteredRecords();
       }

       if (scannerContext == null || !scannerContext.isTrackingMetrics()) return;

       scannerContext.getMetrics().countOfRowsFiltered.incrementAndGet();
     }

     protected void incrementCountOfRowsScannedMetric(ScannerContext scannerContext) {
       if (scannerContext == null || !scannerContext.isTrackingMetrics()) return;

       scannerContext.getMetrics().countOfRowsScanned.incrementAndGet();
     }

     /**
      * @param currentRowCell
      * @return true when the joined heap may have data for the current row
      * @throws IOException
      */
     private boolean joinedHeapMayHaveData(Cell currentRowCell)
         throws IOException {
       Cell nextJoinedKv = joinedHeap.peek();
       boolean matchCurrentRow =
           nextJoinedKv != null && CellUtil.matchingRows(nextJoinedKv, currentRowCell);
       boolean matchAfterSeek = false;

       // If the next value in the joined heap does not match the current row, try to seek to the
       // correct row
       if (!matchCurrentRow) {
         Cell firstOnCurrentRow = PrivateCellUtil.createFirstOnRow(currentRowCell);
         boolean seekSuccessful = this.joinedHeap.requestSeek(firstOnCurrentRow, true, true);
         matchAfterSeek =
             seekSuccessful && joinedHeap.peek() != null
                 && CellUtil.matchingRows(joinedHeap.peek(), currentRowCell);
       }

       return matchCurrentRow || matchAfterSeek;
     }

     /**
      * This function is to maintain backward compatibility for 0.94 filters. HBASE-6429 combines
      * both filterRow & filterRow({@code List<KeyValue> kvs}) functions. While 0.94 code or older,
      * it may not implement hasFilterRow as HBase-6429 expects because 0.94 hasFilterRow() only
      * returns true when filterRow({@code List<KeyValue> kvs}) is overridden not the filterRow().
      * Therefore, the filterRow() will be skipped.
      */
     private boolean filterRow() throws IOException {
       // when hasFilterRow returns true, filter.filterRow() will be called automatically inside
       // filterRowCells(List<Cell> kvs) so we skip that scenario here.
       return filter != null && (!filter.hasFilterRow())
           && filter.filterRow();
     }

     private boolean filterRowKey(Cell current) throws IOException {
       return filter != null && filter.filterRowKey(current);
     }

     protected boolean nextRow(ScannerContext scannerContext, Cell curRowCell) throws IOException {
       assert this.joinedContinuationRow == null: "Trying to go to next row during joinedHeap read.";
       Cell next;
       while ((next = this.storeHeap.peek()) != null &&
              CellUtil.matchingRows(next, curRowCell)) {
         // Check for thread interrupt status in case we have been signaled from
         // #interruptRegionOperation.
         checkInterrupt();
         this.storeHeap.next(MOCKED_LIST);
       }
       resetFilters();

       // Calling the hook in CP which allows it to do a fast forward
       return this.region.getCoprocessorHost() == null
           || this.region.getCoprocessorHost()
               .postScannerFilterRow(this, curRowCell);
     }

     protected boolean shouldStop(Cell currentRowCell) {
       if (currentRowCell == null) {
         return true;
       }
       if (stopRow == null || Bytes.equals(stopRow, HConstants.EMPTY_END_ROW)) {
         return false;
       }
       int c = comparator.compareRows(currentRowCell, stopRow, 0, stopRow.length);
       return c > 0 || (c == 0 && !includeStopRow);
     }

     @Override
     public synchronized void close() {
       if (storeHeap != null) {
         storeHeap.close();
         storeHeap = null;
       }
       if (joinedHeap != null) {
         joinedHeap.close();
         joinedHeap = null;
       }
       // no need to synchronize here.
       scannerReadPoints.remove(this);
       this.filterClosed = true;
     }

     KeyValueHeap getStoreHeapForTesting() {
       return storeHeap;
     }

     @Override
     public synchronized boolean reseek(byte[] row) throws IOException {
       if (row == null) {
         throw new IllegalArgumentException("Row cannot be null.");
       }
       boolean result = false;
       startRegionOperation();
       Cell kv = PrivateCellUtil.createFirstOnRow(row, 0, (short) row.length);
       try {
         // use request seek to make use of the lazy seek option. See HBASE-5520
         result = this.storeHeap.requestSeek(kv, true, true);
         if (this.joinedHeap != null) {
           result = this.joinedHeap.requestSeek(kv, true, true) || result;
         }
       } finally {
         closeRegionOperation();
       }
       return result;
     }

     @Override
     public void shipped() throws IOException {
       if (storeHeap != null) {
         storeHeap.shipped();
       }
       if (joinedHeap != null) {
         joinedHeap.shipped();
       }
     }

     @Override
     public void run() throws IOException {
       // This is the RPC callback method executed. We do the close in of the scanner in this
       // callback
       this.close();
     }
   }

   // Utility methods
   /**
    * A utility method to create new instances of HRegion based on the {@link HConstants#REGION_IMPL}
    * configuration property.
    * @param tableDir qualified path of directory where region should be located, usually the table
    *          directory.
    * @param wal The WAL is the outbound log for any updates to the HRegion The wal file is a logfile
    *          from the previous execution that's custom-computed for this HRegion. The HRegionServer
    *          computes and sorts the appropriate wal info for this HRegion. If there is a previous
    *          file (implying that the HRegion has been written-to before), then read it from the
    *          supplied path.
    * @param fs is the filesystem.
    * @param conf is global configuration settings.
    * @param regionInfo - RegionInfo that describes the region is new), then read them from the
    *          supplied path.
    * @param htd the table descriptor
    * @return the new instance
    */
   public static HRegion newHRegion(Path tableDir, WAL wal, FileSystem fs,
       Configuration conf, RegionInfo regionInfo, final TableDescriptor htd,
       RegionServerServices rsServices) {
     try {
       @SuppressWarnings("unchecked")
       Class<? extends HRegion> regionClass =
           (Class<? extends HRegion>) conf.getClass(HConstants.REGION_IMPL, HRegion.class);

       Constructor<? extends HRegion> c =
           regionClass.getConstructor(Path.class, WAL.class, FileSystem.class,
               Configuration.class, RegionInfo.class, TableDescriptor.class,
               RegionServerServices.class);

       return c.newInstance(tableDir, wal, fs, conf, regionInfo, htd, rsServices);
     } catch (Throwable e) {
       // todo: what should I throw here?
       throw new IllegalStateException("Could not instantiate a region instance.", e);
     }
   }

   /**
    * Convenience method creating new HRegions. Used by createTable.
    * @param info Info for region to create.
    * @param rootDir Root directory for HBase instance
    * @param wal shared WAL
    * @param initialize - true to initialize the region
    * @return new HRegion
    */
   public static HRegion createHRegion(final RegionInfo info, final Path rootDir,
       final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal,
       final boolean initialize) throws IOException {
     return createHRegion(info, rootDir, conf, hTableDescriptor, wal, initialize, null);
   }

   /**
    * Convenience method creating new HRegions. Used by createTable.
    * @param info Info for region to create.
    * @param rootDir Root directory for HBase instance
    * @param wal shared WAL
    * @param initialize - true to initialize the region
    * @param rsRpcServices An interface we can request flushes against.
    * @return new HRegion
    */
   public static HRegion createHRegion(final RegionInfo info, final Path rootDir,
       final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal,
       final boolean initialize, RegionServerServices rsRpcServices) throws IOException {
     LOG.info("creating " + info + ", tableDescriptor="
         + (hTableDescriptor == null ? "null" : hTableDescriptor) + ", regionDir=" + rootDir);
     createRegionDir(conf, info, rootDir);
     FileSystem fs = rootDir.getFileSystem(conf);
     Path tableDir = CommonFSUtils.getTableDir(rootDir, info.getTable());
     HRegion region =
         HRegion.newHRegion(tableDir, wal, fs, conf, info, hTableDescriptor, rsRpcServices);
     if (initialize) {
       region.initialize(null);
     }
     return region;
   }

   /**
    * Create a region under the given table directory.
    */
   public static HRegion createHRegion(Configuration conf, RegionInfo regionInfo, FileSystem fs,
     Path tableDir, TableDescriptor tableDesc) throws IOException {
     LOG.info("Creating {}, tableDescriptor={}, under table dir {}", regionInfo, tableDesc,
       tableDir);
     HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, regionInfo);
     HRegion region = HRegion.newHRegion(tableDir, null, fs, conf, regionInfo, tableDesc, null);
     return region;
   }

   /**
    * Create the region directory in the filesystem.
    */
   public static HRegionFileSystem createRegionDir(Configuration configuration, RegionInfo ri,
         Path rootDir)
       throws IOException {
     FileSystem fs = rootDir.getFileSystem(configuration);
     Path tableDir = CommonFSUtils.getTableDir(rootDir, ri.getTable());
     // If directory already exists, will log warning and keep going. Will try to create
     // .regioninfo. If one exists, will overwrite.
     return HRegionFileSystem.createRegionOnFileSystem(configuration, fs, tableDir, ri);
   }

   public static HRegion createHRegion(final RegionInfo info, final Path rootDir,
                                       final Configuration conf,
                                       final TableDescriptor hTableDescriptor,
                                       final WAL wal)
     throws IOException {
     return createHRegion(info, rootDir, conf, hTableDescriptor, wal, true);
   }


   /**
    * Open a Region.
    * @param info Info for region to be opened.
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @return new HRegion
    *
    * @throws IOException
    */
   public static HRegion openHRegion(final RegionInfo info,
       final TableDescriptor htd, final WAL wal,
       final Configuration conf)
   throws IOException {
     return openHRegion(info, htd, wal, conf, null, null);
   }

   /**
    * Open a Region.
    * @param info Info for region to be opened
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @param conf The Configuration object to use.
    * @param rsServices An interface we can request flushes against.
    * @param reporter An interface we can report progress against.
    * @return new HRegion
    *
    * @throws IOException
    */
   public static HRegion openHRegion(final RegionInfo info,
     final TableDescriptor htd, final WAL wal, final Configuration conf,
     final RegionServerServices rsServices,
     final CancelableProgressable reporter)
   throws IOException {
     return openHRegion(CommonFSUtils.getRootDir(conf), info, htd, wal, conf, rsServices, reporter);
   }

   /**
    * Open a Region.
    * @param rootDir Root directory for HBase instance
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @param conf The Configuration object to use.
    * @return new HRegion
    * @throws IOException
    */
   public static HRegion openHRegion(Path rootDir, final RegionInfo info,
       final TableDescriptor htd, final WAL wal, final Configuration conf)
   throws IOException {
     return openHRegion(rootDir, info, htd, wal, conf, null, null);
   }

   /**
    * Open a Region.
    * @param rootDir Root directory for HBase instance
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @param conf The Configuration object to use.
    * @param rsServices An interface we can request flushes against.
    * @param reporter An interface we can report progress against.
    * @return new HRegion
    * @throws IOException
    */
   public static HRegion openHRegion(final Path rootDir, final RegionInfo info,
       final TableDescriptor htd, final WAL wal, final Configuration conf,
       final RegionServerServices rsServices,
       final CancelableProgressable reporter)
   throws IOException {
     FileSystem fs = null;
     if (rsServices != null) {
       fs = rsServices.getFileSystem();
     }
     if (fs == null) {
       fs = rootDir.getFileSystem(conf);
     }
     return openHRegion(conf, fs, rootDir, info, htd, wal, rsServices, reporter);
   }

   /**
    * Open a Region.
    * @param conf The Configuration object to use.
    * @param fs Filesystem to use
    * @param rootDir Root directory for HBase instance
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @return new HRegion
    */
   public static HRegion openHRegion(final Configuration conf, final FileSystem fs,
       final Path rootDir, final RegionInfo info, final TableDescriptor htd, final WAL wal)
       throws IOException {
     return openHRegion(conf, fs, rootDir, info, htd, wal, null, null);
   }

   /**
    * Open a Region.
    * @param conf The Configuration object to use.
    * @param fs Filesystem to use
    * @param rootDir Root directory for HBase instance
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @param rsServices An interface we can request flushes against.
    * @param reporter An interface we can report progress against.
    * @return new HRegion
    */
   public static HRegion openHRegion(final Configuration conf, final FileSystem fs,
     final Path rootDir, final RegionInfo info, final TableDescriptor htd, final WAL wal,
     final RegionServerServices rsServices, final CancelableProgressable reporter)
     throws IOException {
     Path tableDir = CommonFSUtils.getTableDir(rootDir, info.getTable());
     return openHRegionFromTableDir(conf, fs, tableDir, info, htd, wal, rsServices, reporter);
   }

   /**
    * Open a Region.
    * @param conf The Configuration object to use.
    * @param fs Filesystem to use
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @param wal WAL for region to use. This method will call
    * WAL#setSequenceNumber(long) passing the result of the call to
    * HRegion#getMinSequenceId() to ensure the wal id is properly kept
    * up.  HRegionStore does this every time it opens a new region.
    * @param rsServices An interface we can request flushes against.
    * @param reporter An interface we can report progress against.
    * @return new HRegion
    * @throws NullPointerException if {@code info} is {@code null}
    */
   public static HRegion openHRegionFromTableDir(final Configuration conf, final FileSystem fs,
     final Path tableDir, final RegionInfo info, final TableDescriptor htd, final WAL wal,
     final RegionServerServices rsServices, final CancelableProgressable reporter)
     throws IOException {
     Objects.requireNonNull(info, "RegionInfo cannot be null");
     LOG.debug("Opening region: {}", info);
     HRegion r = HRegion.newHRegion(tableDir, wal, fs, conf, info, htd, rsServices);
     return r.openHRegion(reporter);
   }

   public NavigableMap<byte[], Integer> getReplicationScope() {
     return this.replicationScope;
   }

   /**
    * Useful when reopening a closed region (normally for unit tests)
    * @param other original object
    * @param reporter An interface we can report progress against.
    * @return new HRegion
    */
   public static HRegion openHRegion(final HRegion other, final CancelableProgressable reporter)
       throws IOException {
     HRegionFileSystem regionFs = other.getRegionFileSystem();
     HRegion r = newHRegion(regionFs.getTableDir(), other.getWAL(), regionFs.getFileSystem(),
         other.baseConf, other.getRegionInfo(), other.getTableDescriptor(), null);
     return r.openHRegion(reporter);
   }

   public static Region openHRegion(final Region other, final CancelableProgressable reporter)
         throws IOException {
     return openHRegion((HRegion)other, reporter);
   }

   /**
    * Open HRegion.
    * Calls initialize and sets sequenceId.
    * @return Returns <code>this</code>
    */
   protected HRegion openHRegion(final CancelableProgressable reporter)
   throws IOException {
     try {
       // Refuse to open the region if we are missing local compression support
       TableDescriptorChecker.checkCompression(htableDescriptor);
       // Refuse to open the region if encryption configuration is incorrect or
       // codec support is missing
       LOG.debug("checking encryption for " + this.getRegionInfo().getEncodedName());
       TableDescriptorChecker.checkEncryption(conf, htableDescriptor);
       // Refuse to open the region if a required class cannot be loaded
       LOG.debug("checking classloading for " + this.getRegionInfo().getEncodedName());
       TableDescriptorChecker.checkClassLoading(conf, htableDescriptor);
       this.openSeqNum = initialize(reporter);
       this.mvcc.advanceTo(openSeqNum);
       // The openSeqNum must be increased every time when a region is assigned, as we rely on it to
       // determine whether a region has been successfully reopened. So here we always write open
       // marker, even if the table is read only.
       if (wal != null && getRegionServerServices() != null &&
         RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
         writeRegionOpenMarker(wal, openSeqNum);
       }
     } catch (Throwable t) {
       // By coprocessor path wrong region will open failed,
       // MetricsRegionWrapperImpl is already init and not close,
       // add region close when open failed
       try {
         this.close();
       } catch (Throwable e) {
         LOG.warn("Open region: {} failed. Try close region but got exception ", this.getRegionInfo(),
           e);
       }
       throw t;
     }
     return this;
   }

   /**
    * Open a Region on a read-only file-system (like hdfs snapshots)
    * @param conf The Configuration object to use.
    * @param fs Filesystem to use
    * @param info Info for region to be opened.
    * @param htd the table descriptor
    * @return new HRegion
    * @throws NullPointerException if {@code info} is {@code null}
    */
   public static HRegion openReadOnlyFileSystemHRegion(final Configuration conf, final FileSystem fs,
       final Path tableDir, RegionInfo info, final TableDescriptor htd) throws IOException {
     Objects.requireNonNull(info, "RegionInfo cannot be null");
     if (LOG.isDebugEnabled()) {
       LOG.debug("Opening region (readOnly filesystem): " + info);
     }
     if (info.getReplicaId() <= 0) {
       info = RegionReplicaUtil.getRegionInfoForReplica(info, 1);
     }
     HRegion r = HRegion.newHRegion(tableDir, null, fs, conf, info, htd, null);
     r.writestate.setReadOnly(true);
     return r.openHRegion(null);
   }

   public static void warmupHRegion(final RegionInfo info,
       final TableDescriptor htd, final WAL wal, final Configuration conf,
       final RegionServerServices rsServices,
       final CancelableProgressable reporter)
       throws IOException {

     Objects.requireNonNull(info, "RegionInfo cannot be null");
     LOG.debug("Warmup {}", info);
     Path rootDir = CommonFSUtils.getRootDir(conf);
     Path tableDir = CommonFSUtils.getTableDir(rootDir, info.getTable());
     FileSystem fs = null;
     if (rsServices != null) {
       fs = rsServices.getFileSystem();
     }
     if (fs == null) {
       fs = rootDir.getFileSystem(conf);
     }
     HRegion r = HRegion.newHRegion(tableDir, wal, fs, conf, info, htd, null);
     r.initializeWarmup(reporter);
   }

   /**
    * Computes the Path of the HRegion
    *
    * @param tabledir qualified path for table
    * @param name ENCODED region name
    * @return Path of HRegion directory
    * @deprecated For tests only; to be removed.
    */
   @Deprecated
   public static Path getRegionDir(final Path tabledir, final String name) {
     return new Path(tabledir, name);
   }

   /**
    * Determines if the specified row is within the row range specified by the
    * specified RegionInfo
    *
    * @param info RegionInfo that specifies the row range
    * @param row row to be checked
    * @return true if the row is within the range specified by the RegionInfo
    */
   public static boolean rowIsInRange(RegionInfo info, final byte [] row) {
     return ((info.getStartKey().length == 0) ||
         (Bytes.compareTo(info.getStartKey(), row) <= 0)) &&
         ((info.getEndKey().length == 0) ||
             (Bytes.compareTo(info.getEndKey(), row) > 0));
   }

   public static boolean rowIsInRange(RegionInfo info, final byte [] row, final int offset,
       final short length) {
     return ((info.getStartKey().length == 0) ||
         (Bytes.compareTo(info.getStartKey(), 0, info.getStartKey().length,
           row, offset, length) <= 0)) &&
         ((info.getEndKey().length == 0) ||
           (Bytes.compareTo(info.getEndKey(), 0, info.getEndKey().length, row, offset, length) > 0));
   }

   @Override
   public Result get(final Get get) throws IOException {
     prepareGet(get);
     List<Cell> results = get(get, true);
     boolean stale = this.getRegionInfo().getReplicaId() != 0;
     return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale);
   }

   void prepareGet(final Get get) throws IOException {
     checkRow(get.getRow(), "Get");
     // Verify families are all valid
     if (get.hasFamilies()) {
       for (byte[] family : get.familySet()) {
         checkFamily(family);
       }
     } else { // Adding all families to scanner
       for (byte[] family : this.htableDescriptor.getColumnFamilyNames()) {
         get.addFamily(family);
       }
     }
   }

   @Override
   public List<Cell> get(Get get, boolean withCoprocessor) throws IOException {
     return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   public List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup, long nonce)
       throws IOException {
     List<Cell> results = new ArrayList<>();
     long before =  EnvironmentEdgeManager.currentTime();

     // pre-get CP hook
     if (withCoprocessor && (coprocessorHost != null)) {
       if (coprocessorHost.preGet(get, results)) {
         metricsUpdateForGet(results, before);
         return results;
       }
     }
     Scan scan = new Scan(get);
     if (scan.getLoadColumnFamiliesOnDemandValue() == null) {
       scan.setLoadColumnFamiliesOnDemand(isLoadingCfsOnDemandDefault());
     }
     RegionScanner scanner = null;
     try {
       scanner = getScanner(scan, null, nonceGroup, nonce);
       scanner.next(results);
     } finally {
       if (scanner != null)
         scanner.close();
     }

     // post-get CP hook
     if (withCoprocessor && (coprocessorHost != null)) {
       coprocessorHost.postGet(get, results);
     }

     metricsUpdateForGet(results, before);

     return results;
   }

   void metricsUpdateForGet(List<Cell> results, long before) {
     if (this.metricsRegion != null) {
       this.metricsRegion.updateGet(EnvironmentEdgeManager.currentTime() - before);
     }
   }

   @Override
   public void mutateRow(RowMutations rm) throws IOException {
     // Don't need nonces here - RowMutations only supports puts and deletes
     final List<Mutation> m = rm.getMutations();
     batchMutate(m.toArray(new Mutation[m.size()]), true, HConstants.NO_NONCE,
         HConstants.NO_NONCE);
   }

   /**
    * Perform atomic (all or none) mutations within the region.
    * @param mutations The list of mutations to perform.
    * <code>mutations</code> can contain operations for multiple rows.
    * Caller has to ensure that all rows are contained in this region.
    * @param rowsToLock Rows to lock
    * @param nonceGroup Optional nonce group of the operation (client Id)
    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
    * If multiple rows are locked care should be taken that
    * <code>rowsToLock</code> is sorted in order to avoid deadlocks.
    * @throws IOException
    */
   @Override
   public void mutateRowsWithLocks(Collection<Mutation> mutations,
       Collection<byte[]> rowsToLock, long nonceGroup, long nonce) throws IOException {
     batchMutate(new MutationBatchOperation(this, mutations.toArray(new Mutation[mutations.size()]),
         true, nonceGroup, nonce) {
       @Override
       public MiniBatchOperationInProgress<Mutation> lockRowsAndBuildMiniBatch(
           List<RowLock> acquiredRowLocks) throws IOException {
         RowLock prevRowLock = null;
         for (byte[] row : rowsToLock) {
           try {
             RowLock rowLock = region.getRowLockInternal(row, false, prevRowLock); // write lock
             if (rowLock != prevRowLock) {
               acquiredRowLocks.add(rowLock);
               prevRowLock = rowLock;
             }
           } catch (IOException ioe) {
             LOG.warn("Failed getting lock, row={}, in region {}", Bytes.toStringBinary(row), this,
               ioe);
             throw ioe;
           }
         }
         return createMiniBatch(size(), size());
       }
     });
   }

   /**
    * @return statistics about the current load of the region
    */
   public ClientProtos.RegionLoadStats getLoadStatistics() {
     if (!regionStatsEnabled) {
       return null;
     }
     ClientProtos.RegionLoadStats.Builder stats = ClientProtos.RegionLoadStats.newBuilder();
     stats.setMemStoreLoad((int) (Math.min(100,
         (this.memStoreSizing.getMemStoreSize().getHeapSize() * 100) / this.memstoreFlushSize)));
     if (rsServices.getHeapMemoryManager() != null) {
       // the HeapMemoryManager uses -0.0 to signal a problem asking the JVM,
       // so we could just do the calculation below and we'll get a 0.
       // treating it as a special case analogous to no HMM instead so that it can be
       // programatically treated different from using <1% of heap.
       final float occupancy = rsServices.getHeapMemoryManager().getHeapOccupancyPercent();
       if (occupancy != HeapMemoryManager.HEAP_OCCUPANCY_ERROR_VALUE) {
         stats.setHeapOccupancy((int)(occupancy * 100));
       }
     }
     stats.setCompactionPressure((int) (rsServices.getCompactionPressure() * 100 > 100 ? 100
         : rsServices.getCompactionPressure() * 100));
     return stats.build();
   }

   @Override
   public void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException {
     processRowsWithLocks(processor, rowProcessorTimeout, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   @Override
   public void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce)
       throws IOException {
     processRowsWithLocks(processor, rowProcessorTimeout, nonceGroup, nonce);
   }

   @Override
   public void processRowsWithLocks(RowProcessor<?,?> processor, long timeout,
       long nonceGroup, long nonce) throws IOException {
     for (byte[] row : processor.getRowsToLock()) {
       checkRow(row, "processRowsWithLocks");
     }
     if (!processor.readOnly()) {
       checkReadOnly();
     }
     checkResources();
     startRegionOperation();
     WALEdit walEdit = new WALEdit();

     // STEP 1. Run pre-process hook
     preProcess(processor, walEdit);
     // Short circuit the read only case
     if (processor.readOnly()) {
       try {
         long now = EnvironmentEdgeManager.currentTime();
         doProcessRowWithTimeout(processor, now, this, null, null, timeout);
         processor.postProcess(this, walEdit, true);
       } finally {
         closeRegionOperation();
       }
       return;
     }

     boolean locked = false;
     List<RowLock> acquiredRowLocks = null;
     List<Mutation> mutations = new ArrayList<>();
     Collection<byte[]> rowsToLock = processor.getRowsToLock();
     // This is assigned by mvcc either explicity in the below or in the guts of the WAL append
     // when it assigns the edit a sequencedid (A.K.A the mvcc write number).
     WriteEntry writeEntry = null;
     MemStoreSizing memstoreAccounting = new NonThreadSafeMemStoreSizing();

     // Check for thread interrupt status in case we have been signaled from
     // #interruptRegionOperation.
     checkInterrupt();

     try {
       boolean success = false;
       try {
         // STEP 2. Acquire the row lock(s)
         acquiredRowLocks = new ArrayList<>(rowsToLock.size());
         RowLock prevRowLock = null;
         for (byte[] row : rowsToLock) {
           // Attempt to lock all involved rows, throw if any lock times out
           // use a writer lock for mixed reads and writes
           RowLock rowLock = getRowLockInternal(row, false, prevRowLock);
           if (rowLock != prevRowLock) {
             acquiredRowLocks.add(rowLock);
             prevRowLock = rowLock;
           }
         }

         // Check for thread interrupt status in case we have been signaled from
         // #interruptRegionOperation. Do it before we take the lock and disable interrupts for
         // the WAL append.
         checkInterrupt();

         // STEP 3. Region lock
         lock(this.updatesLock.readLock(), acquiredRowLocks.isEmpty() ? 1 : acquiredRowLocks.size());
         locked = true;

         // From this point until memstore update this operation should not be interrupted.
         disableInterrupts();

         long now = EnvironmentEdgeManager.currentTime();
         // STEP 4. Let the processor scan the rows, generate mutations and add waledits
         doProcessRowWithTimeout(processor, now, this, mutations, walEdit, timeout);
         if (!mutations.isEmpty()) {
           writeRequestsCount.add(mutations.size());
           // STEP 5. Call the preBatchMutate hook
           processor.preBatchMutate(this, walEdit);

           // STEP 6. Append and sync if walEdit has data to write out.
           if (!walEdit.isEmpty()) {
             writeEntry = doWALAppend(walEdit, getEffectiveDurability(processor.useDurability()),
                 processor.getClusterIds(), now, nonceGroup, nonce);
           } else {
             // We are here if WAL is being skipped.
             writeEntry = this.mvcc.begin();
           }

           // STEP 7. Apply to memstore
           long sequenceId = writeEntry.getWriteNumber();
           for (Mutation m : mutations) {
             // Handle any tag based cell features.
             // TODO: Do we need to call rewriteCellTags down in applyToMemStore()? Why not before
             // so tags go into WAL?
             rewriteCellTags(m.getFamilyCellMap(), m);
             for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
               Cell cell = cellScanner.current();
               if (walEdit.isEmpty()) {
                 // If walEdit is empty, we put nothing in WAL. WAL stamps Cells with sequence id.
                 // If no WAL, need to stamp it here.
                 PrivateCellUtil.setSequenceId(cell, sequenceId);
               }
               applyToMemStore(getStore(cell), cell, memstoreAccounting);
             }
           }

           // STEP 8. call postBatchMutate hook
           processor.postBatchMutate(this);

           // STEP 9. Complete mvcc.
           mvcc.completeAndWait(writeEntry);
           writeEntry = null;

           // STEP 10. Release region lock
           if (locked) {
             this.updatesLock.readLock().unlock();
             locked = false;
           }

           // STEP 11. Release row lock(s)
           releaseRowLocks(acquiredRowLocks);

           if (rsServices != null && rsServices.getMetrics() != null) {
             rsServices.getMetrics().updateWriteQueryMeter(this.htableDescriptor.
               getTableName(), mutations.size());
           }
         }
         success = true;
       } finally {
         // Call complete rather than completeAndWait because we probably had error if walKey != null
         if (writeEntry != null) mvcc.complete(writeEntry);
         if (locked) {
           this.updatesLock.readLock().unlock();
         }
         // release locks if some were acquired but another timed out
         releaseRowLocks(acquiredRowLocks);

         enableInterrupts();
       }

       // 12. Run post-process hook
       processor.postProcess(this, walEdit, success);
     } finally {
       closeRegionOperation();
       if (!mutations.isEmpty()) {
         this.incMemStoreSize(memstoreAccounting.getMemStoreSize());
         requestFlushIfNeeded();
       }
     }
   }

   private void preProcess(final RowProcessor<?,?> processor, final WALEdit walEdit)
   throws IOException {
     try {
       processor.preProcess(this, walEdit);
     } catch (IOException e) {
       closeRegionOperation();
       throw e;
     }
   }

   private void doProcessRowWithTimeout(final RowProcessor<?,?> processor,
                                        final long now,
                                        final HRegion region,
                                        final List<Mutation> mutations,
                                        final WALEdit walEdit,
                                        final long timeout) throws IOException {
     // Short circuit the no time bound case.
     if (timeout < 0) {
       try {
         processor.process(now, region, mutations, walEdit);
       } catch (IOException e) {
         String row = processor.getRowsToLock().isEmpty() ? "" :
           " on row(s):" + Bytes.toStringBinary(processor.getRowsToLock().iterator().next()) + "...";
         LOG.warn("RowProcessor: {}, in region {}, throws Exception {}",
           processor.getClass().getName(), getRegionInfo().getRegionNameAsString(), row, e);
         throw e;
       }
       return;
     }

     // Case with time bound
     FutureTask<Void> task = new FutureTask<>(new Callable<Void>() {
         @Override
         public Void call() throws IOException {
           try {
             processor.process(now, region, mutations, walEdit);
             return null;
           } catch (IOException e) {
             String row = processor.getRowsToLock().isEmpty() ? "" :
               " on row(s):" + Bytes.toStringBinary(processor.getRowsToLock().iterator().next()) + "...";
             LOG.warn("RowProcessor: {}, in region {}, throws Exception {}",
               processor.getClass().getName(), getRegionInfo().getRegionNameAsString(), row, e);
             throw e;
           }
         }
       });
     rowProcessorExecutor.execute(task);
     try {
       task.get(timeout, TimeUnit.MILLISECONDS);
     } catch (InterruptedException ie) {
       throw throwOnInterrupt(ie);
     } catch (TimeoutException te) {
       String row = processor.getRowsToLock().isEmpty() ? "" :
         " on row(s):" + Bytes.toStringBinary(processor.getRowsToLock().iterator().next()) + "...";
       LOG.error("RowProcessor timeout: {} ms, in region {}, {}", timeout,
         getRegionInfo().getRegionNameAsString(), row);
       throw new IOException(te);
     } catch (Exception e) {
       throw new IOException(e);
     }
   }

   @Override
   public Result append(Append append) throws IOException {
     return append(append, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   public Result append(Append append, long nonceGroup, long nonce) throws IOException {
     checkReadOnly();
     checkResources();
     startRegionOperation(Operation.APPEND);
     try {
       // All edits for the given row (across all column families) must happen atomically.
       return doBatchMutate(append, true, nonceGroup, nonce).getResult();
     } finally {
       closeRegionOperation(Operation.APPEND);
     }
   }

   @Override
   public Result increment(Increment increment) throws IOException {
     return increment(increment, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }

   public Result increment(Increment increment, long nonceGroup, long nonce) throws IOException {
     checkReadOnly();
     checkResources();
     startRegionOperation(Operation.INCREMENT);
     try {
       // All edits for the given row (across all column families) must happen atomically.
       return doBatchMutate(increment, true, nonceGroup, nonce).getResult();
     } finally {
       closeRegionOperation(Operation.INCREMENT);
     }
   }

   private WriteEntry doWALAppend(WALEdit walEdit, Durability durability, List<UUID> clusterIds,
       long now, long nonceGroup, long nonce) throws IOException {
     return doWALAppend(walEdit, durability, clusterIds, now, nonceGroup, nonce,
         SequenceId.NO_SEQUENCE_ID);
   }

   /**
    * @return writeEntry associated with this append
    */
   private WriteEntry doWALAppend(WALEdit walEdit, Durability durability, List<UUID> clusterIds,
       long now, long nonceGroup, long nonce, long origLogSeqNum) throws IOException {
     Preconditions.checkArgument(walEdit != null && !walEdit.isEmpty(),
         "WALEdit is null or empty!");
     Preconditions.checkArgument(!walEdit.isReplay() || origLogSeqNum != SequenceId.NO_SEQUENCE_ID,
         "Invalid replay sequence Id for replay WALEdit!");
     // Using default cluster id, as this can only happen in the originating cluster.
     // A slave cluster receives the final value (not the delta) as a Put. We use HLogKey
     // here instead of WALKeyImpl directly to support legacy coprocessors.
     WALKeyImpl walKey = walEdit.isReplay()?
         new WALKeyImpl(this.getRegionInfo().getEncodedNameAsBytes(),
           this.htableDescriptor.getTableName(), SequenceId.NO_SEQUENCE_ID, now, clusterIds,
             nonceGroup, nonce, mvcc) :
         new WALKeyImpl(this.getRegionInfo().getEncodedNameAsBytes(),
             this.htableDescriptor.getTableName(), SequenceId.NO_SEQUENCE_ID, now, clusterIds,
             nonceGroup, nonce, mvcc, this.getReplicationScope());
     if (walEdit.isReplay()) {
       walKey.setOrigLogSeqNum(origLogSeqNum);
     }
     //don't call the coproc hook for writes to the WAL caused by
     //system lifecycle events like flushes or compactions
     if (this.coprocessorHost != null && !walEdit.isMetaEdit()) {
       this.coprocessorHost.preWALAppend(walKey, walEdit);
     }
     WriteEntry writeEntry = null;
     try {
       long txid = this.wal.appendData(this.getRegionInfo(), walKey, walEdit);
       // Call sync on our edit.
       if (txid != 0) {
         sync(txid, durability);
       }
       writeEntry = walKey.getWriteEntry();
     } catch (IOException ioe) {
       if (walKey != null && walKey.getWriteEntry() != null) {
         mvcc.complete(walKey.getWriteEntry());
       }
       throw ioe;
     }
     return writeEntry;
   }

   /**
    * @return Sorted list of <code>cells</code> using <code>comparator</code>
    */
   private static List<Cell> sort(List<Cell> cells, final CellComparator comparator) {
     cells.sort(comparator);
     return cells;
   }

   public static final long FIXED_OVERHEAD = ClassSize.estimateBase(HRegion.class, false);

   // woefully out of date - currently missing:
   // 1 x HashMap - coprocessorServiceHandlers
   // 6 x LongAdder - numMutationsWithoutWAL, dataInMemoryWithoutWAL,
   //   checkAndMutateChecksPassed, checkAndMutateChecksFailed, readRequestsCount,
   //   writeRequestsCount, cpRequestsCount
   // 1 x HRegion$WriteState - writestate
   // 1 x RegionCoprocessorHost - coprocessorHost
   // 1 x RegionSplitPolicy - splitPolicy
   // 1 x MetricsRegion - metricsRegion
   // 1 x MetricsRegionWrapperImpl - metricsRegionWrapper
   public static final long DEEP_OVERHEAD = FIXED_OVERHEAD +
       ClassSize.OBJECT + // closeLock
       (2 * ClassSize.ATOMIC_BOOLEAN) + // closed, closing
       (3 * ClassSize.ATOMIC_LONG) + // numPutsWithoutWAL, dataInMemoryWithoutWAL,
                                     // compactionsFailed
       (3 * ClassSize.CONCURRENT_HASHMAP) +  // lockedRows, scannerReadPoints, regionLockHolders
       WriteState.HEAP_SIZE + // writestate
       ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + // stores
       (2 * ClassSize.REENTRANT_LOCK) + // lock, updatesLock
       MultiVersionConcurrencyControl.FIXED_SIZE // mvcc
       + 2 * ClassSize.TREEMAP // maxSeqIdInStores, replicationScopes
       + 2 * ClassSize.ATOMIC_INTEGER // majorInProgress, minorInProgress
       + ClassSize.STORE_SERVICES // store services
       + StoreHotnessProtector.FIXED_SIZE
       ;

   @Override
   public long heapSize() {
     // this does not take into account row locks, recent flushes, mvcc entries, and more
     return DEEP_OVERHEAD + stores.values().stream().mapToLong(HStore::heapSize).sum();
   }

   /**
    * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to be
    * available for handling {@link #execService(RpcController, CoprocessorServiceCall)} calls.
    * <p/>
    * Only a single instance may be registered per region for a given {@link Service} subclass (the
    * instances are keyed on {@link ServiceDescriptor#getFullName()}.. After the first registration,
    * subsequent calls with the same service name will fail with a return value of {@code false}.
    * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
    * @return {@code true} if the registration was successful, {@code false} otherwise
    */
   public boolean registerService(Service instance) {
     // No stacking of instances is allowed for a single service name
     ServiceDescriptor serviceDesc = instance.getDescriptorForType();
     String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
     if (coprocessorServiceHandlers.containsKey(serviceName)) {
       LOG.error("Coprocessor service {} already registered, rejecting request from {} in region {}",
         serviceName, instance, this);
       return false;
     }

     coprocessorServiceHandlers.put(serviceName, instance);
     if (LOG.isDebugEnabled()) {
       LOG.debug("Registered coprocessor service: region=" +
         Bytes.toStringBinary(getRegionInfo().getRegionName()) + " service=" + serviceName);
     }
     return true;
   }

   /**
    * Executes a single protocol buffer coprocessor endpoint {@link Service} method using
    * the registered protocol handlers.  {@link Service} implementations must be registered via the
    * {@link #registerService(Service)}
    * method before they are available.
    *
    * @param controller an {@code RpcContoller} implementation to pass to the invoked service
    * @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
    *     and parameters for the method invocation
    * @return a protocol buffer {@code Message} instance containing the method's result
    * @throws IOException if no registered service handler is found or an error
    *     occurs during the invocation
    * @see #registerService(Service)
    */
   public Message execService(RpcController controller, CoprocessorServiceCall call)
     throws IOException {
     String serviceName = call.getServiceName();
     Service service = coprocessorServiceHandlers.get(serviceName);
     if (service == null) {
       throw new UnknownProtocolException(null, "No registered coprocessor service found for " +
           serviceName + " in region " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
     }
     ServiceDescriptor serviceDesc = service.getDescriptorForType();

     cpRequestsCount.increment();
     String methodName = call.getMethodName();
     MethodDescriptor methodDesc =
         CoprocessorRpcUtils.getMethodDescriptor(methodName, serviceDesc);

     Message.Builder builder =
         service.getRequestPrototype(methodDesc).newBuilderForType();

     ProtobufUtil.mergeFrom(builder,
         call.getRequest().toByteArray());
     Message request =
         CoprocessorRpcUtils.getRequest(service, methodDesc, call.getRequest());

     if (coprocessorHost != null) {
       request = coprocessorHost.preEndpointInvocation(service, methodName, request);
     }

     final Message.Builder responseBuilder =
         service.getResponsePrototype(methodDesc).newBuilderForType();
     service.callMethod(methodDesc, controller, request, new RpcCallback<Message>() {
       @Override
       public void run(Message message) {
         if (message != null) {
           responseBuilder.mergeFrom(message);
         }
       }
     });

     if (coprocessorHost != null) {
       coprocessorHost.postEndpointInvocation(service, methodName, request, responseBuilder);
     }
     IOException exception =
         org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils.getControllerException(controller);
     if (exception != null) {
       throw exception;
     }

     return responseBuilder.build();
   }

   public Optional<byte[]> checkSplit() {
     return checkSplit(false);
   }

   /**
    * Return the split point. An empty result indicates the region isn't splittable.
    */
   public Optional<byte[]> checkSplit(boolean force) {
     // Can't split META
     if (this.getRegionInfo().isMetaRegion()) {
       return Optional.empty();
     }

     // Can't split a region that is closing.
     if (this.isClosing()) {
       return Optional.empty();
     }

     if (!force && !splitPolicy.shouldSplit()) {
       return Optional.empty();
     }

     byte[] ret = splitPolicy.getSplitPoint();

     if (ret != null) {
       try {
         checkRow(ret, "calculated split");
       } catch (IOException e) {
         LOG.error("Ignoring invalid split for region {}", this, e);
         return Optional.empty();
       }
       return Optional.of(ret);
     } else {
       return Optional.empty();
     }
   }

   /**
    * @return The priority that this region should have in the compaction queue
    */
   public int getCompactPriority() {
     return stores.values().stream().mapToInt(HStore::getCompactPriority).min()
         .orElse(Store.NO_PRIORITY);
   }

   /** @return the coprocessor host */
   public RegionCoprocessorHost getCoprocessorHost() {
     return coprocessorHost;
   }

   /** @param coprocessorHost the new coprocessor host */
   public void setCoprocessorHost(final RegionCoprocessorHost coprocessorHost) {
     this.coprocessorHost = coprocessorHost;
   }

   @Override
   public void startRegionOperation() throws IOException {
     startRegionOperation(Operation.ANY);
   }

   @Override
   public void startRegionOperation(Operation op) throws IOException {
     boolean isInterruptableOp = false;
     switch (op) {
       case GET:  // interruptible read operations
       case SCAN:
         isInterruptableOp = true;
         checkReadsEnabled();
         break;
       case INCREMENT: // interruptible write operations
       case APPEND:
       case PUT:
       case DELETE:
       case BATCH_MUTATE:
       case CHECK_AND_MUTATE:
         isInterruptableOp = true;
         break;
       default:  // all others
         break;
     }
     if (op == Operation.MERGE_REGION || op == Operation.SPLIT_REGION
         || op == Operation.COMPACT_REGION || op == Operation.COMPACT_SWITCH) {
       // split, merge or compact region doesn't need to check the closing/closed state or lock the
       // region
       return;
     }
     if (this.closing.get()) {
       throw new NotServingRegionException(getRegionInfo().getRegionNameAsString() + " is closing");
     }
     lock(lock.readLock());
     // Update regionLockHolders ONLY for any startRegionOperation call that is invoked from
     // an RPC handler
     Thread thisThread = Thread.currentThread();
     if (isInterruptableOp) {
       regionLockHolders.put(thisThread, true);
     }
     if (this.closed.get()) {
       lock.readLock().unlock();
       throw new NotServingRegionException(getRegionInfo().getRegionNameAsString() + " is closed");
     }
     // The unit for snapshot is a region. So, all stores for this region must be
     // prepared for snapshot operation before proceeding.
     if (op == Operation.SNAPSHOT) {
       stores.values().forEach(HStore::preSnapshotOperation);
     }
     try {
       if (coprocessorHost != null) {
         coprocessorHost.postStartRegionOperation(op);
       }
     } catch (Exception e) {
       if (isInterruptableOp) {
         // would be harmless to remove what we didn't add but we know by 'isInterruptableOp'
         // if we added this thread to regionLockHolders
         regionLockHolders.remove(thisThread);
       }
       lock.readLock().unlock();
       throw new IOException(e);
     }
   }

   @Override
   public void closeRegionOperation() throws IOException {
     closeRegionOperation(Operation.ANY);
   }

   @Override
   public void closeRegionOperation(Operation operation) throws IOException {
     if (operation == Operation.SNAPSHOT) {
       stores.values().forEach(HStore::postSnapshotOperation);
     }
     Thread thisThread = Thread.currentThread();
     regionLockHolders.remove(thisThread);
     lock.readLock().unlock();
     if (coprocessorHost != null) {
       coprocessorHost.postCloseRegionOperation(operation);
     }
   }

   /**
    * This method needs to be called before any public call that reads or
    * modifies stores in bulk. It has to be called just before a try.
    * #closeBulkRegionOperation needs to be called in the try's finally block
    * Acquires a writelock and checks if the region is closing or closed.
    * @throws NotServingRegionException when the region is closing or closed
    * @throws RegionTooBusyException if failed to get the lock in time
    * @throws InterruptedIOException if interrupted while waiting for a lock
    */
   private void startBulkRegionOperation(boolean writeLockNeeded) throws IOException {
     if (this.closing.get()) {
       throw new NotServingRegionException(getRegionInfo().getRegionNameAsString() + " is closing");
     }
     if (writeLockNeeded) lock(lock.writeLock());
     else lock(lock.readLock());
     if (this.closed.get()) {
       if (writeLockNeeded) lock.writeLock().unlock();
       else lock.readLock().unlock();
       throw new NotServingRegionException(getRegionInfo().getRegionNameAsString() + " is closed");
     }
     regionLockHolders.put(Thread.currentThread(), true);
   }

   /**
    * Closes the lock. This needs to be called in the finally block corresponding
    * to the try block of #startRegionOperation
    */
   private void closeBulkRegionOperation(){
     regionLockHolders.remove(Thread.currentThread());
     if (lock.writeLock().isHeldByCurrentThread()) lock.writeLock().unlock();
     else lock.readLock().unlock();
   }

   /**
    * Update LongAdders for number of puts without wal and the size of possible data loss.
    * These information are exposed by the region server metrics.
    */
   private void recordMutationWithoutWal(final Map<byte [], List<Cell>> familyMap) {
     numMutationsWithoutWAL.increment();
     if (numMutationsWithoutWAL.sum() <= 1) {
       LOG.info("writing data to region " + this +
                " with WAL disabled. Data may be lost in the event of a crash.");
     }

     long mutationSize = 0;
     for (List<Cell> cells: familyMap.values()) {
       // Optimization: 'foreach' loop is not used. See:
       // HBASE-12023 HRegion.applyFamilyMapToMemstore creates too many iterator objects
       assert cells instanceof RandomAccess;
       int listSize = cells.size();
       for (int i=0; i < listSize; i++) {
         Cell cell = cells.get(i);
         mutationSize += cell.getSerializedSize();
       }
     }

     dataInMemoryWithoutWAL.add(mutationSize);
   }

   private void lock(final Lock lock) throws IOException {
     lock(lock, 1);
   }

   /**
    * Try to acquire a lock.  Throw RegionTooBusyException
    * if failed to get the lock in time. Throw InterruptedIOException
    * if interrupted while waiting for the lock.
    */
   private void lock(final Lock lock, final int multiplier) throws IOException {
     try {
       final long waitTime = Math.min(maxBusyWaitDuration,
           busyWaitDuration * Math.min(multiplier, maxBusyWaitMultiplier));
       if (!lock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
         // Don't print millis. Message is used as a key over in
         // RetriesExhaustedWithDetailsException processing.
         final String regionName =
           this.getRegionInfo() == null ? "unknown" : this.getRegionInfo().getRegionNameAsString();
         final String serverName = this.getRegionServerServices() == null ?
           "unknown" : (this.getRegionServerServices().getServerName() == null ?
             "unknown" : this.getRegionServerServices().getServerName().toString());
         RegionTooBusyException rtbe = new RegionTooBusyException(
           "Failed to obtain lock; regionName=" + regionName + ", server=" + serverName);
         LOG.warn("Region is too busy to allow lock acquisition.", rtbe);
         throw rtbe;
       }
     } catch (InterruptedException ie) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Interrupted while waiting for a lock in region {}", this);
       }
       throw throwOnInterrupt(ie);
     }
   }

   /**
    * Calls sync with the given transaction ID
    * @param txid should sync up to which transaction
    * @throws IOException If anything goes wrong with DFS
    */
   private void sync(long txid, Durability durability) throws IOException {
     if (this.getRegionInfo().isMetaRegion()) {
       this.wal.sync(txid);
     } else {
       switch(durability) {
       case USE_DEFAULT:
         // do what table defaults to
         if (shouldSyncWAL()) {
           this.wal.sync(txid);
         }
         break;
       case SKIP_WAL:
         // nothing do to
         break;
       case ASYNC_WAL:
         // nothing do to
         break;
       case SYNC_WAL:
           this.wal.sync(txid, false);
           break;
       case FSYNC_WAL:
           this.wal.sync(txid, true);
           break;
       default:
         throw new RuntimeException("Unknown durability " + durability);
       }
     }
   }

   /**
    * Check whether we should sync the wal from the table's durability settings
    */
   private boolean shouldSyncWAL() {
     return regionDurability.ordinal() >  Durability.ASYNC_WAL.ordinal();
   }

   /**
    * A mocked list implementation - discards all updates.
    */
   private static final List<Cell> MOCKED_LIST = new AbstractList<Cell>() {

     @Override
     public void add(int index, Cell element) {
       // do nothing
     }

     @Override
     public boolean addAll(int index, Collection<? extends Cell> c) {
       return false; // this list is never changed as a result of an update
     }

     @Override
     public KeyValue get(int index) {
       throw new UnsupportedOperationException();
     }

     @Override
     public int size() {
       return 0;
     }
   };

   /** @return the latest sequence number that was read from storage when this region was opened */
   public long getOpenSeqNum() {
     return this.openSeqNum;
   }

   @Override
   public Map<byte[], Long> getMaxStoreSeqId() {
     return this.maxSeqIdInStores;
   }

   public long getOldestSeqIdOfStore(byte[] familyName) {
     return wal.getEarliestMemStoreSeqNum(getRegionInfo().getEncodedNameAsBytes(), familyName);
   }

   @Override
   public CompactionState getCompactionState() {
     boolean hasMajor = majorInProgress.get() > 0, hasMinor = minorInProgress.get() > 0;
     return (hasMajor ? (hasMinor ? CompactionState.MAJOR_AND_MINOR : CompactionState.MAJOR)
         : (hasMinor ? CompactionState.MINOR : CompactionState.NONE));
   }

   public void reportCompactionRequestStart(boolean isMajor){
     (isMajor ? majorInProgress : minorInProgress).incrementAndGet();
   }

   public void reportCompactionRequestEnd(boolean isMajor, int numFiles, long filesSizeCompacted) {
     int newValue = (isMajor ? majorInProgress : minorInProgress).decrementAndGet();

     // metrics
     compactionsFinished.increment();
     compactionNumFilesCompacted.add(numFiles);
     compactionNumBytesCompacted.add(filesSizeCompacted);

     assert newValue >= 0;
   }

   public void reportCompactionRequestFailure() {
     compactionsFailed.increment();
   }

   public void incrementCompactionsQueuedCount() {
     compactionsQueued.increment();
   }

   public void decrementCompactionsQueuedCount() {
     compactionsQueued.decrement();
   }

   public void incrementFlushesQueuedCount() {
     flushesQueued.increment();
   }

   public long getReadPoint() {
     return getReadPoint(IsolationLevel.READ_COMMITTED);
   }

   /**
    * If a handler thread is eligible for interrupt, make it ineligible. Should be paired
    * with {{@link #enableInterrupts()}.
    */
   protected void disableInterrupts() {
     regionLockHolders.computeIfPresent(Thread.currentThread(), (t,b) -> false);
   }

   /**
    * If a handler thread was made ineligible for interrupt via {{@link #disableInterrupts()},
    * make it eligible again. No-op if interrupts are already enabled.
    */
   protected void enableInterrupts() {
     regionLockHolders.computeIfPresent(Thread.currentThread(), (t,b) -> true);
   }

   /**
    * Interrupt any region options that have acquired the region lock via
    * {@link #startRegionOperation(org.apache.hadoop.hbase.regionserver.Region.Operation)},
    * or {@link #startBulkRegionOperation(boolean)}.
    */
   private void interruptRegionOperations() {
     for (Map.Entry<Thread, Boolean> entry: regionLockHolders.entrySet()) {
       // An entry in this map will have a boolean value indicating if it is currently
       // eligible for interrupt; if so, we should interrupt it.
       if (entry.getValue().booleanValue()) {
         entry.getKey().interrupt();
       }
     }
   }

   /**
    * Check thread interrupt status and throw an exception if interrupted.
    * @throws NotServingRegionException if region is closing
    * @throws InterruptedIOException if interrupted but region is not closing
    */
   // Package scope for tests
   void checkInterrupt() throws NotServingRegionException, InterruptedIOException {
     if (Thread.interrupted()) {
       if (this.closing.get()) {
         throw new NotServingRegionException(
           getRegionInfo().getRegionNameAsString() + " is closing");
       }
       throw new InterruptedIOException();
     }
   }

   /**
    * Throw the correct exception upon interrupt
    * @param t cause
    */
   // Package scope for tests
   IOException throwOnInterrupt(Throwable t) {
     if (this.closing.get()) {
       return (NotServingRegionException) new NotServingRegionException(
           getRegionInfo().getRegionNameAsString() + " is closing")
         .initCause(t);
     }
     return (InterruptedIOException) new InterruptedIOException().initCause(t);
   }

   /**
    * {@inheritDoc}
    */
   @Override
   public void onConfigurationChange(Configuration conf) {
     this.storeHotnessProtector.update(conf);
   }

   /**
    * {@inheritDoc}
    */
   @Override
   public void registerChildren(ConfigurationManager manager) {
     configurationManager = manager;
     stores.values().forEach(manager::registerObserver);
   }

   /**
    * {@inheritDoc}
    */
   @Override
   public void deregisterChildren(ConfigurationManager manager) {
     stores.values().forEach(configurationManager::deregisterObserver);
   }

   @Override
   public CellComparator getCellComparator() {
     return cellComparator;
   }

   public long getMemStoreFlushSize() {
     return this.memstoreFlushSize;
   }


   //// method for debugging tests
   void throwException(String title, String regionName) {
     StringBuilder buf = new StringBuilder();
     buf.append(title + ", ");
     buf.append(getRegionInfo().toString());
     buf.append(getRegionInfo().isMetaRegion() ? " meta region " : " ");
     buf.append("stores: ");
     for (HStore s : stores.values()) {
       buf.append(s.getColumnFamilyDescriptor().getNameAsString());
       buf.append(" size: ");
       buf.append(s.getMemStoreSize().getDataSize());
       buf.append(" ");
     }
     buf.append("end-of-stores");
     buf.append(", memstore size ");
     buf.append(getMemStoreDataSize());
     if (getRegionInfo().getRegionNameAsString().startsWith(regionName)) {
       throw new RuntimeException(buf.toString());
     }
   }

   @Override
   public void requestCompaction(String why, int priority, boolean major,
       CompactionLifeCycleTracker tracker) throws IOException {
     if (major) {
       stores.values().forEach(HStore::triggerMajorCompaction);
     }
     rsServices.getCompactionRequestor().requestCompaction(this, why, priority, tracker,
         RpcServer.getRequestUser().orElse(null));
   }

   @Override
   public void requestCompaction(byte[] family, String why, int priority, boolean major,
       CompactionLifeCycleTracker tracker) throws IOException {
     HStore store = stores.get(family);
     if (store == null) {
       throw new NoSuchColumnFamilyException("column family " + Bytes.toString(family) +
           " does not exist in region " + getRegionInfo().getRegionNameAsString());
     }
     if (major) {
       store.triggerMajorCompaction();
     }
     rsServices.getCompactionRequestor().requestCompaction(this, store, why, priority, tracker,
         RpcServer.getRequestUser().orElse(null));
   }

   private void requestFlushIfNeeded() throws RegionTooBusyException {
     if(isFlushSize(this.memStoreSizing.getMemStoreSize())) {
       requestFlush();
     }
   }

   private void requestFlush() {
     if (this.rsServices == null) {
       return;
     }
     requestFlush0(FlushLifeCycleTracker.DUMMY);
   }

   private void requestFlush0(FlushLifeCycleTracker tracker) {
     boolean shouldFlush = false;
     synchronized (writestate) {
       if (!this.writestate.isFlushRequested()) {
         shouldFlush = true;
         writestate.flushRequested = true;
       }
     }
     if (shouldFlush) {
       // Make request outside of synchronize block; HBASE-818.
       this.rsServices.getFlushRequester().requestFlush(this, tracker);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Flush requested on " + this.getRegionInfo().getEncodedName());
       }
     } else {
       tracker.notExecuted("Flush already requested on " + this);
     }
   }

   @Override
   public void requestFlush(FlushLifeCycleTracker tracker) throws IOException {
     requestFlush0(tracker);
   }

   /**
    * This method modifies the region's configuration in order to inject replication-related
    * features
    * @param conf region configurations
    */
   static void decorateRegionConfiguration(Configuration conf) {
     if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
       String plugins = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
       String replicationCoprocessorClass = ReplicationObserver.class.getCanonicalName();
       if (!plugins.contains(replicationCoprocessorClass)) {
         conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
             (plugins.equals("") ? "" : (plugins + ",")) + replicationCoprocessorClass);
       }
     }
   }

   public void setReadRequestsCount(long readRequestsCount) {
     this.readRequestsCount.add(readRequestsCount);
   }

   public void setWriteRequestsCount(long writeRequestsCount) {
     this.writeRequestsCount.add(writeRequestsCount);
   }
 }