hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java - hbase - Git at Google

 /**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.hbase.regionserver;

 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
 import java.util.PriorityQueue;
 import java.util.concurrent.atomic.LongAdder;

 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.yetus.audience.InterfaceStability;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.TimeRange;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
 import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;

 /**
  * KeyValueScanner adaptor over the Reader.  It also provides hooks into
  * bloom filter things.
  */
 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.PHOENIX)
 @InterfaceStability.Evolving
 public class StoreFileScanner implements KeyValueScanner {
   // the reader it comes from:
   private final StoreFileReader reader;
   private final HFileScanner hfs;
   private Cell cur = null;
   private boolean closed = false;

   private boolean realSeekDone;
   private boolean delayedReseek;
   private Cell delayedSeekKV;

   private final boolean enforceMVCC;
   private final boolean hasMVCCInfo;
   // A flag represents whether could stop skipping KeyValues for MVCC
   // if have encountered the next row. Only used for reversed scan
   private boolean stopSkippingKVsIfNextRow = false;

   private static LongAdder seekCount;

   private final boolean canOptimizeForNonNullColumn;

   private final long readPt;

   // Order of this scanner relative to other scanners when duplicate key-value is found.
   // Higher values means scanner has newer data.
   private final long scannerOrder;

   /**
    * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
    * @param useMVCC If true, scanner will filter out updates with MVCC larger than {@code readPt}.
    * @param readPt MVCC value to use to filter out the updates newer than this scanner.
    * @param hasMVCC Set to true if underlying store file reader has MVCC info.
    * @param scannerOrder Order of the scanner relative to other scanners. See
    *          {@link KeyValueScanner#getScannerOrder()}.
    * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
    *          otherwise {@code false}. This is a hint for optimization.
    */
   public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
       boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
     this.readPt = readPt;
     this.reader = reader;
     this.hfs = hfs;
     this.enforceMVCC = useMVCC;
     this.hasMVCCInfo = hasMVCC;
     this.scannerOrder = scannerOrder;
     this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
     this.reader.incrementRefCount();
   }

   /**
    * Return an array of scanners corresponding to the given set of store files.
    */
   public static List<StoreFileScanner> getScannersForStoreFiles(Collection<HStoreFile> files,
       boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind,
       long readPt) throws IOException {
     return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null,
       readPt);
   }

   /**
    * Return an array of scanners corresponding to the given set of store files, And set the
    * ScanQueryMatcher for each store file scanner for further optimization
    */
   public static List<StoreFileScanner> getScannersForStoreFiles(Collection<HStoreFile> files,
       boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean canUseDrop,
       ScanQueryMatcher matcher, long readPt) throws IOException {
     if (files.isEmpty()) {
       return Collections.emptyList();
     }
     List<StoreFileScanner> scanners = new ArrayList<>(files.size());
     boolean canOptimizeForNonNullColumn = matcher != null ? !matcher.hasNullColumnInQuery() : false;
     PriorityQueue<HStoreFile> sortedFiles =
         new PriorityQueue<>(files.size(), StoreFileComparators.SEQ_ID);
     for (HStoreFile file : files) {
       // The sort function needs metadata so we need to open reader first before sorting the list.
       file.initReader();
       sortedFiles.add(file);
     }
     boolean succ = false;
     try {
       for (int i = 0, n = files.size(); i < n; i++) {
         HStoreFile sf = sortedFiles.remove();
         StoreFileScanner scanner;
         if (usePread) {
           scanner = sf.getPreadScanner(cacheBlocks, readPt, i, canOptimizeForNonNullColumn);
         } else {
           scanner = sf.getStreamScanner(canUseDrop, cacheBlocks, isCompaction, readPt, i,
               canOptimizeForNonNullColumn);
         }
         scanners.add(scanner);
       }
       succ = true;
     } finally {
       if (!succ) {
         for (StoreFileScanner scanner : scanners) {
           scanner.close();
         }
       }
     }
     return scanners;
   }

   /**
    * Get scanners for compaction. We will create a separated reader for each store file to avoid
    * contention with normal read request.
    */
   public static List<StoreFileScanner> getScannersForCompaction(Collection<HStoreFile> files,
       boolean canUseDropBehind, long readPt) throws IOException {
     List<StoreFileScanner> scanners = new ArrayList<>(files.size());
     List<HStoreFile> sortedFiles = new ArrayList<>(files);
     Collections.sort(sortedFiles, StoreFileComparators.SEQ_ID);
     boolean succ = false;
     try {
       for (int i = 0, n = sortedFiles.size(); i < n; i++) {
         scanners.add(
           sortedFiles.get(i).getStreamScanner(canUseDropBehind, false, true, readPt, i, false));
       }
       succ = true;
     } finally {
       if (!succ) {
         for (StoreFileScanner scanner : scanners) {
           scanner.close();
         }
       }
     }
     return scanners;
   }

   @Override
   public String toString() {
     return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
   }

   @Override
   public Cell peek() {
     return cur;
   }

   @Override
   public Cell next() throws IOException {
     Cell retKey = cur;

     try {
       // only seek if we aren't at the end. cur == null implies 'end'.
       if (cur != null) {
         hfs.next();
         setCurrentCell(hfs.getCell());
         if (hasMVCCInfo || this.reader.isBulkLoaded()) {
           skipKVsNewerThanReadpoint();
         }
       }
     } catch (FileNotFoundException e) {
       throw e;
     } catch(IOException e) {
       throw new IOException("Could not iterate " + this, e);
     }
     return retKey;
   }

   @Override
   public boolean seek(Cell key) throws IOException {
     if (seekCount != null) seekCount.increment();

     try {
       try {
         if(!seekAtOrAfter(hfs, key)) {
           this.cur = null;
           return false;
         }

         setCurrentCell(hfs.getCell());

         if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
           return skipKVsNewerThanReadpoint();
         } else {
           return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
         }
       } finally {
         realSeekDone = true;
       }
     } catch (FileNotFoundException e) {
       throw e;
     } catch (IOException ioe) {
       throw new IOException("Could not seek " + this + " to key " + key, ioe);
     }
   }

   @Override
   public boolean reseek(Cell key) throws IOException {
     if (seekCount != null) seekCount.increment();

     try {
       try {
         if (!reseekAtOrAfter(hfs, key)) {
           this.cur = null;
           return false;
         }
         setCurrentCell(hfs.getCell());

         if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
           return skipKVsNewerThanReadpoint();
         } else {
           return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
         }
       } finally {
         realSeekDone = true;
       }
     } catch (FileNotFoundException e) {
       throw e;
     } catch (IOException ioe) {
       throw new IOException("Could not reseek " + this + " to key " + key,
           ioe);
     }
   }

   protected void setCurrentCell(Cell newVal) throws IOException {
     this.cur = newVal;
     if (this.cur != null && this.reader.isBulkLoaded() && !this.reader.isSkipResetSeqId()) {
       PrivateCellUtil.setSequenceId(cur, this.reader.getSequenceID());
     }
   }

   protected boolean skipKVsNewerThanReadpoint() throws IOException {
     // We want to ignore all key-values that are newer than our current
     // readPoint
     Cell startKV = cur;
     while(enforceMVCC
         && cur != null
         && (cur.getSequenceId() > readPt)) {
       boolean hasNext = hfs.next();
       setCurrentCell(hfs.getCell());
       if (hasNext && this.stopSkippingKVsIfNextRow
           && getComparator().compareRows(cur, startKV) > 0) {
         return false;
       }
     }

     if (cur == null) {
       return false;
     }

     return true;
   }

   @Override
   public void close() {
     if (closed) return;
     cur = null;
     this.hfs.close();
     if (this.reader != null) {
       this.reader.readCompleted();
     }
     closed = true;
   }

   /**
    *
    * @param s
    * @param k
    * @return false if not found or if k is after the end.
    * @throws IOException
    */
   public static boolean seekAtOrAfter(HFileScanner s, Cell k)
   throws IOException {
     int result = s.seekTo(k);
     if(result < 0) {
       if (result == HConstants.INDEX_KEY_MAGIC) {
         // using faked key
         return true;
       }
       // Passed KV is smaller than first KV in file, work from start of file
       return s.seekTo();
     } else if(result > 0) {
       // Passed KV is larger than current KV in file, if there is a next
       // it is the "after", if not then this scanner is done.
       return s.next();
     }
     // Seeked to the exact key
     return true;
   }

   static boolean reseekAtOrAfter(HFileScanner s, Cell k)
   throws IOException {
     //This function is similar to seekAtOrAfter function
     int result = s.reseekTo(k);
     if (result <= 0) {
       if (result == HConstants.INDEX_KEY_MAGIC) {
         // using faked key
         return true;
       }
       // If up to now scanner is not seeked yet, this means passed KV is smaller
       // than first KV in file, and it is the first time we seek on this file.
       // So we also need to work from the start of file.
       if (!s.isSeeked()) {
         return  s.seekTo();
       }
       return true;
     }
     // passed KV is larger than current KV in file, if there is a next
     // it is after, if not then this scanner is done.
     return s.next();
   }

   /**
    * @see KeyValueScanner#getScannerOrder()
    */
   @Override
   public long getScannerOrder() {
     return scannerOrder;
   }

   /**
    * Pretend we have done a seek but don't do it yet, if possible. The hope is
    * that we find requested columns in more recent files and won't have to seek
    * in older files. Creates a fake key/value with the given row/column and the
    * highest (most recent) possible timestamp we might get from this file. When
    * users of such "lazy scanner" need to know the next KV precisely (e.g. when
    * this scanner is at the top of the heap), they run {@link #enforceSeek()}.
    * <p>
    * Note that this function does guarantee that the current KV of this scanner
    * will be advanced to at least the given KV. Because of this, it does have
    * to do a real seek in cases when the seek timestamp is older than the
    * highest timestamp of the file, e.g. when we are trying to seek to the next
    * row/column and use OLDEST_TIMESTAMP in the seek key.
    */
   @Override
   public boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
       throws IOException {
     if (kv.getFamilyLength() == 0) {
       useBloom = false;
     }

     boolean haveToSeek = true;
     if (useBloom) {
       // check ROWCOL Bloom filter first.
       if (reader.getBloomFilterType() == BloomType.ROWCOL) {
         haveToSeek = reader.passesGeneralRowColBloomFilter(kv);
       } else if (canOptimizeForNonNullColumn
           && ((PrivateCellUtil.isDeleteFamily(kv)
               || PrivateCellUtil.isDeleteFamilyVersion(kv)))) {
         // if there is no such delete family kv in the store file,
         // then no need to seek.
         haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getRowArray(), kv.getRowOffset(),
           kv.getRowLength());
       }
     }

     delayedReseek = forward;
     delayedSeekKV = kv;

     if (haveToSeek) {
       // This row/column might be in this store file (or we did not use the
       // Bloom filter), so we still need to seek.
       realSeekDone = false;
       long maxTimestampInFile = reader.getMaxTimestamp();
       long seekTimestamp = kv.getTimestamp();
       if (seekTimestamp > maxTimestampInFile) {
         // Create a fake key that is not greater than the real next key.
         // (Lower timestamps correspond to higher KVs.)
         // To understand this better, consider that we are asked to seek to
         // a higher timestamp than the max timestamp in this file. We know that
         // the next point when we have to consider this file again is when we
         // pass the max timestamp of this file (with the same row/column).
         setCurrentCell(PrivateCellUtil.createFirstOnRowColTS(kv, maxTimestampInFile));
       } else {
         // This will be the case e.g. when we need to seek to the next
         // row/column, and we don't know exactly what they are, so we set the
         // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
         // row/column.
         enforceSeek();
       }
       return cur != null;
     }

     // Multi-column Bloom filter optimization.
     // Create a fake key/value, so that this scanner only bubbles up to the top
     // of the KeyValueHeap in StoreScanner after we scanned this row/column in
     // all other store files. The query matcher will then just skip this fake
     // key/value and the store scanner will progress to the next column. This
     // is obviously not a "real real" seek, but unlike the fake KV earlier in
     // this method, we want this to be propagated to ScanQueryMatcher.
     setCurrentCell(PrivateCellUtil.createLastOnRowCol(kv));

     realSeekDone = true;
     return true;
   }

   StoreFileReader getReader() {
     return reader;
   }

   CellComparator getComparator() {
     return reader.getComparator();
   }

   @Override
   public boolean realSeekDone() {
     return realSeekDone;
   }

   @Override
   public void enforceSeek() throws IOException {
     if (realSeekDone)
       return;

     if (delayedReseek) {
       reseek(delayedSeekKV);
     } else {
       seek(delayedSeekKV);
     }
   }

   @Override
   public boolean isFileScanner() {
     return true;
   }

   @Override
   public Path getFilePath() {
     return reader.getHFileReader().getPath();
   }

   // Test methods
   static final long getSeekCount() {
     return seekCount.sum();
   }

   static final void instrument() {
     seekCount = new LongAdder();
   }

   @Override
   public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) {
     // if the file has no entries, no need to validate or create a scanner.
     byte[] cf = store.getColumnFamilyDescriptor().getName();
     TimeRange timeRange = scan.getColumnFamilyTimeRange().get(cf);
     if (timeRange == null) {
       timeRange = scan.getTimeRange();
     }
     return reader.passesTimerangeFilter(timeRange, oldestUnexpiredTS) && reader
         .passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, scan.getFamilyMap().get(cf));
   }

   @Override
   public boolean seekToPreviousRow(Cell originalKey) throws IOException {
     try {
       try {
         boolean keepSeeking = false;
         Cell key = originalKey;
         do {
           Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
           if (seekCount != null) seekCount.increment();
           if (!hfs.seekBefore(seekKey)) {
             this.cur = null;
             return false;
           }
           Cell curCell = hfs.getCell();
           Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell);

           if (seekCount != null) seekCount.increment();
           if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
             this.cur = null;
             return false;
           }

           setCurrentCell(hfs.getCell());
           this.stopSkippingKVsIfNextRow = true;
           boolean resultOfSkipKVs;
           try {
             resultOfSkipKVs = skipKVsNewerThanReadpoint();
           } finally {
             this.stopSkippingKVsIfNextRow = false;
           }
           if (!resultOfSkipKVs
               || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) {
             keepSeeking = true;
             key = firstKeyOfPreviousRow;
             continue;
           } else {
             keepSeeking = false;
           }
         } while (keepSeeking);
         return true;
       } finally {
         realSeekDone = true;
       }
     } catch (FileNotFoundException e) {
       throw e;
     } catch (IOException ioe) {
       throw new IOException("Could not seekToPreviousRow " + this + " to key "
           + originalKey, ioe);
     }
   }

   @Override
   public boolean seekToLastRow() throws IOException {
     Optional<byte[]> lastRow = reader.getLastRowKey();
     if (!lastRow.isPresent()) {
       return false;
     }
     Cell seekKey = PrivateCellUtil.createFirstOnRow(lastRow.get());
     if (seek(seekKey)) {
       return true;
     } else {
       return seekToPreviousRow(seekKey);
     }
   }

   @Override
   public boolean backwardSeek(Cell key) throws IOException {
     seek(key);
     if (cur == null
         || getComparator().compareRows(cur, key) > 0) {
       return seekToPreviousRow(key);
     }
     return true;
   }

   @Override
   public Cell getNextIndexedKey() {
     return hfs.getNextIndexedKey();
   }

   @Override
   public void shipped() throws IOException {
     this.hfs.shipped();
   }
 }
	/**
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.hbase.regionserver;

	import java.io.FileNotFoundException;
	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.List;
	import java.util.Optional;
	import java.util.PriorityQueue;
	import java.util.concurrent.atomic.LongAdder;

	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.hbase.Cell;
	import org.apache.hadoop.hbase.CellComparator;
	import org.apache.hadoop.hbase.HBaseInterfaceAudience;
	import org.apache.hadoop.hbase.HConstants;
	import org.apache.hadoop.hbase.PrivateCellUtil;
	import org.apache.yetus.audience.InterfaceAudience;
	import org.apache.yetus.audience.InterfaceStability;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.io.TimeRange;
	import org.apache.hadoop.hbase.io.hfile.HFileScanner;
	import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;

	/**
	* KeyValueScanner adaptor over the Reader. It also provides hooks into
	* bloom filter things.
	*/
	@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.PHOENIX)
	@InterfaceStability.Evolving
	public class StoreFileScanner implements KeyValueScanner {
	// the reader it comes from:
	private final StoreFileReader reader;
	private final HFileScanner hfs;
	private Cell cur = null;
	private boolean closed = false;

	private boolean realSeekDone;
	private boolean delayedReseek;
	private Cell delayedSeekKV;

	private final boolean enforceMVCC;
	private final boolean hasMVCCInfo;
	// A flag represents whether could stop skipping KeyValues for MVCC
	// if have encountered the next row. Only used for reversed scan
	private boolean stopSkippingKVsIfNextRow = false;

	private static LongAdder seekCount;

	private final boolean canOptimizeForNonNullColumn;

	private final long readPt;

	// Order of this scanner relative to other scanners when duplicate key-value is found.
	// Higher values means scanner has newer data.
	private final long scannerOrder;

	/**
	* Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
	* @param useMVCC If true, scanner will filter out updates with MVCC larger than {@code readPt}.
	* @param readPt MVCC value to use to filter out the updates newer than this scanner.
	* @param hasMVCC Set to true if underlying store file reader has MVCC info.
	* @param scannerOrder Order of the scanner relative to other scanners. See
	* {@link KeyValueScanner#getScannerOrder()}.
	* @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
	* otherwise {@code false}. This is a hint for optimization.
	*/
	public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
	boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
	this.readPt = readPt;
	this.reader = reader;
	this.hfs = hfs;
	this.enforceMVCC = useMVCC;
	this.hasMVCCInfo = hasMVCC;
	this.scannerOrder = scannerOrder;
	this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
	this.reader.incrementRefCount();
	}

	/**
	* Return an array of scanners corresponding to the given set of store files.
	*/
	public static List<StoreFileScanner> getScannersForStoreFiles(Collection<HStoreFile> files,
	boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind,
	long readPt) throws IOException {
	return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null,
	readPt);
	}

	/**
	* Return an array of scanners corresponding to the given set of store files, And set the
	* ScanQueryMatcher for each store file scanner for further optimization
	*/
	public static List<StoreFileScanner> getScannersForStoreFiles(Collection<HStoreFile> files,
	boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean canUseDrop,
	ScanQueryMatcher matcher, long readPt) throws IOException {
	if (files.isEmpty()) {
	return Collections.emptyList();
	}
	List<StoreFileScanner> scanners = new ArrayList<>(files.size());
	boolean canOptimizeForNonNullColumn = matcher != null ? !matcher.hasNullColumnInQuery() : false;
	PriorityQueue<HStoreFile> sortedFiles =
	new PriorityQueue<>(files.size(), StoreFileComparators.SEQ_ID);
	for (HStoreFile file : files) {
	// The sort function needs metadata so we need to open reader first before sorting the list.
	file.initReader();
	sortedFiles.add(file);
	}
	boolean succ = false;
	try {
	for (int i = 0, n = files.size(); i < n; i++) {
	HStoreFile sf = sortedFiles.remove();
	StoreFileScanner scanner;
	if (usePread) {
	scanner = sf.getPreadScanner(cacheBlocks, readPt, i, canOptimizeForNonNullColumn);
	} else {
	scanner = sf.getStreamScanner(canUseDrop, cacheBlocks, isCompaction, readPt, i,
	canOptimizeForNonNullColumn);
	}
	scanners.add(scanner);
	}
	succ = true;
	} finally {
	if (!succ) {
	for (StoreFileScanner scanner : scanners) {
	scanner.close();
	}
	}
	}
	return scanners;
	}

	/**
	* Get scanners for compaction. We will create a separated reader for each store file to avoid
	* contention with normal read request.
	*/
	public static List<StoreFileScanner> getScannersForCompaction(Collection<HStoreFile> files,
	boolean canUseDropBehind, long readPt) throws IOException {
	List<StoreFileScanner> scanners = new ArrayList<>(files.size());
	List<HStoreFile> sortedFiles = new ArrayList<>(files);
	Collections.sort(sortedFiles, StoreFileComparators.SEQ_ID);
	boolean succ = false;
	try {
	for (int i = 0, n = sortedFiles.size(); i < n; i++) {
	scanners.add(
	sortedFiles.get(i).getStreamScanner(canUseDropBehind, false, true, readPt, i, false));
	}
	succ = true;
	} finally {
	if (!succ) {
	for (StoreFileScanner scanner : scanners) {
	scanner.close();
	}
	}
	}
	return scanners;
	}

	@Override
	public String toString() {
	return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
	}

	@Override
	public Cell peek() {
	return cur;
	}

	@Override
	public Cell next() throws IOException {
	Cell retKey = cur;

	try {
	// only seek if we aren't at the end. cur == null implies 'end'.
	if (cur != null) {
	hfs.next();
	setCurrentCell(hfs.getCell());
	if (hasMVCCInfo \|\| this.reader.isBulkLoaded()) {
	skipKVsNewerThanReadpoint();
	}
	}
	} catch (FileNotFoundException e) {
	throw e;
	} catch(IOException e) {
	throw new IOException("Could not iterate " + this, e);
	}
	return retKey;
	}

	@Override
	public boolean seek(Cell key) throws IOException {
	if (seekCount != null) seekCount.increment();

	try {
	try {
	if(!seekAtOrAfter(hfs, key)) {
	this.cur = null;
	return false;
	}

	setCurrentCell(hfs.getCell());

	if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
	return skipKVsNewerThanReadpoint();
	} else {
	return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
	}
	} finally {
	realSeekDone = true;
	}
	} catch (FileNotFoundException e) {
	throw e;
	} catch (IOException ioe) {
	throw new IOException("Could not seek " + this + " to key " + key, ioe);
	}
	}

	@Override
	public boolean reseek(Cell key) throws IOException {
	if (seekCount != null) seekCount.increment();

	try {
	try {
	if (!reseekAtOrAfter(hfs, key)) {
	this.cur = null;
	return false;
	}
	setCurrentCell(hfs.getCell());

	if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
	return skipKVsNewerThanReadpoint();
	} else {
	return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
	}
	} finally {
	realSeekDone = true;
	}
	} catch (FileNotFoundException e) {
	throw e;
	} catch (IOException ioe) {
	throw new IOException("Could not reseek " + this + " to key " + key,
	ioe);
	}
	}

	protected void setCurrentCell(Cell newVal) throws IOException {
	this.cur = newVal;
	if (this.cur != null && this.reader.isBulkLoaded() && !this.reader.isSkipResetSeqId()) {
	PrivateCellUtil.setSequenceId(cur, this.reader.getSequenceID());
	}
	}

	protected boolean skipKVsNewerThanReadpoint() throws IOException {
	// We want to ignore all key-values that are newer than our current
	// readPoint
	Cell startKV = cur;
	while(enforceMVCC
	&& cur != null
	&& (cur.getSequenceId() > readPt)) {
	boolean hasNext = hfs.next();
	setCurrentCell(hfs.getCell());
	if (hasNext && this.stopSkippingKVsIfNextRow
	&& getComparator().compareRows(cur, startKV) > 0) {
	return false;
	}
	}

	if (cur == null) {
	return false;
	}

	return true;
	}

	@Override
	public void close() {
	if (closed) return;
	cur = null;
	this.hfs.close();
	if (this.reader != null) {
	this.reader.readCompleted();
	}
	closed = true;
	}

	/**
	*
	* @param s
	* @param k
	* @return false if not found or if k is after the end.
	* @throws IOException
	*/
	public static boolean seekAtOrAfter(HFileScanner s, Cell k)
	throws IOException {
	int result = s.seekTo(k);
	if(result < 0) {
	if (result == HConstants.INDEX_KEY_MAGIC) {
	// using faked key
	return true;
	}
	// Passed KV is smaller than first KV in file, work from start of file
	return s.seekTo();
	} else if(result > 0) {
	// Passed KV is larger than current KV in file, if there is a next
	// it is the "after", if not then this scanner is done.
	return s.next();
	}
	// Seeked to the exact key
	return true;
	}

	static boolean reseekAtOrAfter(HFileScanner s, Cell k)
	throws IOException {
	//This function is similar to seekAtOrAfter function
	int result = s.reseekTo(k);
	if (result <= 0) {
	if (result == HConstants.INDEX_KEY_MAGIC) {
	// using faked key
	return true;
	}
	// If up to now scanner is not seeked yet, this means passed KV is smaller
	// than first KV in file, and it is the first time we seek on this file.
	// So we also need to work from the start of file.
	if (!s.isSeeked()) {
	return s.seekTo();
	}
	return true;
	}
	// passed KV is larger than current KV in file, if there is a next
	// it is after, if not then this scanner is done.
	return s.next();
	}

	/**
	* @see KeyValueScanner#getScannerOrder()
	*/
	@Override
	public long getScannerOrder() {
	return scannerOrder;
	}

	/**
	* Pretend we have done a seek but don't do it yet, if possible. The hope is
	* that we find requested columns in more recent files and won't have to seek
	* in older files. Creates a fake key/value with the given row/column and the
	* highest (most recent) possible timestamp we might get from this file. When
	* users of such "lazy scanner" need to know the next KV precisely (e.g. when
	* this scanner is at the top of the heap), they run {@link #enforceSeek()}.
	* <p>
	* Note that this function does guarantee that the current KV of this scanner
	* will be advanced to at least the given KV. Because of this, it does have
	* to do a real seek in cases when the seek timestamp is older than the
	* highest timestamp of the file, e.g. when we are trying to seek to the next
	* row/column and use OLDEST_TIMESTAMP in the seek key.
	*/
	@Override
	public boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
	throws IOException {
	if (kv.getFamilyLength() == 0) {
	useBloom = false;
	}

	boolean haveToSeek = true;
	if (useBloom) {
	// check ROWCOL Bloom filter first.
	if (reader.getBloomFilterType() == BloomType.ROWCOL) {
	haveToSeek = reader.passesGeneralRowColBloomFilter(kv);
	} else if (canOptimizeForNonNullColumn
	&& ((PrivateCellUtil.isDeleteFamily(kv)
	\|\| PrivateCellUtil.isDeleteFamilyVersion(kv)))) {
	// if there is no such delete family kv in the store file,
	// then no need to seek.
	haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getRowArray(), kv.getRowOffset(),
	kv.getRowLength());
	}
	}

	delayedReseek = forward;
	delayedSeekKV = kv;

	if (haveToSeek) {
	// This row/column might be in this store file (or we did not use the
	// Bloom filter), so we still need to seek.
	realSeekDone = false;
	long maxTimestampInFile = reader.getMaxTimestamp();
	long seekTimestamp = kv.getTimestamp();
	if (seekTimestamp > maxTimestampInFile) {
	// Create a fake key that is not greater than the real next key.
	// (Lower timestamps correspond to higher KVs.)
	// To understand this better, consider that we are asked to seek to
	// a higher timestamp than the max timestamp in this file. We know that
	// the next point when we have to consider this file again is when we
	// pass the max timestamp of this file (with the same row/column).
	setCurrentCell(PrivateCellUtil.createFirstOnRowColTS(kv, maxTimestampInFile));
	} else {
	// This will be the case e.g. when we need to seek to the next
	// row/column, and we don't know exactly what they are, so we set the
	// seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
	// row/column.
	enforceSeek();
	}
	return cur != null;
	}

	// Multi-column Bloom filter optimization.
	// Create a fake key/value, so that this scanner only bubbles up to the top
	// of the KeyValueHeap in StoreScanner after we scanned this row/column in
	// all other store files. The query matcher will then just skip this fake
	// key/value and the store scanner will progress to the next column. This
	// is obviously not a "real real" seek, but unlike the fake KV earlier in
	// this method, we want this to be propagated to ScanQueryMatcher.
	setCurrentCell(PrivateCellUtil.createLastOnRowCol(kv));

	realSeekDone = true;
	return true;
	}

	StoreFileReader getReader() {
	return reader;
	}

	CellComparator getComparator() {
	return reader.getComparator();
	}

	@Override
	public boolean realSeekDone() {
	return realSeekDone;
	}

	@Override
	public void enforceSeek() throws IOException {
	if (realSeekDone)
	return;

	if (delayedReseek) {
	reseek(delayedSeekKV);
	} else {
	seek(delayedSeekKV);
	}
	}

	@Override
	public boolean isFileScanner() {
	return true;
	}

	@Override
	public Path getFilePath() {
	return reader.getHFileReader().getPath();
	}

	// Test methods
	static final long getSeekCount() {
	return seekCount.sum();
	}

	static final void instrument() {
	seekCount = new LongAdder();
	}

	@Override
	public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) {
	// if the file has no entries, no need to validate or create a scanner.
	byte[] cf = store.getColumnFamilyDescriptor().getName();
	TimeRange timeRange = scan.getColumnFamilyTimeRange().get(cf);
	if (timeRange == null) {
	timeRange = scan.getTimeRange();
	}
	return reader.passesTimerangeFilter(timeRange, oldestUnexpiredTS) && reader
	.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, scan.getFamilyMap().get(cf));
	}

	@Override
	public boolean seekToPreviousRow(Cell originalKey) throws IOException {
	try {
	try {
	boolean keepSeeking = false;
	Cell key = originalKey;
	do {
	Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
	if (seekCount != null) seekCount.increment();
	if (!hfs.seekBefore(seekKey)) {
	this.cur = null;
	return false;
	}
	Cell curCell = hfs.getCell();
	Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell);

	if (seekCount != null) seekCount.increment();
	if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
	this.cur = null;
	return false;
	}

	setCurrentCell(hfs.getCell());
	this.stopSkippingKVsIfNextRow = true;
	boolean resultOfSkipKVs;
	try {
	resultOfSkipKVs = skipKVsNewerThanReadpoint();
	} finally {
	this.stopSkippingKVsIfNextRow = false;
	}
	if (!resultOfSkipKVs
	\|\| getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) {
	keepSeeking = true;
	key = firstKeyOfPreviousRow;
	continue;
	} else {
	keepSeeking = false;
	}
	} while (keepSeeking);
	return true;
	} finally {
	realSeekDone = true;
	}
	} catch (FileNotFoundException e) {
	throw e;
	} catch (IOException ioe) {
	throw new IOException("Could not seekToPreviousRow " + this + " to key "
	+ originalKey, ioe);
	}
	}

	@Override
	public boolean seekToLastRow() throws IOException {
	Optional<byte[]> lastRow = reader.getLastRowKey();
	if (!lastRow.isPresent()) {
	return false;
	}
	Cell seekKey = PrivateCellUtil.createFirstOnRow(lastRow.get());
	if (seek(seekKey)) {
	return true;
	} else {
	return seekToPreviousRow(seekKey);
	}
	}

	@Override
	public boolean backwardSeek(Cell key) throws IOException {
	seek(key);
	if (cur == null
	\|\| getComparator().compareRows(cur, key) > 0) {
	return seekToPreviousRow(key);
	}
	return true;
	}

	@Override
	public Cell getNextIndexedKey() {
	return hfs.getNextIndexedKey();
	}

	@Override
	public void shipped() throws IOException {
	this.hfs.shipped();
	}
	}