0.90/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java - hbase - Git at Google

 /**
  * Copyright 2010 The Apache Software Foundation
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.regionserver;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValue.KVComparator;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.HalfStoreFileReader;
 import org.apache.hadoop.hbase.io.Reference;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
 import org.apache.hadoop.hbase.util.BloomFilter;
 import org.apache.hadoop.hbase.util.ByteBloomFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Hash;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.util.StringUtils;

 import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Ordering;

 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.lang.management.ManagementFactory;
 import java.lang.management.MemoryUsage;
 import java.nio.ByteBuffer;
 import java.text.NumberFormat;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.SortedSet;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
  * A Store data file.  Stores usually have one or more of these files.  They
  * are produced by flushing the memstore to disk.  To
  * create, call {@link #createWriter(FileSystem, Path, int)} and append data.  Be
  * sure to add any metadata before calling close on the Writer
  * (Use the appendMetadata convenience methods). On close, a StoreFile is
  * sitting in the Filesystem.  To refer to it, create a StoreFile instance
  * passing filesystem and path.  To read, call {@link #createReader()}.
  * <p>StoreFiles may also reference store files in another Store.
  *
  * The reason for this weird pattern where you use a different instance for the
  * writer and a reader is that we write once but read a lot more.
  */
 public class StoreFile {
   static final Log LOG = LogFactory.getLog(StoreFile.class.getName());

   // Config keys.
   static final String IO_STOREFILE_BLOOM_ERROR_RATE = "io.storefile.bloom.error.rate";
   static final String IO_STOREFILE_BLOOM_MAX_FOLD = "io.storefile.bloom.max.fold";
   static final String IO_STOREFILE_BLOOM_MAX_KEYS = "io.storefile.bloom.max.keys";
   static final String IO_STOREFILE_BLOOM_ENABLED = "io.storefile.bloom.enabled";
   static final String HFILE_BLOCK_CACHE_SIZE_KEY = "hfile.block.cache.size";

   public static enum BloomType {
     /**
      * Bloomfilters disabled
      */
     NONE,
     /**
      * Bloom enabled with Table row as Key
      */
     ROW,
     /**
      * Bloom enabled with Table row & column (family+qualifier) as Key
      */
     ROWCOL
   }
   // Keys for fileinfo values in HFile
   /** Max Sequence ID in FileInfo */
   public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
   /** Major compaction flag in FileInfo */
   public static final byte [] MAJOR_COMPACTION_KEY = Bytes.toBytes("MAJOR_COMPACTION_KEY");
   /** Bloom filter Type in FileInfo */
   static final byte[] BLOOM_FILTER_TYPE_KEY = Bytes.toBytes("BLOOM_FILTER_TYPE");
   /** Key for Timerange information in metadata*/
   static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");

   /** Meta data block name for bloom filter meta-info (ie: bloom params/specs) */
   static final String BLOOM_FILTER_META_KEY = "BLOOM_FILTER_META";
   /** Meta data block name for bloom filter data (ie: bloom bits) */
   static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";

   // Make default block size for StoreFiles 8k while testing.  TODO: FIX!
   // Need to make it 8k for testing.
   public static final int DEFAULT_BLOCKSIZE_SMALL = 8 * 1024;


   private static BlockCache hfileBlockCache = null;

   private final FileSystem fs;
   // This file's path.
   private final Path path;
   // If this storefile references another, this is the reference instance.
   private Reference reference;
   // If this StoreFile references another, this is the other files path.
   private Path referencePath;
   // Should the block cache be used or not.
   private boolean blockcache;
   // Is this from an in-memory store
   private boolean inMemory;

   // Keys for metadata stored in backing HFile.
   // Set when we obtain a Reader.
   private long sequenceid = -1;

   // If true, this file was product of a major compaction.  Its then set
   // whenever you get a Reader.
   private AtomicBoolean majorCompaction = null;

   /** Meta key set when store file is a result of a bulk load */
   public static final byte[] BULKLOAD_TASK_KEY =
     Bytes.toBytes("BULKLOAD_SOURCE_TASK");
   public static final byte[] BULKLOAD_TIME_KEY =
     Bytes.toBytes("BULKLOAD_TIMESTAMP");

   /**
    * Map of the metadata entries in the corresponding HFile
    */
   private Map<byte[], byte[]> metadataMap;

   /*
    * Regex that will work for straight filenames and for reference names.
    * If reference, then the regex has more than just one group.  Group 1 is
    * this files id.  Group 2 the referenced region name, etc.
    */
   private static final Pattern REF_NAME_PARSER =
     Pattern.compile("^(\\d+)(?:\\.(.+))?$");

   // StoreFile.Reader
   private volatile Reader reader;

   // Used making file ids.
   private final static Random rand = new Random();
   private final Configuration conf;
   private final BloomType bloomType;


   /**
    * Constructor, loads a reader and it's indices, etc. May allocate a
    * substantial amount of ram depending on the underlying files (10-20MB?).
    *
    * @param fs  The current file system to use.
    * @param p  The path of the file.
    * @param blockcache  <code>true</code> if the block cache is enabled.
    * @param conf  The current configuration.
    * @param bt The bloom type to use for this store file
    * @throws IOException When opening the reader fails.
    */
   StoreFile(final FileSystem fs,
             final Path p,
             final boolean blockcache,
             final Configuration conf,
             final BloomType bt,
             final boolean inMemory)
       throws IOException {
     this.conf = conf;
     this.fs = fs;
     this.path = p;
     this.blockcache = blockcache;
     this.inMemory = inMemory;
     if (isReference(p)) {
       this.reference = Reference.read(fs, p);
       this.referencePath = getReferredToFile(this.path);
     }
     // ignore if the column family config says "no bloom filter"
     // even if there is one in the hfile.
     if (conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true)) {
       this.bloomType = bt;
     } else {
       this.bloomType = BloomType.NONE;
       LOG.info("Ignoring bloom filter check for file (disabled in config)");
     }
   }

   /**
    * @return Path or null if this StoreFile was made with a Stream.
    */
   Path getPath() {
     return this.path;
   }

   /**
    * @return The Store/ColumnFamily this file belongs to.
    */
   byte [] getFamily() {
     return Bytes.toBytes(this.path.getParent().getName());
   }

   /**
    * @return True if this is a StoreFile Reference; call after {@link #open()}
    * else may get wrong answer.
    */
   boolean isReference() {
     return this.reference != null;
   }

   /**
    * @param p Path to check.
    * @return True if the path has format of a HStoreFile reference.
    */
   public static boolean isReference(final Path p) {
     return !p.getName().startsWith("_") &&
       isReference(p, REF_NAME_PARSER.matcher(p.getName()));
   }

   /**
    * @param p Path to check.
    * @param m Matcher to use.
    * @return True if the path has format of a HStoreFile reference.
    */
   public static boolean isReference(final Path p, final Matcher m) {
     if (m == null || !m.matches()) {
       LOG.warn("Failed match of store file name " + p.toString());
       throw new RuntimeException("Failed match of store file name " +
           p.toString());
     }
     return m.groupCount() > 1 && m.group(2) != null;
   }

   /*
    * Return path to the file referred to by a Reference.  Presumes a directory
    * hierarchy of <code>${hbase.rootdir}/tablename/regionname/familyname</code>.
    * @param p Path to a Reference file.
    * @return Calculated path to parent region file.
    * @throws IOException
    */
   static Path getReferredToFile(final Path p) {
     Matcher m = REF_NAME_PARSER.matcher(p.getName());
     if (m == null || !m.matches()) {
       LOG.warn("Failed match of store file name " + p.toString());
       throw new RuntimeException("Failed match of store file name " +
           p.toString());
     }
     // Other region name is suffix on the passed Reference file name
     String otherRegion = m.group(2);
     // Tabledir is up two directories from where Reference was written.
     Path tableDir = p.getParent().getParent().getParent();
     String nameStrippedOfSuffix = m.group(1);
     // Build up new path with the referenced region in place of our current
     // region in the reference path.  Also strip regionname suffix from name.
     return new Path(new Path(new Path(tableDir, otherRegion),
       p.getParent().getName()), nameStrippedOfSuffix);
   }

   /**
    * @return True if this file was made by a major compaction.
    */
   boolean isMajorCompaction() {
     if (this.majorCompaction == null) {
       throw new NullPointerException("This has not been set yet");
     }
     return this.majorCompaction.get();
   }

   /**
    * @return This files maximum edit sequence id.
    */
   public long getMaxSequenceId() {
     return this.sequenceid;
   }

   /**
    * Return the highest sequence ID found across all storefiles in
    * the given list. Store files that were created by a mapreduce
    * bulk load are ignored, as they do not correspond to any edit
    * log items.
    * @return 0 if no non-bulk-load files are provided or, this is Store that
    * does not yet have any store files.
    */
   public static long getMaxSequenceIdInList(List<StoreFile> sfs) {
     long max = 0;
     for (StoreFile sf : sfs) {
       if (!sf.isBulkLoadResult()) {
         max = Math.max(max, sf.getMaxSequenceId());
       }
     }
     return max;
   }

   /**
    * @return true if this storefile was created by HFileOutputFormat
    * for a bulk load.
    */
   boolean isBulkLoadResult() {
     return metadataMap.containsKey(BULKLOAD_TIME_KEY);
   }

   /**
    * Return the timestamp at which this bulk load file was generated.
    */
   public long getBulkLoadTimestamp() {
     return Bytes.toLong(metadataMap.get(BULKLOAD_TIME_KEY));
   }

   /**
    * Returns the block cache or <code>null</code> in case none should be used.
    *
    * @param conf  The current configuration.
    * @return The block cache or <code>null</code>.
    */
   public static synchronized BlockCache getBlockCache(Configuration conf) {
     if (hfileBlockCache != null) return hfileBlockCache;

     float cachePercentage = conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.2f);
     // There should be a better way to optimize this. But oh well.
     if (cachePercentage == 0L) return null;
     if (cachePercentage > 1.0) {
       throw new IllegalArgumentException(HFILE_BLOCK_CACHE_SIZE_KEY +
         " must be between 0.0 and 1.0, not > 1.0");
     }

     // Calculate the amount of heap to give the heap.
     MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
     long cacheSize = (long)(mu.getMax() * cachePercentage);
     LOG.info("Allocating LruBlockCache with maximum size " +
       StringUtils.humanReadableInt(cacheSize));
     hfileBlockCache = new LruBlockCache(cacheSize, DEFAULT_BLOCKSIZE_SMALL);
     return hfileBlockCache;
   }

   /**
    * @return the blockcache
    */
   public BlockCache getBlockCache() {
     return blockcache ? getBlockCache(conf) : null;
   }

   /**
    * Opens reader on this store file.  Called by Constructor.
    * @return Reader for the store file.
    * @throws IOException
    * @see #closeReader()
    */
   private Reader open() throws IOException {
     if (this.reader != null) {
       throw new IllegalAccessError("Already open");
     }
     if (isReference()) {
       this.reader = new HalfStoreFileReader(this.fs, this.referencePath,
           getBlockCache(), this.reference);
     } else {
       this.reader = new Reader(this.fs, this.path, getBlockCache(),
           this.inMemory);
     }
     // Load up indices and fileinfo.
     metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
     // Read in our metadata.
     byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
     if (b != null) {
       // By convention, if halfhfile, top half has a sequence number > bottom
       // half. Thats why we add one in below. Its done for case the two halves
       // are ever merged back together --rare.  Without it, on open of store,
       // since store files are distingushed by sequence id, the one half would
       // subsume the other.
       this.sequenceid = Bytes.toLong(b);
       if (isReference()) {
         if (Reference.isTopFileRegion(this.reference.getFileRegion())) {
           this.sequenceid += 1;
         }
       }
     }
     this.reader.setSequenceID(this.sequenceid);

     b = metadataMap.get(MAJOR_COMPACTION_KEY);
     if (b != null) {
       boolean mc = Bytes.toBoolean(b);
       if (this.majorCompaction == null) {
         this.majorCompaction = new AtomicBoolean(mc);
       } else {
         this.majorCompaction.set(mc);
       }
     } else {
       // Presume it is not major compacted if it doesn't explicity say so
       // HFileOutputFormat explicitly sets the major compacted key.
       this.majorCompaction = new AtomicBoolean(false);
     }

     if (this.bloomType != BloomType.NONE) {
       this.reader.loadBloomfilter();
     }

     try {
       byte [] timerangeBytes = metadataMap.get(TIMERANGE_KEY);
       if (timerangeBytes != null) {
         this.reader.timeRangeTracker = new TimeRangeTracker();
         Writables.copyWritable(timerangeBytes, this.reader.timeRangeTracker);
       }
     } catch (IllegalArgumentException e) {
       LOG.error("Error reading timestamp range data from meta -- " +
           "proceeding without", e);
       this.reader.timeRangeTracker = null;
     }
     return this.reader;
   }

   /**
    * @return Reader for StoreFile. creates if necessary
    * @throws IOException
    */
   public Reader createReader() throws IOException {
     if (this.reader == null) {
       this.reader = open();
     }
     return this.reader;
   }

   /**
    * @return Current reader.  Must call createReader first else returns null.
    * @throws IOException
    * @see #createReader()
    */
   public Reader getReader() {
     return this.reader;
   }

   /**
    * @throws IOException
    */
   public synchronized void closeReader() throws IOException {
     if (this.reader != null) {
       this.reader.close();
       this.reader = null;
     }
   }

   /**
    * Delete this file
    * @throws IOException
    */
   public void deleteReader() throws IOException {
     closeReader();
     this.fs.delete(getPath(), true);
   }

   @Override
   public String toString() {
     return this.path.toString() +
       (isReference()? "-" + this.referencePath + "-" + reference.toString(): "");
   }

   /**
    * @return a length description of this StoreFile, suitable for debug output
    */
   public String toStringDetailed() {
     StringBuilder sb = new StringBuilder();
     sb.append(this.path.toString());
     sb.append(", isReference=").append(isReference());
     sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
     if (isBulkLoadResult()) {
       sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
     } else {
       sb.append(", seqid=").append(getMaxSequenceId());
     }
     sb.append(", majorCompaction=").append(isMajorCompaction());

     return sb.toString();
   }

   /**
    * Utility to help with rename.
    * @param fs
    * @param src
    * @param tgt
    * @return True if succeeded.
    * @throws IOException
    */
   public static Path rename(final FileSystem fs,
                             final Path src,
                             final Path tgt)
       throws IOException {

     if (!fs.exists(src)) {
       throw new FileNotFoundException(src.toString());
     }
     if (!fs.rename(src, tgt)) {
       throw new IOException("Failed rename of " + src + " to " + tgt);
     }
     return tgt;
   }

   /**
    * Get a store file writer. Client is responsible for closing file when done.
    *
    * @param fs
    * @param dir Path to family directory.  Makes the directory if doesn't exist.
    * Creates a file with a unique name in this directory.
    * @param blocksize size per filesystem block
    * @return StoreFile.Writer
    * @throws IOException
    */
   public static Writer createWriter(final FileSystem fs,
                                               final Path dir,
                                               final int blocksize)
       throws IOException {

     return createWriter(fs, dir, blocksize, null, null, null, BloomType.NONE, 0);
   }

   /**
    * Create a store file writer. Client is responsible for closing file when done.
    * If metadata, add BEFORE closing using appendMetadata()
    * @param fs
    * @param dir Path to family directory.  Makes the directory if doesn't exist.
    * Creates a file with a unique name in this directory.
    * @param blocksize
    * @param algorithm Pass null to get default.
    * @param conf HBase system configuration. used with bloom filters
    * @param bloomType column family setting for bloom filters
    * @param c Pass null to get default.
    * @param maxKeySize peak theoretical entry size (maintains error rate)
    * @return HFile.Writer
    * @throws IOException
    */
   public static StoreFile.Writer createWriter(final FileSystem fs,
                                               final Path dir,
                                               final int blocksize,
                                               final Compression.Algorithm algorithm,
                                               final KeyValue.KVComparator c,
                                               final Configuration conf,
                                               BloomType bloomType,
                                               int maxKeySize)
       throws IOException {

     if (!fs.exists(dir)) {
       fs.mkdirs(dir);
     }
     Path path = getUniqueFile(fs, dir);
     if(conf == null || !conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true)) {
       bloomType = BloomType.NONE;
     }

     return new Writer(fs, path, blocksize,
         algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm,
         conf, c == null? KeyValue.COMPARATOR: c, bloomType, maxKeySize);
   }

   /**
    * @param fs
    * @param dir Directory to create file in.
    * @return random filename inside passed <code>dir</code>
    */
   public static Path getUniqueFile(final FileSystem fs, final Path dir)
       throws IOException {
     if (!fs.getFileStatus(dir).isDir()) {
       throw new IOException("Expecting " + dir.toString() +
         " to be a directory");
     }
     return fs.getFileStatus(dir).isDir()? getRandomFilename(fs, dir): dir;
   }

   /**
    *
    * @param fs
    * @param dir
    * @return Path to a file that doesn't exist at time of this invocation.
    * @throws IOException
    */
   static Path getRandomFilename(final FileSystem fs, final Path dir)
       throws IOException {
     return getRandomFilename(fs, dir, null);
   }

   /**
    *
    * @param fs
    * @param dir
    * @param suffix
    * @return Path to a file that doesn't exist at time of this invocation.
    * @throws IOException
    */
   static Path getRandomFilename(final FileSystem fs,
                                 final Path dir,
                                 final String suffix)
       throws IOException {
     long id = -1;
     Path p = null;
     do {
       id = Math.abs(rand.nextLong());
       p = new Path(dir, Long.toString(id) +
         ((suffix == null || suffix.length() <= 0)? "": suffix));
     } while(fs.exists(p));
     return p;
   }

   /**
    * Write out a split reference.
    *
    * Package local so it doesnt leak out of regionserver.
    *
    * @param fs
    * @param splitDir Presumes path format is actually
    * <code>SOME_DIRECTORY/REGIONNAME/FAMILY</code>.
    * @param f File to split.
    * @param splitRow
    * @param range
    * @return Path to created reference.
    * @throws IOException
    */
   static Path split(final FileSystem fs,
                     final Path splitDir,
                     final StoreFile f,
                     final byte [] splitRow,
                     final Reference.Range range)
       throws IOException {
     // A reference to the bottom half of the hsf store file.
     Reference r = new Reference(splitRow, range);
     // Add the referred-to regions name as a dot separated suffix.
     // See REF_NAME_PARSER regex above.  The referred-to regions name is
     // up in the path of the passed in <code>f</code> -- parentdir is family,
     // then the directory above is the region name.
     String parentRegionName = f.getPath().getParent().getParent().getName();
     // Write reference with same file id only with the other region name as
     // suffix and into the new region location (under same family).
     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
     return r.write(fs, p);
   }


   /**
    * A StoreFile writer.  Use this to read/write HBase Store Files. It is package
    * local because it is an implementation detail of the HBase regionserver.
    */
   public static class Writer {
     private final BloomFilter bloomFilter;
     private final BloomType bloomType;
     private KVComparator kvComparator;
     private KeyValue lastKv = null;
     private byte[] lastByteArray = null;
     TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
     /* isTimeRangeTrackerSet keeps track if the timeRange has already been set
      * When flushing a memstore, we set TimeRange and use this variable to
      * indicate that it doesn't need to be calculated again while
      * appending KeyValues.
      * It is not set in cases of compactions when it is recalculated using only
      * the appended KeyValues*/
     boolean isTimeRangeTrackerSet = false;

     protected HFile.Writer writer;
     /**
      * Creates an HFile.Writer that also write helpful meta data.
      * @param fs file system to write to
      * @param path file name to create
      * @param blocksize HDFS block size
      * @param compress HDFS block compression
      * @param conf user configuration
      * @param comparator key comparator
      * @param bloomType bloom filter setting
      * @param maxKeys maximum amount of keys to add (for blooms)
      * @throws IOException problem writing to FS
      */
     public Writer(FileSystem fs, Path path, int blocksize,
         Compression.Algorithm compress, final Configuration conf,
         final KVComparator comparator, BloomType bloomType, int maxKeys)
         throws IOException {
       writer = new HFile.Writer(fs, path, blocksize, compress, comparator.getRawComparator());

       this.kvComparator = comparator;

       BloomFilter bloom = null;
       BloomType bt = BloomType.NONE;

       if (bloomType != BloomType.NONE && conf != null) {
         float err = conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float)0.01);
         // Since in row+col blooms we have 2 calls to shouldSeek() instead of 1
         // and the false positives are adding up, we should keep the error rate
         // twice as low in order to maintain the number of false positives as
         // desired by the user
         if (bloomType == BloomType.ROWCOL) {
           err /= 2;
         }
         int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, 7);
         int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128*1000*1000);

         if (maxKeys < tooBig) {
           try {
             bloom = new ByteBloomFilter(maxKeys, err,
                 Hash.getHashType(conf), maxFold);
             bloom.allocBloom();
             bt = bloomType;
           } catch (IllegalArgumentException iae) {
             LOG.warn(String.format(
               "Parse error while creating bloom for %s (%d, %f)",
               path, maxKeys, err), iae);
             bloom = null;
             bt = BloomType.NONE;
           }
         } else {
           if (LOG.isDebugEnabled()) {
             LOG.debug("Skipping bloom filter because max keysize too large: "
                 + maxKeys);
           }
         }
       }

       this.bloomFilter = bloom;
       this.bloomType = bt;
     }

     /**
      * Writes meta data.
      * Call before {@link #close()} since its written as meta data to this file.
      * @param maxSequenceId Maximum sequence id.
      * @param majorCompaction True if this file is product of a major compaction
      * @throws IOException problem writing to FS
      */
     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction)
     throws IOException {
       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
       writer.appendFileInfo(MAJOR_COMPACTION_KEY,
           Bytes.toBytes(majorCompaction));
       appendTimeRangeMetadata();
     }

     /**
      * Add TimestampRange to Metadata
      */
     public void appendTimeRangeMetadata() throws IOException {
       appendFileInfo(TIMERANGE_KEY,WritableUtils.toByteArray(timeRangeTracker));
     }

     /**
      * Set TimeRangeTracker
      * @param trt
      */
     public void setTimeRangeTracker(final TimeRangeTracker trt) {
       this.timeRangeTracker = trt;
       isTimeRangeTrackerSet = true;
     }

     /**
      * If the timeRangeTracker is not set,
      * update TimeRangeTracker to include the timestamp of this key
      * @param kv
      * @throws IOException
      */
     public void includeInTimeRangeTracker(final KeyValue kv) {
       if (!isTimeRangeTrackerSet) {
         timeRangeTracker.includeTimestamp(kv);
       }
     }

     /**
      * If the timeRangeTracker is not set,
      * update TimeRangeTracker to include the timestamp of this key
      * @param key
      * @throws IOException
      */
     public void includeInTimeRangeTracker(final byte [] key) {
       if (!isTimeRangeTrackerSet) {
         timeRangeTracker.includeTimestamp(key);
       }
     }

     public void append(final KeyValue kv) throws IOException {
       if (this.bloomFilter != null) {
         // only add to the bloom filter on a new, unique key
         boolean newKey = true;
         if (this.lastKv != null) {
           switch(bloomType) {
           case ROW:
             newKey = ! kvComparator.matchingRows(kv, lastKv);
             break;
           case ROWCOL:
             newKey = ! kvComparator.matchingRowColumn(kv, lastKv);
             break;
           case NONE:
             newKey = false;
           }
         }
         if (newKey) {
           /*
            * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
            * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp
            *
            * 2 Types of Filtering:
            *  1. Row = Row
            *  2. RowCol = Row + Qualifier
            */
           switch (bloomType) {
           case ROW:
             this.bloomFilter.add(kv.getBuffer(), kv.getRowOffset(),
                 kv.getRowLength());
             break;
           case ROWCOL:
             // merge(row, qualifier)
             int ro = kv.getRowOffset();
             int rl = kv.getRowLength();
             int qo = kv.getQualifierOffset();
             int ql = kv.getQualifierLength();
             byte [] result = new byte[rl + ql];
             System.arraycopy(kv.getBuffer(), ro, result, 0,  rl);
             System.arraycopy(kv.getBuffer(), qo, result, rl, ql);
             this.bloomFilter.add(result);
             break;
           default:
           }
           this.lastKv = kv;
         }
       }
       writer.append(kv);
       includeInTimeRangeTracker(kv);
     }

     public Path getPath() {
       return this.writer.getPath();
     }

     boolean hasBloom() {
       return this.bloomFilter != null;
     }

     public void append(final byte [] key, final byte [] value) throws IOException {
       if (this.bloomFilter != null) {
         // only add to the bloom filter on a new row
         if (this.lastByteArray == null || !Arrays.equals(key, lastByteArray)) {
           this.bloomFilter.add(key);
           this.lastByteArray = key;
         }
       }
       writer.append(key, value);
       includeInTimeRangeTracker(key);
     }

     public void close() throws IOException {
       // make sure we wrote something to the bloom before adding it
       if (this.bloomFilter != null && this.bloomFilter.getKeyCount() > 0) {
         bloomFilter.compactBloom();
         if (this.bloomFilter.getMaxKeys() > 0) {
           int b = this.bloomFilter.getByteSize();
           int k = this.bloomFilter.getKeyCount();
           int m = this.bloomFilter.getMaxKeys();
           StoreFile.LOG.info("Bloom added to HFile (" +
               getPath() + "): " + StringUtils.humanReadableInt(b) + ", " +
               k + "/" + m + " (" + NumberFormat.getPercentInstance().format(
                 ((double)k) / ((double)m)) + ")");
         }
         writer.appendMetaBlock(BLOOM_FILTER_META_KEY, bloomFilter.getMetaWriter());
         writer.appendMetaBlock(BLOOM_FILTER_DATA_KEY, bloomFilter.getDataWriter());
         writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY, Bytes.toBytes(bloomType.toString()));
       }
       writer.close();
     }

     public void appendFileInfo(byte[] key, byte[] value) throws IOException {
       writer.appendFileInfo(key, value);
     }
   }

   /**
    * Reader for a StoreFile.
    */
   public static class Reader {
     static final Log LOG = LogFactory.getLog(Reader.class.getName());

     protected BloomFilter bloomFilter = null;
     protected BloomType bloomFilterType;
     private final HFile.Reader reader;
     protected TimeRangeTracker timeRangeTracker = null;
     protected long sequenceID = -1;

     public Reader(FileSystem fs, Path path, BlockCache blockCache, boolean inMemory)
         throws IOException {
       reader = new HFile.Reader(fs, path, blockCache, inMemory);
       bloomFilterType = BloomType.NONE;
     }

     public RawComparator<byte []> getComparator() {
       return reader.getComparator();
     }

     /**
      * Get a scanner to scan over this StoreFile.
      *
      * @param cacheBlocks should this scanner cache blocks?
      * @param pread use pread (for highly concurrent small readers)
      * @return a scanner
      */
     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread) {
       return new StoreFileScanner(this, getScanner(cacheBlocks, pread));
     }

     /**
      * Warning: Do not write further code which depends on this call. Instead
      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
      * which is the preferred way to scan a store with higher level concepts.
      *
      * @param cacheBlocks should we cache the blocks?
      * @param pread use pread (for concurrent small readers)
      * @return the underlying HFileScanner
      */
     @Deprecated
     public HFileScanner getScanner(boolean cacheBlocks, boolean pread) {
       return reader.getScanner(cacheBlocks, pread);
     }

     public void close() throws IOException {
       reader.close();
     }

     public boolean shouldSeek(Scan scan, final SortedSet<byte[]> columns) {
         return (passesTimerangeFilter(scan) && passesBloomFilter(scan,columns));
     }

     /**
      * Check if this storeFile may contain keys within the TimeRange
      * @param scan
      * @return False if it definitely does not exist in this StoreFile
      */
     private boolean passesTimerangeFilter(Scan scan) {
       if (timeRangeTracker == null) {
         return true;
       } else {
         return timeRangeTracker.includesTimeRange(scan.getTimeRange());
       }
     }

     private boolean passesBloomFilter(Scan scan, final SortedSet<byte[]> columns) {
       if (this.bloomFilter == null || !scan.isGetScan()) {
         return true;
       }
       byte[] row = scan.getStartRow();
       byte[] key;
       switch (this.bloomFilterType) {
         case ROW:
           key = row;
           break;
         case ROWCOL:
           if (columns != null && columns.size() == 1) {
             byte[] col = columns.first();
             key = Bytes.add(row, col);
             break;
           }
           //$FALL-THROUGH$
         default:
           return true;
       }

       try {
         ByteBuffer bloom = reader.getMetaBlock(BLOOM_FILTER_DATA_KEY, true);
         if (bloom != null) {
           if (this.bloomFilterType == BloomType.ROWCOL) {
             // Since a Row Delete is essentially a DeleteFamily applied to all
             // columns, a file might be skipped if using row+col Bloom filter.
             // In order to ensure this file is included an additional check is
             // required looking only for a row bloom.
             return this.bloomFilter.contains(key, bloom) ||
                 this.bloomFilter.contains(row, bloom);
           }
           else {
             return this.bloomFilter.contains(key, bloom);
           }
         }
       } catch (IOException e) {
         LOG.error("Error reading bloom filter data -- proceeding without",
             e);
         setBloomFilterFaulty();
       } catch (IllegalArgumentException e) {
         LOG.error("Bad bloom filter data -- proceeding without", e);
         setBloomFilterFaulty();
       }

       return true;
     }

     public Map<byte[], byte[]> loadFileInfo() throws IOException {
       Map<byte [], byte []> fi = reader.loadFileInfo();

       byte[] b = fi.get(BLOOM_FILTER_TYPE_KEY);
       if (b != null) {
         bloomFilterType = BloomType.valueOf(Bytes.toString(b));
       }

       return fi;
     }

     public void loadBloomfilter() {
       if (this.bloomFilter != null) {
         return; // already loaded
       }

       try {
         ByteBuffer b = reader.getMetaBlock(BLOOM_FILTER_META_KEY, false);
         if (b != null) {
           if (bloomFilterType == BloomType.NONE) {
             throw new IOException("valid bloom filter type not found in FileInfo");
           }


           this.bloomFilter = new ByteBloomFilter(b);
           LOG.info("Loaded " + (bloomFilterType== BloomType.ROW? "row":"col")
                  + " bloom filter metadata for " + reader.getName());
         }
       } catch (IOException e) {
         LOG.error("Error reading bloom filter meta -- proceeding without", e);
         this.bloomFilter = null;
       } catch (IllegalArgumentException e) {
         LOG.error("Bad bloom filter meta -- proceeding without", e);
         this.bloomFilter = null;
       }
     }

     public int getFilterEntries() {
       return (this.bloomFilter != null) ? this.bloomFilter.getKeyCount()
           : reader.getFilterEntries();
     }

     public ByteBuffer getMetaBlock(String bloomFilterDataKey, boolean cacheBlock) throws IOException {
       return reader.getMetaBlock(bloomFilterDataKey, cacheBlock);
     }

     public void setBloomFilterFaulty() {
       bloomFilter = null;
     }

     public byte[] getLastKey() {
       return reader.getLastKey();
     }

     public byte[] midkey() throws IOException {
       return reader.midkey();
     }

     public long length() {
       return reader.length();
     }

     public int getEntries() {
       return reader.getEntries();
     }

     public byte[] getFirstKey() {
       return reader.getFirstKey();
     }

     public long indexSize() {
       return reader.indexSize();
     }

     public BloomType getBloomFilterType() {
       return this.bloomFilterType;
     }

     public long getSequenceID() {
       return sequenceID;
     }

     public void setSequenceID(long sequenceID) {
       this.sequenceID = sequenceID;
     }
   }

   /**
    * Useful comparators for comparing StoreFiles.
    */
   abstract static class Comparators {
     /**
      * Comparator that compares based on the flush time of
      * the StoreFiles. All bulk loads are placed before all non-
      * bulk loads, and then all files are sorted by sequence ID.
      * If there are ties, the path name is used as a tie-breaker.
      */
     static final Comparator<StoreFile> FLUSH_TIME =
       Ordering.compound(ImmutableList.of(
           Ordering.natural().onResultOf(new GetBulkTime()),
           Ordering.natural().onResultOf(new GetSeqId()),
           Ordering.natural().onResultOf(new GetPathName())
       ));

     private static class GetBulkTime implements Function<StoreFile, Long> {
       @Override
       public Long apply(StoreFile sf) {
         if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
         return sf.getBulkLoadTimestamp();
       }
     }
     private static class GetSeqId implements Function<StoreFile, Long> {
       @Override
       public Long apply(StoreFile sf) {
         if (sf.isBulkLoadResult()) return -1L;
         return sf.getMaxSequenceId();
       }
     }
     private static class GetPathName implements Function<StoreFile, String> {
       @Override
       public String apply(StoreFile sf) {
         return sf.getPath().getName();
       }
     }

   }
 }