| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.hbase.util; |
| |
| import java.io.DataInput; |
| import java.io.IOException; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.classification.InterfaceAudience; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.hbase.KeyValue; |
| import org.apache.hadoop.hbase.io.hfile.CacheConfig; |
| import org.apache.hadoop.hbase.io.hfile.HFile; |
| import org.apache.hadoop.hbase.regionserver.StoreFile; |
| import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; |
| |
| /** |
| * Handles Bloom filter initialization based on configuration and serialized |
| * metadata in the reader and writer of {@link StoreFile}. |
| */ |
| @InterfaceAudience.Private |
| public final class BloomFilterFactory { |
| |
| private static final Log LOG = |
| LogFactory.getLog(BloomFilterFactory.class.getName()); |
| |
| /** This class should not be instantiated. */ |
| private BloomFilterFactory() {} |
| |
| /** |
| * Specifies the target error rate to use when selecting the number of keys |
| * per Bloom filter. |
| */ |
| public static final String IO_STOREFILE_BLOOM_ERROR_RATE = |
| "io.storefile.bloom.error.rate"; |
| |
| /** |
| * Maximum folding factor allowed. The Bloom filter will be shrunk by |
| * the factor of up to 2 ** this times if we oversize it initially. |
| */ |
| public static final String IO_STOREFILE_BLOOM_MAX_FOLD = |
| "io.storefile.bloom.max.fold"; |
| |
| /** |
| * For default (single-block) Bloom filters this specifies the maximum number |
| * of keys. |
| */ |
| public static final String IO_STOREFILE_BLOOM_MAX_KEYS = |
| "io.storefile.bloom.max.keys"; |
| |
| /** Master switch to enable Bloom filters */ |
| public static final String IO_STOREFILE_BLOOM_ENABLED = |
| "io.storefile.bloom.enabled"; |
| |
| /** Master switch to enable Delete Family Bloom filters */ |
| public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED = |
| "io.storefile.delete.family.bloom.enabled"; |
| |
| /** |
| * Target Bloom block size. Bloom filter blocks of approximately this size |
| * are interleaved with data blocks. |
| */ |
| public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE = |
| "io.storefile.bloom.block.size"; |
| |
| /** Maximum number of times a Bloom filter can be "folded" if oversized */ |
| private static final int MAX_ALLOWED_FOLD_FACTOR = 7; |
| |
| /** |
| * Instantiates the correct Bloom filter class based on the version provided |
| * in the meta block data. |
| * |
| * @param meta the byte array holding the Bloom filter's metadata, including |
| * version information |
| * @param reader the {@link HFile} reader to use to lazily load Bloom filter |
| * blocks |
| * @return an instance of the correct type of Bloom filter |
| * @throws IllegalArgumentException |
| */ |
| public static BloomFilter |
| createFromMeta(DataInput meta, HFile.Reader reader) |
| throws IllegalArgumentException, IOException { |
| int version = meta.readInt(); |
| switch (version) { |
| case ByteBloomFilter.VERSION: |
| // This is only possible in a version 1 HFile. We are ignoring the |
| // passed comparator because raw byte comparators are always used |
| // in version 1 Bloom filters. |
| return new ByteBloomFilter(meta); |
| |
| case CompoundBloomFilterBase.VERSION: |
| return new CompoundBloomFilter(meta, reader); |
| |
| default: |
| throw new IllegalArgumentException( |
| "Bad bloom filter format version " + version |
| ); |
| } |
| } |
| |
| /** |
| * @return true if general Bloom (Row or RowCol) filters are enabled in the |
| * given configuration |
| */ |
| public static boolean isGeneralBloomEnabled(Configuration conf) { |
| return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true); |
| } |
| |
| /** |
| * @return true if Delete Family Bloom filters are enabled in the given configuration |
| */ |
| public static boolean isDeleteFamilyBloomEnabled(Configuration conf) { |
| return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true); |
| } |
| |
| /** |
| * @return the Bloom filter error rate in the given configuration |
| */ |
| public static float getErrorRate(Configuration conf) { |
| return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); |
| } |
| |
| /** |
| * @return the value for Bloom filter max fold in the given configuration |
| */ |
| public static int getMaxFold(Configuration conf) { |
| return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR); |
| } |
| |
| /** @return the compound Bloom filter block size from the configuration */ |
| public static int getBloomBlockSize(Configuration conf) { |
| return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024); |
| } |
| |
| /** |
| * @return max key for the Bloom filter from the configuration |
| */ |
| public static int getMaxKeys(Configuration conf) { |
| return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000); |
| } |
| |
| /** |
| * Creates a new general (Row or RowCol) Bloom filter at the time of |
| * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing. |
| * |
| * @param conf |
| * @param cacheConf |
| * @param bloomType |
| * @param maxKeys an estimate of the number of keys we expect to insert. |
| * Irrelevant if compound Bloom filters are enabled. |
| * @param writer the HFile writer |
| * @return the new Bloom filter, or null in case Bloom filters are disabled |
| * or when failed to create one. |
| */ |
| public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf, |
| CacheConfig cacheConf, BloomType bloomType, int maxKeys, |
| HFile.Writer writer) { |
| if (!isGeneralBloomEnabled(conf)) { |
| LOG.trace("Bloom filters are disabled by configuration for " |
| + writer.getPath() |
| + (conf == null ? " (configuration is null)" : "")); |
| return null; |
| } else if (bloomType == BloomType.NONE) { |
| LOG.trace("Bloom filter is turned off for the column family"); |
| return null; |
| } |
| |
| float err = getErrorRate(conf); |
| |
| // In case of row/column Bloom filter lookups, each lookup is an OR if two |
| // separate lookups. Therefore, if each lookup's false positive rate is p, |
| // the resulting false positive rate is err = 1 - (1 - p)^2, and |
| // p = 1 - sqrt(1 - err). |
| if (bloomType == BloomType.ROWCOL) { |
| err = (float) (1 - Math.sqrt(1 - err)); |
| } |
| |
| int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, |
| MAX_ALLOWED_FOLD_FACTOR); |
| |
| // Do we support compound bloom filters? |
| if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) { |
| // In case of compound Bloom filters we ignore the maxKeys hint. |
| CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter( |
| getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, |
| cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL |
| ? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR); |
| writer.addInlineBlockWriter(bloomWriter); |
| return bloomWriter; |
| } else { |
| // A single-block Bloom filter. Only used when testing HFile format |
| // version 1. |
| int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, |
| 128 * 1000 * 1000); |
| |
| if (maxKeys <= 0) { |
| LOG.warn("Invalid maximum number of keys specified: " + maxKeys |
| + ", not using Bloom filter"); |
| return null; |
| } else if (maxKeys < tooBig) { |
| BloomFilterWriter bloom = new ByteBloomFilter((int) maxKeys, err, |
| Hash.getHashType(conf), maxFold); |
| bloom.allocBloom(); |
| return bloom; |
| } else { |
| LOG.debug("Skipping bloom filter because max keysize too large: " |
| + maxKeys); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Creates a new Delete Family Bloom filter at the time of |
| * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing. |
| * @param conf |
| * @param cacheConf |
| * @param maxKeys an estimate of the number of keys we expect to insert. |
| * Irrelevant if compound Bloom filters are enabled. |
| * @param writer the HFile writer |
| * @return the new Bloom filter, or null in case Bloom filters are disabled |
| * or when failed to create one. |
| */ |
| public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf, |
| CacheConfig cacheConf, int maxKeys, HFile.Writer writer) { |
| if (!isDeleteFamilyBloomEnabled(conf)) { |
| LOG.info("Delete Bloom filters are disabled by configuration for " |
| + writer.getPath() |
| + (conf == null ? " (configuration is null)" : "")); |
| return null; |
| } |
| |
| float err = getErrorRate(conf); |
| |
| if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) { |
| int maxFold = getMaxFold(conf); |
| // In case of compound Bloom filters we ignore the maxKeys hint. |
| CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter( |
| getBloomBlockSize(conf), err, Hash.getHashType(conf), |
| maxFold, |
| cacheConf.shouldCacheBloomsOnWrite(), Bytes.BYTES_RAWCOMPARATOR); |
| writer.addInlineBlockWriter(bloomWriter); |
| return bloomWriter; |
| } else { |
| LOG.info("Delete Family Bloom filter is not supported in HFile V1"); |
| return null; |
| } |
| } |
| }; |