blob: 984922e1c0943435be9c979367fd05db7e9d749e [file] [log] [blame]
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.DataInput;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
/**
* Handles Bloom filter initialization based on configuration and serialized
* metadata in the reader and writer of {@link StoreFile}.
*/
@InterfaceAudience.Private
public final class BloomFilterFactory {
private static final Log LOG =
LogFactory.getLog(BloomFilterFactory.class.getName());
/** This class should not be instantiated. */
private BloomFilterFactory() {}
/**
* Specifies the target error rate to use when selecting the number of keys
* per Bloom filter.
*/
public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
"io.storefile.bloom.error.rate";
/**
* Maximum folding factor allowed. The Bloom filter will be shrunk by
* the factor of up to 2 ** this times if we oversize it initially.
*/
public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
"io.storefile.bloom.max.fold";
/**
* For default (single-block) Bloom filters this specifies the maximum number
* of keys.
*/
public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
"io.storefile.bloom.max.keys";
/** Master switch to enable Bloom filters */
public static final String IO_STOREFILE_BLOOM_ENABLED =
"io.storefile.bloom.enabled";
/** Master switch to enable Delete Family Bloom filters */
public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
"io.storefile.delete.family.bloom.enabled";
/**
* Target Bloom block size. Bloom filter blocks of approximately this size
* are interleaved with data blocks.
*/
public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
"io.storefile.bloom.block.size";
/** Maximum number of times a Bloom filter can be "folded" if oversized */
private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
/**
* Instantiates the correct Bloom filter class based on the version provided
* in the meta block data.
*
* @param meta the byte array holding the Bloom filter's metadata, including
* version information
* @param reader the {@link HFile} reader to use to lazily load Bloom filter
* blocks
* @return an instance of the correct type of Bloom filter
* @throws IllegalArgumentException
*/
public static BloomFilter
createFromMeta(DataInput meta, HFile.Reader reader)
throws IllegalArgumentException, IOException {
int version = meta.readInt();
switch (version) {
case ByteBloomFilter.VERSION:
// This is only possible in a version 1 HFile. We are ignoring the
// passed comparator because raw byte comparators are always used
// in version 1 Bloom filters.
return new ByteBloomFilter(meta);
case CompoundBloomFilterBase.VERSION:
return new CompoundBloomFilter(meta, reader);
default:
throw new IllegalArgumentException(
"Bad bloom filter format version " + version
);
}
}
/**
* @return true if general Bloom (Row or RowCol) filters are enabled in the
* given configuration
*/
public static boolean isGeneralBloomEnabled(Configuration conf) {
return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
}
/**
* @return true if Delete Family Bloom filters are enabled in the given configuration
*/
public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
}
/**
* @return the Bloom filter error rate in the given configuration
*/
public static float getErrorRate(Configuration conf) {
return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
}
/**
* @return the value for Bloom filter max fold in the given configuration
*/
public static int getMaxFold(Configuration conf) {
return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
}
/** @return the compound Bloom filter block size from the configuration */
public static int getBloomBlockSize(Configuration conf) {
return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
}
/**
* @return max key for the Bloom filter from the configuration
*/
public static int getMaxKeys(Configuration conf) {
return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
}
/**
* Creates a new general (Row or RowCol) Bloom filter at the time of
* {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
*
* @param conf
* @param cacheConf
* @param bloomType
* @param maxKeys an estimate of the number of keys we expect to insert.
* Irrelevant if compound Bloom filters are enabled.
* @param writer the HFile writer
* @return the new Bloom filter, or null in case Bloom filters are disabled
* or when failed to create one.
*/
public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
CacheConfig cacheConf, BloomType bloomType, int maxKeys,
HFile.Writer writer) {
if (!isGeneralBloomEnabled(conf)) {
LOG.trace("Bloom filters are disabled by configuration for "
+ writer.getPath()
+ (conf == null ? " (configuration is null)" : ""));
return null;
} else if (bloomType == BloomType.NONE) {
LOG.trace("Bloom filter is turned off for the column family");
return null;
}
float err = getErrorRate(conf);
// In case of row/column Bloom filter lookups, each lookup is an OR if two
// separate lookups. Therefore, if each lookup's false positive rate is p,
// the resulting false positive rate is err = 1 - (1 - p)^2, and
// p = 1 - sqrt(1 - err).
if (bloomType == BloomType.ROWCOL) {
err = (float) (1 - Math.sqrt(1 - err));
}
int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
MAX_ALLOWED_FOLD_FACTOR);
// Do we support compound bloom filters?
if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold,
cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL
? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
} else {
// A single-block Bloom filter. Only used when testing HFile format
// version 1.
int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS,
128 * 1000 * 1000);
if (maxKeys <= 0) {
LOG.warn("Invalid maximum number of keys specified: " + maxKeys
+ ", not using Bloom filter");
return null;
} else if (maxKeys < tooBig) {
BloomFilterWriter bloom = new ByteBloomFilter((int) maxKeys, err,
Hash.getHashType(conf), maxFold);
bloom.allocBloom();
return bloom;
} else {
LOG.debug("Skipping bloom filter because max keysize too large: "
+ maxKeys);
}
}
return null;
}
/**
* Creates a new Delete Family Bloom filter at the time of
* {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
* @param conf
* @param cacheConf
* @param maxKeys an estimate of the number of keys we expect to insert.
* Irrelevant if compound Bloom filters are enabled.
* @param writer the HFile writer
* @return the new Bloom filter, or null in case Bloom filters are disabled
* or when failed to create one.
*/
public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
if (!isDeleteFamilyBloomEnabled(conf)) {
LOG.info("Delete Bloom filters are disabled by configuration for "
+ writer.getPath()
+ (conf == null ? " (configuration is null)" : ""));
return null;
}
float err = getErrorRate(conf);
if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
int maxFold = getMaxFold(conf);
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
getBloomBlockSize(conf), err, Hash.getHashType(conf),
maxFold,
cacheConf.shouldCacheBloomsOnWrite(), Bytes.BYTES_RAWCOMPARATOR);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
} else {
LOG.info("Delete Family Bloom filter is not supported in HFile V1");
return null;
}
}
};