| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.hbase.io.hfile; |
| |
| import java.io.DataInput; |
| import java.io.IOException; |
| |
| import org.apache.hadoop.hbase.Cell; |
| import org.apache.yetus.audience.InterfaceAudience; |
| import org.apache.hadoop.hbase.nio.ByteBuff; |
| import org.apache.hadoop.hbase.regionserver.BloomType; |
| import org.apache.hadoop.hbase.util.BloomFilter; |
| import org.apache.hadoop.hbase.util.BloomFilterUtil; |
| import org.apache.hadoop.hbase.util.Bytes; |
| import org.apache.hadoop.hbase.util.Hash; |
| |
| /** |
| * A Bloom filter implementation built on top of |
| * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating |
| * a set of fixed-size Bloom filters written out at the time of |
| * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data |
| * block stream, and loaded on demand at query time. This class only provides |
| * reading capabilities. |
| */ |
| @InterfaceAudience.Private |
| public class CompoundBloomFilter extends CompoundBloomFilterBase |
| implements BloomFilter { |
| |
| /** Used to load chunks on demand */ |
| private HFile.Reader reader; |
| |
| private HFileBlockIndex.BlockIndexReader index; |
| |
| private int hashCount; |
| private Hash hash; |
| |
| private long[] numQueriesPerChunk; |
| private long[] numPositivesPerChunk; |
| |
| /** |
| * De-serialization for compound Bloom filter metadata. Must be consistent |
| * with what {@link CompoundBloomFilterWriter} does. |
| * |
| * @param meta serialized Bloom filter metadata without any magic blocks |
| * @throws IOException |
| */ |
| public CompoundBloomFilter(DataInput meta, HFile.Reader reader) |
| throws IOException { |
| this.reader = reader; |
| |
| totalByteSize = meta.readLong(); |
| hashCount = meta.readInt(); |
| hashType = meta.readInt(); |
| totalKeyCount = meta.readLong(); |
| totalMaxKeys = meta.readLong(); |
| numChunks = meta.readInt(); |
| byte[] comparatorClassName = Bytes.readByteArray(meta); |
| // The writer would have return 0 as the vint length for the case of |
| // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be |
| // null |
| if (comparatorClassName.length != 0) { |
| comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); |
| } |
| |
| hash = Hash.getInstance(hashType); |
| if (hash == null) { |
| throw new IllegalArgumentException("Invalid hash type: " + hashType); |
| } |
| // We will pass null for ROW block |
| if(comparator == null) { |
| index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); |
| } else { |
| index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1); |
| } |
| index.readRootIndex(meta, numChunks); |
| } |
| |
| @Override |
| public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { |
| int block = index.rootBlockContainingKey(key, keyOffset, keyLength); |
| if (block < 0) { |
| return false; // This key is not in the file. |
| } |
| boolean result; |
| HFileBlock bloomBlock = getBloomBlock(block); |
| try { |
| ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); |
| result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, |
| bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); |
| } finally { |
| // After the use, should release the block to deallocate byte buffers. |
| bloomBlock.release(); |
| } |
| if (numPositivesPerChunk != null && result) { |
| // Update statistics. Only used in unit tests. |
| ++numPositivesPerChunk[block]; |
| } |
| return result; |
| } |
| |
| private HFileBlock getBloomBlock(int block) { |
| HFileBlock bloomBlock; |
| try { |
| // We cache the block and use a positional read. |
| bloomBlock = reader.readBlock(index.getRootBlockOffset(block), |
| index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null); |
| } catch (IOException ex) { |
| // The Bloom filter is broken, turn it off. |
| throw new IllegalArgumentException("Failed to load Bloom block", ex); |
| } |
| |
| if (numQueriesPerChunk != null) { |
| // Update statistics. Only used in unit tests. |
| ++numQueriesPerChunk[block]; |
| } |
| return bloomBlock; |
| } |
| |
| @Override |
| public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) { |
| int block = index.rootBlockContainingKey(keyCell); |
| if (block < 0) { |
| return false; // This key is not in the file. |
| } |
| boolean result; |
| HFileBlock bloomBlock = getBloomBlock(block); |
| try { |
| ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); |
| result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(), |
| bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type); |
| } finally { |
| // After the use, should release the block to deallocate the byte buffers. |
| bloomBlock.release(); |
| } |
| if (numPositivesPerChunk != null && result) { |
| // Update statistics. Only used in unit tests. |
| ++numPositivesPerChunk[block]; |
| } |
| return result; |
| } |
| |
| @Override |
| public boolean supportsAutoLoading() { |
| return true; |
| } |
| |
| public int getNumChunks() { |
| return numChunks; |
| } |
| |
| public void enableTestingStats() { |
| numQueriesPerChunk = new long[numChunks]; |
| numPositivesPerChunk = new long[numChunks]; |
| } |
| |
| public String formatTestingStats() { |
| StringBuilder sb = new StringBuilder(); |
| for (int i = 0; i < numChunks; ++i) { |
| sb.append("chunk #"); |
| sb.append(i); |
| sb.append(": queries="); |
| sb.append(numQueriesPerChunk[i]); |
| sb.append(", positives="); |
| sb.append(numPositivesPerChunk[i]); |
| sb.append(", positiveRatio="); |
| sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]); |
| sb.append(";\n"); |
| } |
| return sb.toString(); |
| } |
| |
| public long getNumQueriesForTesting(int chunk) { |
| return numQueriesPerChunk[chunk]; |
| } |
| |
| public long getNumPositivesForTesting(int chunk) { |
| return numPositivesPerChunk[chunk]; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append(BloomFilterUtil.formatStats(this)); |
| sb.append(BloomFilterUtil.STATS_RECORD_SEP + |
| "Number of chunks: " + numChunks); |
| sb.append(BloomFilterUtil.STATS_RECORD_SEP + |
| ((comparator != null) ? "Comparator: " |
| + comparator.getClass().getSimpleName() : "Comparator: " |
| + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); |
| return sb.toString(); |
| } |
| |
| } |