| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hbase.io.hfile; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.DataOutputStream; |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Random; |
| import java.util.Set; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FSDataInputStream; |
| import org.apache.hadoop.fs.FSDataOutputStream; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hbase.CellBuilderType; |
| import org.apache.hadoop.hbase.CellComparatorImpl; |
| import org.apache.hadoop.hbase.CellUtil; |
| import org.apache.hadoop.hbase.ExtendedCellBuilderFactory; |
| import org.apache.hadoop.hbase.HBaseClassTestRule; |
| import org.apache.hadoop.hbase.HBaseCommonTestingUtility; |
| import org.apache.hadoop.hbase.HBaseTestingUtility; |
| import org.apache.hadoop.hbase.HConstants; |
| import org.apache.hadoop.hbase.KeyValue; |
| import org.apache.hadoop.hbase.KeyValueUtil; |
| import org.apache.hadoop.hbase.PrivateCellUtil; |
| import org.apache.hadoop.hbase.fs.HFileSystem; |
| import org.apache.hadoop.hbase.io.ByteBuffAllocator; |
| import org.apache.hadoop.hbase.io.compress.Compression; |
| import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; |
| import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; |
| import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk; |
| import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; |
| import org.apache.hadoop.hbase.nio.ByteBuff; |
| import org.apache.hadoop.hbase.nio.MultiByteBuff; |
| import org.apache.hadoop.hbase.testclassification.IOTests; |
| import org.apache.hadoop.hbase.testclassification.MediumTests; |
| import org.apache.hadoop.hbase.util.Bytes; |
| import org.apache.hadoop.hbase.util.ClassSize; |
| import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; |
| import org.junit.Before; |
| import org.junit.ClassRule; |
| import org.junit.Test; |
| import org.junit.experimental.categories.Category; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.Parameterized; |
| import org.junit.runners.Parameterized.Parameters; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| @RunWith(Parameterized.class) |
| @Category({IOTests.class, MediumTests.class}) |
| public class TestHFileBlockIndex { |
| |
| @ClassRule |
| public static final HBaseClassTestRule CLASS_RULE = |
| HBaseClassTestRule.forClass(TestHFileBlockIndex.class); |
| |
| @Parameters |
| public static Collection<Object[]> compressionAlgorithms() { |
| return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED; |
| } |
| |
| public TestHFileBlockIndex(Compression.Algorithm compr) { |
| this.compr = compr; |
| } |
| |
| private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class); |
| |
| private static final int NUM_DATA_BLOCKS = 1000; |
| private static final HBaseTestingUtility TEST_UTIL = |
| new HBaseTestingUtility(); |
| |
| private static final int SMALL_BLOCK_SIZE = 4096; |
| private static final int NUM_KV = 10000; |
| |
| private static FileSystem fs; |
| private Path path; |
| private Random rand; |
| private long rootIndexOffset; |
| private int numRootEntries; |
| private int numLevels; |
| private static final List<byte[]> keys = new ArrayList<>(); |
| private final Compression.Algorithm compr; |
| private byte[] firstKeyInFile; |
| private Configuration conf; |
| |
| private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 }; |
| private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 }; |
| private static final int[] UNCOMPRESSED_INDEX_SIZES = |
| { 19187, 21813, 23086 }; |
| |
| private static final boolean includesMemstoreTS = true; |
| |
| static { |
| assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; |
| assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; |
| } |
| |
| @Before |
| public void setUp() throws IOException { |
| keys.clear(); |
| rand = new Random(2389757); |
| firstKeyInFile = null; |
| conf = TEST_UTIL.getConfiguration(); |
| |
| // This test requires at least HFile format version 2. |
| conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); |
| |
| fs = HFileSystem.get(conf); |
| } |
| |
| @Test |
| public void testBlockIndex() throws IOException { |
| testBlockIndexInternals(false); |
| clear(); |
| testBlockIndexInternals(true); |
| } |
| |
| private void clear() throws IOException { |
| keys.clear(); |
| rand = new Random(2389757); |
| firstKeyInFile = null; |
| conf = TEST_UTIL.getConfiguration(); |
| |
| // This test requires at least HFile format version 2. |
| conf.setInt(HFile.FORMAT_VERSION_KEY, 3); |
| |
| fs = HFileSystem.get(conf); |
| } |
| |
| private void testBlockIndexInternals(boolean useTags) throws IOException { |
| path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); |
| writeWholeIndex(useTags); |
| readIndex(useTags); |
| } |
| |
| /** |
| * A wrapper around a block reader which only caches the results of the last |
| * operation. Not thread-safe. |
| */ |
| private static class BlockReaderWrapper implements HFile.CachingBlockReader { |
| |
| private HFileBlock.FSReader realReader; |
| private long prevOffset; |
| private long prevOnDiskSize; |
| private boolean prevPread; |
| private HFileBlock prevBlock; |
| |
| public int hitCount = 0; |
| public int missCount = 0; |
| |
| public BlockReaderWrapper(HFileBlock.FSReader realReader) { |
| this.realReader = realReader; |
| } |
| |
| @Override |
| public HFileBlock readBlock(long offset, long onDiskSize, |
| boolean cacheBlock, boolean pread, boolean isCompaction, |
| boolean updateCacheMetrics, BlockType expectedBlockType, |
| DataBlockEncoding expectedDataBlockEncoding) |
| throws IOException { |
| if (offset == prevOffset && onDiskSize == prevOnDiskSize && |
| pread == prevPread) { |
| hitCount += 1; |
| return prevBlock; |
| } |
| |
| missCount += 1; |
| prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true); |
| prevOffset = offset; |
| prevOnDiskSize = onDiskSize; |
| prevPread = pread; |
| |
| return prevBlock; |
| } |
| } |
| |
| private void readIndex(boolean useTags) throws IOException { |
| long fileSize = fs.getFileStatus(path).getLen(); |
| LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString()); |
| |
| FSDataInputStream istream = fs.open(path); |
| HFileContext meta = new HFileContextBuilder() |
| .withHBaseCheckSum(true) |
| .withIncludesMvcc(includesMemstoreTS) |
| .withIncludesTags(useTags) |
| .withCompression(compr) |
| .build(); |
| HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(istream, |
| fs.getFileStatus(path).getLen(), meta, ByteBuffAllocator.HEAP); |
| |
| BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); |
| HFileBlockIndex.BlockIndexReader indexReader = |
| new HFileBlockIndex.CellBasedKeyBlockIndexReader( |
| CellComparatorImpl.COMPARATOR, numLevels, brw); |
| |
| indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, |
| fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); |
| |
| long prevOffset = -1; |
| int i = 0; |
| int expectedHitCount = 0; |
| int expectedMissCount = 0; |
| LOG.info("Total number of keys: " + keys.size()); |
| for (byte[] key : keys) { |
| assertTrue(key != null); |
| assertTrue(indexReader != null); |
| KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); |
| HFileBlock b = |
| indexReader.seekToDataBlock(keyOnlyKey, null, true, |
| true, false, null); |
| if (PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0, |
| firstKeyInFile.length) < 0) { |
| assertTrue(b == null); |
| ++i; |
| continue; |
| } |
| |
| String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key); |
| |
| assertTrue("seekToDataBlock failed for " + keyStr, b != null); |
| |
| if (prevOffset == b.getOffset()) { |
| assertEquals(++expectedHitCount, brw.hitCount); |
| } else { |
| LOG.info("First key in a new block: " + keyStr + ", block offset: " |
| + b.getOffset() + ")"); |
| assertTrue(b.getOffset() > prevOffset); |
| assertEquals(++expectedMissCount, brw.missCount); |
| prevOffset = b.getOffset(); |
| } |
| ++i; |
| } |
| |
| istream.close(); |
| } |
| |
| private void writeWholeIndex(boolean useTags) throws IOException { |
| assertEquals(0, keys.size()); |
| HFileContext meta = new HFileContextBuilder() |
| .withHBaseCheckSum(true) |
| .withIncludesMvcc(includesMemstoreTS) |
| .withIncludesTags(useTags) |
| .withCompression(compr) |
| .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM) |
| .build(); |
| HFileBlock.Writer hbw = new HFileBlock.Writer(null, |
| meta); |
| FSDataOutputStream outputStream = fs.create(path); |
| HFileBlockIndex.BlockIndexWriter biw = |
| new HFileBlockIndex.BlockIndexWriter(hbw, null, null); |
| |
| for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { |
| hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(rand.nextInt(1000)))); |
| long blockOffset = outputStream.getPos(); |
| hbw.writeHeaderAndData(outputStream); |
| |
| byte[] firstKey = null; |
| byte[] family = Bytes.toBytes("f"); |
| byte[] qualifier = Bytes.toBytes("q"); |
| for (int j = 0; j < 16; ++j) { |
| byte[] k = |
| new KeyValue(RandomKeyValueUtil.randomOrderedKey(rand, i * 16 + j), family, qualifier, |
| EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey(); |
| keys.add(k); |
| if (j == 8) { |
| firstKey = k; |
| } |
| } |
| assertTrue(firstKey != null); |
| if (firstKeyInFile == null) { |
| firstKeyInFile = firstKey; |
| } |
| biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader()); |
| |
| writeInlineBlocks(hbw, outputStream, biw, false); |
| } |
| writeInlineBlocks(hbw, outputStream, biw, true); |
| rootIndexOffset = biw.writeIndexBlocks(outputStream); |
| outputStream.close(); |
| |
| numLevels = biw.getNumLevels(); |
| numRootEntries = biw.getNumRootEntries(); |
| |
| LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + |
| numRootEntries + ", rootIndexOffset=" + rootIndexOffset); |
| } |
| |
| private void writeInlineBlocks(HFileBlock.Writer hbw, |
| FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw, |
| boolean isClosing) throws IOException { |
| while (biw.shouldWriteBlock(isClosing)) { |
| long offset = outputStream.getPos(); |
| biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); |
| hbw.writeHeaderAndData(outputStream); |
| biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), |
| hbw.getUncompressedSizeWithoutHeader()); |
| LOG.info("Wrote an inline index block at " + offset + ", size " + |
| hbw.getOnDiskSizeWithHeader()); |
| } |
| } |
| |
| private static final long getDummyFileOffset(int i) { |
| return i * 185 + 379; |
| } |
| |
| private static final int getDummyOnDiskSize(int i) { |
| return i * i * 37 + i * 19 + 13; |
| } |
| |
| @Test |
| public void testSecondaryIndexBinarySearch() throws IOException { |
| int numTotalKeys = 99; |
| assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even. |
| |
| // We only add odd-index keys into the array that we will binary-search. |
| int numSearchedKeys = (numTotalKeys - 1) / 2; |
| |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| DataOutputStream dos = new DataOutputStream(baos); |
| |
| dos.writeInt(numSearchedKeys); |
| int curAllEntriesSize = 0; |
| int numEntriesAdded = 0; |
| |
| // Only odd-index elements of this array are used to keep the secondary |
| // index entries of the corresponding keys. |
| int secondaryIndexEntries[] = new int[numTotalKeys]; |
| |
| for (int i = 0; i < numTotalKeys; ++i) { |
| byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i * 2); |
| KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), |
| Bytes.toBytes("val")); |
| //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); |
| keys.add(cell.getKey()); |
| String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): "; |
| StringBuilder padding = new StringBuilder(); |
| while (msgPrefix.length() + padding.length() < 70) |
| padding.append(' '); |
| msgPrefix += padding; |
| if (i % 2 == 1) { |
| dos.writeInt(curAllEntriesSize); |
| secondaryIndexEntries[i] = curAllEntriesSize; |
| LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) + |
| ", offset " + curAllEntriesSize); |
| curAllEntriesSize += cell.getKey().length |
| + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; |
| ++numEntriesAdded; |
| } else { |
| secondaryIndexEntries[i] = -1; |
| LOG.info(msgPrefix + "not in the searched array"); |
| } |
| } |
| |
| // Make sure the keys are increasing. |
| for (int i = 0; i < keys.size() - 1; ++i) |
| assertTrue(CellComparatorImpl.COMPARATOR.compare( |
| new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length), |
| new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0); |
| |
| dos.writeInt(curAllEntriesSize); |
| assertEquals(numSearchedKeys, numEntriesAdded); |
| int secondaryIndexOffset = dos.size(); |
| assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), |
| secondaryIndexOffset); |
| |
| for (int i = 1; i <= numTotalKeys - 1; i += 2) { |
| assertEquals(dos.size(), |
| secondaryIndexOffset + secondaryIndexEntries[i]); |
| long dummyFileOffset = getDummyFileOffset(i); |
| int dummyOnDiskSize = getDummyOnDiskSize(i); |
| LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + |
| dummyOnDiskSize + " at offset " + dos.size()); |
| dos.writeLong(dummyFileOffset); |
| dos.writeInt(dummyOnDiskSize); |
| LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size()); |
| dos.write(keys.get(i)); |
| } |
| |
| dos.writeInt(curAllEntriesSize); |
| |
| ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray()); |
| for (int i = 0; i < numTotalKeys; ++i) { |
| byte[] searchKey = keys.get(i); |
| byte[] arrayHoldingKey = new byte[searchKey.length + |
| searchKey.length / 2]; |
| |
| // To make things a bit more interesting, store the key we are looking |
| // for at a non-zero offset in a new array. |
| System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, |
| searchKey.length); |
| |
| KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue( |
| arrayHoldingKey, searchKey.length / 2, searchKey.length); |
| int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell, |
| new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR); |
| String lookupFailureMsg = "Failed to look up key #" + i + " (" |
| + Bytes.toStringBinary(searchKey) + ")"; |
| |
| int expectedResult; |
| int referenceItem; |
| |
| if (i % 2 == 1) { |
| // This key is in the array we search as the element (i - 1) / 2. Make |
| // sure we find it. |
| expectedResult = (i - 1) / 2; |
| referenceItem = i; |
| } else { |
| // This key is not in the array but between two elements on the array, |
| // in the beginning, or in the end. The result should be the previous |
| // key in the searched array, or -1 for i = 0. |
| expectedResult = i / 2 - 1; |
| referenceItem = i - 1; |
| } |
| |
| assertEquals(lookupFailureMsg, expectedResult, searchResult); |
| |
| // Now test we can get the offset and the on-disk-size using a |
| // higher-level API function.s |
| boolean locateBlockResult = |
| (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell, |
| CellComparatorImpl.COMPARATOR) != -1); |
| |
| if (i == 0) { |
| assertFalse(locateBlockResult); |
| } else { |
| assertTrue(locateBlockResult); |
| String errorMsg = "i=" + i + ", position=" + nonRootIndex.position(); |
| assertEquals(errorMsg, getDummyFileOffset(referenceItem), |
| nonRootIndex.getLong()); |
| assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), |
| nonRootIndex.getInt()); |
| } |
| } |
| |
| } |
| |
| @Test |
| public void testBlockIndexChunk() throws IOException { |
| BlockIndexChunk c = new BlockIndexChunk(); |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| int N = 1000; |
| int[] numSubEntriesAt = new int[N]; |
| int numSubEntries = 0; |
| for (int i = 0; i < N; ++i) { |
| baos.reset(); |
| DataOutputStream dos = new DataOutputStream(baos); |
| c.writeNonRoot(dos); |
| assertEquals(c.getNonRootSize(), dos.size()); |
| |
| baos.reset(); |
| dos = new DataOutputStream(baos); |
| c.writeRoot(dos); |
| assertEquals(c.getRootSize(), dos.size()); |
| |
| byte[] k = RandomKeyValueUtil.randomOrderedKey(rand, i); |
| numSubEntries += rand.nextInt(5) + 1; |
| keys.add(k); |
| c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries); |
| } |
| |
| // Test the ability to look up the entry that contains a particular |
| // deeper-level index block's entry ("sub-entry"), assuming a global |
| // 0-based ordering of sub-entries. This is needed for mid-key calculation. |
| for (int i = 0; i < N; ++i) { |
| for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; |
| j < numSubEntriesAt[i]; |
| ++j) { |
| assertEquals(i, c.getEntryBySubEntry(j)); |
| } |
| } |
| } |
| |
| /** Checks if the HeapSize calculator is within reason */ |
| @Test |
| public void testHeapSizeForBlockIndex() throws IOException { |
| Class<HFileBlockIndex.BlockIndexReader> cl = |
| HFileBlockIndex.BlockIndexReader.class; |
| long expected = ClassSize.estimateBase(cl, false); |
| |
| HFileBlockIndex.BlockIndexReader bi = |
| new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); |
| long actual = bi.heapSize(); |
| |
| // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, |
| // int [] blockDataSizes) are all null they are not going to show up in the |
| // HeapSize calculation, so need to remove those array costs from expected. |
| // Already the block keys are not there in this case |
| expected -= ClassSize.align(2 * ClassSize.ARRAY); |
| |
| if (expected != actual) { |
| expected = ClassSize.estimateBase(cl, true); |
| assertEquals(expected, actual); |
| } |
| } |
| |
| /** |
| * to check if looks good when midKey on a leaf index block boundary |
| * @throws IOException |
| */ |
| @Test |
| public void testMidKeyOnLeafIndexBlockBoundary() throws IOException { |
| Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey"); |
| int maxChunkSize = 512; |
| conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); |
| // should open hfile.block.index.cacheonwrite |
| conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true); |
| CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); |
| BlockCache blockCache = cacheConf.getBlockCache().get(); |
| // Evict all blocks that were cached-on-write by the previous invocation. |
| blockCache.evictBlocksByHfileName(hfilePath.getName()); |
| // Write the HFile |
| HFileContext meta = |
| new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(Algorithm.NONE) |
| .withDataBlockEncoding(DataBlockEncoding.NONE).build(); |
| HFile.Writer writer = |
| HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath).withFileContext(meta) |
| .create(); |
| Random rand = new Random(19231737); |
| byte[] family = Bytes.toBytes("f"); |
| byte[] qualifier = Bytes.toBytes("q"); |
| int kvNumberToBeWritten = 16; |
| // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks, |
| // midkey is just on the boundary of the first leaf-index block |
| for (int i = 0; i < kvNumberToBeWritten; ++i) { |
| byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30); |
| |
| // Key will be interpreted by KeyValue.KEY_COMPARATOR |
| KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), |
| RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE)); |
| writer.append(kv); |
| } |
| writer.close(); |
| |
| // close hfile.block.index.cacheonwrite |
| conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false); |
| |
| // Read the HFile |
| HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); |
| |
| boolean hasArrayIndexOutOfBoundsException = false; |
| try { |
| // get the mid-key. |
| reader.midKey(); |
| } catch (ArrayIndexOutOfBoundsException e) { |
| hasArrayIndexOutOfBoundsException = true; |
| } finally { |
| reader.close(); |
| } |
| |
| // to check if ArrayIndexOutOfBoundsException occurred |
| assertFalse(hasArrayIndexOutOfBoundsException); |
| } |
| |
| /** |
| * Testing block index through the HFile writer/reader APIs. Allows to test |
| * setting index block size through configuration, intermediate-level index |
| * blocks, and caching index blocks on write. |
| * |
| * @throws IOException |
| */ |
| @Test |
| public void testHFileWriterAndReader() throws IOException { |
| Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), |
| "hfile_for_block_index"); |
| CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); |
| BlockCache blockCache = cacheConf.getBlockCache().get(); |
| |
| for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { |
| int indexBlockSize = INDEX_CHUNK_SIZES[testI]; |
| int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; |
| LOG.info("Index block size: " + indexBlockSize + ", compression: " |
| + compr); |
| // Evict all blocks that were cached-on-write by the previous invocation. |
| blockCache.evictBlocksByHfileName(hfilePath.getName()); |
| |
| conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); |
| Set<String> keyStrSet = new HashSet<>(); |
| byte[][] keys = new byte[NUM_KV][]; |
| byte[][] values = new byte[NUM_KV][]; |
| |
| // Write the HFile |
| { |
| HFileContext meta = new HFileContextBuilder() |
| .withBlockSize(SMALL_BLOCK_SIZE) |
| .withCompression(compr) |
| .build(); |
| HFile.Writer writer = |
| HFile.getWriterFactory(conf, cacheConf) |
| .withPath(fs, hfilePath) |
| .withFileContext(meta) |
| .create(); |
| Random rand = new Random(19231737); |
| byte[] family = Bytes.toBytes("f"); |
| byte[] qualifier = Bytes.toBytes("q"); |
| for (int i = 0; i < NUM_KV; ++i) { |
| byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i); |
| |
| // Key will be interpreted by KeyValue.KEY_COMPARATOR |
| KeyValue kv = |
| new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), |
| RandomKeyValueUtil.randomValue(rand)); |
| byte[] k = kv.getKey(); |
| writer.append(kv); |
| keys[i] = k; |
| values[i] = CellUtil.cloneValue(kv); |
| keyStrSet.add(Bytes.toStringBinary(k)); |
| if (i > 0) { |
| assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], |
| 0, keys[i - 1].length)) > 0); |
| } |
| } |
| |
| writer.close(); |
| } |
| |
| // Read the HFile |
| HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); |
| assertEquals(expectedNumLevels, |
| reader.getTrailer().getNumDataIndexLevels()); |
| |
| assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey().get()).getKey())); |
| assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue)reader.getLastKey().get()).getKey())); |
| LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); |
| |
| for (boolean pread : new boolean[] { false, true }) { |
| HFileScanner scanner = reader.getScanner(true, pread); |
| for (int i = 0; i < NUM_KV; ++i) { |
| checkSeekTo(keys, scanner, i); |
| checkKeyValue("i=" + i, keys[i], values[i], |
| ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); |
| } |
| assertTrue(scanner.seekTo()); |
| for (int i = NUM_KV - 1; i >= 0; --i) { |
| checkSeekTo(keys, scanner, i); |
| checkKeyValue("i=" + i, keys[i], values[i], |
| ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); |
| } |
| } |
| |
| // Manually compute the mid-key and validate it. |
| HFile.Reader reader2 = reader; |
| HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); |
| |
| HFileBlock.BlockIterator iter = fsReader.blockRange(0, |
| reader.getTrailer().getLoadOnOpenDataOffset()); |
| HFileBlock block; |
| List<byte[]> blockKeys = new ArrayList<>(); |
| while ((block = iter.nextBlock()) != null) { |
| if (block.getBlockType() != BlockType.LEAF_INDEX) |
| return; |
| ByteBuff b = block.getBufferReadOnly(); |
| int n = b.getIntAfterPosition(0); |
| // One int for the number of items, and n + 1 for the secondary index. |
| int entriesOffset = Bytes.SIZEOF_INT * (n + 2); |
| |
| // Get all the keys from the leaf index block. S |
| for (int i = 0; i < n; ++i) { |
| int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1)); |
| int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2)); |
| int keyLen = nextKeyRelOffset - keyRelOffset; |
| int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset + |
| HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; |
| byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset |
| + keyLen); |
| String blockKeyStr = Bytes.toString(blockKey); |
| blockKeys.add(blockKey); |
| |
| // If the first key of the block is not among the keys written, we |
| // are not parsing the non-root index block format correctly. |
| assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, |
| keyStrSet.contains(blockKeyStr)); |
| } |
| } |
| |
| // Validate the mid-key. |
| assertEquals( |
| Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), |
| reader.midKey()); |
| |
| assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], |
| reader.getTrailer().getUncompressedDataIndexSize()); |
| |
| reader.close(); |
| reader2.close(); |
| } |
| } |
| |
| private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) |
| throws IOException { |
| assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0, |
| scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i]))); |
| } |
| |
| private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, |
| ByteBuffer buf) { |
| assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr) |
| + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0, |
| arr.length, buf.array(), buf.arrayOffset(), buf.limit())); |
| } |
| |
| /** Check a key/value pair after it was read by the reader */ |
| private void checkKeyValue(String msgPrefix, byte[] expectedKey, |
| byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) { |
| if (!msgPrefix.isEmpty()) |
| msgPrefix += ". "; |
| |
| assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); |
| assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, |
| valueRead); |
| } |
| |
| @Test |
| public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { |
| testIntermediateLevelIndicesWithLargeKeys(16); |
| } |
| |
| @Test |
| public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { |
| // because of the large rowKeys, we will end up with a 50-level block index without sanity check |
| testIntermediateLevelIndicesWithLargeKeys(2); |
| } |
| |
| public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { |
| Path hfPath = new Path(TEST_UTIL.getDataTestDir(), |
| "testIntermediateLevelIndicesWithLargeKeys.hfile"); |
| int maxChunkSize = 1024; |
| FileSystem fs = FileSystem.get(conf); |
| CacheConfig cacheConf = new CacheConfig(conf); |
| conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); |
| conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); |
| HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); |
| HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf) |
| .withFileContext(context) |
| .withPath(fs, hfPath).create(); |
| List<byte[]> keys = new ArrayList<>(); |
| |
| // This should result in leaf-level indices and a root level index |
| for (int i=0; i < 100; i++) { |
| byte[] rowkey = new byte[maxChunkSize + 1]; |
| byte[] b = Bytes.toBytes(i); |
| System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); |
| keys.add(rowkey); |
| hfw.append(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY) |
| .setRow(rowkey).setFamily(HConstants.EMPTY_BYTE_ARRAY) |
| .setQualifier(HConstants.EMPTY_BYTE_ARRAY) |
| .setTimestamp(HConstants.LATEST_TIMESTAMP) |
| .setType(KeyValue.Type.Maximum.getCode()) |
| .setValue(HConstants.EMPTY_BYTE_ARRAY) |
| .build()); |
| } |
| hfw.close(); |
| |
| HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf); |
| // Scanner doesn't do Cells yet. Fix. |
| HFileScanner scanner = reader.getScanner(true, true); |
| for (int i = 0; i < keys.size(); ++i) { |
| scanner.seekTo(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY) |
| .setRow(keys.get(i)).setFamily(HConstants.EMPTY_BYTE_ARRAY) |
| .setQualifier(HConstants.EMPTY_BYTE_ARRAY) |
| .setTimestamp(HConstants.LATEST_TIMESTAMP) |
| .setType(KeyValue.Type.Maximum.getCode()) |
| .setValue(HConstants.EMPTY_BYTE_ARRAY) |
| .build()); |
| } |
| reader.close(); |
| } |
| } |
| |