blob: d83b96c536930f692a73a198b90edf7524024cd9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.io.sstable;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import com.google.common.collect.Lists;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.Assume;
import org.apache.cassandra.Util;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.*;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.RandomPartitioner;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.Pair;
import static org.apache.cassandra.io.sstable.IndexSummaryBuilder.downsample;
import static org.apache.cassandra.io.sstable.IndexSummaryBuilder.entriesAtSamplingLevel;
import static org.apache.cassandra.io.sstable.Downsampling.BASE_SAMPLING_LEVEL;
import static org.junit.Assert.*;
public class IndexSummaryTest
{
private final static Random random = new Random();
@BeforeClass
public static void initDD()
{
DatabaseDescriptor.daemonInitialization();
final long seed = System.nanoTime();
System.out.println("Using seed: " + seed);
random.setSeed(seed);
}
IPartitioner partitioner = Util.testPartitioner();
@BeforeClass
public static void setup()
{
final long seed = System.nanoTime();
System.out.println("Using seed: " + seed);
random.setSeed(seed);
}
@Test
public void testIndexSummaryKeySizes() throws IOException
{
// On Circle CI we normally don't have enough off-heap memory for this test so ignore it
Assume.assumeTrue(System.getenv("CIRCLECI") == null);
testIndexSummaryProperties(32, 100);
testIndexSummaryProperties(64, 100);
testIndexSummaryProperties(100, 100);
testIndexSummaryProperties(1000, 100);
testIndexSummaryProperties(10000, 100);
}
private void testIndexSummaryProperties(int keySize, int numKeys) throws IOException
{
final int minIndexInterval = 1;
final List<DecoratedKey> keys = new ArrayList<>(numKeys);
try (IndexSummaryBuilder builder = new IndexSummaryBuilder(numKeys, minIndexInterval, BASE_SAMPLING_LEVEL))
{
for (int i = 0; i < numKeys; i++)
{
byte[] randomBytes = new byte[keySize];
random.nextBytes(randomBytes);
DecoratedKey key = partitioner.decorateKey(ByteBuffer.wrap(randomBytes));
keys.add(key);
builder.maybeAddEntry(key, i);
}
try(IndexSummary indexSummary = builder.build(partitioner))
{
assertEquals(numKeys, keys.size());
assertEquals(minIndexInterval, indexSummary.getMinIndexInterval());
assertEquals(numKeys, indexSummary.getMaxNumberOfEntries());
assertEquals(numKeys + 1, indexSummary.getEstimatedKeyCount());
for (int i = 0; i < numKeys; i++)
assertEquals(keys.get(i).getKey(), ByteBuffer.wrap(indexSummary.getKey(i)));
}
}
}
/**
* Test an index summary whose total size is bigger than 2GB,
* the index summary builder should log an error but it should still
* create an index summary, albeit one that does not cover the entire sstable.
*/
@Test
public void testLargeIndexSummary() throws IOException
{
// On Circle CI we normally don't have enough off-heap memory for this test so ignore it
Assume.assumeTrue(System.getenv("CIRCLECI") == null);
final int numKeys = 1000000;
final int keySize = 3000;
final int minIndexInterval = 1;
try (IndexSummaryBuilder builder = new IndexSummaryBuilder(numKeys, minIndexInterval, BASE_SAMPLING_LEVEL))
{
for (int i = 0; i < numKeys; i++)
{
byte[] randomBytes = new byte[keySize];
random.nextBytes(randomBytes);
DecoratedKey key = partitioner.decorateKey(ByteBuffer.wrap(randomBytes));
builder.maybeAddEntry(key, i);
}
try (IndexSummary indexSummary = builder.build(partitioner))
{
assertNotNull(indexSummary);
assertEquals(numKeys, indexSummary.getMaxNumberOfEntries());
assertEquals(numKeys + 1, indexSummary.getEstimatedKeyCount());
}
}
}
/**
* Test an index summary whose total size is bigger than 2GB,
* having updated IndexSummaryBuilder.defaultExpectedKeySize to match the size,
* the index summary should be downsampled automatically.
*/
@Test
public void testLargeIndexSummaryWithExpectedSizeMatching() throws IOException
{
// On Circle CI we normally don't have enough off-heap memory for this test so ignore it
Assume.assumeTrue(System.getenv("CIRCLECI") == null);
final int numKeys = 1000000;
final int keySize = 3000;
final int minIndexInterval = 1;
long oldExpectedKeySize = IndexSummaryBuilder.defaultExpectedKeySize;
IndexSummaryBuilder.defaultExpectedKeySize = 3000;
try (IndexSummaryBuilder builder = new IndexSummaryBuilder(numKeys, minIndexInterval, BASE_SAMPLING_LEVEL))
{
for (int i = 0; i < numKeys; i++)
{
byte[] randomBytes = new byte[keySize];
random.nextBytes(randomBytes);
DecoratedKey key = partitioner.decorateKey(ByteBuffer.wrap(randomBytes));
builder.maybeAddEntry(key, i);
}
try (IndexSummary indexSummary = builder.build(partitioner))
{
assertNotNull(indexSummary);
assertEquals(minIndexInterval * 2, indexSummary.getMinIndexInterval());
assertEquals(numKeys / 2, indexSummary.getMaxNumberOfEntries());
assertEquals(numKeys + 2, indexSummary.getEstimatedKeyCount());
}
}
finally
{
IndexSummaryBuilder.defaultExpectedKeySize = oldExpectedKeySize;
}
}
@Test
public void testGetKey()
{
Pair<List<DecoratedKey>, IndexSummary> random = generateRandomIndex(100, 1);
for (int i = 0; i < 100; i++)
assertEquals(random.left.get(i).getKey(), ByteBuffer.wrap(random.right.getKey(i)));
random.right.close();
}
@Test
public void testBinarySearch()
{
Pair<List<DecoratedKey>, IndexSummary> random = generateRandomIndex(100, 1);
for (int i = 0; i < 100; i++)
assertEquals(i, random.right.binarySearch(random.left.get(i)));
random.right.close();
}
@Test
public void testGetPosition()
{
Pair<List<DecoratedKey>, IndexSummary> random = generateRandomIndex(100, 2);
for (int i = 0; i < 50; i++)
assertEquals(i*2, random.right.getPosition(i));
random.right.close();
}
@Test
public void testSerialization() throws IOException
{
Pair<List<DecoratedKey>, IndexSummary> random = generateRandomIndex(100, 1);
DataOutputBuffer dos = new DataOutputBuffer();
IndexSummary.serializer.serialize(random.right, dos, false);
// write junk
dos.writeUTF("JUNK");
dos.writeUTF("JUNK");
FileUtils.closeQuietly(dos);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(dos.toByteArray()));
IndexSummary is = IndexSummary.serializer.deserialize(dis, partitioner, false, 1, 1);
for (int i = 0; i < 100; i++)
assertEquals(i, is.binarySearch(random.left.get(i)));
// read the junk
assertEquals(dis.readUTF(), "JUNK");
assertEquals(dis.readUTF(), "JUNK");
is.close();
FileUtils.closeQuietly(dis);
random.right.close();
}
@Test
public void testAddEmptyKey() throws Exception
{
IPartitioner p = new RandomPartitioner();
try (IndexSummaryBuilder builder = new IndexSummaryBuilder(1, 1, BASE_SAMPLING_LEVEL))
{
builder.maybeAddEntry(p.decorateKey(ByteBufferUtil.EMPTY_BYTE_BUFFER), 0);
IndexSummary summary = builder.build(p);
assertEquals(1, summary.size());
assertEquals(0, summary.getPosition(0));
assertArrayEquals(new byte[0], summary.getKey(0));
DataOutputBuffer dos = new DataOutputBuffer();
IndexSummary.serializer.serialize(summary, dos, false);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(dos.toByteArray()));
IndexSummary loaded = IndexSummary.serializer.deserialize(dis, p, false, 1, 1);
assertEquals(1, loaded.size());
assertEquals(summary.getPosition(0), loaded.getPosition(0));
assertArrayEquals(summary.getKey(0), summary.getKey(0));
summary.close();
loaded.close();
}
}
private Pair<List<DecoratedKey>, IndexSummary> generateRandomIndex(int size, int interval)
{
List<DecoratedKey> list = Lists.newArrayList();
try (IndexSummaryBuilder builder = new IndexSummaryBuilder(list.size(), interval, BASE_SAMPLING_LEVEL))
{
for (int i = 0; i < size; i++)
{
UUID uuid = UUID.randomUUID();
DecoratedKey key = partitioner.decorateKey(ByteBufferUtil.bytes(uuid));
list.add(key);
}
Collections.sort(list);
for (int i = 0; i < size; i++)
builder.maybeAddEntry(list.get(i), i);
IndexSummary summary = builder.build(partitioner);
return Pair.create(list, summary);
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
@Test
public void testDownsamplePatterns()
{
assertEquals(Arrays.asList(0), Downsampling.getSamplingPattern(0));
assertEquals(Arrays.asList(0), Downsampling.getSamplingPattern(1));
assertEquals(Arrays.asList(1, 0), Downsampling.getSamplingPattern(2));
assertEquals(Arrays.asList(3, 1, 2, 0), Downsampling.getSamplingPattern(4));
assertEquals(Arrays.asList(7, 3, 5, 1, 6, 2, 4, 0), Downsampling.getSamplingPattern(8));
assertEquals(Arrays.asList(15, 7, 11, 3, 13, 5, 9, 1, 14, 6, 10, 2, 12, 4, 8, 0), Downsampling.getSamplingPattern(16));
}
private static boolean shouldSkip(int index, List<Integer> startPoints)
{
for (int start : startPoints)
{
if ((index - start) % BASE_SAMPLING_LEVEL == 0)
return true;
}
return false;
}
@Test
public void testDownsample()
{
final int NUM_KEYS = 4096;
final int INDEX_INTERVAL = 128;
final int ORIGINAL_NUM_ENTRIES = NUM_KEYS / INDEX_INTERVAL;
Pair<List<DecoratedKey>, IndexSummary> random = generateRandomIndex(NUM_KEYS, INDEX_INTERVAL);
List<DecoratedKey> keys = random.left;
IndexSummary original = random.right;
// sanity check on the original index summary
for (int i = 0; i < ORIGINAL_NUM_ENTRIES; i++)
assertEquals(keys.get(i * INDEX_INTERVAL).getKey(), ByteBuffer.wrap(original.getKey(i)));
List<Integer> samplePattern = Downsampling.getSamplingPattern(BASE_SAMPLING_LEVEL);
// downsample by one level, then two levels, then three levels...
int downsamplingRound = 1;
for (int samplingLevel = BASE_SAMPLING_LEVEL - 1; samplingLevel >= 1; samplingLevel--)
{
try (IndexSummary downsampled = downsample(original, samplingLevel, 128, partitioner);)
{
assertEquals(entriesAtSamplingLevel(samplingLevel, original.getMaxNumberOfEntries()), downsampled.size());
int sampledCount = 0;
List<Integer> skipStartPoints = samplePattern.subList(0, downsamplingRound);
for (int i = 0; i < ORIGINAL_NUM_ENTRIES; i++)
{
if (!shouldSkip(i, skipStartPoints))
{
assertEquals(keys.get(i * INDEX_INTERVAL).getKey(), ByteBuffer.wrap(downsampled.getKey(sampledCount)));
sampledCount++;
}
}
testPosition(original, downsampled, keys);
downsamplingRound++;
}
}
// downsample one level each time
IndexSummary previous = original;
downsamplingRound = 1;
for (int downsampleLevel = BASE_SAMPLING_LEVEL - 1; downsampleLevel >= 1; downsampleLevel--)
{
IndexSummary downsampled = downsample(previous, downsampleLevel, 128, partitioner);
if (previous != original)
previous.close();
assertEquals(entriesAtSamplingLevel(downsampleLevel, original.getMaxNumberOfEntries()), downsampled.size());
int sampledCount = 0;
List<Integer> skipStartPoints = samplePattern.subList(0, downsamplingRound);
for (int i = 0; i < ORIGINAL_NUM_ENTRIES; i++)
{
if (!shouldSkip(i, skipStartPoints))
{
assertEquals(keys.get(i * INDEX_INTERVAL).getKey(), ByteBuffer.wrap(downsampled.getKey(sampledCount)));
sampledCount++;
}
}
testPosition(original, downsampled, keys);
previous = downsampled;
downsamplingRound++;
}
previous.close();
original.close();
}
private void testPosition(IndexSummary original, IndexSummary downsampled, List<DecoratedKey> keys)
{
for (DecoratedKey key : keys)
{
long orig = SSTableReader.getIndexScanPositionFromBinarySearchResult(original.binarySearch(key), original);
int binarySearch = downsampled.binarySearch(key);
int index = SSTableReader.getIndexSummaryIndexFromBinarySearchResult(binarySearch);
int scanFrom = (int) SSTableReader.getIndexScanPositionFromBinarySearchResult(index, downsampled);
assert scanFrom <= orig;
int effectiveInterval = downsampled.getEffectiveIndexIntervalAfterIndex(index);
DecoratedKey k = null;
for (int i = 0 ; k != key && i < effectiveInterval && scanFrom < keys.size() ; i++, scanFrom ++)
k = keys.get(scanFrom);
assert k == key;
}
}
@Test
public void testOriginalIndexLookup()
{
for (int i = BASE_SAMPLING_LEVEL; i >= 1; i--)
assertEquals(i, Downsampling.getOriginalIndexes(i).size());
ArrayList<Integer> full = new ArrayList<>();
for (int i = 0; i < BASE_SAMPLING_LEVEL; i++)
full.add(i);
assertEquals(full, Downsampling.getOriginalIndexes(BASE_SAMPLING_LEVEL));
// the entry at index 127 is the first to go
assertEquals(full.subList(0, full.size() - 1), Downsampling.getOriginalIndexes(BASE_SAMPLING_LEVEL - 1));
// spot check a few values (these depend on BASE_SAMPLING_LEVEL being 128)
assertEquals(128, BASE_SAMPLING_LEVEL);
assertEquals(Arrays.asList(0, 32, 64, 96), Downsampling.getOriginalIndexes(4));
assertEquals(Arrays.asList(0, 64), Downsampling.getOriginalIndexes(2));
assertEquals(Arrays.asList(0), Downsampling.getOriginalIndexes(1));
}
@Test
public void testGetNumberOfSkippedEntriesAfterIndex()
{
int indexInterval = 128;
for (int i = 0; i < BASE_SAMPLING_LEVEL; i++)
assertEquals(indexInterval, Downsampling.getEffectiveIndexIntervalAfterIndex(i, BASE_SAMPLING_LEVEL, indexInterval));
// with one round of downsampling, only the last summary entry has been removed, so only the last index will have
// double the gap until the next sample
for (int i = 0; i < BASE_SAMPLING_LEVEL - 2; i++)
assertEquals(indexInterval, Downsampling.getEffectiveIndexIntervalAfterIndex(i, BASE_SAMPLING_LEVEL - 1, indexInterval));
assertEquals(indexInterval * 2, Downsampling.getEffectiveIndexIntervalAfterIndex(BASE_SAMPLING_LEVEL - 2, BASE_SAMPLING_LEVEL - 1, indexInterval));
// at samplingLevel=2, the retained summary points are [0, 64] (assumes BASE_SAMPLING_LEVEL is 128)
assertEquals(128, BASE_SAMPLING_LEVEL);
assertEquals(64 * indexInterval, Downsampling.getEffectiveIndexIntervalAfterIndex(0, 2, indexInterval));
assertEquals(64 * indexInterval, Downsampling.getEffectiveIndexIntervalAfterIndex(1, 2, indexInterval));
}
}