| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.index.sasi.disk; |
| |
| import java.io.File; |
| import java.nio.ByteBuffer; |
| import java.util.*; |
| import java.util.concurrent.ThreadLocalRandom; |
| import java.util.stream.Collectors; |
| |
| import org.apache.cassandra.config.DatabaseDescriptor; |
| import org.apache.cassandra.cql3.Operator; |
| import org.apache.cassandra.db.BufferDecoratedKey; |
| import org.apache.cassandra.db.DecoratedKey; |
| import org.apache.cassandra.dht.Murmur3Partitioner; |
| import org.apache.cassandra.index.sasi.plan.Expression; |
| import org.apache.cassandra.index.sasi.utils.CombinedTerm; |
| import org.apache.cassandra.index.sasi.utils.CombinedTermIterator; |
| import org.apache.cassandra.index.sasi.utils.OnDiskIndexIterator; |
| import org.apache.cassandra.index.sasi.utils.RangeIterator; |
| import org.apache.cassandra.db.marshal.AbstractType; |
| import org.apache.cassandra.db.marshal.Int32Type; |
| import org.apache.cassandra.db.marshal.LongType; |
| import org.apache.cassandra.db.marshal.UTF8Type; |
| import org.apache.cassandra.io.util.DataOutputBuffer; |
| import org.apache.cassandra.io.util.FileUtils; |
| import org.apache.cassandra.utils.MurmurHash; |
| import org.apache.cassandra.utils.Pair; |
| |
| import com.carrotsearch.hppc.LongSet; |
| import com.carrotsearch.hppc.cursors.LongCursor; |
| |
| import com.google.common.base.Function; |
| import com.google.common.collect.Iterators; |
| import com.google.common.collect.Sets; |
| |
| import org.junit.Assert; |
| |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| public class OnDiskIndexTest |
| { |
| @BeforeClass |
| public static void setupDD() |
| { |
| DatabaseDescriptor.daemonInitialization(); |
| } |
| |
| @Test |
| public void testStringSAConstruction() throws Exception |
| { |
| Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>() |
| {{ |
| put(UTF8Type.instance.decompose("scat"), keyBuilder(1L)); |
| put(UTF8Type.instance.decompose("mat"), keyBuilder(2L)); |
| put(UTF8Type.instance.decompose("fat"), keyBuilder(3L)); |
| put(UTF8Type.instance.decompose("cat"), keyBuilder(1L, 4L)); |
| put(UTF8Type.instance.decompose("till"), keyBuilder(2L, 6L)); |
| put(UTF8Type.instance.decompose("bill"), keyBuilder(5L)); |
| put(UTF8Type.instance.decompose("foo"), keyBuilder(7L)); |
| put(UTF8Type.instance.decompose("bar"), keyBuilder(9L, 10L)); |
| put(UTF8Type.instance.decompose("michael"), keyBuilder(11L, 12L, 1L)); |
| put(UTF8Type.instance.decompose("am"), keyBuilder(15L)); |
| }}; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| addAll(builder, e.getKey(), e.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-string", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter()); |
| |
| // first check if we can find exact matches |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| { |
| if (UTF8Type.instance.getString(e.getKey()).equals("cat")) |
| continue; // cat is embedded into scat, we'll test it in next section |
| |
| Assert.assertEquals("Key was: " + UTF8Type.instance.compose(e.getKey()), convert(e.getValue()), convert(onDisk.search(expressionFor(UTF8Type.instance, e.getKey())))); |
| } |
| |
| // check that cat returns positions for scat & cat |
| Assert.assertEquals(convert(1, 4), convert(onDisk.search(expressionFor("cat")))); |
| |
| // random suffix queries |
| Assert.assertEquals(convert(9, 10), convert(onDisk.search(expressionFor("ar")))); |
| Assert.assertEquals(convert(1, 2, 3, 4), convert(onDisk.search(expressionFor("at")))); |
| Assert.assertEquals(convert(1, 11, 12), convert(onDisk.search(expressionFor("mic")))); |
| Assert.assertEquals(convert(1, 11, 12), convert(onDisk.search(expressionFor("ae")))); |
| Assert.assertEquals(convert(2, 5, 6), convert(onDisk.search(expressionFor("ll")))); |
| Assert.assertEquals(convert(1, 2, 5, 6, 11, 12), convert(onDisk.search(expressionFor("l")))); |
| Assert.assertEquals(convert(7), convert(onDisk.search(expressionFor("oo")))); |
| Assert.assertEquals(convert(7), convert(onDisk.search(expressionFor("o")))); |
| Assert.assertEquals(convert(1, 2, 3, 4, 6), convert(onDisk.search(expressionFor("t")))); |
| Assert.assertEquals(convert(1, 2, 11, 12), convert(onDisk.search(expressionFor("m", Operator.LIKE_PREFIX)))); |
| |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("hello")))); |
| |
| onDisk.close(); |
| } |
| |
| @Test |
| public void testIntegerSAConstruction() throws Exception |
| { |
| final Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>() |
| {{ |
| put(Int32Type.instance.decompose(5), keyBuilder(1L)); |
| put(Int32Type.instance.decompose(7), keyBuilder(2L)); |
| put(Int32Type.instance.decompose(1), keyBuilder(3L)); |
| put(Int32Type.instance.decompose(3), keyBuilder(1L, 4L)); |
| put(Int32Type.instance.decompose(8), keyBuilder(2L, 6L)); |
| put(Int32Type.instance.decompose(10), keyBuilder(5L)); |
| put(Int32Type.instance.decompose(6), keyBuilder(7L)); |
| put(Int32Type.instance.decompose(4), keyBuilder(9L, 10L)); |
| put(Int32Type.instance.decompose(0), keyBuilder(11L, 12L, 1L)); |
| }}; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| addAll(builder, e.getKey(), e.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-int", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, Int32Type.instance, new KeyConverter()); |
| |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| { |
| Assert.assertEquals(convert(e.getValue()), convert(onDisk.search(expressionFor(Operator.EQ, Int32Type.instance, e.getKey())))); |
| } |
| |
| List<ByteBuffer> sortedNumbers = new ArrayList<ByteBuffer>() |
| {{ |
| addAll(data.keySet().stream().collect(Collectors.toList())); |
| }}; |
| |
| Collections.sort(sortedNumbers, Int32Type.instance::compare); |
| |
| // test full iteration |
| int idx = 0; |
| for (OnDiskIndex.DataTerm term : onDisk) |
| { |
| ByteBuffer number = sortedNumbers.get(idx++); |
| Assert.assertEquals(number, term.getTerm()); |
| Assert.assertEquals(convert(data.get(number)), convert(term.getTokens())); |
| } |
| |
| // test partial iteration (descending) |
| idx = 3; // start from the 3rd element |
| Iterator<OnDiskIndex.DataTerm> partialIter = onDisk.iteratorAt(sortedNumbers.get(idx), OnDiskIndex.IteratorOrder.DESC, true); |
| while (partialIter.hasNext()) |
| { |
| OnDiskIndex.DataTerm term = partialIter.next(); |
| ByteBuffer number = sortedNumbers.get(idx++); |
| |
| Assert.assertEquals(number, term.getTerm()); |
| Assert.assertEquals(convert(data.get(number)), convert(term.getTokens())); |
| } |
| |
| idx = 3; // start from the 3rd element exclusive |
| partialIter = onDisk.iteratorAt(sortedNumbers.get(idx++), OnDiskIndex.IteratorOrder.DESC, false); |
| while (partialIter.hasNext()) |
| { |
| OnDiskIndex.DataTerm term = partialIter.next(); |
| ByteBuffer number = sortedNumbers.get(idx++); |
| |
| Assert.assertEquals(number, term.getTerm()); |
| Assert.assertEquals(convert(data.get(number)), convert(term.getTokens())); |
| } |
| |
| // test partial iteration (ascending) |
| idx = 6; // start from the 6rd element |
| partialIter = onDisk.iteratorAt(sortedNumbers.get(idx), OnDiskIndex.IteratorOrder.ASC, true); |
| while (partialIter.hasNext()) |
| { |
| OnDiskIndex.DataTerm term = partialIter.next(); |
| ByteBuffer number = sortedNumbers.get(idx--); |
| |
| Assert.assertEquals(number, term.getTerm()); |
| Assert.assertEquals(convert(data.get(number)), convert(term.getTokens())); |
| } |
| |
| idx = 6; // start from the 6rd element exclusive |
| partialIter = onDisk.iteratorAt(sortedNumbers.get(idx--), OnDiskIndex.IteratorOrder.ASC, false); |
| while (partialIter.hasNext()) |
| { |
| OnDiskIndex.DataTerm term = partialIter.next(); |
| ByteBuffer number = sortedNumbers.get(idx--); |
| |
| Assert.assertEquals(number, term.getTerm()); |
| Assert.assertEquals(convert(data.get(number)), convert(term.getTokens())); |
| } |
| |
| onDisk.close(); |
| |
| List<ByteBuffer> iterCheckNums = new ArrayList<ByteBuffer>() |
| {{ |
| add(Int32Type.instance.decompose(3)); |
| add(Int32Type.instance.decompose(9)); |
| add(Int32Type.instance.decompose(14)); |
| add(Int32Type.instance.decompose(42)); |
| }}; |
| |
| OnDiskIndexBuilder iterTest = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| for (int i = 0; i < iterCheckNums.size(); i++) |
| iterTest.add(iterCheckNums.get(i), keyAt((long) i), i); |
| |
| File iterIndex = FileUtils.createTempFile("sa-iter", ".db"); |
| iterIndex.deleteOnExit(); |
| |
| iterTest.finish(iterIndex); |
| |
| onDisk = new OnDiskIndex(iterIndex, Int32Type.instance, new KeyConverter()); |
| |
| ByteBuffer number = Int32Type.instance.decompose(1); |
| Assert.assertEquals(0, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, false))); |
| Assert.assertEquals(0, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, true))); |
| Assert.assertEquals(4, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, false))); |
| Assert.assertEquals(4, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, true))); |
| |
| number = Int32Type.instance.decompose(44); |
| Assert.assertEquals(4, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, false))); |
| Assert.assertEquals(4, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, true))); |
| Assert.assertEquals(0, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, false))); |
| Assert.assertEquals(0, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, true))); |
| |
| number = Int32Type.instance.decompose(20); |
| Assert.assertEquals(3, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, false))); |
| Assert.assertEquals(3, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, true))); |
| Assert.assertEquals(1, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, false))); |
| Assert.assertEquals(1, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, true))); |
| |
| number = Int32Type.instance.decompose(5); |
| Assert.assertEquals(1, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, false))); |
| Assert.assertEquals(1, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, true))); |
| Assert.assertEquals(3, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, false))); |
| Assert.assertEquals(3, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, true))); |
| |
| number = Int32Type.instance.decompose(10); |
| Assert.assertEquals(2, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, false))); |
| Assert.assertEquals(2, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.ASC, true))); |
| Assert.assertEquals(2, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, false))); |
| Assert.assertEquals(2, Iterators.size(onDisk.iteratorAt(number, OnDiskIndex.IteratorOrder.DESC, true))); |
| |
| onDisk.close(); |
| } |
| |
| @Test |
| public void testMultiSuffixMatches() throws Exception |
| { |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS) |
| {{ |
| addAll(this, UTF8Type.instance.decompose("Eliza"), keyBuilder(1L, 2L)); |
| addAll(this, UTF8Type.instance.decompose("Elizabeth"), keyBuilder(3L, 4L)); |
| addAll(this, UTF8Type.instance.decompose("Aliza"), keyBuilder(5L, 6L)); |
| addAll(this, UTF8Type.instance.decompose("Taylor"), keyBuilder(7L, 8L)); |
| addAll(this, UTF8Type.instance.decompose("Pavel"), keyBuilder(9L, 10L)); |
| }}; |
| |
| File index = FileUtils.createTempFile("on-disk-sa-multi-suffix-match", ".db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter()); |
| |
| Assert.assertEquals(convert(1, 2, 3, 4, 5, 6), convert(onDisk.search(expressionFor("liz")))); |
| Assert.assertEquals(convert(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), convert(onDisk.search(expressionFor("a")))); |
| Assert.assertEquals(convert(5, 6), convert(onDisk.search(expressionFor("A")))); |
| Assert.assertEquals(convert(1, 2, 3, 4), convert(onDisk.search(expressionFor("E")))); |
| Assert.assertEquals(convert(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), convert(onDisk.search(expressionFor("l")))); |
| Assert.assertEquals(convert(3, 4), convert(onDisk.search(expressionFor("bet")))); |
| Assert.assertEquals(convert(3, 4, 9, 10), convert(onDisk.search(expressionFor("e")))); |
| Assert.assertEquals(convert(7, 8), convert(onDisk.search(expressionFor("yl")))); |
| Assert.assertEquals(convert(7, 8), convert(onDisk.search(expressionFor("T")))); |
| Assert.assertEquals(convert(1, 2, 3, 4, 5, 6), convert(onDisk.search(expressionFor("za")))); |
| Assert.assertEquals(convert(3, 4), convert(onDisk.search(expressionFor("ab")))); |
| |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("Pi")))); |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("ethz")))); |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("liw")))); |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("Taw")))); |
| Assert.assertEquals(Collections.<DecoratedKey>emptySet(), convert(onDisk.search(expressionFor("Av")))); |
| |
| onDisk.close(); |
| } |
| |
| @Test |
| public void testSparseMode() throws Exception |
| { |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.SPARSE); |
| |
| final long start = System.currentTimeMillis(); |
| final int numIterations = 100000; |
| |
| for (long i = 0; i < numIterations; i++) |
| builder.add(LongType.instance.decompose(start + i), keyAt(i), i); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-sparse", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, LongType.instance, new KeyConverter()); |
| |
| ThreadLocalRandom random = ThreadLocalRandom.current(); |
| |
| for (long step = start; step < (start + numIterations); step += 1000) |
| { |
| boolean lowerInclusive = random.nextBoolean(); |
| boolean upperInclusive = random.nextBoolean(); |
| |
| long limit = random.nextLong(step, start + numIterations); |
| RangeIterator<Long, Token> rows = onDisk.search(expressionFor(step, lowerInclusive, limit, upperInclusive)); |
| |
| long lowerKey = step - start; |
| long upperKey = lowerKey + (limit - step); |
| |
| if (!lowerInclusive) |
| lowerKey += 1; |
| |
| if (upperInclusive) |
| upperKey += 1; |
| |
| Set<DecoratedKey> actual = convert(rows); |
| for (long key = lowerKey; key < upperKey; key++) |
| Assert.assertTrue("key" + key + " wasn't found", actual.contains(keyAt(key))); |
| |
| Assert.assertEquals((upperKey - lowerKey), actual.size()); |
| } |
| |
| // let's also explicitly test whole range search |
| RangeIterator<Long, Token> rows = onDisk.search(expressionFor(start, true, start + numIterations, true)); |
| |
| Set<DecoratedKey> actual = convert(rows); |
| Assert.assertEquals(numIterations, actual.size()); |
| } |
| |
| @Test |
| public void testNotEqualsQueryForStrings() throws Exception |
| { |
| Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>() |
| {{ |
| put(UTF8Type.instance.decompose("Pavel"), keyBuilder(1L, 2L)); |
| put(UTF8Type.instance.decompose("Jason"), keyBuilder(3L)); |
| put(UTF8Type.instance.decompose("Jordan"), keyBuilder(4L)); |
| put(UTF8Type.instance.decompose("Michael"), keyBuilder(5L, 6L)); |
| put(UTF8Type.instance.decompose("Vijay"), keyBuilder(7L)); |
| put(UTF8Type.instance.decompose("Travis"), keyBuilder(8L)); |
| put(UTF8Type.instance.decompose("Aleksey"), keyBuilder(9L, 10L)); |
| }}; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| addAll(builder, e.getKey(), e.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-except-test", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter()); |
| |
| // test whole words first |
| Assert.assertEquals(convert(3, 4, 5, 6, 7, 8, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pavel")))); |
| |
| Assert.assertEquals(convert(3, 4, 7, 8, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pavel", "Michael")))); |
| |
| Assert.assertEquals(convert(3, 4, 7, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pavel", "Michael", "Travis")))); |
| |
| // now test prefixes |
| Assert.assertEquals(convert(3, 4, 5, 6, 7, 8, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pav")))); |
| |
| Assert.assertEquals(convert(3, 4, 7, 8, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pavel", "Mic")))); |
| |
| Assert.assertEquals(convert(3, 4, 7, 9, 10), convert(onDisk.search(expressionForNot("Aleksey", "Vijay", "Pavel", "Micha", "Tr")))); |
| |
| onDisk.close(); |
| } |
| |
| @Test |
| public void testNotEqualsQueryForNumbers() throws Exception |
| { |
| final Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>() |
| {{ |
| put(Int32Type.instance.decompose(5), keyBuilder(1L)); |
| put(Int32Type.instance.decompose(7), keyBuilder(2L)); |
| put(Int32Type.instance.decompose(1), keyBuilder(3L)); |
| put(Int32Type.instance.decompose(3), keyBuilder(1L, 4L)); |
| put(Int32Type.instance.decompose(8), keyBuilder(8L, 6L)); |
| put(Int32Type.instance.decompose(10), keyBuilder(5L)); |
| put(Int32Type.instance.decompose(6), keyBuilder(7L)); |
| put(Int32Type.instance.decompose(4), keyBuilder(9L, 10L)); |
| put(Int32Type.instance.decompose(0), keyBuilder(11L, 12L, 1L)); |
| }}; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| addAll(builder, e.getKey(), e.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-except-int-test", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, Int32Type.instance, new KeyConverter()); |
| |
| Assert.assertEquals(convert(1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12), convert(onDisk.search(expressionForNot(0, 10, 1)))); |
| Assert.assertEquals(convert(1, 2, 4, 5, 7, 9, 10, 11, 12), convert(onDisk.search(expressionForNot(0, 10, 1, 8)))); |
| Assert.assertEquals(convert(1, 2, 4, 5, 7, 11, 12), convert(onDisk.search(expressionForNot(0, 10, 1, 8, 4)))); |
| |
| onDisk.close(); |
| } |
| |
| @Test |
| public void testRangeQueryWithExclusions() throws Exception |
| { |
| final long lower = 0; |
| final long upper = 100000; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.SPARSE); |
| for (long i = lower; i <= upper; i++) |
| builder.add(LongType.instance.decompose(i), keyAt(i), i); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-except-long-ranges", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, LongType.instance, new KeyConverter()); |
| |
| ThreadLocalRandom random = ThreadLocalRandom.current(); |
| |
| // single exclusion |
| |
| // let's do small range first to figure out if searchPoint works properly |
| validateExclusions(onDisk, lower, 50, Sets.newHashSet(42L)); |
| // now let's do whole data set to test SPARSE searching |
| validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L)); |
| |
| // pair of exclusions which would generate a split |
| |
| validateExclusions(onDisk, lower, random.nextInt(400, 800), Sets.newHashSet(42L, 154L)); |
| validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L, 54631L)); |
| |
| // 3 exclusions which would generate a split and change bounds |
| |
| validateExclusions(onDisk, lower, random.nextInt(400, 800), Sets.newHashSet(42L, 154L)); |
| validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L, 54631L)); |
| |
| validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L)); |
| validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L, 93L)); |
| validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L, 93L, 205L)); |
| |
| Set<Long> exclusions = Sets.newHashSet(3L, 12L, 13L, 14L, 27L, 54L, 81L, 125L, 384L, 771L, 1054L, 2048L, 78834L); |
| |
| // test that exclusions are properly bound by lower/upper of the expression |
| Assert.assertEquals(392, validateExclusions(onDisk, lower, 400, exclusions, false)); |
| Assert.assertEquals(101, validateExclusions(onDisk, lower, 100, Sets.newHashSet(-10L, -5L, -1L), false)); |
| |
| validateExclusions(onDisk, lower, upper, exclusions); |
| |
| Assert.assertEquals(100000, convert(onDisk.search(new Expression("", LongType.instance) |
| .add(Operator.NEQ, LongType.instance.decompose(100L)))).size()); |
| |
| Assert.assertEquals(49, convert(onDisk.search(new Expression("", LongType.instance) |
| .add(Operator.LT, LongType.instance.decompose(50L)) |
| .add(Operator.NEQ, LongType.instance.decompose(10L)))).size()); |
| |
| Assert.assertEquals(99998, convert(onDisk.search(new Expression("", LongType.instance) |
| .add(Operator.GT, LongType.instance.decompose(1L)) |
| .add(Operator.NEQ, LongType.instance.decompose(20L)))).size()); |
| |
| onDisk.close(); |
| } |
| |
| private void validateExclusions(OnDiskIndex sa, long lower, long upper, Set<Long> exclusions) |
| { |
| validateExclusions(sa, lower, upper, exclusions, true); |
| } |
| |
| private int validateExclusions(OnDiskIndex sa, long lower, long upper, Set<Long> exclusions, boolean checkCount) |
| { |
| int count = 0; |
| for (DecoratedKey key : convert(sa.search(rangeWithExclusions(lower, true, upper, true, exclusions)))) |
| { |
| String keyId = UTF8Type.instance.getString(key.getKey()).split("key")[1]; |
| Assert.assertFalse("key" + keyId + " is present.", exclusions.contains(Long.valueOf(keyId))); |
| count++; |
| } |
| |
| if (checkCount) |
| Assert.assertEquals(upper - (lower == 0 ? -1 : lower) - exclusions.size(), count); |
| |
| return count; |
| } |
| |
| @Test |
| public void testDescriptor() throws Exception |
| { |
| final Map<ByteBuffer, Pair<DecoratedKey, Long>> data = new HashMap<ByteBuffer, Pair<DecoratedKey, Long>>() |
| {{ |
| put(Int32Type.instance.decompose(5), Pair.create(keyAt(1L), 1L)); |
| }}; |
| |
| OnDiskIndexBuilder builder1 = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| OnDiskIndexBuilder builder2 = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| for (Map.Entry<ByteBuffer, Pair<DecoratedKey, Long>> e : data.entrySet()) |
| { |
| DecoratedKey key = e.getValue().left; |
| Long position = e.getValue().right; |
| |
| builder1.add(e.getKey(), key, position); |
| builder2.add(e.getKey(), key, position); |
| } |
| |
| File index1 = FileUtils.createTempFile("on-disk-sa-int", "db"); |
| File index2 = FileUtils.createTempFile("on-disk-sa-int2", "db"); |
| index1.deleteOnExit(); |
| index2.deleteOnExit(); |
| |
| builder1.finish(index1); |
| builder2.finish(new Descriptor(Descriptor.VERSION_AA), index2); |
| |
| OnDiskIndex onDisk1 = new OnDiskIndex(index1, Int32Type.instance, new KeyConverter()); |
| OnDiskIndex onDisk2 = new OnDiskIndex(index2, Int32Type.instance, new KeyConverter()); |
| |
| ByteBuffer number = Int32Type.instance.decompose(5); |
| |
| Assert.assertEquals(Collections.singleton(data.get(number).left), convert(onDisk1.search(expressionFor(Operator.EQ, Int32Type.instance, number)))); |
| Assert.assertEquals(Collections.singleton(data.get(number).left), convert(onDisk2.search(expressionFor(Operator.EQ, Int32Type.instance, number)))); |
| |
| Assert.assertEquals(onDisk1.descriptor.version.version, Descriptor.CURRENT_VERSION); |
| Assert.assertEquals(onDisk2.descriptor.version.version, Descriptor.VERSION_AA); |
| } |
| |
| @Test |
| public void testSuperBlocks() throws Exception |
| { |
| Map<ByteBuffer, TokenTreeBuilder> terms = new HashMap<>(); |
| terms.put(UTF8Type.instance.decompose("1234"), keyBuilder(1L, 2L)); |
| terms.put(UTF8Type.instance.decompose("2345"), keyBuilder(3L, 4L)); |
| terms.put(UTF8Type.instance.decompose("3456"), keyBuilder(5L, 6L)); |
| terms.put(UTF8Type.instance.decompose("4567"), keyBuilder(7L, 8L)); |
| terms.put(UTF8Type.instance.decompose("5678"), keyBuilder(9L, 10L)); |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, Int32Type.instance, OnDiskIndexBuilder.Mode.SPARSE); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> entry : terms.entrySet()) |
| addAll(builder, entry.getKey(), entry.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-try-superblocks", ".db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, Int32Type.instance, new KeyConverter()); |
| OnDiskIndex.OnDiskSuperBlock superBlock = onDisk.dataLevel.getSuperBlock(0); |
| Iterator<Token> iter = superBlock.iterator(); |
| |
| Long lastToken = null; |
| while (iter.hasNext()) |
| { |
| Token token = iter.next(); |
| |
| if (lastToken != null) |
| Assert.assertTrue(lastToken.compareTo(token.get()) < 0); |
| |
| lastToken = token.get(); |
| } |
| } |
| |
| @Test |
| public void testSuperBlockRetrieval() throws Exception |
| { |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.SPARSE); |
| for (long i = 0; i < 100000; i++) |
| builder.add(LongType.instance.decompose(i), keyAt(i), i); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-multi-superblock-match", ".db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDiskIndex = new OnDiskIndex(index, LongType.instance, new KeyConverter()); |
| |
| testSearchRangeWithSuperBlocks(onDiskIndex, 0, 500); |
| testSearchRangeWithSuperBlocks(onDiskIndex, 300, 93456); |
| testSearchRangeWithSuperBlocks(onDiskIndex, 210, 1700); |
| testSearchRangeWithSuperBlocks(onDiskIndex, 530, 3200); |
| |
| Random random = new Random(0xdeadbeef); |
| for (int i = 0; i < 100000; i += random.nextInt(1500)) // random steps with max of 1500 elements |
| { |
| for (int j = 0; j < 3; j++) |
| testSearchRangeWithSuperBlocks(onDiskIndex, i, ThreadLocalRandom.current().nextInt(i, 100000)); |
| } |
| } |
| |
| public void putAll(SortedMap<Long, LongSet> offsets, TokenTreeBuilder ttb) |
| { |
| for (Pair<Long, LongSet> entry : ttb) |
| offsets.put(entry.left, entry.right); |
| } |
| |
| @Test |
| public void testCombiningOfThePartitionedSA() throws Exception |
| { |
| OnDiskIndexBuilder builderA = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| OnDiskIndexBuilder builderB = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| |
| TreeMap<Long, TreeMap<Long, LongSet>> expected = new TreeMap<>(); |
| |
| for (long i = 0; i <= 100; i++) |
| { |
| TreeMap<Long, LongSet> offsets = expected.get(i); |
| if (offsets == null) |
| expected.put(i, (offsets = new TreeMap<>())); |
| |
| builderA.add(LongType.instance.decompose(i), keyAt(i), i); |
| putAll(offsets, keyBuilder(i)); |
| } |
| |
| for (long i = 50; i < 100; i++) |
| { |
| TreeMap<Long, LongSet> offsets = expected.get(i); |
| if (offsets == null) |
| expected.put(i, (offsets = new TreeMap<>())); |
| |
| long position = 100L + i; |
| builderB.add(LongType.instance.decompose(i), keyAt(position), position); |
| putAll(offsets, keyBuilder(100L + i)); |
| } |
| |
| File indexA = FileUtils.createTempFile("on-disk-sa-partition-a", ".db"); |
| indexA.deleteOnExit(); |
| |
| File indexB = FileUtils.createTempFile("on-disk-sa-partition-b", ".db"); |
| indexB.deleteOnExit(); |
| |
| builderA.finish(indexA); |
| builderB.finish(indexB); |
| |
| OnDiskIndex a = new OnDiskIndex(indexA, LongType.instance, new KeyConverter()); |
| OnDiskIndex b = new OnDiskIndex(indexB, LongType.instance, new KeyConverter()); |
| |
| RangeIterator<OnDiskIndex.DataTerm, CombinedTerm> union = OnDiskIndexIterator.union(a, b); |
| |
| TreeMap<Long, TreeMap<Long, LongSet>> actual = new TreeMap<>(); |
| while (union.hasNext()) |
| { |
| CombinedTerm term = union.next(); |
| |
| Long composedTerm = LongType.instance.compose(term.getTerm()); |
| |
| TreeMap<Long, LongSet> offsets = actual.get(composedTerm); |
| if (offsets == null) |
| actual.put(composedTerm, (offsets = new TreeMap<>())); |
| |
| putAll(offsets, term.getTokenTreeBuilder()); |
| } |
| |
| Assert.assertEquals(actual, expected); |
| |
| File indexC = FileUtils.createTempFile("on-disk-sa-partition-final", ".db"); |
| indexC.deleteOnExit(); |
| |
| OnDiskIndexBuilder combined = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.PREFIX); |
| combined.finish(Pair.create(keyAt(0).getKey(), keyAt(100).getKey()), indexC, new CombinedTermIterator(a, b)); |
| |
| OnDiskIndex c = new OnDiskIndex(indexC, LongType.instance, new KeyConverter()); |
| union = OnDiskIndexIterator.union(c); |
| actual.clear(); |
| |
| while (union.hasNext()) |
| { |
| CombinedTerm term = union.next(); |
| |
| Long composedTerm = LongType.instance.compose(term.getTerm()); |
| |
| TreeMap<Long, LongSet> offsets = actual.get(composedTerm); |
| if (offsets == null) |
| actual.put(composedTerm, (offsets = new TreeMap<>())); |
| |
| putAll(offsets, term.getTokenTreeBuilder()); |
| } |
| |
| Assert.assertEquals(actual, expected); |
| |
| a.close(); |
| b.close(); |
| } |
| |
| @Test |
| public void testPrefixSearchWithCONTAINSMode() throws Exception |
| { |
| Map<ByteBuffer, TokenTreeBuilder> data = new HashMap<ByteBuffer, TokenTreeBuilder>() |
| {{ |
| |
| put(UTF8Type.instance.decompose("lady gaga"), keyBuilder(1L)); |
| |
| // Partial term for 'lady of bells' |
| DataOutputBuffer ladyOfBellsBuffer = new DataOutputBuffer(); |
| ladyOfBellsBuffer.writeShort(UTF8Type.instance.decompose("lady of bells").remaining() | (1 << OnDiskIndexBuilder.IS_PARTIAL_BIT)); |
| ladyOfBellsBuffer.write(UTF8Type.instance.decompose("lady of bells")); |
| put(ladyOfBellsBuffer.asNewBuffer(), keyBuilder(2L)); |
| |
| |
| put(UTF8Type.instance.decompose("lady pank"), keyBuilder(3L)); |
| }}; |
| |
| OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS); |
| for (Map.Entry<ByteBuffer, TokenTreeBuilder> e : data.entrySet()) |
| addAll(builder, e.getKey(), e.getValue()); |
| |
| File index = FileUtils.createTempFile("on-disk-sa-prefix-contains-search", "db"); |
| index.deleteOnExit(); |
| |
| builder.finish(index); |
| |
| OnDiskIndex onDisk = new OnDiskIndex(index, UTF8Type.instance, new KeyConverter()); |
| |
| // check that lady% return lady gaga (1) and lady pank (3) but not lady of bells(2) |
| Assert.assertEquals(convert(1, 3), convert(onDisk.search(expressionFor("lady", Operator.LIKE_PREFIX)))); |
| |
| onDisk.close(); |
| } |
| |
| private void testSearchRangeWithSuperBlocks(OnDiskIndex onDiskIndex, long start, long end) |
| { |
| RangeIterator<Long, Token> tokens = onDiskIndex.search(expressionFor(start, true, end, false)); |
| |
| // no results should be produced only if range is empty |
| if (tokens == null) |
| { |
| Assert.assertEquals(0, end - start); |
| return; |
| } |
| |
| int keyCount = 0; |
| Long lastToken = null; |
| while (tokens.hasNext()) |
| { |
| Token token = tokens.next(); |
| Iterator<DecoratedKey> keys = token.iterator(); |
| |
| // each of the values should have exactly a single key |
| Assert.assertTrue(keys.hasNext()); |
| keys.next(); |
| Assert.assertFalse(keys.hasNext()); |
| |
| // and it's last should always smaller than current |
| if (lastToken != null) |
| Assert.assertTrue("last should be less than current", lastToken.compareTo(token.get()) < 0); |
| |
| lastToken = token.get(); |
| keyCount++; |
| } |
| |
| Assert.assertEquals(end - start, keyCount); |
| } |
| |
| private static DecoratedKey keyAt(long rawKey) |
| { |
| ByteBuffer key = ByteBuffer.wrap(("key" + rawKey).getBytes()); |
| return new BufferDecoratedKey(new Murmur3Partitioner.LongToken(MurmurHash.hash2_64(key, key.position(), key.remaining(), 0)), key); |
| } |
| |
| private static TokenTreeBuilder keyBuilder(Long... keys) |
| { |
| TokenTreeBuilder builder = new DynamicTokenTreeBuilder(); |
| |
| for (final Long key : keys) |
| { |
| DecoratedKey dk = keyAt(key); |
| builder.add((Long) dk.getToken().getTokenValue(), key); |
| } |
| |
| return builder.finish(); |
| } |
| |
| private static Set<DecoratedKey> convert(TokenTreeBuilder offsets) |
| { |
| Set<DecoratedKey> result = new HashSet<>(); |
| |
| Iterator<Pair<Long, LongSet>> offsetIter = offsets.iterator(); |
| while (offsetIter.hasNext()) |
| { |
| LongSet v = offsetIter.next().right; |
| |
| for (LongCursor offset : v) |
| result.add(keyAt(offset.value)); |
| } |
| return result; |
| } |
| |
| private static Set<DecoratedKey> convert(long... keyOffsets) |
| { |
| Set<DecoratedKey> result = new HashSet<>(); |
| for (long offset : keyOffsets) |
| result.add(keyAt(offset)); |
| |
| return result; |
| } |
| |
| private static Set<DecoratedKey> convert(RangeIterator<Long, Token> results) |
| { |
| if (results == null) |
| return Collections.emptySet(); |
| |
| Set<DecoratedKey> keys = new TreeSet<>(DecoratedKey.comparator); |
| |
| while (results.hasNext()) |
| { |
| for (DecoratedKey key : results.next()) |
| keys.add(key); |
| } |
| |
| return keys; |
| } |
| |
| private static Expression expressionFor(long lower, boolean lowerInclusive, long upper, boolean upperInclusive) |
| { |
| Expression expression = new Expression("", LongType.instance); |
| expression.add(lowerInclusive ? Operator.GTE : Operator.GT, LongType.instance.decompose(lower)); |
| expression.add(upperInclusive ? Operator.LTE : Operator.LT, LongType.instance.decompose(upper)); |
| return expression; |
| } |
| |
| private static Expression expressionFor(AbstractType<?> validator, ByteBuffer term) |
| { |
| return expressionFor(Operator.LIKE_CONTAINS, validator, term); |
| } |
| |
| private static Expression expressionFor(Operator op, AbstractType<?> validator, ByteBuffer term) |
| { |
| Expression expression = new Expression("", validator); |
| expression.add(op, term); |
| return expression; |
| } |
| |
| private static Expression expressionForNot(AbstractType<?> validator, ByteBuffer lower, ByteBuffer upper, Iterable<ByteBuffer> terms) |
| { |
| Expression expression = new Expression("", validator); |
| expression.setOp(Expression.Op.RANGE); |
| expression.setLower(new Expression.Bound(lower, true)); |
| expression.setUpper(new Expression.Bound(upper, true)); |
| for (ByteBuffer term : terms) |
| expression.add(Operator.NEQ, term); |
| return expression; |
| |
| } |
| |
| private static Expression expressionForNot(Integer lower, Integer upper, Integer... terms) |
| { |
| return expressionForNot(Int32Type.instance, |
| Int32Type.instance.decompose(lower), |
| Int32Type.instance.decompose(upper), |
| Arrays.asList(terms).stream().map(Int32Type.instance::decompose).collect(Collectors.toList())); |
| } |
| |
| private static Expression rangeWithExclusions(long lower, boolean lowerInclusive, long upper, boolean upperInclusive, Set<Long> exclusions) |
| { |
| Expression expression = expressionFor(lower, lowerInclusive, upper, upperInclusive); |
| for (long e : exclusions) |
| expression.add(Operator.NEQ, LongType.instance.decompose(e)); |
| |
| return expression; |
| } |
| |
| private static Expression expressionForNot(String lower, String upper, String... terms) |
| { |
| return expressionForNot(UTF8Type.instance, |
| UTF8Type.instance.decompose(lower), |
| UTF8Type.instance.decompose(upper), |
| Arrays.asList(terms).stream().map(UTF8Type.instance::decompose).collect(Collectors.toList())); |
| } |
| |
| private static Expression expressionFor(String term) |
| { |
| return expressionFor(term, Operator.LIKE_CONTAINS); |
| } |
| |
| private static Expression expressionFor(String term, Operator op) |
| { |
| return expressionFor(op, UTF8Type.instance, UTF8Type.instance.decompose(term)); |
| } |
| |
| private static void addAll(OnDiskIndexBuilder builder, ByteBuffer term, TokenTreeBuilder tokens) |
| { |
| for (Pair<Long, LongSet> token : tokens) |
| { |
| for (long position : token.right.toArray()) |
| builder.add(term, keyAt(position), position); |
| } |
| } |
| |
| private static class KeyConverter implements Function<Long, DecoratedKey> |
| { |
| @Override |
| public DecoratedKey apply(Long offset) |
| { |
| return keyAt(offset); |
| } |
| } |
| } |