test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.index.sai.disk.v1;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;

 import org.junit.BeforeClass;
 import org.junit.Test;

 import com.carrotsearch.hppc.LongArrayList;
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.cql3.Operator;
 import org.apache.cassandra.db.ClusteringComparator;
 import org.apache.cassandra.db.marshal.UTF8Type;
 import org.apache.cassandra.dht.Murmur3Partitioner;
 import org.apache.cassandra.dht.Token;
 import org.apache.cassandra.index.sai.QueryContext;
 import org.apache.cassandra.index.sai.SAITester;
 import org.apache.cassandra.index.sai.StorageAttachedIndex;
 import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
 import org.apache.cassandra.index.sai.memory.MemtableTermsIterator;
 import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
 import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
 import org.apache.cassandra.index.sai.disk.v1.segment.IndexSegmentSearcher;
 import org.apache.cassandra.index.sai.disk.v1.segment.LiteralIndexSegmentSearcher;
 import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata;
 import org.apache.cassandra.index.sai.disk.v1.trie.LiteralIndexWriter;
 import org.apache.cassandra.index.sai.plan.Expression;
 import org.apache.cassandra.index.sai.utils.PrimaryKey;
 import org.apache.cassandra.index.sai.utils.SAIRandomizedTester;
 import org.apache.cassandra.service.StorageService;
 import org.apache.cassandra.utils.Pair;
 import org.apache.cassandra.utils.bytecomparable.ByteComparable;
 import org.apache.cassandra.utils.bytecomparable.ByteSource;
 import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse;

 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;

 public class InvertedIndexSearcherTest extends SAIRandomizedTester
 {
     public static final PrimaryKeyMap TEST_PRIMARY_KEY_MAP = new PrimaryKeyMap()
     {
         private final PrimaryKey.Factory primaryKeyFactory = new PrimaryKey.Factory(Murmur3Partitioner.instance, new ClusteringComparator());

         @Override
         public PrimaryKey primaryKeyFromRowId(long sstableRowId)
         {
             return primaryKeyFactory.create(new Murmur3Partitioner.LongToken(sstableRowId));
         }

         @Override
         public long rowIdFromPrimaryKey(PrimaryKey key)
         {
             return key.token().getLongValue();
         }

         @Override
         public long ceiling(Token token)
         {
             return 0;
         }

         @Override
         public long floor(Token token)
         {
             return 0;
         }
     };
     public static final PrimaryKeyMap.Factory TEST_PRIMARY_KEY_MAP_FACTORY = () -> TEST_PRIMARY_KEY_MAP;

     @BeforeClass
     public static void setupCQLTester()
     {
         DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
         StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance);
     }

     @Test
     public void testEqQueriesAgainstStringIndex() throws Exception
     {
         QueryContext context = mock(QueryContext.class);
         final StorageAttachedIndex index = createMockIndex(UTF8Type.instance);

         final int numTerms = getRandom().nextIntBetween(64, 512), numPostings = getRandom().nextIntBetween(256, 1024);
         final List<Pair<ByteComparable, LongArrayList>> termsEnum = buildTermsEnum(numTerms, numPostings);

         try (IndexSegmentSearcher searcher = buildIndexAndOpenSearcher(index, numTerms, numPostings, termsEnum))
         {
             for (int t = 0; t < numTerms; ++t)
             {
                 try (KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, wrap(termsEnum.get(t).left)), null, context))
                 {
                     assertTrue(results.hasNext());

                     for (int p = 0; p < numPostings; ++p)
                     {
                         final long expectedToken = termsEnum.get(t).right.get(p);
                         assertTrue(results.hasNext());
                         final long actualToken = results.next().token().getLongValue();
                         assertEquals(expectedToken, actualToken);
                     }
                     assertFalse(results.hasNext());
                 }

                 try (KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, wrap(termsEnum.get(t).left)), null, context))
                 {
                     assertTrue(results.hasNext());

                     // test skipping to the last block
                     final int idxToSkip = numPostings - 7;
                     // tokens are equal to their corresponding row IDs
                     final long tokenToSkip = termsEnum.get(t).right.get(idxToSkip);
                     results.skipTo(SAITester.TEST_FACTORY.create(new Murmur3Partitioner.LongToken(tokenToSkip)));

                     for (int p = idxToSkip; p < numPostings; ++p)
                     {
                         final long expectedToken = termsEnum.get(t).right.get(p);
                         final long actualToken = results.next().token().getLongValue();
                         assertEquals(expectedToken, actualToken);
                     }
                 }
             }

             // try searching for terms that weren't indexed
             final String tooLongTerm = randomSimpleString(10, 12);
             KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, UTF8Type.instance.decompose(tooLongTerm)), null, context);
             assertFalse(results.hasNext());

             final String tooShortTerm = randomSimpleString(1, 2);
             results = searcher.search(Expression.create(index).add(Operator.EQ, UTF8Type.instance.decompose(tooShortTerm)), null, context);
             assertFalse(results.hasNext());
         }
     }

     @Test
     public void testUnsupportedOperator() throws Exception
     {
         QueryContext context = mock(QueryContext.class);
         final StorageAttachedIndex index = createMockIndex(UTF8Type.instance);

         final int numTerms = getRandom().nextIntBetween(5, 15), numPostings = getRandom().nextIntBetween(5, 20);
         final List<Pair<ByteComparable, LongArrayList>> termsEnum = buildTermsEnum(numTerms, numPostings);

         try (IndexSegmentSearcher searcher = buildIndexAndOpenSearcher(index, numTerms, numPostings, termsEnum))
         {
             searcher.search(Expression.create(index).add(Operator.GT, UTF8Type.instance.decompose("a")), null, context);

             fail("Expect IllegalArgumentException thrown, but didn't");
         }
         catch (IllegalArgumentException e)
         {
             // expected
         }
     }

     private IndexSegmentSearcher buildIndexAndOpenSearcher(StorageAttachedIndex index,
                                                            int terms,
                                                            int postings,
                                                            List<Pair<ByteComparable, LongArrayList>> termsEnum) throws IOException
     {
         final int size = terms * postings;
         final IndexDescriptor indexDescriptor = newIndexDescriptor();

         SegmentMetadata.ComponentMetadataMap indexMetas;
         LiteralIndexWriter writer = new LiteralIndexWriter(indexDescriptor, index.identifier());
         indexMetas = writer.writeCompleteSegment(new MemtableTermsIterator(null, null, termsEnum.iterator()));

         final SegmentMetadata segmentMetadata = new SegmentMetadata(0,
                                                                     size,
                                                                     0,
                                                                     Long.MAX_VALUE,
                                                                     SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMinimumToken()),
                                                                     SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMaximumToken()),
                                                                     wrap(termsEnum.get(0).left),
                                                                     wrap(termsEnum.get(terms - 1).left),
                                                                     indexMetas);

         try (PerColumnIndexFiles indexFiles = new PerColumnIndexFiles(indexDescriptor, index.termType(), index.identifier()))
         {
             final IndexSegmentSearcher searcher = IndexSegmentSearcher.open(TEST_PRIMARY_KEY_MAP_FACTORY,
                                                                             indexFiles,
                                                                             segmentMetadata,
                                                                             index);
             assertThat(searcher, is(instanceOf(LiteralIndexSegmentSearcher.class)));
             return searcher;
         }
     }

     private List<Pair<ByteComparable, LongArrayList>> buildTermsEnum(int terms, int postings)
     {
         return InvertedIndexBuilder.buildStringTermsEnum(terms, postings, () -> randomSimpleString(3, 5), () -> nextInt(0, Integer.MAX_VALUE));
     }

     private ByteBuffer wrap(ByteComparable bc)
     {
         return ByteBuffer.wrap(ByteSourceInverse.readBytes(ByteSourceInverse.unescape(ByteSource.peekable(bc.asComparableBytes(ByteComparable.Version.OSS50)))));
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.cassandra.index.sai.disk.v1;

	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.util.List;

	import org.junit.BeforeClass;
	import org.junit.Test;

	import com.carrotsearch.hppc.LongArrayList;
	import org.apache.cassandra.config.DatabaseDescriptor;
	import org.apache.cassandra.cql3.Operator;
	import org.apache.cassandra.db.ClusteringComparator;
	import org.apache.cassandra.db.marshal.UTF8Type;
	import org.apache.cassandra.dht.Murmur3Partitioner;
	import org.apache.cassandra.dht.Token;
	import org.apache.cassandra.index.sai.QueryContext;
	import org.apache.cassandra.index.sai.SAITester;
	import org.apache.cassandra.index.sai.StorageAttachedIndex;
	import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
	import org.apache.cassandra.index.sai.memory.MemtableTermsIterator;
	import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
	import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
	import org.apache.cassandra.index.sai.disk.v1.segment.IndexSegmentSearcher;
	import org.apache.cassandra.index.sai.disk.v1.segment.LiteralIndexSegmentSearcher;
	import org.apache.cassandra.index.sai.disk.v1.segment.SegmentMetadata;
	import org.apache.cassandra.index.sai.disk.v1.trie.LiteralIndexWriter;
	import org.apache.cassandra.index.sai.plan.Expression;
	import org.apache.cassandra.index.sai.utils.PrimaryKey;
	import org.apache.cassandra.index.sai.utils.SAIRandomizedTester;
	import org.apache.cassandra.service.StorageService;
	import org.apache.cassandra.utils.Pair;
	import org.apache.cassandra.utils.bytecomparable.ByteComparable;
	import org.apache.cassandra.utils.bytecomparable.ByteSource;
	import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse;

	import static org.hamcrest.Matchers.instanceOf;
	import static org.hamcrest.Matchers.is;
	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertFalse;
	import static org.junit.Assert.assertThat;
	import static org.junit.Assert.assertTrue;
	import static org.junit.Assert.fail;
	import static org.mockito.Mockito.mock;

	public class InvertedIndexSearcherTest extends SAIRandomizedTester
	{
	public static final PrimaryKeyMap TEST_PRIMARY_KEY_MAP = new PrimaryKeyMap()
	{
	private final PrimaryKey.Factory primaryKeyFactory = new PrimaryKey.Factory(Murmur3Partitioner.instance, new ClusteringComparator());

	@Override
	public PrimaryKey primaryKeyFromRowId(long sstableRowId)
	{
	return primaryKeyFactory.create(new Murmur3Partitioner.LongToken(sstableRowId));
	}

	@Override
	public long rowIdFromPrimaryKey(PrimaryKey key)
	{
	return key.token().getLongValue();
	}

	@Override
	public long ceiling(Token token)
	{
	return 0;
	}

	@Override
	public long floor(Token token)
	{
	return 0;
	}
	};
	public static final PrimaryKeyMap.Factory TEST_PRIMARY_KEY_MAP_FACTORY = () -> TEST_PRIMARY_KEY_MAP;

	@BeforeClass
	public static void setupCQLTester()
	{
	DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
	StorageService.instance.setPartitionerUnsafe(Murmur3Partitioner.instance);
	}

	@Test
	public void testEqQueriesAgainstStringIndex() throws Exception
	{
	QueryContext context = mock(QueryContext.class);
	final StorageAttachedIndex index = createMockIndex(UTF8Type.instance);

	final int numTerms = getRandom().nextIntBetween(64, 512), numPostings = getRandom().nextIntBetween(256, 1024);
	final List<Pair<ByteComparable, LongArrayList>> termsEnum = buildTermsEnum(numTerms, numPostings);

	try (IndexSegmentSearcher searcher = buildIndexAndOpenSearcher(index, numTerms, numPostings, termsEnum))
	{
	for (int t = 0; t < numTerms; ++t)
	{
	try (KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, wrap(termsEnum.get(t).left)), null, context))
	{
	assertTrue(results.hasNext());

	for (int p = 0; p < numPostings; ++p)
	{
	final long expectedToken = termsEnum.get(t).right.get(p);
	assertTrue(results.hasNext());
	final long actualToken = results.next().token().getLongValue();
	assertEquals(expectedToken, actualToken);
	}
	assertFalse(results.hasNext());
	}

	try (KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, wrap(termsEnum.get(t).left)), null, context))
	{
	assertTrue(results.hasNext());

	// test skipping to the last block
	final int idxToSkip = numPostings - 7;
	// tokens are equal to their corresponding row IDs
	final long tokenToSkip = termsEnum.get(t).right.get(idxToSkip);
	results.skipTo(SAITester.TEST_FACTORY.create(new Murmur3Partitioner.LongToken(tokenToSkip)));

	for (int p = idxToSkip; p < numPostings; ++p)
	{
	final long expectedToken = termsEnum.get(t).right.get(p);
	final long actualToken = results.next().token().getLongValue();
	assertEquals(expectedToken, actualToken);
	}
	}
	}

	// try searching for terms that weren't indexed
	final String tooLongTerm = randomSimpleString(10, 12);
	KeyRangeIterator results = searcher.search(Expression.create(index).add(Operator.EQ, UTF8Type.instance.decompose(tooLongTerm)), null, context);
	assertFalse(results.hasNext());

	final String tooShortTerm = randomSimpleString(1, 2);
	results = searcher.search(Expression.create(index).add(Operator.EQ, UTF8Type.instance.decompose(tooShortTerm)), null, context);
	assertFalse(results.hasNext());
	}
	}

	@Test
	public void testUnsupportedOperator() throws Exception
	{
	QueryContext context = mock(QueryContext.class);
	final StorageAttachedIndex index = createMockIndex(UTF8Type.instance);

	final int numTerms = getRandom().nextIntBetween(5, 15), numPostings = getRandom().nextIntBetween(5, 20);
	final List<Pair<ByteComparable, LongArrayList>> termsEnum = buildTermsEnum(numTerms, numPostings);

	try (IndexSegmentSearcher searcher = buildIndexAndOpenSearcher(index, numTerms, numPostings, termsEnum))
	{
	searcher.search(Expression.create(index).add(Operator.GT, UTF8Type.instance.decompose("a")), null, context);

	fail("Expect IllegalArgumentException thrown, but didn't");
	}
	catch (IllegalArgumentException e)
	{
	// expected
	}
	}

	private IndexSegmentSearcher buildIndexAndOpenSearcher(StorageAttachedIndex index,
	int terms,
	int postings,
	List<Pair<ByteComparable, LongArrayList>> termsEnum) throws IOException
	{
	final int size = terms * postings;
	final IndexDescriptor indexDescriptor = newIndexDescriptor();

	SegmentMetadata.ComponentMetadataMap indexMetas;
	LiteralIndexWriter writer = new LiteralIndexWriter(indexDescriptor, index.identifier());
	indexMetas = writer.writeCompleteSegment(new MemtableTermsIterator(null, null, termsEnum.iterator()));

	final SegmentMetadata segmentMetadata = new SegmentMetadata(0,
	size,
	0,
	Long.MAX_VALUE,
	SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMinimumToken()),
	SAITester.TEST_FACTORY.create(DatabaseDescriptor.getPartitioner().getMaximumToken()),
	wrap(termsEnum.get(0).left),
	wrap(termsEnum.get(terms - 1).left),
	indexMetas);

	try (PerColumnIndexFiles indexFiles = new PerColumnIndexFiles(indexDescriptor, index.termType(), index.identifier()))
	{
	final IndexSegmentSearcher searcher = IndexSegmentSearcher.open(TEST_PRIMARY_KEY_MAP_FACTORY,
	indexFiles,
	segmentMetadata,
	index);
	assertThat(searcher, is(instanceOf(LiteralIndexSegmentSearcher.class)));
	return searcher;
	}
	}

	private List<Pair<ByteComparable, LongArrayList>> buildTermsEnum(int terms, int postings)
	{
	return InvertedIndexBuilder.buildStringTermsEnum(terms, postings, () -> randomSimpleString(3, 5), () -> nextInt(0, Integer.MAX_VALUE));
	}

	private ByteBuffer wrap(ByteComparable bc)
	{
	return ByteBuffer.wrap(ByteSourceInverse.readBytes(ByteSourceInverse.unescape(ByteSource.peekable(bc.asComparableBytes(ByteComparable.Version.OSS50)))));
	}
	}