StaticTokenTreeBuilder should respect posibility of duplicate tokens patch by jrwest and xedin; reviewed by xedin for CASSANDRA-11525

commit: 020dd2d1034abc5c729edf1975953614b33c5a8b [log] [tgz]
author: Jordan West <jordanrw@gmail.com> Thu Apr 07 19:07:50 2016 -0700
committer: Pavel Yaskevich <xedin@apache.org> Fri Apr 08 21:22:00 2016 -0700
tree: b2c6bd298ad29b39c39b29479c88429862ed3964
parent: 11da411fb62b0d6bc32d9119dd8548d94f88021f [diff]
diff --git a/CHANGES.txt b/CHANGES.txt
index 58d8ae8..392d9e7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt

@@ -1,4 +1,5 @@
 3.5
+ * StaticTokenTreeBuilder should respect posibility of duplicate tokens (CASSANDRA-11525)
  * Correctly fix potential assertion error during compaction (CASSANDRA-11353)
  * Avoid index segment stitching in RAM which lead to OOM on big SSTable files (CASSANDRA-11383)
  * Fix clustering and row filters for LIKE queries on clustering columns (CASSANDRA-11397)

diff --git a/src/java/org/apache/cassandra/index/sasi/disk/AbstractTokenTreeBuilder.java b/src/java/org/apache/cassandra/index/sasi/disk/AbstractTokenTreeBuilder.java
index 4e93b2b..9a1f7f1 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/AbstractTokenTreeBuilder.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/AbstractTokenTreeBuilder.java

@@ -397,6 +397,7 @@
 
             public short offsetExtra()
             {
+                // exta offset is supposed to be an unsigned 16-bit integer
                 return (short) offset;
             }
         }

diff --git a/src/java/org/apache/cassandra/index/sasi/disk/StaticTokenTreeBuilder.java b/src/java/org/apache/cassandra/index/sasi/disk/StaticTokenTreeBuilder.java
index 147427e..7a41b38 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/StaticTokenTreeBuilder.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/StaticTokenTreeBuilder.java

@@ -79,7 +79,7 @@
 
     public boolean isEmpty()
     {
-        return combinedTerm.getTokenIterator().getCount() == 0;
+        return tokenCount == 0;
     }
 
     public Iterator<Pair<Long, LongSet>> iterator()
@@ -100,7 +100,7 @@
 
     public long getTokenCount()
     {
-        return combinedTerm.getTokenIterator().getCount();
+        return tokenCount;
     }
 
     @Override
@@ -130,64 +130,50 @@
     {
         RangeIterator<Long, Token> tokens = combinedTerm.getTokenIterator();
 
-        tokenCount = tokens.getCount();
+        tokenCount = 0;
         treeMinToken = tokens.getMinimum();
         treeMaxToken = tokens.getMaximum();
         numBlocks = 1;
 
-        if (tokenCount <= TOKENS_PER_BLOCK)
+        root = new InteriorNode();
+        rightmostParent = (InteriorNode) root;
+        Leaf lastLeaf = null;
+        Long lastToken, firstToken = null;
+        int leafSize = 0;
+        while (tokens.hasNext())
         {
-            leftmostLeaf = new StaticLeaf(tokens, tokens.getMinimum(), tokens.getMaximum(), tokens.getCount(), true);
-            rightmostLeaf = leftmostLeaf;
-            root = leftmostLeaf;
+            Long token = tokens.next().get();
+            if (firstToken == null)
+                firstToken = token;
+
+            tokenCount++;
+            leafSize++;
+
+            // skip until the last token in the leaf
+            if (tokenCount % TOKENS_PER_BLOCK != 0 && token != treeMaxToken)
+                continue;
+
+            lastToken = token;
+            Leaf leaf = new PartialLeaf(firstToken, lastToken, leafSize);
+            if (lastLeaf == null) // first leaf created
+                leftmostLeaf = leaf;
+            else
+                lastLeaf.next = leaf;
+
+
+            rightmostParent.add(leaf);
+            lastLeaf = rightmostLeaf = leaf;
+            firstToken = null;
+            numBlocks++;
+            leafSize = 0;
         }
-        else
+
+        // if the tree is really a single leaf the empty root interior
+        // node must be discarded
+        if (root.tokenCount() == 0)
         {
-            root = new InteriorNode();
-            rightmostParent = (InteriorNode) root;
-
-            // build all the leaves except for maybe
-            // the last leaf which is not completely full .
-            // This loop relies on the fact that multiple index segments
-            // will never have token intersection for a single term,
-            // because it's impossible to encounter the same value for
-            // the same column multiple times in a single key/sstable.
-            Leaf lastLeaf = null;
-            long numFullLeaves = tokenCount / TOKENS_PER_BLOCK;
-            for (long i = 0; i < numFullLeaves; i++)
-            {
-                Long firstToken = tokens.next().get();
-                for (int j = 1; j < (TOKENS_PER_BLOCK - 1); j++)
-                    tokens.next();
-
-                Long lastToken = tokens.next().get();
-                Leaf leaf = new PartialLeaf(firstToken, lastToken, TOKENS_PER_BLOCK);
-
-                if (lastLeaf == null)
-                    leftmostLeaf = leaf;
-                else
-                    lastLeaf.next = leaf;
-
-                rightmostParent.add(leaf);
-                lastLeaf = rightmostLeaf = leaf;
-                numBlocks++;
-            }
-
-            // build the last leaf out of any remaining tokens if necessary
-            // safe downcast since TOKENS_PER_BLOCK is an int
-            int remainingTokens = (int) (tokenCount % TOKENS_PER_BLOCK);
-            if (remainingTokens != 0)
-            {
-                Long firstToken = tokens.next().get();
-                Long lastToken = firstToken;
-                while (tokens.hasNext())
-                    lastToken = tokens.next().get();
-
-                Leaf leaf = new PartialLeaf(firstToken, lastToken, remainingTokens);
-                rightmostParent.add(leaf);
-                lastLeaf.next = rightmostLeaf = leaf;
-                numBlocks++;
-            }
+            numBlocks = 1;
+            root = new StaticLeaf(combinedTerm.getTokenIterator(), treeMinToken, treeMaxToken, tokenCount, true);
         }
     }
 

diff --git a/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java b/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java
index 3f8182d..c69ce00 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java

@@ -470,30 +470,32 @@
         private long[] fetchOffsets()
         {
             short info = buffer.getShort(position);
-            short offsetShort = buffer.getShort(position + SHORT_BYTES);
-            int offsetInt = buffer.getInt(position + (2 * SHORT_BYTES) + LONG_BYTES);
+            // offset extra is unsigned short (right-most 16 bits of 48 bits allowed for an offset)
+            int offsetExtra = buffer.getShort(position + SHORT_BYTES) & 0xFFFF;
+            // is the it left-most (32-bit) base of the actual offset in the index file
+            int offsetData = buffer.getInt(position + (2 * SHORT_BYTES) + LONG_BYTES);
 
             EntryType type = EntryType.of(info & TokenTreeBuilder.ENTRY_TYPE_MASK);
 
             switch (type)
             {
                 case SIMPLE:
-                    return new long[] { offsetInt };
+                    return new long[] { offsetData };
 
                 case OVERFLOW:
-                    long[] offsets = new long[offsetShort]; // offsetShort contains count of tokens
-                    long offsetPos = (buffer.position() + (2 * (leafSize * LONG_BYTES)) + (offsetInt * LONG_BYTES));
+                    long[] offsets = new long[offsetExtra]; // offsetShort contains count of tokens
+                    long offsetPos = (buffer.position() + (2 * (leafSize * LONG_BYTES)) + (offsetData * LONG_BYTES));
 
-                    for (int i = 0; i < offsetShort; i++)
+                    for (int i = 0; i < offsetExtra; i++)
                         offsets[i] = buffer.getLong(offsetPos + (i * LONG_BYTES));
 
                     return offsets;
 
                 case FACTORED:
-                    return new long[] { (((long) offsetInt) << Short.SIZE) + offsetShort };
+                    return new long[] { (((long) offsetData) << Short.SIZE) + offsetExtra };
 
                 case PACKED:
-                    return new long[] { offsetShort, offsetInt };
+                    return new long[] { offsetExtra, offsetData };
 
                 default:
                     throw new IllegalStateException("Unknown entry type: " + type);

diff --git a/test/unit/org/apache/cassandra/index/sasi/disk/TokenTreeTest.java b/test/unit/org/apache/cassandra/index/sasi/disk/TokenTreeTest.java
index 189e9c6..67e54f4 100644
--- a/test/unit/org/apache/cassandra/index/sasi/disk/TokenTreeTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/disk/TokenTreeTest.java

@@ -33,6 +33,7 @@
 import org.apache.cassandra.index.sasi.utils.MappedBuffer;
 import org.apache.cassandra.index.sasi.utils.RangeIterator;
 import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.index.sasi.utils.RangeUnionIterator;
 import org.apache.cassandra.io.compress.BufferType;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.utils.MurmurHash;
@@ -52,7 +53,7 @@
     private static final Function<Long, DecoratedKey> KEY_CONVERTER = new KeyConverter();
 
     static LongSet singleOffset = new LongOpenHashSet() {{ add(1); }};
-    static LongSet bigSingleOffset = new LongOpenHashSet() {{ add(((long) Integer.MAX_VALUE) + 10); }};
+    static LongSet bigSingleOffset = new LongOpenHashSet() {{ add(2147521562L); }};
     static LongSet shortPackableCollision = new LongOpenHashSet() {{ add(2L); add(3L); }}; // can pack two shorts
     static LongSet intPackableCollision = new LongOpenHashSet() {{ add(6L); add(((long) Short.MAX_VALUE) + 1); }}; // can pack int & short
     static LongSet multiCollision =  new LongOpenHashSet() {{ add(3L); add(4L); add(5L); }}; // can't pack
@@ -353,6 +354,75 @@
         Assert.assertEquals(EntryType.OVERFLOW, EntryType.of(EntryType.OVERFLOW.ordinal()));
     }
 
+    @Test
+    public void testMergingOfEqualTokenTrees() throws Exception
+    {
+        testMergingOfEqualTokenTrees(simpleTokenMap);
+        testMergingOfEqualTokenTrees(bigTokensMap);
+    }
+
+    public void testMergingOfEqualTokenTrees(SortedMap<Long, LongSet> tokensMap) throws Exception
+    {
+        TokenTreeBuilder tokensA = new DynamicTokenTreeBuilder(tokensMap);
+        TokenTreeBuilder tokensB = new DynamicTokenTreeBuilder(tokensMap);
+
+        TokenTree a = buildTree(tokensA);
+        TokenTree b = buildTree(tokensB);
+
+        TokenTreeBuilder tokensC = new StaticTokenTreeBuilder(new CombinedTerm(null, null)
+        {
+            public RangeIterator<Long, Token> getTokenIterator()
+            {
+                RangeIterator.Builder<Long, Token> union = RangeUnionIterator.builder();
+                union.add(a.iterator(new KeyConverter()));
+                union.add(b.iterator(new KeyConverter()));
+
+                return union.build();
+            }
+        });
+
+        TokenTree c = buildTree(tokensC);
+        Assert.assertEquals(tokensMap.size(), c.getCount());
+
+        Iterator<Token> tokenIterator = c.iterator(KEY_CONVERTER);
+        Iterator<Map.Entry<Long, LongSet>> listIterator = tokensMap.entrySet().iterator();
+        while (tokenIterator.hasNext() && listIterator.hasNext())
+        {
+            Token treeNext = tokenIterator.next();
+            Map.Entry<Long, LongSet> listNext = listIterator.next();
+
+            Assert.assertEquals(listNext.getKey(), treeNext.get());
+            Assert.assertEquals(convert(listNext.getValue()), convert(treeNext));
+        }
+
+        for (Map.Entry<Long, LongSet> entry : tokensMap.entrySet())
+        {
+            TokenTree.OnDiskToken result = c.get(entry.getKey(), KEY_CONVERTER);
+            Assert.assertNotNull("failed to find object for token " + entry.getKey(), result);
+
+            LongSet found = result.getOffsets();
+            Assert.assertEquals(entry.getValue(), found);
+
+        }
+    }
+
+
+    private static TokenTree buildTree(TokenTreeBuilder builder) throws Exception
+    {
+        builder.finish();
+        final File treeFile = File.createTempFile("token-tree-", "db");
+        treeFile.deleteOnExit();
+
+        try (SequentialWriter writer = new SequentialWriter(treeFile, 4096, BufferType.ON_HEAP))
+        {
+            builder.write(writer);
+            writer.sync();
+        }
+
+        final RandomAccessReader reader = RandomAccessReader.open(treeFile);
+        return new TokenTree(new MappedBuffer(reader));
+    }
+
     private static class EntrySetSkippableIterator extends RangeIterator<Long, TokenWithOffsets>
     {
         private final PeekingIterator<Map.Entry<Long, LongSet>> elements;
commit	020dd2d1034abc5c729edf1975953614b33c5a8b	[log] [tgz]
author	Jordan West <jordanrw@gmail.com>	Thu Apr 07 19:07:50 2016 -0700
committer	Pavel Yaskevich <xedin@apache.org>	Fri Apr 08 21:22:00 2016 -0700
tree	b2c6bd298ad29b39c39b29479c88429862ed3964
parent	11da411fb62b0d6bc32d9119dd8548d94f88021f [diff]