| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.index.sasi.disk; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.util.Iterator; |
| import java.util.SortedMap; |
| |
| import org.apache.cassandra.index.sasi.utils.CombinedTerm; |
| import org.apache.cassandra.index.sasi.utils.RangeIterator; |
| import org.apache.cassandra.io.util.DataOutputPlus; |
| import org.apache.cassandra.utils.AbstractIterator; |
| import org.apache.cassandra.utils.Pair; |
| |
| import com.carrotsearch.hppc.LongSet; |
| import com.google.common.collect.Iterators; |
| |
| /** |
| * Intended usage of this class is to be used in place of {@link DynamicTokenTreeBuilder} |
| * when multiple index segments produced by {@link PerSSTableIndexWriter} are stitched together |
| * by {@link PerSSTableIndexWriter#complete()}. |
| * |
| * This class uses the RangeIterator, now provided by |
| * {@link CombinedTerm#getTokenIterator()}, to iterate the data twice. |
| * The first iteration builds the tree with leaves that contain only enough |
| * information to build the upper layers -- these leaves do not store more |
| * than their minimum and maximum tokens plus their total size, which makes them |
| * un-serializable. |
| * |
| * When the tree is written to disk the final layer is not |
| * written. Its at this point the data is iterated once again to write |
| * the leaves to disk. This (logarithmically) reduces copying of the |
| * token values while building and writing upper layers of the tree, |
| * removes the use of SortedMap when combining SAs, and relies on the |
| * memory mapped SAs otherwise, greatly improving performance and no |
| * longer causing OOMs when TokenTree sizes are big. |
| * |
| * See https://issues.apache.org/jira/browse/CASSANDRA-11383 for more details. |
| */ |
| @SuppressWarnings("resource") |
| public class StaticTokenTreeBuilder extends AbstractTokenTreeBuilder |
| { |
| private final CombinedTerm combinedTerm; |
| |
| public StaticTokenTreeBuilder(CombinedTerm term) |
| { |
| combinedTerm = term; |
| } |
| |
| public void add(Long token, long keyPosition) |
| { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public void add(SortedMap<Long, LongSet> data) |
| { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public void add(Iterator<Pair<Long, LongSet>> data) |
| { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public boolean isEmpty() |
| { |
| return tokenCount == 0; |
| } |
| |
| public Iterator<Pair<Long, LongSet>> iterator() |
| { |
| Iterator<Token> iterator = combinedTerm.getTokenIterator(); |
| return new AbstractIterator<Pair<Long, LongSet>>() |
| { |
| protected Pair<Long, LongSet> computeNext() |
| { |
| if (!iterator.hasNext()) |
| return endOfData(); |
| |
| Token token = iterator.next(); |
| return Pair.create(token.get(), token.getOffsets()); |
| } |
| }; |
| } |
| |
| public long getTokenCount() |
| { |
| return tokenCount; |
| } |
| |
| @Override |
| public void write(DataOutputPlus out) throws IOException |
| { |
| // if the root is not a leaf then none of the leaves have been written (all are PartialLeaf) |
| // so write out the last layer of the tree by converting PartialLeaf to StaticLeaf and |
| // iterating the data once more |
| super.write(out); |
| if (root.isLeaf()) |
| return; |
| |
| RangeIterator<Long, Token> tokens = combinedTerm.getTokenIterator(); |
| ByteBuffer blockBuffer = ByteBuffer.allocate(BLOCK_BYTES); |
| Iterator<Node> leafIterator = leftmostLeaf.levelIterator(); |
| while (leafIterator.hasNext()) |
| { |
| Leaf leaf = (Leaf) leafIterator.next(); |
| Leaf writeableLeaf = new StaticLeaf(Iterators.limit(tokens, leaf.tokenCount()), leaf); |
| writeableLeaf.serialize(-1, blockBuffer); |
| flushBuffer(blockBuffer, out, true); |
| } |
| |
| } |
| |
| protected void constructTree() |
| { |
| RangeIterator<Long, Token> tokens = combinedTerm.getTokenIterator(); |
| |
| tokenCount = 0; |
| treeMinToken = tokens.getMinimum(); |
| treeMaxToken = tokens.getMaximum(); |
| numBlocks = 1; |
| |
| root = new InteriorNode(); |
| rightmostParent = (InteriorNode) root; |
| Leaf lastLeaf = null; |
| Long lastToken, firstToken = null; |
| int leafSize = 0; |
| while (tokens.hasNext()) |
| { |
| Long token = tokens.next().get(); |
| if (firstToken == null) |
| firstToken = token; |
| |
| tokenCount++; |
| leafSize++; |
| |
| // skip until the last token in the leaf |
| if (tokenCount % TOKENS_PER_BLOCK != 0 && token != treeMaxToken) |
| continue; |
| |
| lastToken = token; |
| Leaf leaf = new PartialLeaf(firstToken, lastToken, leafSize); |
| if (lastLeaf == null) // first leaf created |
| leftmostLeaf = leaf; |
| else |
| lastLeaf.next = leaf; |
| |
| |
| rightmostParent.add(leaf); |
| lastLeaf = rightmostLeaf = leaf; |
| firstToken = null; |
| numBlocks++; |
| leafSize = 0; |
| } |
| |
| // if the tree is really a single leaf the empty root interior |
| // node must be discarded |
| if (root.tokenCount() == 0) |
| { |
| numBlocks = 1; |
| root = new StaticLeaf(combinedTerm.getTokenIterator(), treeMinToken, treeMaxToken, tokenCount, true); |
| } |
| } |
| |
| // This denotes the leaf which only has min/max and token counts |
| // but doesn't have any associated data yet, so it can't be serialized. |
| private class PartialLeaf extends Leaf |
| { |
| private final int size; |
| public PartialLeaf(Long min, Long max, int count) |
| { |
| super(min, max); |
| size = count; |
| } |
| |
| public int tokenCount() |
| { |
| return size; |
| } |
| |
| public void serializeData(ByteBuffer buf) |
| { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public boolean isSerializable() |
| { |
| return false; |
| } |
| } |
| |
| // This denotes the leaf which has been filled with data and is ready to be serialized |
| private class StaticLeaf extends Leaf |
| { |
| private final Iterator<Token> tokens; |
| private final int count; |
| private final boolean isLast; |
| |
| public StaticLeaf(Iterator<Token> tokens, Leaf leaf) |
| { |
| this(tokens, leaf.smallestToken(), leaf.largestToken(), leaf.tokenCount(), leaf.isLastLeaf()); |
| } |
| |
| public StaticLeaf(Iterator<Token> tokens, Long min, Long max, long count, boolean isLastLeaf) |
| { |
| super(min, max); |
| |
| this.count = (int) count; // downcast is safe since leaf size is always < Integer.MAX_VALUE |
| this.tokens = tokens; |
| this.isLast = isLastLeaf; |
| } |
| |
| public boolean isLastLeaf() |
| { |
| return isLast; |
| } |
| |
| public int tokenCount() |
| { |
| return count; |
| } |
| |
| public void serializeData(ByteBuffer buf) |
| { |
| while (tokens.hasNext()) |
| { |
| Token entry = tokens.next(); |
| createEntry(entry.get(), entry.getOffsets()).serialize(buf); |
| } |
| } |
| |
| public boolean isSerializable() |
| { |
| return true; |
| } |
| } |
| } |