| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.index.sasi.disk; |
| |
| import java.io.IOException; |
| import java.util.*; |
| |
| import org.apache.cassandra.db.DecoratedKey; |
| import org.apache.cassandra.index.sasi.utils.AbstractIterator; |
| import org.apache.cassandra.index.sasi.utils.CombinedValue; |
| import org.apache.cassandra.index.sasi.utils.MappedBuffer; |
| import org.apache.cassandra.index.sasi.utils.RangeIterator; |
| import org.apache.cassandra.utils.MergeIterator; |
| |
| import com.carrotsearch.hppc.LongOpenHashSet; |
| import com.carrotsearch.hppc.LongSet; |
| import com.google.common.annotations.VisibleForTesting; |
| import com.google.common.base.Function; |
| import com.google.common.collect.Iterators; |
| import org.apache.commons.lang3.builder.HashCodeBuilder; |
| |
| import static org.apache.cassandra.index.sasi.disk.TokenTreeBuilder.EntryType; |
| |
| // Note: all of the seek-able offsets contained in TokenTree should be sizeof(long) |
| // even if currently only lower int portion of them if used, because that makes |
| // it possible to switch to mmap implementation which supports long positions |
| // without any on-disk format changes and/or re-indexing if one day we'll have a need to. |
| public class TokenTree |
| { |
| private static final int LONG_BYTES = Long.SIZE / 8; |
| private static final int SHORT_BYTES = Short.SIZE / 8; |
| |
| private final Descriptor descriptor; |
| private final MappedBuffer file; |
| private final long startPos; |
| private final long treeMinToken; |
| private final long treeMaxToken; |
| private final long tokenCount; |
| |
| @VisibleForTesting |
| protected TokenTree(MappedBuffer tokenTree) |
| { |
| this(Descriptor.CURRENT, tokenTree); |
| } |
| |
| public TokenTree(Descriptor d, MappedBuffer tokenTree) |
| { |
| descriptor = d; |
| file = tokenTree; |
| startPos = file.position(); |
| |
| file.position(startPos + TokenTreeBuilder.SHARED_HEADER_BYTES); |
| |
| if (!validateMagic()) |
| throw new IllegalArgumentException("invalid token tree"); |
| |
| tokenCount = file.getLong(); |
| treeMinToken = file.getLong(); |
| treeMaxToken = file.getLong(); |
| } |
| |
| public long getCount() |
| { |
| return tokenCount; |
| } |
| |
| public RangeIterator<Long, Token> iterator(Function<Long, DecoratedKey> keyFetcher) |
| { |
| return new TokenTreeIterator(file.duplicate(), keyFetcher); |
| } |
| |
| public OnDiskToken get(final long searchToken, Function<Long, DecoratedKey> keyFetcher) |
| { |
| seekToLeaf(searchToken, file); |
| long leafStart = file.position(); |
| short leafSize = file.getShort(leafStart + 1); // skip the info byte |
| |
| file.position(leafStart + TokenTreeBuilder.BLOCK_HEADER_BYTES); // skip to tokens |
| short tokenIndex = searchLeaf(searchToken, leafSize); |
| |
| file.position(leafStart + TokenTreeBuilder.BLOCK_HEADER_BYTES); |
| |
| OnDiskToken token = OnDiskToken.getTokenAt(file, tokenIndex, leafSize, keyFetcher); |
| return token.get().equals(searchToken) ? token : null; |
| } |
| |
| private boolean validateMagic() |
| { |
| switch (descriptor.version.toString()) |
| { |
| case Descriptor.VERSION_AA: |
| return true; |
| case Descriptor.VERSION_AB: |
| return TokenTreeBuilder.AB_MAGIC == file.getShort(); |
| default: |
| return false; |
| } |
| } |
| |
| // finds leaf that *could* contain token |
| private void seekToLeaf(long token, MappedBuffer file) |
| { |
| // this loop always seeks forward except for the first iteration |
| // where it may seek back to the root |
| long blockStart = startPos; |
| while (true) |
| { |
| file.position(blockStart); |
| |
| byte info = file.get(); |
| boolean isLeaf = (info & 1) == 1; |
| |
| if (isLeaf) |
| { |
| file.position(blockStart); |
| break; |
| } |
| |
| short tokenCount = file.getShort(); |
| |
| long minToken = file.getLong(); |
| long maxToken = file.getLong(); |
| |
| long seekBase = blockStart + TokenTreeBuilder.BLOCK_HEADER_BYTES; |
| if (minToken > token) |
| { |
| // seek to beginning of child offsets to locate first child |
| file.position(seekBase + tokenCount * LONG_BYTES); |
| blockStart = (startPos + (int) file.getLong()); |
| } |
| else if (maxToken < token) |
| { |
| // seek to end of child offsets to locate last child |
| file.position(seekBase + (2 * tokenCount) * LONG_BYTES); |
| blockStart = (startPos + (int) file.getLong()); |
| } |
| else |
| { |
| // skip to end of block header/start of interior block tokens |
| file.position(seekBase); |
| |
| short offsetIndex = searchBlock(token, tokenCount, file); |
| |
| // file pointer is now at beginning of offsets |
| if (offsetIndex == tokenCount) |
| file.position(file.position() + (offsetIndex * LONG_BYTES)); |
| else |
| file.position(file.position() + ((tokenCount - offsetIndex - 1) + offsetIndex) * LONG_BYTES); |
| |
| blockStart = (startPos + (int) file.getLong()); |
| } |
| } |
| } |
| |
| private short searchBlock(long searchToken, short tokenCount, MappedBuffer file) |
| { |
| short offsetIndex = 0; |
| for (int i = 0; i < tokenCount; i++) |
| { |
| long readToken = file.getLong(); |
| if (searchToken < readToken) |
| break; |
| |
| offsetIndex++; |
| } |
| |
| return offsetIndex; |
| } |
| |
| private short searchLeaf(long searchToken, short tokenCount) |
| { |
| long base = file.position(); |
| |
| int start = 0; |
| int end = tokenCount; |
| int middle = 0; |
| |
| while (start <= end) |
| { |
| middle = start + ((end - start) >> 1); |
| |
| // each entry is 16 bytes wide, token is in bytes 4-11 |
| long token = file.getLong(base + (middle * (2 * LONG_BYTES) + 4)); |
| |
| if (token == searchToken) |
| break; |
| |
| if (token < searchToken) |
| start = middle + 1; |
| else |
| end = middle - 1; |
| } |
| |
| return (short) middle; |
| } |
| |
| public class TokenTreeIterator extends RangeIterator<Long, Token> |
| { |
| private final Function<Long, DecoratedKey> keyFetcher; |
| private final MappedBuffer file; |
| |
| private long currentLeafStart; |
| private int currentTokenIndex; |
| |
| private long leafMinToken; |
| private long leafMaxToken; |
| private short leafSize; |
| |
| protected boolean firstIteration = true; |
| private boolean lastLeaf; |
| |
| TokenTreeIterator(MappedBuffer file, Function<Long, DecoratedKey> keyFetcher) |
| { |
| super(treeMinToken, treeMaxToken, tokenCount); |
| |
| this.file = file; |
| this.keyFetcher = keyFetcher; |
| } |
| |
| protected Token computeNext() |
| { |
| maybeFirstIteration(); |
| |
| if (currentTokenIndex >= leafSize && lastLeaf) |
| return endOfData(); |
| |
| if (currentTokenIndex < leafSize) // tokens remaining in this leaf |
| { |
| return getTokenAt(currentTokenIndex++); |
| } |
| else // no more tokens remaining in this leaf |
| { |
| assert !lastLeaf; |
| |
| seekToNextLeaf(); |
| setupBlock(); |
| return computeNext(); |
| } |
| } |
| |
| protected void performSkipTo(Long nextToken) |
| { |
| maybeFirstIteration(); |
| |
| if (nextToken <= leafMaxToken) // next is in this leaf block |
| { |
| searchLeaf(nextToken); |
| } |
| else // next is in a leaf block that needs to be found |
| { |
| seekToLeaf(nextToken, file); |
| setupBlock(); |
| findNearest(nextToken); |
| } |
| } |
| |
| private void setupBlock() |
| { |
| currentLeafStart = file.position(); |
| currentTokenIndex = 0; |
| |
| lastLeaf = (file.get() & (1 << TokenTreeBuilder.LAST_LEAF_SHIFT)) > 0; |
| leafSize = file.getShort(); |
| |
| leafMinToken = file.getLong(); |
| leafMaxToken = file.getLong(); |
| |
| // seek to end of leaf header/start of data |
| file.position(currentLeafStart + TokenTreeBuilder.BLOCK_HEADER_BYTES); |
| } |
| |
| private void findNearest(Long next) |
| { |
| if (next > leafMaxToken && !lastLeaf) |
| { |
| seekToNextLeaf(); |
| setupBlock(); |
| findNearest(next); |
| } |
| else if (next > leafMinToken) |
| searchLeaf(next); |
| } |
| |
| private void searchLeaf(long next) |
| { |
| for (int i = currentTokenIndex; i < leafSize; i++) |
| { |
| if (compareTokenAt(currentTokenIndex, next) >= 0) |
| break; |
| |
| currentTokenIndex++; |
| } |
| } |
| |
| private int compareTokenAt(int idx, long toToken) |
| { |
| return Long.compare(file.getLong(getTokenPosition(idx)), toToken); |
| } |
| |
| private Token getTokenAt(int idx) |
| { |
| return OnDiskToken.getTokenAt(file, idx, leafSize, keyFetcher); |
| } |
| |
| private long getTokenPosition(int idx) |
| { |
| // skip 4 byte entry header to get position pointing directly at the entry's token |
| return OnDiskToken.getEntryPosition(idx, file) + (2 * SHORT_BYTES); |
| } |
| |
| private void seekToNextLeaf() |
| { |
| file.position(currentLeafStart + TokenTreeBuilder.BLOCK_BYTES); |
| } |
| |
| public void close() throws IOException |
| { |
| // nothing to do here |
| } |
| |
| private void maybeFirstIteration() |
| { |
| // seek to the first token only when requested for the first time, |
| // highly predictable branch and saves us a lot by not traversing the tree |
| // on creation time because it's not at all required. |
| if (!firstIteration) |
| return; |
| |
| seekToLeaf(treeMinToken, file); |
| setupBlock(); |
| firstIteration = false; |
| } |
| } |
| |
| public static class OnDiskToken extends Token |
| { |
| private final Set<TokenInfo> info = new HashSet<>(2); |
| private final Set<DecoratedKey> loadedKeys = new TreeSet<>(DecoratedKey.comparator); |
| |
| public OnDiskToken(MappedBuffer buffer, long position, short leafSize, Function<Long, DecoratedKey> keyFetcher) |
| { |
| super(buffer.getLong(position + (2 * SHORT_BYTES))); |
| info.add(new TokenInfo(buffer, position, leafSize, keyFetcher)); |
| } |
| |
| public void merge(CombinedValue<Long> other) |
| { |
| if (!(other instanceof Token)) |
| return; |
| |
| Token o = (Token) other; |
| if (token != o.token) |
| throw new IllegalArgumentException(String.format("%s != %s", token, o.token)); |
| |
| if (o instanceof OnDiskToken) |
| { |
| info.addAll(((OnDiskToken) other).info); |
| } |
| else |
| { |
| Iterators.addAll(loadedKeys, o.iterator()); |
| } |
| } |
| |
| public Iterator<DecoratedKey> iterator() |
| { |
| List<Iterator<DecoratedKey>> keys = new ArrayList<>(info.size()); |
| |
| for (TokenInfo i : info) |
| keys.add(i.iterator()); |
| |
| if (!loadedKeys.isEmpty()) |
| keys.add(loadedKeys.iterator()); |
| |
| return MergeIterator.get(keys, DecoratedKey.comparator, new MergeIterator.Reducer<DecoratedKey, DecoratedKey>() |
| { |
| DecoratedKey reduced = null; |
| |
| public boolean trivialReduceIsTrivial() |
| { |
| return true; |
| } |
| |
| public void reduce(int idx, DecoratedKey current) |
| { |
| reduced = current; |
| } |
| |
| protected DecoratedKey getReduced() |
| { |
| return reduced; |
| } |
| }); |
| } |
| |
| public LongSet getOffsets() |
| { |
| LongSet offsets = new LongOpenHashSet(4); |
| for (TokenInfo i : info) |
| { |
| for (long offset : i.fetchOffsets()) |
| offsets.add(offset); |
| } |
| |
| return offsets; |
| } |
| |
| public static OnDiskToken getTokenAt(MappedBuffer buffer, int idx, short leafSize, Function<Long, DecoratedKey> keyFetcher) |
| { |
| return new OnDiskToken(buffer, getEntryPosition(idx, buffer), leafSize, keyFetcher); |
| } |
| |
| private static long getEntryPosition(int idx, MappedBuffer file) |
| { |
| // info (4 bytes) + token (8 bytes) + offset (4 bytes) = 16 bytes |
| return file.position() + (idx * (2 * LONG_BYTES)); |
| } |
| } |
| |
| private static class TokenInfo |
| { |
| private final MappedBuffer buffer; |
| private final Function<Long, DecoratedKey> keyFetcher; |
| |
| private final long position; |
| private final short leafSize; |
| |
| public TokenInfo(MappedBuffer buffer, long position, short leafSize, Function<Long, DecoratedKey> keyFetcher) |
| { |
| this.keyFetcher = keyFetcher; |
| this.buffer = buffer; |
| this.position = position; |
| this.leafSize = leafSize; |
| } |
| |
| public Iterator<DecoratedKey> iterator() |
| { |
| return new KeyIterator(keyFetcher, fetchOffsets()); |
| } |
| |
| public int hashCode() |
| { |
| return new HashCodeBuilder().append(keyFetcher).append(position).append(leafSize).build(); |
| } |
| |
| public boolean equals(Object other) |
| { |
| if (!(other instanceof TokenInfo)) |
| return false; |
| |
| TokenInfo o = (TokenInfo) other; |
| return keyFetcher == o.keyFetcher && position == o.position; |
| } |
| |
| private long[] fetchOffsets() |
| { |
| short info = buffer.getShort(position); |
| // offset extra is unsigned short (right-most 16 bits of 48 bits allowed for an offset) |
| int offsetExtra = buffer.getShort(position + SHORT_BYTES) & 0xFFFF; |
| // is the it left-most (32-bit) base of the actual offset in the index file |
| int offsetData = buffer.getInt(position + (2 * SHORT_BYTES) + LONG_BYTES); |
| |
| EntryType type = EntryType.of(info & TokenTreeBuilder.ENTRY_TYPE_MASK); |
| |
| switch (type) |
| { |
| case SIMPLE: |
| return new long[] { offsetData }; |
| |
| case OVERFLOW: |
| long[] offsets = new long[offsetExtra]; // offsetShort contains count of tokens |
| long offsetPos = (buffer.position() + (2 * (leafSize * LONG_BYTES)) + (offsetData * LONG_BYTES)); |
| |
| for (int i = 0; i < offsetExtra; i++) |
| offsets[i] = buffer.getLong(offsetPos + (i * LONG_BYTES)); |
| |
| return offsets; |
| |
| case FACTORED: |
| return new long[] { (((long) offsetData) << Short.SIZE) + offsetExtra }; |
| |
| case PACKED: |
| return new long[] { offsetExtra, offsetData }; |
| |
| default: |
| throw new IllegalStateException("Unknown entry type: " + type); |
| } |
| } |
| } |
| |
| private static class KeyIterator extends AbstractIterator<DecoratedKey> |
| { |
| private final Function<Long, DecoratedKey> keyFetcher; |
| private final long[] offsets; |
| private int index = 0; |
| |
| public KeyIterator(Function<Long, DecoratedKey> keyFetcher, long[] offsets) |
| { |
| this.keyFetcher = keyFetcher; |
| this.offsets = offsets; |
| } |
| |
| public DecoratedKey computeNext() |
| { |
| return index < offsets.length ? keyFetcher.apply(offsets[index++]) : endOfData(); |
| } |
| } |
| } |