| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.index.sasi.sa; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| |
| import org.apache.cassandra.index.sasi.disk.DynamicTokenTreeBuilder; |
| import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder; |
| import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder; |
| import org.apache.cassandra.db.marshal.AbstractType; |
| import org.apache.cassandra.utils.LongTimSort; |
| import org.apache.cassandra.utils.Pair; |
| |
| import com.google.common.base.Charsets; |
| |
| public class SuffixSA extends SA<CharBuffer> |
| { |
| public SuffixSA(AbstractType<?> comparator, OnDiskIndexBuilder.Mode mode) |
| { |
| super(comparator, mode); |
| } |
| |
| protected Term<CharBuffer> getTerm(ByteBuffer termValue, TokenTreeBuilder tokens) |
| { |
| return new CharTerm(charCount, Charsets.UTF_8.decode(termValue.duplicate()), tokens); |
| } |
| |
| public TermIterator finish() |
| { |
| return new SASuffixIterator(); |
| } |
| |
| private class SASuffixIterator extends TermIterator |
| { |
| |
| private static final int COMPLETE_BIT = 31; |
| |
| private final long[] suffixes; |
| |
| private int current = 0; |
| private IndexedTerm lastProcessedSuffix; |
| private TokenTreeBuilder container; |
| |
| public SASuffixIterator() |
| { |
| // each element has term index and char position encoded as two 32-bit integers |
| // to avoid binary search per suffix while sorting suffix array. |
| suffixes = new long[charCount]; |
| |
| long termIndex = -1, currentTermLength = -1; |
| boolean isComplete = false; |
| for (int i = 0; i < charCount; i++) |
| { |
| if (i >= currentTermLength || currentTermLength == -1) |
| { |
| Term currentTerm = terms.get((int) ++termIndex); |
| currentTermLength = currentTerm.getPosition() + currentTerm.length(); |
| isComplete = true; |
| } |
| |
| suffixes[i] = (termIndex << 32) | i; |
| if (isComplete) |
| suffixes[i] |= (1L << COMPLETE_BIT); |
| |
| isComplete = false; |
| } |
| |
| LongTimSort.sort(suffixes, (a, b) -> { |
| Term aTerm = terms.get((int) (a >>> 32)); |
| Term bTerm = terms.get((int) (b >>> 32)); |
| return comparator.compare(aTerm.getSuffix(clearCompleteBit(a) - aTerm.getPosition()), |
| bTerm.getSuffix(clearCompleteBit(b) - bTerm.getPosition())); |
| }); |
| } |
| |
| private int clearCompleteBit(long value) |
| { |
| return (int) (value & ~(1L << COMPLETE_BIT)); |
| } |
| |
| private Pair<IndexedTerm, TokenTreeBuilder> suffixAt(int position) |
| { |
| long index = suffixes[position]; |
| Term term = terms.get((int) (index >>> 32)); |
| boolean isPartitial = (index & ((long) 1 << 31)) == 0; |
| return Pair.create(new IndexedTerm(term.getSuffix(clearCompleteBit(index) - term.getPosition()), isPartitial), term.getTokens()); |
| } |
| |
| public ByteBuffer minTerm() |
| { |
| return suffixAt(0).left.getBytes(); |
| } |
| |
| public ByteBuffer maxTerm() |
| { |
| return suffixAt(suffixes.length - 1).left.getBytes(); |
| } |
| |
| protected Pair<IndexedTerm, TokenTreeBuilder> computeNext() |
| { |
| while (true) |
| { |
| if (current >= suffixes.length) |
| { |
| if (lastProcessedSuffix == null) |
| return endOfData(); |
| |
| Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix(); |
| |
| lastProcessedSuffix = null; |
| return result; |
| } |
| |
| Pair<IndexedTerm, TokenTreeBuilder> suffix = suffixAt(current++); |
| |
| if (lastProcessedSuffix == null) |
| { |
| lastProcessedSuffix = suffix.left; |
| container = new DynamicTokenTreeBuilder(suffix.right); |
| } |
| else if (comparator.compare(lastProcessedSuffix.getBytes(), suffix.left.getBytes()) == 0) |
| { |
| lastProcessedSuffix = suffix.left; |
| container.add(suffix.right); |
| } |
| else |
| { |
| Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix(); |
| |
| lastProcessedSuffix = suffix.left; |
| container = new DynamicTokenTreeBuilder(suffix.right); |
| |
| return result; |
| } |
| } |
| } |
| |
| private Pair<IndexedTerm, TokenTreeBuilder> finishSuffix() |
| { |
| return Pair.create(lastProcessedSuffix, container.finish()); |
| } |
| } |
| } |