src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.index.sasi.sa;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;

 import org.apache.cassandra.index.sasi.disk.DynamicTokenTreeBuilder;
 import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder;
 import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.utils.LongTimSort;
 import org.apache.cassandra.utils.Pair;

 import com.google.common.base.Charsets;

 public class SuffixSA extends SA<CharBuffer>
 {
     public SuffixSA(AbstractType<?> comparator, OnDiskIndexBuilder.Mode mode)
     {
         super(comparator, mode);
     }

     protected Term<CharBuffer> getTerm(ByteBuffer termValue, TokenTreeBuilder tokens)
     {
         return new CharTerm(charCount, Charsets.UTF_8.decode(termValue.duplicate()), tokens);
     }

     public TermIterator finish()
     {
         return new SASuffixIterator();
     }

     private class SASuffixIterator extends TermIterator
     {

         private static final int COMPLETE_BIT = 31;

         private final long[] suffixes;

         private int current = 0;
         private IndexedTerm lastProcessedSuffix;
         private TokenTreeBuilder container;

         public SASuffixIterator()
         {
             // each element has term index and char position encoded as two 32-bit integers
             // to avoid binary search per suffix while sorting suffix array.
             suffixes = new long[charCount];

             long termIndex = -1, currentTermLength = -1;
             boolean isComplete = false;
             for (int i = 0; i < charCount; i++)
             {
                 if (i >= currentTermLength || currentTermLength == -1)
                 {
                     Term currentTerm = terms.get((int) ++termIndex);
                     currentTermLength = currentTerm.getPosition() + currentTerm.length();
                     isComplete = true;
                 }

                 suffixes[i] = (termIndex << 32) | i;
                 if (isComplete)
                     suffixes[i] |= (1L << COMPLETE_BIT);

                 isComplete = false;
             }

             LongTimSort.sort(suffixes, (a, b) -> {
                 Term aTerm = terms.get((int) (a >>> 32));
                 Term bTerm = terms.get((int) (b >>> 32));
                 return comparator.compare(aTerm.getSuffix(clearCompleteBit(a) - aTerm.getPosition()),
                                           bTerm.getSuffix(clearCompleteBit(b) - bTerm.getPosition()));
             });
         }

         private int clearCompleteBit(long value)
         {
             return (int) (value & ~(1L << COMPLETE_BIT));
         }

         private Pair<IndexedTerm, TokenTreeBuilder> suffixAt(int position)
         {
             long index = suffixes[position];
             Term term = terms.get((int) (index >>> 32));
             boolean isPartitial = (index & ((long) 1 << 31)) == 0;
             return Pair.create(new IndexedTerm(term.getSuffix(clearCompleteBit(index) - term.getPosition()), isPartitial), term.getTokens());
         }

         public ByteBuffer minTerm()
         {
             return suffixAt(0).left.getBytes();
         }

         public ByteBuffer maxTerm()
         {
             return suffixAt(suffixes.length - 1).left.getBytes();
         }

         protected Pair<IndexedTerm, TokenTreeBuilder> computeNext()
         {
             while (true)
             {
                 if (current >= suffixes.length)
                 {
                     if (lastProcessedSuffix == null)
                         return endOfData();

                     Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix();

                     lastProcessedSuffix = null;
                     return result;
                 }

                 Pair<IndexedTerm, TokenTreeBuilder> suffix = suffixAt(current++);

                 if (lastProcessedSuffix == null)
                 {
                     lastProcessedSuffix = suffix.left;
                     container = new DynamicTokenTreeBuilder(suffix.right);
                 }
                 else if (comparator.compare(lastProcessedSuffix.getBytes(), suffix.left.getBytes()) == 0)
                 {
                     lastProcessedSuffix = suffix.left;
                     container.add(suffix.right);
                 }
                 else
                 {
                     Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix();

                     lastProcessedSuffix = suffix.left;
                     container = new DynamicTokenTreeBuilder(suffix.right);

                     return result;
                 }
             }
         }

         private Pair<IndexedTerm, TokenTreeBuilder> finishSuffix()
         {
             return Pair.create(lastProcessedSuffix, container.finish());
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.cassandra.index.sasi.sa;

	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;

	import org.apache.cassandra.index.sasi.disk.DynamicTokenTreeBuilder;
	import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
	import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder;
	import org.apache.cassandra.db.marshal.AbstractType;
	import org.apache.cassandra.utils.LongTimSort;
	import org.apache.cassandra.utils.Pair;

	import com.google.common.base.Charsets;

	public class SuffixSA extends SA<CharBuffer>
	{
	public SuffixSA(AbstractType<?> comparator, OnDiskIndexBuilder.Mode mode)
	{
	super(comparator, mode);
	}

	protected Term<CharBuffer> getTerm(ByteBuffer termValue, TokenTreeBuilder tokens)
	{
	return new CharTerm(charCount, Charsets.UTF_8.decode(termValue.duplicate()), tokens);
	}

	public TermIterator finish()
	{
	return new SASuffixIterator();
	}

	private class SASuffixIterator extends TermIterator
	{

	private static final int COMPLETE_BIT = 31;

	private final long[] suffixes;

	private int current = 0;
	private IndexedTerm lastProcessedSuffix;
	private TokenTreeBuilder container;

	public SASuffixIterator()
	{
	// each element has term index and char position encoded as two 32-bit integers
	// to avoid binary search per suffix while sorting suffix array.
	suffixes = new long[charCount];

	long termIndex = -1, currentTermLength = -1;
	boolean isComplete = false;
	for (int i = 0; i < charCount; i++)
	{
	if (i >= currentTermLength \|\| currentTermLength == -1)
	{
	Term currentTerm = terms.get((int) ++termIndex);
	currentTermLength = currentTerm.getPosition() + currentTerm.length();
	isComplete = true;
	}

	suffixes[i] = (termIndex << 32) \| i;
	if (isComplete)
	suffixes[i] \|= (1L << COMPLETE_BIT);

	isComplete = false;
	}

	LongTimSort.sort(suffixes, (a, b) -> {
	Term aTerm = terms.get((int) (a >>> 32));
	Term bTerm = terms.get((int) (b >>> 32));
	return comparator.compare(aTerm.getSuffix(clearCompleteBit(a) - aTerm.getPosition()),
	bTerm.getSuffix(clearCompleteBit(b) - bTerm.getPosition()));
	});
	}

	private int clearCompleteBit(long value)
	{
	return (int) (value & ~(1L << COMPLETE_BIT));
	}

	private Pair<IndexedTerm, TokenTreeBuilder> suffixAt(int position)
	{
	long index = suffixes[position];
	Term term = terms.get((int) (index >>> 32));
	boolean isPartitial = (index & ((long) 1 << 31)) == 0;
	return Pair.create(new IndexedTerm(term.getSuffix(clearCompleteBit(index) - term.getPosition()), isPartitial), term.getTokens());
	}

	public ByteBuffer minTerm()
	{
	return suffixAt(0).left.getBytes();
	}

	public ByteBuffer maxTerm()
	{
	return suffixAt(suffixes.length - 1).left.getBytes();
	}

	protected Pair<IndexedTerm, TokenTreeBuilder> computeNext()
	{
	while (true)
	{
	if (current >= suffixes.length)
	{
	if (lastProcessedSuffix == null)
	return endOfData();

	Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix();

	lastProcessedSuffix = null;
	return result;
	}

	Pair<IndexedTerm, TokenTreeBuilder> suffix = suffixAt(current++);

	if (lastProcessedSuffix == null)
	{
	lastProcessedSuffix = suffix.left;
	container = new DynamicTokenTreeBuilder(suffix.right);
	}
	else if (comparator.compare(lastProcessedSuffix.getBytes(), suffix.left.getBytes()) == 0)
	{
	lastProcessedSuffix = suffix.left;
	container.add(suffix.right);
	}
	else
	{
	Pair<IndexedTerm, TokenTreeBuilder> result = finishSuffix();

	lastProcessedSuffix = suffix.left;
	container = new DynamicTokenTreeBuilder(suffix.right);

	return result;
	}
	}
	}

	private Pair<IndexedTerm, TokenTreeBuilder> finishSuffix()
	{
	return Pair.create(lastProcessedSuffix, container.finish());
	}
	}
	}