src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.index.sasi.disk;

 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.*;

 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.index.sasi.plan.Expression.Op;
 import org.apache.cassandra.index.sasi.sa.IndexedTerm;
 import org.apache.cassandra.index.sasi.sa.IntegralSA;
 import org.apache.cassandra.index.sasi.sa.SA;
 import org.apache.cassandra.index.sasi.sa.TermIterator;
 import org.apache.cassandra.index.sasi.sa.SuffixSA;
 import org.apache.cassandra.db.marshal.*;
 import org.apache.cassandra.io.FSWriteError;
 import org.apache.cassandra.io.util.*;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.Pair;

 import com.carrotsearch.hppc.LongArrayList;
 import com.carrotsearch.hppc.LongSet;
 import com.carrotsearch.hppc.ShortArrayList;
 import com.google.common.annotations.VisibleForTesting;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 public class OnDiskIndexBuilder
 {
     private static final Logger logger = LoggerFactory.getLogger(OnDiskIndexBuilder.class);

     public enum Mode
     {
         PREFIX(EnumSet.of(Op.EQ, Op.MATCH, Op.PREFIX, Op.NOT_EQ, Op.RANGE)),
         CONTAINS(EnumSet.of(Op.EQ, Op.MATCH, Op.CONTAINS, Op.PREFIX, Op.SUFFIX, Op.NOT_EQ)),
         SPARSE(EnumSet.of(Op.EQ, Op.NOT_EQ, Op.RANGE));

         Set<Op> supportedOps;

         Mode(Set<Op> ops)
         {
             supportedOps = ops;
         }

         public static Mode mode(String mode)
         {
             return Mode.valueOf(mode.toUpperCase());
         }

         public boolean supports(Op op)
         {
             return supportedOps.contains(op);
         }
     }

     public enum TermSize
     {
         INT(4), LONG(8), UUID(16), VARIABLE(-1);

         public final int size;

         TermSize(int size)
         {
             this.size = size;
         }

         public boolean isConstant()
         {
             return this != VARIABLE;
         }

         public static TermSize of(int size)
         {
             switch (size)
             {
                 case -1:
                     return VARIABLE;

                 case 4:
                     return INT;

                 case 8:
                     return LONG;

                 case 16:
                     return UUID;

                 default:
                     throw new IllegalStateException("unknown state: " + size);
             }
         }

         public static TermSize sizeOf(AbstractType<?> comparator)
         {
             if (comparator instanceof Int32Type || comparator instanceof FloatType)
                 return INT;

             if (comparator instanceof LongType || comparator instanceof DoubleType
                     || comparator instanceof TimestampType || comparator instanceof DateType)
                 return LONG;

             if (comparator instanceof TimeUUIDType || comparator instanceof UUIDType)
                 return UUID;

             return VARIABLE;
         }
     }

     public static final int BLOCK_SIZE = 4096;
     public static final int MAX_TERM_SIZE = 1024;
     public static final int SUPER_BLOCK_SIZE = 64;
     public static final int IS_PARTIAL_BIT = 15;

     private static final SequentialWriterOption WRITER_OPTION = SequentialWriterOption.newBuilder()
                                                                                       .bufferSize(BLOCK_SIZE)
                                                                                       .build();

     private final List<MutableLevel<InMemoryPointerTerm>> levels = new ArrayList<>();
     private MutableLevel<InMemoryDataTerm> dataLevel;

     private final TermSize termSize;

     private final AbstractType<?> keyComparator, termComparator;

     private final Map<ByteBuffer, TokenTreeBuilder> terms;
     private final Mode mode;
     private final boolean marksPartials;

     private ByteBuffer minKey, maxKey;
     private long estimatedBytes;

     public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> comparator, Mode mode)
     {
         this(keyComparator, comparator, mode, true);
     }

     public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> comparator, Mode mode, boolean marksPartials)
     {
         this.keyComparator = keyComparator;
         this.termComparator = comparator;
         this.terms = new HashMap<>();
         this.termSize = TermSize.sizeOf(comparator);
         this.mode = mode;
         this.marksPartials = marksPartials;
     }

     public OnDiskIndexBuilder add(ByteBuffer term, DecoratedKey key, long keyPosition)
     {
         if (term.remaining() >= MAX_TERM_SIZE)
         {
             logger.error("Rejecting value (value size {}, maximum size {}).",
                          FBUtilities.prettyPrintMemory(term.remaining()),
                          FBUtilities.prettyPrintMemory(Short.MAX_VALUE));
             return this;
         }

         TokenTreeBuilder tokens = terms.get(term);
         if (tokens == null)
         {
             terms.put(term, (tokens = new DynamicTokenTreeBuilder()));

             // on-heap size estimates from jol
             // 64 bytes for TTB + 48 bytes for TreeMap in TTB + size bytes for the term (map key)
             estimatedBytes += 64 + 48 + term.remaining();
         }

         tokens.add((Long) key.getToken().getTokenValue(), keyPosition);

         // calculate key range (based on actual key values) for current index
         minKey = (minKey == null || keyComparator.compare(minKey, key.getKey()) > 0) ? key.getKey() : minKey;
         maxKey = (maxKey == null || keyComparator.compare(maxKey, key.getKey()) < 0) ? key.getKey() : maxKey;

         // 60 ((boolean(1)*4) + (long(8)*4) + 24) bytes for the LongOpenHashSet created when the keyPosition was added
         // + 40 bytes for the TreeMap.Entry + 8 bytes for the token (key).
         // in the case of hash collision for the token we may overestimate but this is extremely rare
         estimatedBytes += 60 + 40 + 8;

         return this;
     }

     public long estimatedMemoryUse()
     {
         return estimatedBytes;
     }

     private void addTerm(InMemoryDataTerm term, SequentialWriter out) throws IOException
     {
         InMemoryPointerTerm ptr = dataLevel.add(term);
         if (ptr == null)
             return;

         int levelIdx = 0;
         for (;;)
         {
             MutableLevel<InMemoryPointerTerm> level = getIndexLevel(levelIdx++, out);
             if ((ptr = level.add(ptr)) == null)
                 break;
         }
     }

     public boolean isEmpty()
     {
         return terms.isEmpty();
     }

     public void finish(Pair<ByteBuffer, ByteBuffer> range, File file, TermIterator terms)
     {
         finish(Descriptor.CURRENT, range, file, terms);
     }

     /**
      * Finishes up index building process by creating/populating index file.
      *
      * @param indexFile The file to write index contents to.
      *
      * @return true if index was written successfully, false otherwise (e.g. if index was empty).
      *
      * @throws FSWriteError on I/O error.
      */
     public boolean finish(File indexFile) throws FSWriteError
     {
         return finish(Descriptor.CURRENT, indexFile);
     }

     @VisibleForTesting
     protected boolean finish(Descriptor descriptor, File file) throws FSWriteError
     {
         // no terms means there is nothing to build
         if (terms.isEmpty())
         {
             try
             {
                 file.createNewFile();
             }
             catch (IOException e)
             {
                 throw new FSWriteError(e, file);
             }

             return false;
         }

         // split terms into suffixes only if it's text, otherwise (even if CONTAINS is set) use terms in original form
         SA sa = ((termComparator instanceof UTF8Type || termComparator instanceof AsciiType) && mode == Mode.CONTAINS)
                     ? new SuffixSA(termComparator, mode) : new IntegralSA(termComparator, mode);

         for (Map.Entry<ByteBuffer, TokenTreeBuilder> term : terms.entrySet())
             sa.add(term.getKey(), term.getValue());

         finish(descriptor, Pair.create(minKey, maxKey), file, sa.finish());
         return true;
     }

     @SuppressWarnings("resource")
     protected void finish(Descriptor descriptor, Pair<ByteBuffer, ByteBuffer> range, File file, TermIterator terms)
     {
         SequentialWriter out = null;

         try
         {
             out = new SequentialWriter(file, WRITER_OPTION);

             out.writeUTF(descriptor.version.toString());

             out.writeShort(termSize.size);

             // min, max term (useful to find initial scan range from search expressions)
             ByteBufferUtil.writeWithShortLength(terms.minTerm(), out);
             ByteBufferUtil.writeWithShortLength(terms.maxTerm(), out);

             // min, max keys covered by index (useful when searching across multiple indexes)
             ByteBufferUtil.writeWithShortLength(range.left, out);
             ByteBufferUtil.writeWithShortLength(range.right, out);

             out.writeUTF(mode.toString());
             out.writeBoolean(marksPartials);

             out.skipBytes((int) (BLOCK_SIZE - out.position()));

             dataLevel = mode == Mode.SPARSE ? new DataBuilderLevel(out, new MutableDataBlock(termComparator, mode))
                                             : new MutableLevel<>(out, new MutableDataBlock(termComparator, mode));
             while (terms.hasNext())
             {
                 Pair<IndexedTerm, TokenTreeBuilder> term = terms.next();
                 addTerm(new InMemoryDataTerm(term.left, term.right), out);
             }

             dataLevel.finalFlush();
             for (MutableLevel l : levels)
                 l.flush(); // flush all of the buffers

             // and finally write levels index
             final long levelIndexPosition = out.position();

             out.writeInt(levels.size());
             for (int i = levels.size() - 1; i >= 0; i--)
                 levels.get(i).flushMetadata();

             dataLevel.flushMetadata();

             out.writeLong(levelIndexPosition);

             // sync contents of the output and disk,
             // since it's not done implicitly on close
             out.sync();
         }
         catch (IOException e)
         {
             throw new FSWriteError(e, file);
         }
         finally
         {
             FileUtils.closeQuietly(out);
         }
     }

     private MutableLevel<InMemoryPointerTerm> getIndexLevel(int idx, SequentialWriter out)
     {
         if (levels.size() == 0)
             levels.add(new MutableLevel<>(out, new MutableBlock<>()));

         if (levels.size() - 1 < idx)
         {
             int toAdd = idx - (levels.size() - 1);
             for (int i = 0; i < toAdd; i++)
                 levels.add(new MutableLevel<>(out, new MutableBlock<>()));
         }

         return levels.get(idx);
     }

     protected static void alignToBlock(SequentialWriter out) throws IOException
     {
         long endOfBlock = out.position();
         if ((endOfBlock & (BLOCK_SIZE - 1)) != 0) // align on the block boundary if needed
             out.skipBytes((int) (FBUtilities.align(endOfBlock, BLOCK_SIZE) - endOfBlock));
     }

     private class InMemoryTerm
     {
         protected final IndexedTerm term;

         public InMemoryTerm(IndexedTerm term)
         {
             this.term = term;
         }

         public int serializedSize()
         {
             return (termSize.isConstant() ? 0 : 2) + term.getBytes().remaining();
         }

         public void serialize(DataOutputPlus out) throws IOException
         {
             if (termSize.isConstant())
             {
                 out.write(term.getBytes());
             }
             else
             {
                 out.writeShort(term.getBytes().remaining() | ((marksPartials && term.isPartial() ? 1 : 0) << IS_PARTIAL_BIT));
                 out.write(term.getBytes());
             }

         }
     }

     private class InMemoryPointerTerm extends InMemoryTerm
     {
         protected final int blockCnt;

         public InMemoryPointerTerm(IndexedTerm term, int blockCnt)
         {
             super(term);
             this.blockCnt = blockCnt;
         }

         public int serializedSize()
         {
             return super.serializedSize() + 4;
         }

         public void serialize(DataOutputPlus out) throws IOException
         {
             super.serialize(out);
             out.writeInt(blockCnt);
         }
     }

     private class InMemoryDataTerm extends InMemoryTerm
     {
         private final TokenTreeBuilder keys;

         public InMemoryDataTerm(IndexedTerm term, TokenTreeBuilder keys)
         {
             super(term);
             this.keys = keys;
         }
     }

     private class MutableLevel<T extends InMemoryTerm>
     {
         private final LongArrayList blockOffsets = new LongArrayList();

         protected final SequentialWriter out;

         private final MutableBlock<T> inProcessBlock;
         private InMemoryPointerTerm lastTerm;

         public MutableLevel(SequentialWriter out, MutableBlock<T> block)
         {
             this.out = out;
             this.inProcessBlock = block;
         }

         /**
          * @return If we flushed a block, return the last term of that block; else, null.
          */
         public InMemoryPointerTerm add(T term) throws IOException
         {
             InMemoryPointerTerm toPromote = null;

             if (!inProcessBlock.hasSpaceFor(term))
             {
                 flush();
                 toPromote = lastTerm;
             }

             inProcessBlock.add(term);

             lastTerm = new InMemoryPointerTerm(term.term, blockOffsets.size());
             return toPromote;
         }

         public void flush() throws IOException
         {
             blockOffsets.add(out.position());
             inProcessBlock.flushAndClear(out);
         }

         public void finalFlush() throws IOException
         {
             flush();
         }

         public void flushMetadata() throws IOException
         {
             flushMetadata(blockOffsets);
         }

         protected void flushMetadata(LongArrayList longArrayList) throws IOException
         {
             out.writeInt(longArrayList.size());
             for (int i = 0; i < longArrayList.size(); i++)
                 out.writeLong(longArrayList.get(i));
         }
     }

     /** builds standard data blocks and super blocks, as well */
     private class DataBuilderLevel extends MutableLevel<InMemoryDataTerm>
     {
         private final LongArrayList superBlockOffsets = new LongArrayList();

         /** count of regular data blocks written since current super block was init'd */
         private int dataBlocksCnt;
         private TokenTreeBuilder superBlockTree;

         public DataBuilderLevel(SequentialWriter out, MutableBlock<InMemoryDataTerm> block)
         {
             super(out, block);
             superBlockTree = new DynamicTokenTreeBuilder();
         }

         public InMemoryPointerTerm add(InMemoryDataTerm term) throws IOException
         {
             InMemoryPointerTerm ptr = super.add(term);
             if (ptr != null)
             {
                 dataBlocksCnt++;
                 flushSuperBlock(false);
             }
             superBlockTree.add(term.keys);
             return ptr;
         }

         public void flushSuperBlock(boolean force) throws IOException
         {
             if (dataBlocksCnt == SUPER_BLOCK_SIZE || (force && !superBlockTree.isEmpty()))
             {
                 superBlockOffsets.add(out.position());
                 superBlockTree.finish().write(out);
                 alignToBlock(out);

                 dataBlocksCnt = 0;
                 superBlockTree = new DynamicTokenTreeBuilder();
             }
         }

         public void finalFlush() throws IOException
         {
             super.flush();
             flushSuperBlock(true);
         }

         public void flushMetadata() throws IOException
         {
             super.flushMetadata();
             flushMetadata(superBlockOffsets);
         }
     }

     private static class MutableBlock<T extends InMemoryTerm>
     {
         protected final DataOutputBufferFixed buffer;
         protected final ShortArrayList offsets;

         public MutableBlock()
         {
             buffer = new DataOutputBufferFixed(BLOCK_SIZE);
             offsets = new ShortArrayList();
         }

         public final void add(T term) throws IOException
         {
             offsets.add((short) buffer.position());
             addInternal(term);
         }

         protected void addInternal(T term) throws IOException
         {
             term.serialize(buffer);
         }

         public boolean hasSpaceFor(T element)
         {
             return sizeAfter(element) < BLOCK_SIZE;
         }

         protected int sizeAfter(T element)
         {
             return getWatermark() + 4 + element.serializedSize();
         }

         protected int getWatermark()
         {
             return 4 + offsets.size() * 2 + (int) buffer.position();
         }

         public void flushAndClear(SequentialWriter out) throws IOException
         {
             out.writeInt(offsets.size());
             for (int i = 0; i < offsets.size(); i++)
                 out.writeShort(offsets.get(i));

             out.write(buffer.buffer());

             alignToBlock(out);

             offsets.clear();
             buffer.clear();
         }
     }

     private static class MutableDataBlock extends MutableBlock<InMemoryDataTerm>
     {
         private static final int MAX_KEYS_SPARSE = 5;

         private final AbstractType<?> comparator;
         private final Mode mode;

         private int offset = 0;

         private final List<TokenTreeBuilder> containers = new ArrayList<>();
         private TokenTreeBuilder combinedIndex;

         public MutableDataBlock(AbstractType<?> comparator, Mode mode)
         {
             this.comparator = comparator;
             this.mode = mode;
             this.combinedIndex = initCombinedIndex();
         }

         protected void addInternal(InMemoryDataTerm term) throws IOException
         {
             TokenTreeBuilder keys = term.keys;

             if (mode == Mode.SPARSE)
             {
                 if (keys.getTokenCount() > MAX_KEYS_SPARSE)
                     throw new IOException(String.format("Term - '%s' belongs to more than %d keys in %s mode, which is not allowed.",
                                                         comparator.getString(term.term.getBytes()), MAX_KEYS_SPARSE, mode.name()));

                 writeTerm(term, keys);
             }
             else
             {
                 writeTerm(term, offset);

                 offset += keys.serializedSize();
                 containers.add(keys);
             }

             if (mode == Mode.SPARSE)
                 combinedIndex.add(keys);
         }

         protected int sizeAfter(InMemoryDataTerm element)
         {
             return super.sizeAfter(element) + ptrLength(element);
         }

         public void flushAndClear(SequentialWriter out) throws IOException
         {
             super.flushAndClear(out);

             out.writeInt(mode == Mode.SPARSE ? offset : -1);

             if (containers.size() > 0)
             {
                 for (TokenTreeBuilder tokens : containers)
                     tokens.write(out);
             }

             if (mode == Mode.SPARSE && combinedIndex != null)
                 combinedIndex.finish().write(out);

             alignToBlock(out);

             containers.clear();
             combinedIndex = initCombinedIndex();

             offset = 0;
         }

         private int ptrLength(InMemoryDataTerm term)
         {
             return (term.keys.getTokenCount() > 5)
                     ? 5 // 1 byte type + 4 byte offset to the tree
                     : 1 + (8 * (int) term.keys.getTokenCount()); // 1 byte size + n 8 byte tokens
         }

         private void writeTerm(InMemoryTerm term, TokenTreeBuilder keys) throws IOException
         {
             term.serialize(buffer);
             buffer.writeByte((byte) keys.getTokenCount());
             for (Pair<Long, LongSet> key : keys)
                 buffer.writeLong(key.left);
         }

         private void writeTerm(InMemoryTerm term, int offset) throws IOException
         {
             term.serialize(buffer);
             buffer.writeByte(0x0);
             buffer.writeInt(offset);
         }

         private TokenTreeBuilder initCombinedIndex()
         {
             return mode == Mode.SPARSE ? new DynamicTokenTreeBuilder() : null;
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.cassandra.index.sasi.disk;

	import java.io.File;
	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.util.*;

	import org.apache.cassandra.db.DecoratedKey;
	import org.apache.cassandra.index.sasi.plan.Expression.Op;
	import org.apache.cassandra.index.sasi.sa.IndexedTerm;
	import org.apache.cassandra.index.sasi.sa.IntegralSA;
	import org.apache.cassandra.index.sasi.sa.SA;
	import org.apache.cassandra.index.sasi.sa.TermIterator;
	import org.apache.cassandra.index.sasi.sa.SuffixSA;
	import org.apache.cassandra.db.marshal.*;
	import org.apache.cassandra.io.FSWriteError;
	import org.apache.cassandra.io.util.*;
	import org.apache.cassandra.utils.ByteBufferUtil;
	import org.apache.cassandra.utils.FBUtilities;
	import org.apache.cassandra.utils.Pair;

	import com.carrotsearch.hppc.LongArrayList;
	import com.carrotsearch.hppc.LongSet;
	import com.carrotsearch.hppc.ShortArrayList;
	import com.google.common.annotations.VisibleForTesting;

	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	public class OnDiskIndexBuilder
	{
	private static final Logger logger = LoggerFactory.getLogger(OnDiskIndexBuilder.class);

	public enum Mode
	{
	PREFIX(EnumSet.of(Op.EQ, Op.MATCH, Op.PREFIX, Op.NOT_EQ, Op.RANGE)),
	CONTAINS(EnumSet.of(Op.EQ, Op.MATCH, Op.CONTAINS, Op.PREFIX, Op.SUFFIX, Op.NOT_EQ)),
	SPARSE(EnumSet.of(Op.EQ, Op.NOT_EQ, Op.RANGE));

	Set<Op> supportedOps;

	Mode(Set<Op> ops)
	{
	supportedOps = ops;
	}

	public static Mode mode(String mode)
	{
	return Mode.valueOf(mode.toUpperCase());
	}

	public boolean supports(Op op)
	{
	return supportedOps.contains(op);
	}
	}

	public enum TermSize
	{
	INT(4), LONG(8), UUID(16), VARIABLE(-1);

	public final int size;

	TermSize(int size)
	{
	this.size = size;
	}

	public boolean isConstant()
	{
	return this != VARIABLE;
	}

	public static TermSize of(int size)
	{
	switch (size)
	{
	case -1:
	return VARIABLE;

	case 4:
	return INT;

	case 8:
	return LONG;

	case 16:
	return UUID;

	default:
	throw new IllegalStateException("unknown state: " + size);
	}
	}

	public static TermSize sizeOf(AbstractType<?> comparator)
	{
	if (comparator instanceof Int32Type \|\| comparator instanceof FloatType)
	return INT;

	if (comparator instanceof LongType \|\| comparator instanceof DoubleType
	\|\| comparator instanceof TimestampType \|\| comparator instanceof DateType)
	return LONG;

	if (comparator instanceof TimeUUIDType \|\| comparator instanceof UUIDType)
	return UUID;

	return VARIABLE;
	}
	}

	public static final int BLOCK_SIZE = 4096;
	public static final int MAX_TERM_SIZE = 1024;
	public static final int SUPER_BLOCK_SIZE = 64;
	public static final int IS_PARTIAL_BIT = 15;

	private static final SequentialWriterOption WRITER_OPTION = SequentialWriterOption.newBuilder()
	.bufferSize(BLOCK_SIZE)
	.build();

	private final List<MutableLevel<InMemoryPointerTerm>> levels = new ArrayList<>();
	private MutableLevel<InMemoryDataTerm> dataLevel;

	private final TermSize termSize;

	private final AbstractType<?> keyComparator, termComparator;

	private final Map<ByteBuffer, TokenTreeBuilder> terms;
	private final Mode mode;
	private final boolean marksPartials;

	private ByteBuffer minKey, maxKey;
	private long estimatedBytes;

	public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> comparator, Mode mode)
	{
	this(keyComparator, comparator, mode, true);
	}

	public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> comparator, Mode mode, boolean marksPartials)
	{
	this.keyComparator = keyComparator;
	this.termComparator = comparator;
	this.terms = new HashMap<>();
	this.termSize = TermSize.sizeOf(comparator);
	this.mode = mode;
	this.marksPartials = marksPartials;
	}

	public OnDiskIndexBuilder add(ByteBuffer term, DecoratedKey key, long keyPosition)
	{
	if (term.remaining() >= MAX_TERM_SIZE)
	{
	logger.error("Rejecting value (value size {}, maximum size {}).",
	FBUtilities.prettyPrintMemory(term.remaining()),
	FBUtilities.prettyPrintMemory(Short.MAX_VALUE));
	return this;
	}

	TokenTreeBuilder tokens = terms.get(term);
	if (tokens == null)
	{
	terms.put(term, (tokens = new DynamicTokenTreeBuilder()));

	// on-heap size estimates from jol
	// 64 bytes for TTB + 48 bytes for TreeMap in TTB + size bytes for the term (map key)
	estimatedBytes += 64 + 48 + term.remaining();
	}

	tokens.add((Long) key.getToken().getTokenValue(), keyPosition);

	// calculate key range (based on actual key values) for current index
	minKey = (minKey == null \|\| keyComparator.compare(minKey, key.getKey()) > 0) ? key.getKey() : minKey;
	maxKey = (maxKey == null \|\| keyComparator.compare(maxKey, key.getKey()) < 0) ? key.getKey() : maxKey;

	// 60 ((boolean(1)4) + (long(8)4) + 24) bytes for the LongOpenHashSet created when the keyPosition was added
	// + 40 bytes for the TreeMap.Entry + 8 bytes for the token (key).
	// in the case of hash collision for the token we may overestimate but this is extremely rare
	estimatedBytes += 60 + 40 + 8;

	return this;
	}

	public long estimatedMemoryUse()
	{
	return estimatedBytes;
	}

	private void addTerm(InMemoryDataTerm term, SequentialWriter out) throws IOException
	{
	InMemoryPointerTerm ptr = dataLevel.add(term);
	if (ptr == null)
	return;

	int levelIdx = 0;
	for (;;)
	{
	MutableLevel<InMemoryPointerTerm> level = getIndexLevel(levelIdx++, out);
	if ((ptr = level.add(ptr)) == null)
	break;
	}
	}

	public boolean isEmpty()
	{
	return terms.isEmpty();
	}

	public void finish(Pair<ByteBuffer, ByteBuffer> range, File file, TermIterator terms)
	{
	finish(Descriptor.CURRENT, range, file, terms);
	}

	/**
	* Finishes up index building process by creating/populating index file.
	*
	* @param indexFile The file to write index contents to.
	*
	* @return true if index was written successfully, false otherwise (e.g. if index was empty).
	*
	* @throws FSWriteError on I/O error.
	*/
	public boolean finish(File indexFile) throws FSWriteError
	{
	return finish(Descriptor.CURRENT, indexFile);
	}

	@VisibleForTesting
	protected boolean finish(Descriptor descriptor, File file) throws FSWriteError
	{
	// no terms means there is nothing to build
	if (terms.isEmpty())
	{
	try
	{
	file.createNewFile();
	}
	catch (IOException e)
	{
	throw new FSWriteError(e, file);
	}

	return false;
	}

	// split terms into suffixes only if it's text, otherwise (even if CONTAINS is set) use terms in original form
	SA sa = ((termComparator instanceof UTF8Type \|\| termComparator instanceof AsciiType) && mode == Mode.CONTAINS)
	? new SuffixSA(termComparator, mode) : new IntegralSA(termComparator, mode);

	for (Map.Entry<ByteBuffer, TokenTreeBuilder> term : terms.entrySet())
	sa.add(term.getKey(), term.getValue());

	finish(descriptor, Pair.create(minKey, maxKey), file, sa.finish());
	return true;
	}

	@SuppressWarnings("resource")
	protected void finish(Descriptor descriptor, Pair<ByteBuffer, ByteBuffer> range, File file, TermIterator terms)
	{
	SequentialWriter out = null;

	try
	{
	out = new SequentialWriter(file, WRITER_OPTION);

	out.writeUTF(descriptor.version.toString());

	out.writeShort(termSize.size);

	// min, max term (useful to find initial scan range from search expressions)
	ByteBufferUtil.writeWithShortLength(terms.minTerm(), out);
	ByteBufferUtil.writeWithShortLength(terms.maxTerm(), out);

	// min, max keys covered by index (useful when searching across multiple indexes)
	ByteBufferUtil.writeWithShortLength(range.left, out);
	ByteBufferUtil.writeWithShortLength(range.right, out);

	out.writeUTF(mode.toString());
	out.writeBoolean(marksPartials);

	out.skipBytes((int) (BLOCK_SIZE - out.position()));

	dataLevel = mode == Mode.SPARSE ? new DataBuilderLevel(out, new MutableDataBlock(termComparator, mode))
	: new MutableLevel<>(out, new MutableDataBlock(termComparator, mode));
	while (terms.hasNext())
	{
	Pair<IndexedTerm, TokenTreeBuilder> term = terms.next();
	addTerm(new InMemoryDataTerm(term.left, term.right), out);
	}

	dataLevel.finalFlush();
	for (MutableLevel l : levels)
	l.flush(); // flush all of the buffers

	// and finally write levels index
	final long levelIndexPosition = out.position();

	out.writeInt(levels.size());
	for (int i = levels.size() - 1; i >= 0; i--)
	levels.get(i).flushMetadata();

	dataLevel.flushMetadata();

	out.writeLong(levelIndexPosition);

	// sync contents of the output and disk,
	// since it's not done implicitly on close
	out.sync();
	}
	catch (IOException e)
	{
	throw new FSWriteError(e, file);
	}
	finally
	{
	FileUtils.closeQuietly(out);
	}
	}

	private MutableLevel<InMemoryPointerTerm> getIndexLevel(int idx, SequentialWriter out)
	{
	if (levels.size() == 0)
	levels.add(new MutableLevel<>(out, new MutableBlock<>()));

	if (levels.size() - 1 < idx)
	{
	int toAdd = idx - (levels.size() - 1);
	for (int i = 0; i < toAdd; i++)
	levels.add(new MutableLevel<>(out, new MutableBlock<>()));
	}

	return levels.get(idx);
	}

	protected static void alignToBlock(SequentialWriter out) throws IOException
	{
	long endOfBlock = out.position();
	if ((endOfBlock & (BLOCK_SIZE - 1)) != 0) // align on the block boundary if needed
	out.skipBytes((int) (FBUtilities.align(endOfBlock, BLOCK_SIZE) - endOfBlock));
	}

	private class InMemoryTerm
	{
	protected final IndexedTerm term;

	public InMemoryTerm(IndexedTerm term)
	{
	this.term = term;
	}

	public int serializedSize()
	{
	return (termSize.isConstant() ? 0 : 2) + term.getBytes().remaining();
	}

	public void serialize(DataOutputPlus out) throws IOException
	{
	if (termSize.isConstant())
	{
	out.write(term.getBytes());
	}
	else
	{
	out.writeShort(term.getBytes().remaining() \| ((marksPartials && term.isPartial() ? 1 : 0) << IS_PARTIAL_BIT));
	out.write(term.getBytes());
	}

	}
	}

	private class InMemoryPointerTerm extends InMemoryTerm
	{
	protected final int blockCnt;

	public InMemoryPointerTerm(IndexedTerm term, int blockCnt)
	{
	super(term);
	this.blockCnt = blockCnt;
	}

	public int serializedSize()
	{
	return super.serializedSize() + 4;
	}

	public void serialize(DataOutputPlus out) throws IOException
	{
	super.serialize(out);
	out.writeInt(blockCnt);
	}
	}

	private class InMemoryDataTerm extends InMemoryTerm
	{
	private final TokenTreeBuilder keys;

	public InMemoryDataTerm(IndexedTerm term, TokenTreeBuilder keys)
	{
	super(term);
	this.keys = keys;
	}
	}

	private class MutableLevel<T extends InMemoryTerm>
	{
	private final LongArrayList blockOffsets = new LongArrayList();

	protected final SequentialWriter out;

	private final MutableBlock<T> inProcessBlock;
	private InMemoryPointerTerm lastTerm;

	public MutableLevel(SequentialWriter out, MutableBlock<T> block)
	{
	this.out = out;
	this.inProcessBlock = block;
	}

	/**
	* @return If we flushed a block, return the last term of that block; else, null.
	*/
	public InMemoryPointerTerm add(T term) throws IOException
	{
	InMemoryPointerTerm toPromote = null;

	if (!inProcessBlock.hasSpaceFor(term))
	{
	flush();
	toPromote = lastTerm;
	}

	inProcessBlock.add(term);

	lastTerm = new InMemoryPointerTerm(term.term, blockOffsets.size());
	return toPromote;
	}

	public void flush() throws IOException
	{
	blockOffsets.add(out.position());
	inProcessBlock.flushAndClear(out);
	}

	public void finalFlush() throws IOException
	{
	flush();
	}

	public void flushMetadata() throws IOException
	{
	flushMetadata(blockOffsets);
	}

	protected void flushMetadata(LongArrayList longArrayList) throws IOException
	{
	out.writeInt(longArrayList.size());
	for (int i = 0; i < longArrayList.size(); i++)
	out.writeLong(longArrayList.get(i));
	}
	}

	/** builds standard data blocks and super blocks, as well */
	private class DataBuilderLevel extends MutableLevel<InMemoryDataTerm>
	{
	private final LongArrayList superBlockOffsets = new LongArrayList();

	/** count of regular data blocks written since current super block was init'd */
	private int dataBlocksCnt;
	private TokenTreeBuilder superBlockTree;

	public DataBuilderLevel(SequentialWriter out, MutableBlock<InMemoryDataTerm> block)
	{
	super(out, block);
	superBlockTree = new DynamicTokenTreeBuilder();
	}

	public InMemoryPointerTerm add(InMemoryDataTerm term) throws IOException
	{
	InMemoryPointerTerm ptr = super.add(term);
	if (ptr != null)
	{
	dataBlocksCnt++;
	flushSuperBlock(false);
	}
	superBlockTree.add(term.keys);
	return ptr;
	}

	public void flushSuperBlock(boolean force) throws IOException
	{
	if (dataBlocksCnt == SUPER_BLOCK_SIZE \|\| (force && !superBlockTree.isEmpty()))
	{
	superBlockOffsets.add(out.position());
	superBlockTree.finish().write(out);
	alignToBlock(out);

	dataBlocksCnt = 0;
	superBlockTree = new DynamicTokenTreeBuilder();
	}
	}

	public void finalFlush() throws IOException
	{
	super.flush();
	flushSuperBlock(true);
	}

	public void flushMetadata() throws IOException
	{
	super.flushMetadata();
	flushMetadata(superBlockOffsets);
	}
	}

	private static class MutableBlock<T extends InMemoryTerm>
	{
	protected final DataOutputBufferFixed buffer;
	protected final ShortArrayList offsets;

	public MutableBlock()
	{
	buffer = new DataOutputBufferFixed(BLOCK_SIZE);
	offsets = new ShortArrayList();
	}

	public final void add(T term) throws IOException
	{
	offsets.add((short) buffer.position());
	addInternal(term);
	}

	protected void addInternal(T term) throws IOException
	{
	term.serialize(buffer);
	}

	public boolean hasSpaceFor(T element)
	{
	return sizeAfter(element) < BLOCK_SIZE;
	}

	protected int sizeAfter(T element)
	{
	return getWatermark() + 4 + element.serializedSize();
	}

	protected int getWatermark()
	{
	return 4 + offsets.size() * 2 + (int) buffer.position();
	}

	public void flushAndClear(SequentialWriter out) throws IOException
	{
	out.writeInt(offsets.size());
	for (int i = 0; i < offsets.size(); i++)
	out.writeShort(offsets.get(i));

	out.write(buffer.buffer());

	alignToBlock(out);

	offsets.clear();
	buffer.clear();
	}
	}

	private static class MutableDataBlock extends MutableBlock<InMemoryDataTerm>
	{
	private static final int MAX_KEYS_SPARSE = 5;

	private final AbstractType<?> comparator;
	private final Mode mode;

	private int offset = 0;

	private final List<TokenTreeBuilder> containers = new ArrayList<>();
	private TokenTreeBuilder combinedIndex;

	public MutableDataBlock(AbstractType<?> comparator, Mode mode)
	{
	this.comparator = comparator;
	this.mode = mode;
	this.combinedIndex = initCombinedIndex();
	}

	protected void addInternal(InMemoryDataTerm term) throws IOException
	{
	TokenTreeBuilder keys = term.keys;

	if (mode == Mode.SPARSE)
	{
	if (keys.getTokenCount() > MAX_KEYS_SPARSE)
	throw new IOException(String.format("Term - '%s' belongs to more than %d keys in %s mode, which is not allowed.",
	comparator.getString(term.term.getBytes()), MAX_KEYS_SPARSE, mode.name()));

	writeTerm(term, keys);
	}
	else
	{
	writeTerm(term, offset);

	offset += keys.serializedSize();
	containers.add(keys);
	}

	if (mode == Mode.SPARSE)
	combinedIndex.add(keys);
	}

	protected int sizeAfter(InMemoryDataTerm element)
	{
	return super.sizeAfter(element) + ptrLength(element);
	}

	public void flushAndClear(SequentialWriter out) throws IOException
	{
	super.flushAndClear(out);

	out.writeInt(mode == Mode.SPARSE ? offset : -1);

	if (containers.size() > 0)
	{
	for (TokenTreeBuilder tokens : containers)
	tokens.write(out);
	}

	if (mode == Mode.SPARSE && combinedIndex != null)
	combinedIndex.finish().write(out);

	alignToBlock(out);

	containers.clear();
	combinedIndex = initCombinedIndex();

	offset = 0;
	}

	private int ptrLength(InMemoryDataTerm term)
	{
	return (term.keys.getTokenCount() > 5)
	? 5 // 1 byte type + 4 byte offset to the tree
	: 1 + (8 * (int) term.keys.getTokenCount()); // 1 byte size + n 8 byte tokens
	}

	private void writeTerm(InMemoryTerm term, TokenTreeBuilder keys) throws IOException
	{
	term.serialize(buffer);
	buffer.writeByte((byte) keys.getTokenCount());
	for (Pair<Long, LongSet> key : keys)
	buffer.writeLong(key.left);
	}

	private void writeTerm(InMemoryTerm term, int offset) throws IOException
	{
	term.serialize(buffer);
	buffer.writeByte(0x0);
	buffer.writeInt(offset);
	}

	private TokenTreeBuilder initCombinedIndex()
	{
	return mode == Mode.SPARSE ? new DynamicTokenTreeBuilder() : null;
	}
	}
	}