src/java/org/apache/cassandra/db/ColumnIndex.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.db;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.*;

 import com.google.common.annotations.VisibleForTesting;

 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.db.composites.Composite;
 import org.apache.cassandra.io.sstable.IndexHelper;
 import org.apache.cassandra.io.util.DataOutputPlus;
 import org.apache.cassandra.utils.ByteBufferUtil;

 public class ColumnIndex
 {
     public final List<IndexHelper.IndexInfo> columnsIndex;

     private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList());

     private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex)
     {
         assert columnsIndex != null;

         this.columnsIndex = columnsIndex;
     }

     @VisibleForTesting
     public static ColumnIndex nothing()
     {
         return EMPTY;
     }

     /**
      * Help to create an index for a column family based on size of columns,
      * and write said columns to disk.
      */
     public static class Builder
     {
         private final ColumnIndex result;
         private final long indexOffset;
         private long startPosition = -1;
         private long endPosition = 0;
         private long blockSize;
         private OnDiskAtom firstColumn;
         private OnDiskAtom lastColumn;
         private OnDiskAtom lastBlockClosing;
         private final DataOutputPlus output;
         private final RangeTombstone.Tracker tombstoneTracker;
         private int atomCount;
         private final ByteBuffer key;
         private final DeletionInfo deletionInfo; // only used for serializing and calculating row header size

         private final OnDiskAtom.Serializer atomSerializer;

         public Builder(ColumnFamily cf,
                        ByteBuffer key,
                        DataOutputPlus output)
         {
             assert cf != null;
             assert key != null;
             assert output != null;

             this.key = key;
             deletionInfo = cf.deletionInfo();
             this.indexOffset = rowHeaderSize(key, deletionInfo);
             this.result = new ColumnIndex(new ArrayList<IndexHelper.IndexInfo>());
             this.output = output;
             this.tombstoneTracker = new RangeTombstone.Tracker(cf.getComparator());
             this.atomSerializer = cf.getComparator().onDiskAtomSerializer();
         }

         /**
          * Returns the number of bytes between the beginning of the row and the
          * first serialized column.
          */
         private static long rowHeaderSize(ByteBuffer key, DeletionInfo delInfo)
         {
             TypeSizes typeSizes = TypeSizes.NATIVE;
             // TODO fix constantSize when changing the nativeconststs.
             int keysize = key.remaining();
             return typeSizes.sizeof((short) keysize) + keysize          // Row key
                  + DeletionTime.serializer.serializedSize(delInfo.getTopLevelDeletion(), typeSizes);
         }

         public RangeTombstone.Tracker tombstoneTracker()
         {
             return tombstoneTracker;
         }

         public int writtenAtomCount()
         {
             return atomCount + tombstoneTracker.writtenAtom();
         }

         /**
          * Serializes the index into in-memory structure with all required components
          * such as Bloom Filter, index block size, IndexInfo list
          *
          * @param cf Column family to create index for
          *
          * @return information about index - it's Bloom Filter, block size and IndexInfo list
          */
         public ColumnIndex build(ColumnFamily cf) throws IOException
         {
             // cf has disentangled the columns and range tombstones, we need to re-interleave them in comparator order
             Comparator<Composite> comparator = cf.getComparator();
             DeletionInfo.InOrderTester tester = cf.deletionInfo().inOrderTester();
             Iterator<RangeTombstone> rangeIter = cf.deletionInfo().rangeIterator();
             RangeTombstone tombstone = rangeIter.hasNext() ? rangeIter.next() : null;

             for (Cell c : cf)
             {
                 while (tombstone != null && comparator.compare(c.name(), tombstone.min) >= 0)
                 {
                     // skip range tombstones that are shadowed by partition tombstones
                     if (!cf.deletionInfo().getTopLevelDeletion().isDeleted(tombstone))
                         add(tombstone);
                     tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
                 }

                 // We can skip any cell if it's shadowed by a tombstone already. This is a more
                 // general case than was handled by CASSANDRA-2589.
                 if (!tester.isDeleted(c))
                     add(c);
             }

             while (tombstone != null)
             {
                 add(tombstone);
                 tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
             }
             finishAddingAtoms();
             ColumnIndex index = build();

             maybeWriteEmptyRowHeader();

             return index;
         }

         /**
          * The important distinction wrt build() is that we may be building for a row that ends up
          * being compacted away entirely, i.e., the input consists only of expired tombstones (or
          * columns shadowed by expired tombstone).  Thus, it is the caller's responsibility
          * to decide whether to write the header for an empty row.
          */
         public ColumnIndex buildForCompaction(Iterator<OnDiskAtom> columns) throws IOException
         {
             while (columns.hasNext())
             {
                 OnDiskAtom c =  columns.next();
                 add(c);
             }
             finishAddingAtoms();

             return build();
         }

         public void add(OnDiskAtom column) throws IOException
         {
             atomCount++;

             if (firstColumn == null)
             {
                 firstColumn = column;
                 startPosition = endPosition;
                 // TODO: have that use the firstColumn as min + make sure we optimize that on read
                 endPosition += tombstoneTracker.writeOpenedMarkers(firstColumn.name(), output, atomSerializer);
                 blockSize = 0; // We don't count repeated tombstone marker in the block size, to avoid a situation
                                // where we wouldn't make any progress because a block is filled by said marker

                 maybeWriteRowHeader();
             }

             if (tombstoneTracker.update(column, false))
             {
                 long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
                 size += atomSerializer.serializedSizeForSSTable(column);
                 endPosition += size;
                 blockSize += size;

                 atomSerializer.serializeForSSTable(column, output);
             }

             lastColumn = column;

             // if we hit the column index size that we have to index after, go ahead and index it.
             if (blockSize >= DatabaseDescriptor.getColumnIndexSize())
             {
                 IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), indexOffset + startPosition, endPosition - startPosition);
                 result.columnsIndex.add(cIndexInfo);
                 firstColumn = null;
                 lastBlockClosing = column;
             }
         }

         private void maybeWriteRowHeader() throws IOException
         {
             if (lastColumn == null)
             {
                 ByteBufferUtil.writeWithShortLength(key, output);
                 DeletionTime.serializer.serialize(deletionInfo.getTopLevelDeletion(), output);
             }
         }

         public void finishAddingAtoms() throws IOException
         {
             long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
             endPosition += size;
             blockSize += size;
         }

         public ColumnIndex build()
         {
             assert !tombstoneTracker.hasUnwrittenTombstones();  // finishAddingAtoms must be called before building.
             // all columns were GC'd after all
             if (lastColumn == null)
                 return ColumnIndex.EMPTY;

             // the last column may have fallen on an index boundary already.  if not, index it explicitly.
             if (result.columnsIndex.isEmpty() || lastBlockClosing != lastColumn)
             {
                 IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), lastColumn.name(), indexOffset + startPosition, endPosition - startPosition);
                 result.columnsIndex.add(cIndexInfo);
             }

             // we should always have at least one computed index block, but we only write it out if there is more than that.
             assert result.columnsIndex.size() > 0;
             return result;
         }

         public void maybeWriteEmptyRowHeader() throws IOException
         {
             if (!deletionInfo.isLive())
                 maybeWriteRowHeader();
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.cassandra.db;

	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.util.*;

	import com.google.common.annotations.VisibleForTesting;

	import org.apache.cassandra.config.DatabaseDescriptor;
	import org.apache.cassandra.db.composites.Composite;
	import org.apache.cassandra.io.sstable.IndexHelper;
	import org.apache.cassandra.io.util.DataOutputPlus;
	import org.apache.cassandra.utils.ByteBufferUtil;

	public class ColumnIndex
	{
	public final List<IndexHelper.IndexInfo> columnsIndex;

	private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList());

	private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex)
	{
	assert columnsIndex != null;

	this.columnsIndex = columnsIndex;
	}

	@VisibleForTesting
	public static ColumnIndex nothing()
	{
	return EMPTY;
	}

	/**
	* Help to create an index for a column family based on size of columns,
	* and write said columns to disk.
	*/
	public static class Builder
	{
	private final ColumnIndex result;
	private final long indexOffset;
	private long startPosition = -1;
	private long endPosition = 0;
	private long blockSize;
	private OnDiskAtom firstColumn;
	private OnDiskAtom lastColumn;
	private OnDiskAtom lastBlockClosing;
	private final DataOutputPlus output;
	private final RangeTombstone.Tracker tombstoneTracker;
	private int atomCount;
	private final ByteBuffer key;
	private final DeletionInfo deletionInfo; // only used for serializing and calculating row header size

	private final OnDiskAtom.Serializer atomSerializer;

	public Builder(ColumnFamily cf,
	ByteBuffer key,
	DataOutputPlus output)
	{
	assert cf != null;
	assert key != null;
	assert output != null;

	this.key = key;
	deletionInfo = cf.deletionInfo();
	this.indexOffset = rowHeaderSize(key, deletionInfo);
	this.result = new ColumnIndex(new ArrayList<IndexHelper.IndexInfo>());
	this.output = output;
	this.tombstoneTracker = new RangeTombstone.Tracker(cf.getComparator());
	this.atomSerializer = cf.getComparator().onDiskAtomSerializer();
	}

	/**
	* Returns the number of bytes between the beginning of the row and the
	* first serialized column.
	*/
	private static long rowHeaderSize(ByteBuffer key, DeletionInfo delInfo)
	{
	TypeSizes typeSizes = TypeSizes.NATIVE;
	// TODO fix constantSize when changing the nativeconststs.
	int keysize = key.remaining();
	return typeSizes.sizeof((short) keysize) + keysize // Row key
	+ DeletionTime.serializer.serializedSize(delInfo.getTopLevelDeletion(), typeSizes);
	}

	public RangeTombstone.Tracker tombstoneTracker()
	{
	return tombstoneTracker;
	}

	public int writtenAtomCount()
	{
	return atomCount + tombstoneTracker.writtenAtom();
	}

	/**
	* Serializes the index into in-memory structure with all required components
	* such as Bloom Filter, index block size, IndexInfo list
	*
	* @param cf Column family to create index for
	*
	* @return information about index - it's Bloom Filter, block size and IndexInfo list
	*/
	public ColumnIndex build(ColumnFamily cf) throws IOException
	{
	// cf has disentangled the columns and range tombstones, we need to re-interleave them in comparator order
	Comparator<Composite> comparator = cf.getComparator();
	DeletionInfo.InOrderTester tester = cf.deletionInfo().inOrderTester();
	Iterator<RangeTombstone> rangeIter = cf.deletionInfo().rangeIterator();
	RangeTombstone tombstone = rangeIter.hasNext() ? rangeIter.next() : null;

	for (Cell c : cf)
	{
	while (tombstone != null && comparator.compare(c.name(), tombstone.min) >= 0)
	{
	// skip range tombstones that are shadowed by partition tombstones
	if (!cf.deletionInfo().getTopLevelDeletion().isDeleted(tombstone))
	add(tombstone);
	tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
	}

	// We can skip any cell if it's shadowed by a tombstone already. This is a more
	// general case than was handled by CASSANDRA-2589.
	if (!tester.isDeleted(c))
	add(c);
	}

	while (tombstone != null)
	{
	add(tombstone);
	tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
	}
	finishAddingAtoms();
	ColumnIndex index = build();

	maybeWriteEmptyRowHeader();

	return index;
	}

	/**
	* The important distinction wrt build() is that we may be building for a row that ends up
	* being compacted away entirely, i.e., the input consists only of expired tombstones (or
	* columns shadowed by expired tombstone). Thus, it is the caller's responsibility
	* to decide whether to write the header for an empty row.
	*/
	public ColumnIndex buildForCompaction(Iterator<OnDiskAtom> columns) throws IOException
	{
	while (columns.hasNext())
	{
	OnDiskAtom c = columns.next();
	add(c);
	}
	finishAddingAtoms();

	return build();
	}

	public void add(OnDiskAtom column) throws IOException
	{
	atomCount++;

	if (firstColumn == null)
	{
	firstColumn = column;
	startPosition = endPosition;
	// TODO: have that use the firstColumn as min + make sure we optimize that on read
	endPosition += tombstoneTracker.writeOpenedMarkers(firstColumn.name(), output, atomSerializer);
	blockSize = 0; // We don't count repeated tombstone marker in the block size, to avoid a situation
	// where we wouldn't make any progress because a block is filled by said marker

	maybeWriteRowHeader();
	}

	if (tombstoneTracker.update(column, false))
	{
	long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
	size += atomSerializer.serializedSizeForSSTable(column);
	endPosition += size;
	blockSize += size;

	atomSerializer.serializeForSSTable(column, output);
	}

	lastColumn = column;

	// if we hit the column index size that we have to index after, go ahead and index it.
	if (blockSize >= DatabaseDescriptor.getColumnIndexSize())
	{
	IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), indexOffset + startPosition, endPosition - startPosition);
	result.columnsIndex.add(cIndexInfo);
	firstColumn = null;
	lastBlockClosing = column;
	}
	}

	private void maybeWriteRowHeader() throws IOException
	{
	if (lastColumn == null)
	{
	ByteBufferUtil.writeWithShortLength(key, output);
	DeletionTime.serializer.serialize(deletionInfo.getTopLevelDeletion(), output);
	}
	}

	public void finishAddingAtoms() throws IOException
	{
	long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
	endPosition += size;
	blockSize += size;
	}

	public ColumnIndex build()
	{
	assert !tombstoneTracker.hasUnwrittenTombstones(); // finishAddingAtoms must be called before building.
	// all columns were GC'd after all
	if (lastColumn == null)
	return ColumnIndex.EMPTY;

	// the last column may have fallen on an index boundary already. if not, index it explicitly.
	if (result.columnsIndex.isEmpty() \|\| lastBlockClosing != lastColumn)
	{
	IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), lastColumn.name(), indexOffset + startPosition, endPosition - startPosition);
	result.columnsIndex.add(cIndexInfo);
	}

	// we should always have at least one computed index block, but we only write it out if there is more than that.
	assert result.columnsIndex.size() > 0;
	return result;
	}

	public void maybeWriteEmptyRowHeader() throws IOException
	{
	if (!deletionInfo.isLive())
	maybeWriteRowHeader();
	}
	}
	}