blob: 2cd9e97d968fcccf4da7e156b731914f285bfc10 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.db.partitions;
import java.util.Iterator;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.filter.ColumnFilter;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.utils.SearchIterator;
import org.apache.cassandra.utils.btree.BTree;
import org.apache.cassandra.utils.btree.BTreeSearchIterator;
import static org.apache.cassandra.utils.btree.BTree.Dir.desc;
public abstract class AbstractBTreePartition implements Partition, Iterable<Row>
{
protected static final Holder EMPTY = new Holder(PartitionColumns.NONE, BTree.empty(), DeletionInfo.LIVE, Rows.EMPTY_STATIC_ROW, EncodingStats.NO_STATS);
protected final CFMetaData metadata;
protected final DecoratedKey partitionKey;
protected abstract Holder holder();
protected abstract boolean canHaveShadowedData();
protected AbstractBTreePartition(CFMetaData metadata, DecoratedKey partitionKey)
{
this.metadata = metadata;
this.partitionKey = partitionKey;
}
protected static final class Holder
{
final PartitionColumns columns;
final DeletionInfo deletionInfo;
// the btree of rows
final Object[] tree;
final Row staticRow;
final EncodingStats stats;
Holder(PartitionColumns columns, Object[] tree, DeletionInfo deletionInfo, Row staticRow, EncodingStats stats)
{
this.columns = columns;
this.tree = tree;
this.deletionInfo = deletionInfo;
this.staticRow = staticRow == null ? Rows.EMPTY_STATIC_ROW : staticRow;
this.stats = stats;
}
}
public DeletionInfo deletionInfo()
{
return holder().deletionInfo;
}
public Row staticRow()
{
return holder().staticRow;
}
public boolean isEmpty()
{
Holder holder = holder();
return holder.deletionInfo.isLive() && BTree.isEmpty(holder.tree) && holder.staticRow.isEmpty();
}
public boolean hasRows()
{
Holder holder = holder();
return !BTree.isEmpty(holder.tree);
}
public CFMetaData metadata()
{
return metadata;
}
public DecoratedKey partitionKey()
{
return partitionKey;
}
public DeletionTime partitionLevelDeletion()
{
return deletionInfo().getPartitionDeletion();
}
public PartitionColumns columns()
{
return holder().columns;
}
public EncodingStats stats()
{
return holder().stats;
}
public Row getRow(Clustering clustering)
{
Row row = searchIterator(ColumnFilter.selection(columns()), false).next(clustering);
// Note that for statics, this will never return null, this will return an empty row. However,
// it's more consistent for this method to return null if we don't really have a static row.
return row == null || (clustering == Clustering.STATIC_CLUSTERING && row.isEmpty()) ? null : row;
}
private Row staticRow(Holder current, ColumnFilter columns, boolean setActiveDeletionToRow)
{
DeletionTime partitionDeletion = current.deletionInfo.getPartitionDeletion();
if (columns.fetchedColumns().statics.isEmpty() || (current.staticRow.isEmpty() && partitionDeletion.isLive()))
return Rows.EMPTY_STATIC_ROW;
Row row = current.staticRow.filter(columns, partitionDeletion, setActiveDeletionToRow, metadata);
return row == null ? Rows.EMPTY_STATIC_ROW : row;
}
public SearchIterator<Clustering, Row> searchIterator(final ColumnFilter columns, final boolean reversed)
{
// TODO: we could optimize comparison for "NativeRow" à la #6755
final Holder current = holder();
return new SearchIterator<Clustering, Row>()
{
private final SearchIterator<Clustering, Row> rawIter = new BTreeSearchIterator<>(current.tree, metadata.comparator, desc(reversed));
private final DeletionTime partitionDeletion = current.deletionInfo.getPartitionDeletion();
public Row next(Clustering clustering)
{
if (clustering == Clustering.STATIC_CLUSTERING)
return staticRow(current, columns, true);
Row row = rawIter.next(clustering);
RangeTombstone rt = current.deletionInfo.rangeCovering(clustering);
// A search iterator only return a row, so it doesn't allow to directly account for deletion that should apply to to row
// (the partition deletion or the deletion of a range tombstone that covers it). So if needs be, reuse the row deletion
// to carry the proper deletion on the row.
DeletionTime activeDeletion = partitionDeletion;
if (rt != null && rt.deletionTime().supersedes(activeDeletion))
activeDeletion = rt.deletionTime();
if (row == null)
{
// this means our partition level deletion superseedes all other deletions and we don't have to keep the row deletions
if (activeDeletion == partitionDeletion)
return null;
// no need to check activeDeletion.isLive here - if anything superseedes the partitionDeletion
// it must be non-live
return BTreeRow.emptyDeletedRow(clustering, Row.Deletion.regular(activeDeletion));
}
return row.filter(columns, activeDeletion, true, metadata);
}
};
}
public UnfilteredRowIterator unfilteredIterator()
{
return unfilteredIterator(ColumnFilter.all(metadata()), Slices.ALL, false);
}
public UnfilteredRowIterator unfilteredIterator(ColumnFilter selection, Slices slices, boolean reversed)
{
return unfilteredIterator(holder(), selection, slices, reversed);
}
public UnfilteredRowIterator unfilteredIterator(Holder current, ColumnFilter selection, Slices slices, boolean reversed)
{
Row staticRow = staticRow(current, selection, false);
if (slices.size() == 0)
{
DeletionTime partitionDeletion = current.deletionInfo.getPartitionDeletion();
return UnfilteredRowIterators.noRowsIterator(metadata, partitionKey, staticRow, partitionDeletion, reversed);
}
return slices.size() == 1
? sliceIterator(selection, slices.get(0), reversed, current, staticRow)
: new SlicesIterator(selection, slices, reversed, current, staticRow);
}
private UnfilteredRowIterator sliceIterator(ColumnFilter selection, Slice slice, boolean reversed, Holder current, Row staticRow)
{
Slice.Bound start = slice.start() == Slice.Bound.BOTTOM ? null : slice.start();
Slice.Bound end = slice.end() == Slice.Bound.TOP ? null : slice.end();
Iterator<Row> rowIter = BTree.slice(current.tree, metadata.comparator, start, true, end, true, desc(reversed));
Iterator<RangeTombstone> deleteIter = current.deletionInfo.rangeIterator(slice, reversed);
return merge(rowIter, deleteIter, selection, reversed, current, staticRow);
}
private RowAndDeletionMergeIterator merge(Iterator<Row> rowIter, Iterator<RangeTombstone> deleteIter,
ColumnFilter selection, boolean reversed, Holder current, Row staticRow)
{
return new RowAndDeletionMergeIterator(metadata, partitionKey, current.deletionInfo.getPartitionDeletion(),
selection, staticRow, reversed, current.stats,
rowIter, deleteIter,
canHaveShadowedData());
}
private abstract class AbstractIterator extends AbstractUnfilteredRowIterator
{
final Holder current;
final ColumnFilter selection;
private AbstractIterator(ColumnFilter selection, boolean isReversed)
{
this(AbstractBTreePartition.this.holder(), selection, isReversed);
}
private AbstractIterator(Holder current, ColumnFilter selection, boolean isReversed)
{
this(current,
AbstractBTreePartition.this.staticRow(current, selection, false),
selection, isReversed);
}
private AbstractIterator(Holder current, Row staticRow, ColumnFilter selection, boolean isReversed)
{
super(AbstractBTreePartition.this.metadata,
AbstractBTreePartition.this.partitionKey,
current.deletionInfo.getPartitionDeletion(),
selection.fetchedColumns(), // non-selected columns will be filtered in subclasses by RowAndDeletionMergeIterator
// it would also be more precise to return the intersection of the selection and current.columns,
// but its probably not worth spending time on computing that.
staticRow,
isReversed,
current.stats);
this.current = current;
this.selection = selection;
}
}
public class SlicesIterator extends AbstractIterator
{
private final Slices slices;
private int idx;
private Iterator<Unfiltered> currentSlice;
private SlicesIterator(ColumnFilter selection,
Slices slices,
boolean isReversed,
Holder current,
Row staticRow)
{
super(current, staticRow, selection, isReversed);
this.slices = slices;
}
protected Unfiltered computeNext()
{
while (true)
{
if (currentSlice == null)
{
if (idx >= slices.size())
return endOfData();
int sliceIdx = isReverseOrder ? slices.size() - idx - 1 : idx;
currentSlice = sliceIterator(selection, slices.get(sliceIdx), isReverseOrder, current, Rows.EMPTY_STATIC_ROW);
idx++;
}
if (currentSlice.hasNext())
return currentSlice.next();
currentSlice = null;
}
}
}
public class SliceableIterator extends AbstractIterator implements SliceableUnfilteredRowIterator
{
private Iterator<Unfiltered> iterator;
protected SliceableIterator(ColumnFilter selection, boolean isReversed)
{
super(selection, isReversed);
}
protected Unfiltered computeNext()
{
if (iterator == null)
iterator = unfilteredIterator(selection, Slices.ALL, isReverseOrder);
if (!iterator.hasNext())
return endOfData();
return iterator.next();
}
public Iterator<Unfiltered> slice(Slice slice)
{
return sliceIterator(selection, slice, isReverseOrder, current, staticRow);
}
}
public SliceableUnfilteredRowIterator sliceableUnfilteredIterator(ColumnFilter columns, boolean reversed)
{
return new SliceableIterator(columns, reversed);
}
protected SliceableUnfilteredRowIterator sliceableUnfilteredIterator()
{
return sliceableUnfilteredIterator(ColumnFilter.all(metadata), false);
}
protected static Holder build(UnfilteredRowIterator iterator, int initialRowCapacity)
{
return build(iterator, initialRowCapacity, true, null);
}
protected static Holder build(UnfilteredRowIterator iterator, int initialRowCapacity, boolean ordered, BTree.Builder.QuickResolver<Row> quickResolver)
{
CFMetaData metadata = iterator.metadata();
PartitionColumns columns = iterator.columns();
boolean reversed = iterator.isReverseOrder();
BTree.Builder<Row> builder = BTree.builder(metadata.comparator, initialRowCapacity);
builder.auto(!ordered);
builder.setQuickResolver(quickResolver);
MutableDeletionInfo.Builder deletionBuilder = MutableDeletionInfo.builder(iterator.partitionLevelDeletion(), metadata.comparator, reversed);
while (iterator.hasNext())
{
Unfiltered unfiltered = iterator.next();
if (unfiltered.kind() == Unfiltered.Kind.ROW)
builder.add((Row)unfiltered);
else
deletionBuilder.add((RangeTombstoneMarker)unfiltered);
}
if (reversed)
builder.reverse();
return new Holder(columns, builder.build(), deletionBuilder.build(), iterator.staticRow(), iterator.stats());
}
// Note that when building with a RowIterator, deletion will generally be LIVE, but we allow to pass it nonetheless because PartitionUpdate
// passes a MutableDeletionInfo that it mutates later.
protected static Holder build(RowIterator rows, DeletionInfo deletion, boolean buildEncodingStats, int initialRowCapacity)
{
CFMetaData metadata = rows.metadata();
PartitionColumns columns = rows.columns();
boolean reversed = rows.isReverseOrder();
BTree.Builder<Row> builder = BTree.builder(metadata.comparator, initialRowCapacity);
builder.auto(false);
while (rows.hasNext())
{
Row row = rows.next();
builder.add(row);
}
if (reversed)
builder.reverse();
Row staticRow = rows.staticRow();
Object[] tree = builder.build();
EncodingStats stats = buildEncodingStats ? EncodingStats.Collector.collect(staticRow, BTree.iterator(tree), deletion)
: EncodingStats.NO_STATS;
return new Holder(columns, tree, deletion, staticRow, stats);
}
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append(String.format("[%s.%s] key=%s partition_deletion=%s columns=%s",
metadata.ksName,
metadata.cfName,
metadata.getKeyValidator().getString(partitionKey().getKey()),
partitionLevelDeletion(),
columns()));
if (staticRow() != Rows.EMPTY_STATIC_ROW)
sb.append("\n ").append(staticRow().toString(metadata, true));
try (UnfilteredRowIterator iter = unfilteredIterator())
{
while (iter.hasNext())
sb.append("\n ").append(iter.next().toString(metadata, true));
}
return sb.toString();
}
public int rowCount()
{
return BTree.size(holder().tree);
}
public Iterator<Row> iterator()
{
return BTree.<Row>iterator(holder().tree);
}
public Row lastRow()
{
Object[] tree = holder().tree;
if (BTree.isEmpty(tree))
return null;
return BTree.findByIndex(tree, BTree.size(tree) - 1);
}
}