blob: 9dae11c1c0eedbd99df4aaa985bc929d10b90888 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.service.pager;
import org.apache.cassandra.transport.ProtocolVersion;
import org.apache.cassandra.utils.AbstractIterator;
import java.util.Arrays;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.db.filter.DataLimits;
import org.apache.cassandra.db.partitions.*;
import org.apache.cassandra.exceptions.RequestValidationException;
import org.apache.cassandra.exceptions.RequestExecutionException;
import org.apache.cassandra.service.ClientState;
/**
* Pager over a list of ReadCommand.
*
* Note that this is not easy to make efficient. Indeed, we need to page the first command fully before
* returning results from the next one, but if the result returned by each command is small (compared to pageSize),
* paging the commands one at a time under-performs compared to parallelizing. On the other, if we parallelize
* and each command raised pageSize results, we'll end up with commands.size() * pageSize results in memory, which
* defeats the purpose of paging.
*
* For now, we keep it simple (somewhat) and just do one command at a time. Provided that we make sure to not
* create a pager unless we need to, this is probably fine. Though if we later want to get fancy, we could use the
* cfs meanPartitionSize to decide if parallelizing some of the command might be worth it while being confident we don't
* blow out memory.
*/
public class MultiPartitionPager implements QueryPager
{
private final SinglePartitionPager[] pagers;
private final DataLimits limit;
private final int nowInSec;
private int remaining;
private int current;
public MultiPartitionPager(SinglePartitionReadCommand.Group group, PagingState state, ProtocolVersion protocolVersion)
{
this.limit = group.limits();
this.nowInSec = group.nowInSec();
int i = 0;
// If it's not the beginning (state != null), we need to find where we were and skip previous commands
// since they are done.
if (state != null)
for (; i < group.commands.size(); i++)
if (group.commands.get(i).partitionKey().getKey().equals(state.partitionKey))
break;
if (i >= group.commands.size())
{
pagers = null;
return;
}
pagers = new SinglePartitionPager[group.commands.size() - i];
// 'i' is on the first non exhausted pager for the previous page (or the first one)
SinglePartitionReadCommand command = group.commands.get(i);
pagers[0] = command.getPager(state, protocolVersion);
// Following ones haven't been started yet
for (int j = i + 1; j < group.commands.size(); j++)
pagers[j - i] = group.commands.get(j).getPager(null, protocolVersion);
remaining = state == null ? limit.count() : state.remaining;
}
private MultiPartitionPager(SinglePartitionPager[] pagers,
DataLimits limit,
int nowInSec,
int remaining,
int current)
{
this.pagers = pagers;
this.limit = limit;
this.nowInSec = nowInSec;
this.remaining = remaining;
this.current = current;
}
public QueryPager withUpdatedLimit(DataLimits newLimits)
{
SinglePartitionPager[] newPagers = Arrays.copyOf(pagers, pagers.length);
newPagers[current] = newPagers[current].withUpdatedLimit(newLimits);
return new MultiPartitionPager(newPagers,
newLimits,
nowInSec,
remaining,
current);
}
public PagingState state()
{
// Sets current to the first non-exhausted pager
if (isExhausted())
return null;
PagingState state = pagers[current].state();
return new PagingState(pagers[current].key(), state == null ? null : state.rowMark, remaining, pagers[current].remainingInPartition());
}
public boolean isExhausted()
{
if (remaining <= 0 || pagers == null)
return true;
while (current < pagers.length)
{
if (!pagers[current].isExhausted())
return false;
current++;
}
return true;
}
public ReadExecutionController executionController()
{
// Note that for all pagers, the only difference is the partition key to which it applies, so in practice we
// can use any of the sub-pager ReadOrderGroup group to protect the whole pager
for (int i = current; i < pagers.length; i++)
{
if (pagers[i] != null)
return pagers[i].executionController();
}
throw new AssertionError("Shouldn't be called on an exhausted pager");
}
@SuppressWarnings("resource") // iter closed via countingIter
public PartitionIterator fetchPage(int pageSize, ConsistencyLevel consistency, ClientState clientState, long queryStartNanoTime) throws RequestValidationException, RequestExecutionException
{
int toQuery = Math.min(remaining, pageSize);
return new PagersIterator(toQuery, consistency, clientState, null, queryStartNanoTime);
}
@SuppressWarnings("resource") // iter closed via countingIter
public PartitionIterator fetchPageInternal(int pageSize, ReadExecutionController executionController) throws RequestValidationException, RequestExecutionException
{
int toQuery = Math.min(remaining, pageSize);
return new PagersIterator(toQuery, null, null, executionController, System.nanoTime());
}
private class PagersIterator extends AbstractIterator<RowIterator> implements PartitionIterator
{
private final int pageSize;
private PartitionIterator result;
private boolean closed;
private final long queryStartNanoTime;
// For "normal" queries
private final ConsistencyLevel consistency;
private final ClientState clientState;
// For internal queries
private final ReadExecutionController executionController;
private int pagerMaxRemaining;
private int counted;
public PagersIterator(int pageSize, ConsistencyLevel consistency, ClientState clientState, ReadExecutionController executionController, long queryStartNanoTime)
{
this.pageSize = pageSize;
this.consistency = consistency;
this.clientState = clientState;
this.executionController = executionController;
this.queryStartNanoTime = queryStartNanoTime;
}
protected RowIterator computeNext()
{
while (result == null || !result.hasNext())
{
if (result != null)
{
result.close();
counted += pagerMaxRemaining - pagers[current].maxRemaining();
}
// We are done if we have reached the page size or in the case of GROUP BY if the current pager
// is not exhausted.
boolean isDone = counted >= pageSize
|| (result != null && limit.isGroupByLimit() && !pagers[current].isExhausted());
// isExhausted() will sets us on the first non-exhausted pager
if (isDone || isExhausted())
{
closed = true;
return endOfData();
}
pagerMaxRemaining = pagers[current].maxRemaining();
int toQuery = pageSize - counted;
result = consistency == null
? pagers[current].fetchPageInternal(toQuery, executionController)
: pagers[current].fetchPage(toQuery, consistency, clientState, queryStartNanoTime);
}
return result.next();
}
public void close()
{
remaining -= counted;
if (result != null && !closed)
result.close();
}
}
public int maxRemaining()
{
return remaining;
}
}