blob: 9a7aa98359d1dba91852000088566d9277a71793 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db.compaction;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import org.apache.cassandra.db.Directories;
import org.apache.cassandra.db.compaction.writers.CompactionAwareWriter;
import org.apache.cassandra.db.compaction.writers.DefaultCompactionWriter;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.SystemKeyspace;
import org.apache.cassandra.db.compaction.CompactionManager.CompactionExecutorStatsCollector;
import org.apache.cassandra.db.lifecycle.LifecycleTransaction;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.concurrent.Refs;
public class CompactionTask extends AbstractCompactionTask
{
protected static final Logger logger = LoggerFactory.getLogger(CompactionTask.class);
protected final int gcBefore;
protected final boolean offline;
protected final boolean keepOriginals;
protected static long totalBytesCompacted = 0;
private CompactionExecutorStatsCollector collector;
public CompactionTask(ColumnFamilyStore cfs, LifecycleTransaction txn, int gcBefore)
{
this(cfs, txn, gcBefore, false, false);
}
public CompactionTask(ColumnFamilyStore cfs, LifecycleTransaction txn, int gcBefore, boolean offline, boolean keepOriginals)
{
super(cfs, txn);
this.gcBefore = gcBefore;
this.offline = offline;
this.keepOriginals = keepOriginals;
}
public static synchronized long addToTotalBytesCompacted(long bytesCompacted)
{
return totalBytesCompacted += bytesCompacted;
}
protected int executeInternal(CompactionExecutorStatsCollector collector)
{
this.collector = collector;
run();
return transaction.originals().size();
}
public boolean reduceScopeForLimitedSpace()
{
if (partialCompactionsAcceptable() && transaction.originals().size() > 1)
{
// Try again w/o the largest one.
logger.warn("insufficient space to compact all requested files {}", StringUtils.join(transaction.originals(), ", "));
// Note that we have removed files that are still marked as compacting.
// This suboptimal but ok since the caller will unmark all the sstables at the end.
SSTableReader removedSSTable = cfs.getMaxSizeFile(transaction.originals());
transaction.cancel(removedSSTable);
return true;
}
return false;
}
/**
* For internal use and testing only. The rest of the system should go through the submit* methods,
* which are properly serialized.
* Caller is in charge of marking/unmarking the sstables as compacting.
*/
protected void runMayThrow() throws Exception
{
// The collection of sstables passed may be empty (but not null); even if
// it is not empty, it may compact down to nothing if all rows are deleted.
assert transaction != null;
if (transaction.originals().isEmpty())
return;
// Note that the current compaction strategy, is not necessarily the one this task was created under.
// This should be harmless; see comments to CFS.maybeReloadCompactionStrategy.
CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
if (DatabaseDescriptor.isSnapshotBeforeCompaction())
cfs.snapshotWithoutFlush(System.currentTimeMillis() + "-compact-" + cfs.name);
// note that we need to do a rough estimate early if we can fit the compaction on disk - this is pessimistic, but
// since we might remove sstables from the compaction in checkAvailableDiskSpace it needs to be done here
long expectedWriteSize = cfs.getExpectedCompactedFileSize(transaction.originals(), compactionType);
long earlySSTableEstimate = Math.max(1, expectedWriteSize / strategy.getMaxSSTableBytes());
checkAvailableDiskSpace(earlySSTableEstimate, expectedWriteSize);
// sanity check: all sstables must belong to the same cfs
assert !Iterables.any(transaction.originals(), new Predicate<SSTableReader>()
{
@Override
public boolean apply(SSTableReader sstable)
{
return !sstable.descriptor.cfname.equals(cfs.name);
}
});
UUID taskId = transaction.opId();
// new sstables from flush can be added during a compaction, but only the compaction can remove them,
// so in our single-threaded compaction world this is a valid way of determining if we're compacting
// all the sstables (that existed when we started)
StringBuilder ssTableLoggerMsg = new StringBuilder("[");
for (SSTableReader sstr : transaction.originals())
{
ssTableLoggerMsg.append(String.format("%s:level=%d, ", sstr.getFilename(), sstr.getSSTableLevel()));
}
ssTableLoggerMsg.append("]");
logger.debug("Compacting ({}) {}", taskId, ssTableLoggerMsg);
long start = System.nanoTime();
long totalKeysWritten = 0;
long estimatedKeys = 0;
try (CompactionController controller = getCompactionController(transaction.originals()))
{
Set<SSTableReader> actuallyCompact = Sets.difference(transaction.originals(), controller.getFullyExpiredSSTables());
Collection<SSTableReader> newSStables;
long[] mergedRowCounts;
// SSTableScanners need to be closed before markCompactedSSTablesReplaced call as scanners contain references
// to both ifile and dfile and SSTR will throw deletion errors on Windows if it tries to delete before scanner is closed.
// See CASSANDRA-8019 and CASSANDRA-8399
int nowInSec = FBUtilities.nowInSeconds();
try (Refs<SSTableReader> refs = Refs.ref(actuallyCompact);
AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(actuallyCompact);
CompactionIterator ci = new CompactionIterator(compactionType, scanners.scanners, controller, nowInSec, taskId))
{
if (collector != null)
collector.beginCompaction(ci);
long lastCheckObsoletion = start;
if (!controller.cfs.getCompactionStrategyManager().isActive)
throw new CompactionInterruptedException(ci.getCompactionInfo());
try (CompactionAwareWriter writer = getCompactionAwareWriter(cfs, getDirectories(), transaction, actuallyCompact))
{
estimatedKeys = writer.estimatedKeys();
while (ci.hasNext())
{
if (ci.isStopRequested())
throw new CompactionInterruptedException(ci.getCompactionInfo());
if (writer.append(ci.next()))
totalKeysWritten++;
if (System.nanoTime() - lastCheckObsoletion > TimeUnit.MINUTES.toNanos(1L))
{
controller.maybeRefreshOverlaps();
lastCheckObsoletion = System.nanoTime();
}
}
// point of no return
newSStables = writer.finish();
}
finally
{
if (collector != null)
collector.finishCompaction(ci);
mergedRowCounts = ci.getMergedRowCounts();
}
}
// log a bunch of statistics about the result and save to system table compaction_history
long dTime = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
long startsize = SSTableReader.getTotalBytes(transaction.originals());
long endsize = SSTableReader.getTotalBytes(newSStables);
double ratio = (double) endsize / (double) startsize;
StringBuilder newSSTableNames = new StringBuilder();
for (SSTableReader reader : newSStables)
newSSTableNames.append(reader.descriptor.baseFilename()).append(",");
double mbps = dTime > 0 ? (double) endsize / (1024 * 1024) / ((double) dTime / 1000) : 0;
long totalSourceRows = 0;
String mergeSummary = updateCompactionHistory(cfs.keyspace.getName(), cfs.getColumnFamilyName(), mergedRowCounts, startsize, endsize);
logger.debug(String.format("Compacted (%s) %d sstables to [%s] to level=%d. %,d bytes to %,d (~%d%% of original) in %,dms = %fMB/s. %,d total partitions merged to %,d. Partition merge counts were {%s}",
taskId, transaction.originals().size(), newSSTableNames.toString(), getLevel(), startsize, endsize, (int) (ratio * 100), dTime, mbps, totalSourceRows, totalKeysWritten, mergeSummary));
logger.trace(String.format("CF Total Bytes Compacted: %,d", CompactionTask.addToTotalBytesCompacted(endsize)));
logger.trace("Actual #keys: {}, Estimated #keys:{}, Err%: {}", totalKeysWritten, estimatedKeys, ((double)(totalKeysWritten - estimatedKeys)/totalKeysWritten));
if (offline)
Refs.release(Refs.selfRefs(newSStables));
}
}
@Override
public CompactionAwareWriter getCompactionAwareWriter(ColumnFamilyStore cfs,
Directories directories,
LifecycleTransaction transaction,
Set<SSTableReader> nonExpiredSSTables)
{
return new DefaultCompactionWriter(cfs, directories, transaction, nonExpiredSSTables, offline, keepOriginals);
}
public static String updateCompactionHistory(String keyspaceName, String columnFamilyName, long[] mergedRowCounts, long startSize, long endSize)
{
StringBuilder mergeSummary = new StringBuilder(mergedRowCounts.length * 10);
Map<Integer, Long> mergedRows = new HashMap<>();
for (int i = 0; i < mergedRowCounts.length; i++)
{
long count = mergedRowCounts[i];
if (count == 0)
continue;
int rows = i + 1;
mergeSummary.append(String.format("%d:%d, ", rows, count));
mergedRows.put(rows, count);
}
SystemKeyspace.updateCompactionHistory(keyspaceName, columnFamilyName, System.currentTimeMillis(), startSize, endSize, mergedRows);
return mergeSummary.toString();
}
protected Directories getDirectories()
{
return cfs.getDirectories();
}
public static long getMinRepairedAt(Set<SSTableReader> actuallyCompact)
{
long minRepairedAt= Long.MAX_VALUE;
for (SSTableReader sstable : actuallyCompact)
minRepairedAt = Math.min(minRepairedAt, sstable.getSSTableMetadata().repairedAt);
if (minRepairedAt == Long.MAX_VALUE)
return ActiveRepairService.UNREPAIRED_SSTABLE;
return minRepairedAt;
}
protected void checkAvailableDiskSpace(long estimatedSSTables, long expectedWriteSize)
{
if(!cfs.isCompactionDiskSpaceCheckEnabled() && compactionType == OperationType.COMPACTION)
{
logger.info("Compaction space check is disabled");
return;
}
while (!getDirectories().hasAvailableDiskSpace(estimatedSSTables, expectedWriteSize))
{
if (!reduceScopeForLimitedSpace())
throw new RuntimeException(String.format("Not enough space for compaction, estimated sstables = %d, expected write size = %d", estimatedSSTables, expectedWriteSize));
}
}
protected int getLevel()
{
return 0;
}
protected CompactionController getCompactionController(Set<SSTableReader> toCompact)
{
return new CompactionController(cfs, toCompact, gcBefore);
}
protected boolean partialCompactionsAcceptable()
{
return !isUserDefined;
}
public static long getMaxDataAge(Collection<SSTableReader> sstables)
{
long max = 0;
for (SSTableReader sstable : sstables)
{
if (sstable.maxDataAge > max)
max = sstable.maxDataAge;
}
return max;
}
}