blob: 41ec676d32a5b86c7755e645188c73608ddf888a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.run;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.io.Closer;
import joptsimple.OptionParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.FileIOUtils;
import org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
import org.apache.jackrabbit.oak.commons.sort.EscapeUtils;
import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions;
import org.apache.jackrabbit.oak.run.cli.CommonOptions;
import org.apache.jackrabbit.oak.run.cli.NodeStoreFixture;
import org.apache.jackrabbit.oak.run.cli.NodeStoreFixtureProvider;
import org.apache.jackrabbit.oak.run.cli.Options;
import org.apache.jackrabbit.oak.run.commons.Command;
import org.apache.jackrabbit.oak.run.commons.LoggingInitializer;
import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.AZURE;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FAKE;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FDS;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.S3;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.getService;
/**
* Command to check data store consistency and also optionally retrieve ids
* and references.
*/
public class DataStoreCommand implements Command {
private static final Logger log = LoggerFactory.getLogger(DataStoreCommand.class);
public static final String NAME = "datastore";
private static final String summary = "Provides DataStore management operations";
private Options opts;
private DataStoreOptions dataStoreOpts;
@Override
public void execute(String... args) throws Exception {
OptionParser parser = new OptionParser();
opts = new Options();
opts.setCommandName(NAME);
opts.setSummary(summary);
opts.setConnectionString(CommonOptions.DEFAULT_CONNECTION_STRING);
opts.registerOptionsFactory(DataStoreOptions.FACTORY);
opts.parseAndConfigure(parser, args);
dataStoreOpts = opts.getOptionBean(DataStoreOptions.class);
//Clean up before setting up NodeStore as the temp
//directory might be used by NodeStore for cache stuff like persistentCache
setupDirectories(dataStoreOpts);
setupLogging(dataStoreOpts);
logCliArgs(args);
boolean success = false;
try (Closer closer = Closer.create()) {
opts.setTempDirectory(dataStoreOpts.getWorkDir().getAbsolutePath());
NodeStoreFixture fixture = NodeStoreFixtureProvider.create(opts);
closer.register(fixture);
if (!checkParameters(dataStoreOpts, opts, fixture, parser)) {
return;
}
execute(fixture, dataStoreOpts, opts, closer);
success = true;
} catch (Throwable e) {
log.error("Error occurred while performing datastore operation", e);
e.printStackTrace(System.err);
} finally {
shutdownLogging();
}
if (!success) {
System.exit(1);
}
}
private static boolean checkParameters(DataStoreOptions dataStoreOpts, Options opts, NodeStoreFixture fixture,
OptionParser parser) throws IOException {
if (!dataStoreOpts.anyActionSelected()) {
log.info("No actions specified");
parser.printHelpOn(System.out);
return false;
} else if (fixture.getStore() == null) {
log.info("No NodeStore specified");
parser.printHelpOn(System.out);
return false;
} else if (!opts.getCommonOpts().isDocument() && fixture.getBlobStore() == null) {
log.info("No BlobStore specified");
parser.printHelpOn(System.out);
return false;
}
return true;
}
private void execute(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, Options opts, Closer closer)
throws Exception {
MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
if (dataStoreOpts.checkConsistency()) {
long missing = collector.checkConsistency();
log.warn("Found {} missing blobs", missing);
if (dataStoreOpts.isVerbose()) {
new VerboseIdLogger(opts).log();
}
} else if (dataStoreOpts.collectGarbage()) {
collector.collectGarbage(dataStoreOpts.markOnly());
}
}
private static void setupDirectories(DataStoreOptions opts) throws IOException {
if (opts.getOutDir().exists()) {
FileUtils.cleanDirectory(opts.getOutDir());
}
FileUtils.cleanDirectory(opts.getWorkDir());
}
private static MarkSweepGarbageCollector getCollector(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts,
Options opts, Closer closer) throws IOException {
BlobReferenceRetriever retriever;
if (opts.getCommonOpts().isDocument()) {
retriever = new DocumentBlobReferenceRetriever((DocumentNodeStore) fixture.getStore());
} else {
if (dataStoreOpts.isVerbose()) {
retriever = new NodeTraverserReferenceRetriever(fixture.getStore());
} else {
ReadOnlyFileStore fileStore = getService(fixture.getWhiteboard(), ReadOnlyFileStore.class);
retriever = new SegmentBlobReferenceRetriever(fileStore);
}
}
ExecutorService service = Executors.newSingleThreadExecutor();
closer.register(new ExecutorCloser(service));
String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore());
checkNotNull(repositoryId);
MarkSweepGarbageCollector collector =
new MarkSweepGarbageCollector(retriever, (GarbageCollectableBlobStore) fixture.getBlobStore(), service,
dataStoreOpts.getOutDir().getAbsolutePath(), dataStoreOpts.getBatchCount(),
SECONDS.toMillis(dataStoreOpts.getBlobGcMaxAgeInSecs()), repositoryId, fixture.getWhiteboard(),
getService(fixture.getWhiteboard(), StatisticsProvider.class));
collector.setTraceOutput(true);
return collector;
}
protected static void setupLogging(DataStoreOptions dataStoreOpts) throws IOException {
new LoggingInitializer(dataStoreOpts.getWorkDir(), NAME, dataStoreOpts.isResetLoggingConfig()).init();
}
private static void shutdownLogging() {
LoggingInitializer.shutdownLogging();
}
private static void logCliArgs(String[] args) {
log.info("Command line arguments used for datastore command [{}]", Joiner.on(' ').join(args));
List<String> inputArgs = ManagementFactory.getRuntimeMXBean().getInputArguments();
if (!inputArgs.isEmpty()) {
log.info("System properties and vm options passed {}", inputArgs);
}
}
/**
* {@link BlobReferenceRetriever} instance which iterates over the whole node store to find
* blobs being referred. Useful when path of those blobs needed and the underlying {@link NodeStore}
* native implementation does not provide that.
*/
static class NodeTraverserReferenceRetriever implements BlobReferenceRetriever {
private final NodeStore nodeStore;
public NodeTraverserReferenceRetriever(NodeStore nodeStore) {
this.nodeStore = nodeStore;
}
private void binaryProperties(NodeState state, String path, ReferenceCollector collector) {
for (PropertyState p : state.getProperties()) {
String propPath = path;//PathUtils.concat(path, p.getName());
if (p.getType() == Type.BINARY) {
String blobId = p.getValue(Type.BINARY).getContentIdentity();
if (blobId != null) {
collector.addReference(blobId, propPath);
}
} else if (p.getType() == Type.BINARIES && p.count() > 0) {
Iterator<Blob> iterator = p.getValue(Type.BINARIES).iterator();
while (iterator.hasNext()) {
String blobId = iterator.next().getContentIdentity();
if (blobId != null) {
collector.addReference(blobId, propPath);
}
}
}
}
}
private void traverseChildren(NodeState state, String path, ReferenceCollector collector) {
binaryProperties(state, path, collector);
for (ChildNodeEntry c : state.getChildNodeEntries()) {
traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), collector);
}
}
@Override public void collectReferences(ReferenceCollector collector) throws IOException {
log.info("Starting dump of blob references by traversing");
traverseChildren(nodeStore.getRoot(), "/", collector);
}
}
static class VerboseIdLogger {
static final String DELIM = ",";
static final String DASH = "-";
static final String HASH = "#";
static final Comparator<String> idComparator = new Comparator<String>() {
@Override
public int compare(String s1, String s2) {
return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
}
};
private final static Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
private final static Splitter delimSplitter = Splitter.on(DELIM).trimResults().omitEmptyStrings();
private final BlobStoreOptions optionBean;
private final BlobStoreOptions.Type blobStoreType;
private final File outDir;
private final File outFile;
public VerboseIdLogger(Options options) {
this.optionBean = options.getOptionBean(BlobStoreOptions.class);
this.blobStoreType = optionBean.getBlobStoreType();
outDir = options.getOptionBean(DataStoreOptions.class).getOutDir();
outFile = filterFiles(outDir, "gccand-");
if (outFile == null) {
throw new IllegalArgumentException("No candidate file found");
}
}
@Nullable
static File filterFiles(File outDir, String prefix) {
List<File> subDirs = FileFilterUtils.filterList(FileFilterUtils
.and(FileFilterUtils.prefixFileFilter("gcworkdir-"), FileFilterUtils.directoryFileFilter()),
outDir.listFiles());
if (subDirs != null && !subDirs.isEmpty()) {
File workDir = subDirs.get(0);
List<File> outFiles = FileFilterUtils.filterList(FileFilterUtils.prefixFileFilter(prefix), workDir.listFiles());
if (outFiles != null && !outFiles.isEmpty()) {
return outFiles.get(0);
}
}
return null;
}
static String encodeId(String line, BlobStoreOptions.Type dsType) {
List<String> list = delimSplitter.splitToList(line);
String id = list.get(0);
List<String> idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id);
String blobId = idLengthSepList.get(0);
if (dsType == FAKE || dsType == FDS) {
blobId = (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId
.substring(4, 6) + FILE_SEPARATOR.value() + blobId);
} else if (dsType == S3 || dsType == AZURE) {
blobId = (blobId.substring(0, 4) + DASH + blobId.substring(4));
}
return delimJoiner.join(blobId, EscapeUtils.unescapeLineBreaks(list.get(1)));
}
public void log() throws IOException {
File tempFile = new File(outDir, outFile.getName() + "-temp");
FileUtils.moveFile(outFile, tempFile);
try (BurnOnCloseFileIterator iterator =
new BurnOnCloseFileIterator(FileUtils.lineIterator(tempFile, UTF_8.toString()), tempFile,
(Function<String, String>) input -> encodeId(input, blobStoreType))) {
FileIOUtils.writeStrings(iterator, outFile, true, log, "Transformed to verbose ids - ");
}
}
}
}