blob: 4762c6666ce4ca2c81ddb3e7a6b1d970d6aa582a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.run;
import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
import static com.google.common.base.Stopwatch.createStarted;
import static com.google.common.io.Closeables.close;
import static java.io.File.createTempFile;
import static java.util.Arrays.asList;
import static org.apache.commons.io.FileUtils.forceDelete;
import static org.apache.commons.io.FileUtils.listFiles;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
import static org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentNodeStoreBuilder.newMongoDocumentNodeStoreBuilder;
import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.MongoURI;
import joptsimple.ArgumentAcceptingOptionSpec;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import joptsimple.OptionSpecBuilder;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.FileIOUtils;
import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore;
import org.apache.jackrabbit.oak.run.commons.Command;
import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
import org.apache.jackrabbit.oak.segment.file.FileStore;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.jetbrains.annotations.Nullable;
/**
* Command to check data store consistency and also optionally retrieve ids
* and references.
*/
public class DataStoreCheckCommand implements Command {
private static final String DELIM = ",";
private static final String FDS = "--fds";
private static final String S3DS = "--s3ds";
private static final String AZUREDS = "--azureblobds";
private static final String DASH = "-";
private static final String HASH = "#";
private static final Comparator<String> idComparator = new Comparator<String>() {
@Override
public int compare(String s1, String s2) {
return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
}
};
@Override
public void execute(String... args) throws Exception {
System.exit(checkDataStore(args));
}
static int checkDataStore(String... args) {
OptionParser parser = new OptionParser();
parser.allowsUnrecognizedOptions();
String helpStr =
"datastorecheck [--id] [--ref] [--consistency] [--store <path>|<mongo_uri>] "
+ "[--s3ds <s3ds_config>|--fds <fds_config>|--azureblobds <azureblobds_config>|--nods]"
+ " [--dump <path>] [--repoHome <repo_home>] [--track] [--verbose]";
try (Closer closer = Closer.create()) {
// Options for operations requested
OptionSpecBuilder idOp = parser.accepts("id", "Get ids");
OptionSpecBuilder refOp = parser.accepts("ref", "Get references");
OptionSpecBuilder consistencyOp = parser.accepts("consistency", "Check consistency");
// Node Store - needed for --ref, --consistency
ArgumentAcceptingOptionSpec<String> store = parser.accepts("store", "Node Store")
.requiredIf(refOp, consistencyOp).withRequiredArg().ofType(String.class);
// Optional argument to specify the dump path
ArgumentAcceptingOptionSpec<String> dump = parser.accepts("dump", "Dump Path")
.withRequiredArg().ofType(String.class);
// Optional argument to specify tracking
OptionSpecBuilder trackOverride = parser.accepts("track", "Force override tracked ids");
// Required argument for --consistency to specify tracking folder (otherwise can have inconsistencies)
ArgumentAcceptingOptionSpec<String> repoHome = parser.accepts("repoHome", "Local repository home folder")
.requiredIf(trackOverride, consistencyOp).withRequiredArg().ofType(String.class);
// Optional argument to specify tracking
OptionSpecBuilder verbose = parser.accepts("verbose", "Output backend formatted ids/paths");
OptionSpec<?> help = parser.acceptsAll(asList("h", "?", "help"),
"show help").forHelp();
// Required rules (any one of --id, --ref, --consistency)
idOp.requiredUnless(refOp, consistencyOp);
refOp.requiredUnless(idOp, consistencyOp);
consistencyOp.requiredUnless(idOp, refOp);
trackOverride.availableIf(idOp, consistencyOp);
OptionSet options = null;
try {
options = parser.parse(args);
} catch (Exception e) {
System.err.println(e);
System.err.println(Arrays.toString(args));
System.err.println();
System.err.println("Options :");
parser.printHelpOn(System.err);
return 1;
}
if (options.has(help)) {
parser.printHelpOn(System.out);
return 0;
}
String dumpPath = JAVA_IO_TMPDIR.value();
if (options.has(dump)) {
dumpPath = options.valueOf(dump);
}
GarbageCollectableBlobStore blobStore = null;
BlobReferenceRetriever marker = null;
NodeStore nodeStore = null;
if (options.has(store)) {
String source = options.valueOf(store);
if (source.startsWith(MongoURI.MONGODB_PREFIX)) {
MongoClientURI uri = new MongoClientURI(source);
MongoClient client = new MongoClient(uri);
DocumentNodeStore docNodeStore =
newMongoDocumentNodeStoreBuilder().setMongoDB(client, uri.getDatabase()).build();
closer.register(Utils.asCloseable(docNodeStore));
blobStore = (GarbageCollectableBlobStore) docNodeStore.getBlobStore();
marker = new DocumentBlobReferenceRetriever(docNodeStore);
nodeStore = docNodeStore;
} else {
FileStore fileStore = fileStoreBuilder(new File(source)).withStrictVersionCheck(true).build();
marker = new SegmentBlobReferenceRetriever(fileStore);
closer.register(fileStore);
nodeStore =
SegmentNodeStoreBuilders.builder(fileStore).build();
}
}
// Initialize S3/FileDataStore if configured
String dsType = "";
GarbageCollectableBlobStore dataStore = Utils.bootstrapDataStore(args, closer);
if (dataStore != null) {
dsType = getDSType(args);
blobStore = dataStore;
}
// blob store still not initialized means configuration not supported
if (blobStore == null) {
System.err.println("Operation not defined for SegmentNodeStore without external datastore");
parser.printHelpOn(System.err);
return 1;
}
FileRegister register = new FileRegister(options);
closer.register(register);
if (options.has(idOp) || options.has(consistencyOp)) {
File idTemp = createTempFile("ids", null);
closer.register(new Closeable() {
@Override public void close() throws IOException {
forceDelete(idTemp);
}
});
File dumpFile = register.createFile(idOp, dumpPath);
retrieveBlobIds(blobStore, idTemp);
// If track path and track override specified copy the file to the location
if (options.has(repoHome) && options.has(trackOverride)) {
String trackPath = options.valueOf(repoHome);
File trackingFileParent = new File(FilenameUtils.concat(trackPath, "blobids"));
File trackingFile = new File(trackingFileParent,
"blob-" + String.valueOf(System.currentTimeMillis()) + ".gen");
FileUtils.copyFile(idTemp, trackingFile);
}
if (options.has(verbose)) {
verboseIds(closer, dsType, idTemp, dumpFile);
} else {
FileUtils.copyFile(idTemp, dumpFile);
}
}
if (options.has(refOp) || options.has(consistencyOp)) {
if (options.has(verbose) &&
(nodeStore instanceof SegmentNodeStore ||
nodeStore instanceof org.apache.jackrabbit.oak.segment.SegmentNodeStore)) {
NodeTraverser traverser = new NodeTraverser(nodeStore, dsType);
closer.register(traverser);
traverser.traverse();
FileUtils.copyFile(traverser.references, register.createFile(refOp, dumpPath));
} else {
retrieveBlobReferences(blobStore, marker,
register.createFile(refOp, dumpPath), dsType, options.has(verbose));
}
}
if (options.has(consistencyOp)) {
checkConsistency(register.get(idOp), register.get(refOp),
register.createFile(consistencyOp, dumpPath), options.valueOf(repoHome), dsType);
}
return 0;
} catch (Throwable t) {
t.printStackTrace(System.err);
return 1;
}
}
private static void verboseIds(Closer closer, final String dsType, File readFile, File writeFile) throws IOException {
LineIterator idIterator = FileUtils.lineIterator(readFile, Charsets.UTF_8.name());
try {
// Create a temp file to write real ids and register with closer
File longIdTemp = createTempFile("longids", null);
closer.register(new Closeable() {
@Override public void close() throws IOException {
forceDelete(longIdTemp);
}
});
// Read and write the converted ids
FileIOUtils.writeStrings(idIterator, longIdTemp, false, new Function<String, String>() {
@Nullable @Override public String apply(@Nullable String input) {
return encodeId(input, dsType);
}
}, null, null);
FileUtils.copyFile(longIdTemp, writeFile);
} finally {
if (idIterator != null) {
idIterator.close();
}
}
}
private static String getDSType(String[] args) {
List<String> argList = Arrays.asList(args);
if (argList.contains(S3DS)) {
return S3DS;
} else if (argList.contains(FDS)) {
return FDS;
} else if (argList.contains(AZUREDS)) {
return AZUREDS;
}
return "";
}
static String encodeId(String id, String dsType) {
List<String> idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id);
String blobId = idLengthSepList.get(0);
if (dsType.equals(FDS)) {
return (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId
.substring(4, 6) + FILE_SEPARATOR.value() + blobId);
} else if (dsType.equals(S3DS) || dsType.equals(AZUREDS)) {
return (blobId.substring(0, 4) + DASH + blobId.substring(4));
}
return id;
}
private static String decodeId(String id) {
List<String> list = Splitter.on(FILE_SEPARATOR.value()).trimResults().omitEmptyStrings().splitToList(id);
String pathStrippedId = list.get(list.size() -1);
return Joiner.on("").join(Splitter.on(DASH).omitEmptyStrings().trimResults().splitToList(pathStrippedId));
}
static class FileRegister implements Closeable {
Map<OptionSpec, File> opFiles = Maps.newHashMap();
String suffix = String.valueOf(System.currentTimeMillis());
OptionSet options;
public FileRegister(OptionSet options) {
this.options = options;
}
public File createFile(OptionSpec spec, String path) {
File f = new File(path, spec.toString() + suffix);
opFiles.put(spec, f);
return f;
}
public File get(OptionSpec spec) {
return opFiles.get(spec);
}
@Override
public void close() throws IOException {
Iterator<Map.Entry<OptionSpec, File>> iterator = opFiles.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<OptionSpec, File> entry = iterator.next();
File f = entry.getValue();
if (options.has(entry.getKey()) && f.length() != 0) {
System.out.println(entry.getKey().toString() + " - " + f.getAbsolutePath());
} else {
if (f.exists()) {
forceDelete(f);
}
}
}
}
}
private static void checkConsistency(File ids, File refs, File missing, String trackRoot, String dsType)
throws IOException {
System.out.println("Starting consistency check");
Stopwatch watch = createStarted();
FileLineDifferenceIterator iter = new FileLineDifferenceIterator(ids, refs, new Function<String, String>() {
@Nullable
@Override
public String apply(@Nullable String input) {
if (input != null) {
return input.split(DELIM)[0];
}
return "";
}});
// write the candidates identified to a temp file
File candTemp = createTempFile("candTemp", null);
int candidates = writeStrings(iter, candTemp, false);
try {
// retrieve the .del file from track directory
File trackingFileParent = new File(FilenameUtils.concat(trackRoot, "blobids"));
if (trackingFileParent.exists()) {
Collection<File> files =
listFiles(trackingFileParent, FileFilterUtils.suffixFileFilter(".del"), null);
// If a delete file is present filter the tracked deleted ids
if (!files.isEmpty()) {
File delFile = files.iterator().next();
FileLineDifferenceIterator filteringIter = new FileLineDifferenceIterator(delFile, candTemp, new Function<String, String>() {
@Nullable @Override public String apply(@Nullable String input) {
if (input != null) {
return encodeId(decodeId(input.split(DELIM)[0]), dsType);
}
return "";
}
});
candidates = FileIOUtils.writeStrings(filteringIter, missing, false);
}
} else {
System.out.println("Skipping active deleted tracked as parameter [repoHome] : [" + trackRoot + "] incorrect");
FileUtils.copyFile(candTemp, missing);
}
} finally {
FileUtils.forceDelete(candTemp);
}
System.out.println("Consistency check found " + candidates + " missing blobs");
if (candidates > 0) {
System.out.println("Consistency check failure for the data store");
}
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
}
private static void retrieveBlobReferences(GarbageCollectableBlobStore blobStore, BlobReferenceRetriever marker,
File marked, String dsType, boolean isVerbose) throws IOException {
final BufferedWriter writer = Files.newWriter(marked, Charsets.UTF_8);
final AtomicInteger count = new AtomicInteger();
boolean threw = true;
try {
final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
final GarbageCollectableBlobStore finalBlobStore = blobStore;
System.out.println("Starting dump of blob references");
Stopwatch watch = createStarted();
marker.collectReferences(
new ReferenceCollector() {
@Override
public void addReference(String blobId, String nodeId) {
try {
Iterator<String> idIter = finalBlobStore.resolveChunks(blobId);
while (idIter.hasNext()) {
String id = idIter.next();
if (isVerbose) {
id = encodeId(id, dsType);
}
String combinedId = delimJoiner.join(id, escapeLineBreak(nodeId));
count.getAndIncrement();
writeAsLine(writer, combinedId, false);
}
} catch (Exception e) {
throw new RuntimeException("Error in retrieving references", e);
}
}
}
);
writer.flush();
sort(marked, idComparator);
System.out.println(count.get() + " blob references found");
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
threw = false;
} finally {
close(writer, threw);
}
}
private static void retrieveBlobIds(GarbageCollectableBlobStore blobStore, File blob)
throws Exception {
System.out.println("Starting dump of blob ids");
Stopwatch watch = createStarted();
Iterator<String> blobIter = blobStore.getAllChunkIds(0);
int count = writeStrings(blobIter, blob, false);
sort(blob);
System.out.println(count + " blob ids found");
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
}
static class NodeTraverser implements Closeable {
private final String dsType;
private final File references;
private final NodeStore nodeStore;
private final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
public NodeTraverser(NodeStore nodeStore, String dsType) throws IOException {
this.references = File.createTempFile("traverseref", null);
this.nodeStore = nodeStore;
this.dsType = dsType;
}
private void binaryProperties(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
for (PropertyState p : state.getProperties()) {
String propPath = PathUtils.concat(path, p.getName());
try {
if (p.getType() == Type.BINARY) {
count.incrementAndGet();
writeAsLine(writer,
getLine(p.getValue(Type.BINARY).getContentIdentity(), propPath), false);
} else if (p.getType() == Type.BINARIES && p.count() > 0) {
Iterator<Blob> iterator = p.getValue(Type.BINARIES).iterator();
while (iterator.hasNext()) {
count.incrementAndGet();
String id = iterator.next().getContentIdentity();
writeAsLine(writer,
getLine(id, propPath), false);
}
}
} catch (Exception e) {
System.err.println("Error in retrieving blob id for path " + propPath);
}
}
}
private String getLine(String id, String path) {
return delimJoiner.join(encodeId(id, dsType), escapeLineBreak(path));
}
private void traverseChildren(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
binaryProperties(state, path, writer, count);
for (ChildNodeEntry c : state.getChildNodeEntries()) {
traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), writer, count);
}
}
public void traverse() throws IOException {
BufferedWriter writer = null;
final AtomicInteger count = new AtomicInteger();
boolean threw = true;
System.out.println("Starting dump of blob references by traversing");
Stopwatch watch = createStarted();
try {
writer = Files.newWriter(references, Charsets.UTF_8);
traverseChildren(nodeStore.getRoot(), "/", writer, count);
writer.flush();
sort(references, idComparator);
System.out.println(count.get() + " blob references found");
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
threw = false;
} finally {
Closeables.close(writer, threw);
}
}
@Override
public void close() throws IOException {
FileUtils.forceDelete(references);
}
}
}