oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java - jackrabbit-oak - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;

 import static com.google.common.collect.Iterables.filter;

 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;

 import org.apache.jackrabbit.oak.plugins.document.Collection;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
 import org.apache.jackrabbit.oak.plugins.document.VersionGCSupport;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.QueryCondition;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.UnsupportedIndexedPropertyException;
 import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
 import org.apache.jackrabbit.oak.plugins.document.util.SystemPropertySupplier;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import com.google.common.base.Predicate;
 import com.google.common.collect.AbstractIterator;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;

 /**
  * RDB specific version of {@link VersionGCSupport} which uses an extended query
  * interface to fetch required {@link NodeDocument}s.
  */
 public class RDBVersionGCSupport extends VersionGCSupport {

     private static final Logger LOG = LoggerFactory.getLogger(RDBVersionGCSupport.class);

     private RDBDocumentStore store;

     // 1: seek using historical, paging mode
     // 2: use custom single query directly using RDBDocumentStore API
     private static final int DEFAULTMODE = 2;

     private static final int MODE = SystemPropertySupplier.create(RDBVersionGCSupport.class.getName() + ".MODE", DEFAULTMODE)
             .loggingTo(LOG).validateWith(value -> (value == 1 || value == 2)).formatSetMessage((name, value) -> String
                     .format("Strategy for %s set to %s (via system property %s)", RDBVersionGCSupport.class.getName(), value, name))
             .get();

     public RDBVersionGCSupport(RDBDocumentStore store) {
         super(store);
         this.store = store;
     }

     @Override
     public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
         List<QueryCondition> conditions = new ArrayList<QueryCondition>();
         conditions.add(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1));
         conditions.add(new QueryCondition(NodeDocument.MODIFIED_IN_SECS, "<", NodeDocument.getModifiedInSecs(toModified)));
         conditions.add(new QueryCondition(NodeDocument.MODIFIED_IN_SECS, ">=", NodeDocument.getModifiedInSecs(fromModified)));
         if (MODE == 1) {
             return getIterator(RDBDocumentStore.EMPTY_KEY_PATTERN, conditions);
         } else {
             return store.queryAsIterable(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN, conditions, Integer.MAX_VALUE, null);
         }
     }

     @Override
     protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
         if (MODE == 1) {
             return identifyGarbageMode1(gcTypes, sweepRevs, oldestRevTimeStamp);
         } else {
             return identifyGarbageMode2(gcTypes, sweepRevs, oldestRevTimeStamp);
         }
     }

     private Iterable<NodeDocument> getSplitDocuments() {
         List<QueryCondition> conditions = Collections.emptyList();
         // absent support for SDTYPE as indexed property: exclude those
         // documents from the query which definitively aren't split documents
         List<String> excludeKeyPatterns = Arrays.asList("_:/%", "__:/%", "___:/%");
         return getIterator(excludeKeyPatterns, conditions);
     }

     private Iterable<NodeDocument> identifyGarbageMode1(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
         return filter(getSplitDocuments(), getGarbageCheckPredicate(gcTypes, sweepRevs, oldestRevTimeStamp));
     }

     private Predicate<NodeDocument> getGarbageCheckPredicate(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
         return new Predicate<NodeDocument>() {
             @Override
             public boolean apply(NodeDocument doc) {
                 return gcTypes.contains(doc.getSplitDocType()) && doc.hasAllRevisionLessThan(oldestRevTimeStamp)
                         && !isDefaultNoBranchSplitNewerThan(doc, sweepRevs);
             }
         };
     }

     private Iterable<NodeDocument> identifyGarbageMode2(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
         Iterable<NodeDocument> it1;
         Iterable<NodeDocument> it2;
         String name1, name2;

         // for schema 0 or 1 rows, we'll have to constrain the path
         List<String> excludeKeyPatterns = Arrays.asList("_:/%", "__:/%", "___:/%");

         try {
             List<Integer> gcTypeCodes = Lists.newArrayList();
             for (SplitDocType type : gcTypes) {
                 gcTypeCodes.add(type.typeCode());
             }

             List<QueryCondition> conditions1 = new ArrayList<QueryCondition>();
             conditions1.add(new QueryCondition(NodeDocument.SD_TYPE, "in", gcTypeCodes));
             conditions1.add(new QueryCondition(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "<=", NodeDocument.getModifiedInSecs(oldestRevTimeStamp)));
             conditions1.add(new QueryCondition(RDBDocumentStore.VERSIONPROP, ">=", 2));
             name1 = "version 2 query";
             it1 = store.queryAsIterable(Collection.NODES, null, null, Collections.emptyList(), conditions1,
                     Integer.MAX_VALUE, null);

             List<QueryCondition> conditions2 = new ArrayList<QueryCondition>();
             conditions2.add(new QueryCondition(RDBDocumentStore.VERSIONPROP, "null or <", 2));
             it2 = store.queryAsIterable(Collection.NODES, null, null, excludeKeyPatterns, conditions2,
                     Integer.MAX_VALUE, null);
             name2 = "version <2 fallback on " + excludeKeyPatterns;
         } catch (UnsupportedIndexedPropertyException ex) {
             // this will happen if we query a table that doesn't have the SD*
             // columns - create a new query without the constraint, and let the
             // Java code filter the results
             it1 = store.queryAsIterable(Collection.NODES, null, null, excludeKeyPatterns, Collections.emptyList(),
                     Integer.MAX_VALUE, null);
             it2 = Collections.emptySet();
             name1 = "version <2 fallback on " + excludeKeyPatterns;
             name2 = "";
         }

         final Iterable<NodeDocument> fit1 = it1;
         final Iterable<NodeDocument> fit2 = it2;

         Predicate<NodeDocument> pred = getGarbageCheckPredicate(gcTypes, sweepRevs, oldestRevTimeStamp);

         final CountingPredicate<NodeDocument> cp1 = new CountingPredicate<NodeDocument>(name1, pred);
         final CountingPredicate<NodeDocument> cp2 = new CountingPredicate<NodeDocument>(name2, pred);

         return CloseableIterable.wrap(Iterables.concat(Iterables.filter(fit1, cp1), Iterables.filter(fit2, cp2)), new Closeable() {
             @Override
             public void close() throws IOException {
                 Utils.closeIfCloseable(fit1);
                 Utils.closeIfCloseable(fit2);
                 if (LOG.isDebugEnabled()) {
                     String stats1 = cp1.getStats();
                     String stats2 = cp2.getStats();
                     String message = "";
                     if (!stats1.isEmpty()) {
                         message = stats1;
                     }
                     if (!stats2.isEmpty()) {
                         if (!message.isEmpty()) {
                             message += ", ";
                         }
                         message += stats2;
                     }
                     if (!message.isEmpty()) {
                         LOG.debug(message);
                     }
                 }
             }
         });
     }

     private static class CountingPredicate<T> implements Predicate<T> {

         private final String name;
         private final Predicate<T> predicate;
         private int count, matches;

         public CountingPredicate(String name, Predicate<T> predicate) {
             this.name = name;
             this.predicate = predicate;
         }

         public String getStats() {
             return count == 0 ? "" : ("Predicate statistics for '" + name + "': " + matches + "/" + count);
         }

         @Override
         public boolean apply(T doc) {
             count += 1;
             boolean match = predicate.apply(doc);
             matches += (match ? 1 : 0);
             return match;
         }
     }

     @Override
     public long getOldestDeletedOnceTimestamp(Clock clock, long precisionMs) {
         long modifiedMs = Long.MIN_VALUE;

         LOG.debug("getOldestDeletedOnceTimestamp() <- start");
         try {
             long modifiedSec = store.getMinValue(Collection.NODES, NodeDocument.MODIFIED_IN_SECS, null, null,
                     RDBDocumentStore.EMPTY_KEY_PATTERN,
                     Collections.singletonList(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1)));
             modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
         } catch (DocumentStoreException ex) {
             LOG.debug("getMinValue(MODIFIED)", ex);
         }

         if (modifiedMs > 0) {
             LOG.debug("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs));
             return modifiedMs;
         } else {
             LOG.debug("getOldestDeletedOnceTimestamp() -> none found, return current time");
             return clock.getTime();
         }
     }

     @Override
     public long getDeletedOnceCount() {
         return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN,
                 Collections.singletonList(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1)));
     }

     private Iterable<NodeDocument> getIterator(final List<String> excludeKeyPatterns, final List<QueryCondition> conditions) {
         return new Iterable<NodeDocument>() {
             @Override
             public Iterator<NodeDocument> iterator() {
                 return new AbstractIterator<NodeDocument>() {

                     private static final int BATCH_SIZE = 100;
                     private String startId = NodeDocument.MIN_ID_VALUE;
                     private Iterator<NodeDocument> batch = nextBatch();

                     @Override
                     protected NodeDocument computeNext() {
                         // read next batch if necessary
                         if (!batch.hasNext()) {
                             batch = nextBatch();
                         }

                         NodeDocument doc;
                         if (batch.hasNext()) {
                             doc = batch.next();
                             // remember current id
                             startId = doc.getId();
                         } else {
                             doc = endOfData();
                         }
                         return doc;
                     }

                     private Iterator<NodeDocument> nextBatch() {
                         List<NodeDocument> result = store.query(Collection.NODES, startId, NodeDocument.MAX_ID_VALUE,
                                 excludeKeyPatterns, conditions, BATCH_SIZE);
                         return result.iterator();
                     }
                 };
             }
         };
     }
  }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.jackrabbit.oak.plugins.document.rdb;

	import static com.google.common.collect.Iterables.filter;

	import java.io.Closeable;
	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Set;
	import java.util.concurrent.TimeUnit;

	import org.apache.jackrabbit.oak.plugins.document.Collection;
	import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
	import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
	import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
	import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
	import org.apache.jackrabbit.oak.plugins.document.VersionGCSupport;
	import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.QueryCondition;
	import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.UnsupportedIndexedPropertyException;
	import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
	import org.apache.jackrabbit.oak.plugins.document.util.SystemPropertySupplier;
	import org.apache.jackrabbit.oak.plugins.document.util.Utils;
	import org.apache.jackrabbit.oak.stats.Clock;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import com.google.common.base.Predicate;
	import com.google.common.collect.AbstractIterator;
	import com.google.common.collect.Iterables;
	import com.google.common.collect.Lists;

	/**
	* RDB specific version of {@link VersionGCSupport} which uses an extended query
	* interface to fetch required {@link NodeDocument}s.
	*/
	public class RDBVersionGCSupport extends VersionGCSupport {

	private static final Logger LOG = LoggerFactory.getLogger(RDBVersionGCSupport.class);

	private RDBDocumentStore store;

	// 1: seek using historical, paging mode
	// 2: use custom single query directly using RDBDocumentStore API
	private static final int DEFAULTMODE = 2;

	private static final int MODE = SystemPropertySupplier.create(RDBVersionGCSupport.class.getName() + ".MODE", DEFAULTMODE)
	.loggingTo(LOG).validateWith(value -> (value == 1 \|\| value == 2)).formatSetMessage((name, value) -> String
	.format("Strategy for %s set to %s (via system property %s)", RDBVersionGCSupport.class.getName(), value, name))
	.get();

	public RDBVersionGCSupport(RDBDocumentStore store) {
	super(store);
	this.store = store;
	}

	@Override
	public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
	List<QueryCondition> conditions = new ArrayList<QueryCondition>();
	conditions.add(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1));
	conditions.add(new QueryCondition(NodeDocument.MODIFIED_IN_SECS, "<", NodeDocument.getModifiedInSecs(toModified)));
	conditions.add(new QueryCondition(NodeDocument.MODIFIED_IN_SECS, ">=", NodeDocument.getModifiedInSecs(fromModified)));
	if (MODE == 1) {
	return getIterator(RDBDocumentStore.EMPTY_KEY_PATTERN, conditions);
	} else {
	return store.queryAsIterable(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN, conditions, Integer.MAX_VALUE, null);
	}
	}

	@Override
	protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
	final long oldestRevTimeStamp) {
	if (MODE == 1) {
	return identifyGarbageMode1(gcTypes, sweepRevs, oldestRevTimeStamp);
	} else {
	return identifyGarbageMode2(gcTypes, sweepRevs, oldestRevTimeStamp);
	}
	}

	private Iterable<NodeDocument> getSplitDocuments() {
	List<QueryCondition> conditions = Collections.emptyList();
	// absent support for SDTYPE as indexed property: exclude those
	// documents from the query which definitively aren't split documents
	List<String> excludeKeyPatterns = Arrays.asList("_:/%", "__:/%", "___:/%");
	return getIterator(excludeKeyPatterns, conditions);
	}

	private Iterable<NodeDocument> identifyGarbageMode1(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
	final long oldestRevTimeStamp) {
	return filter(getSplitDocuments(), getGarbageCheckPredicate(gcTypes, sweepRevs, oldestRevTimeStamp));
	}

	private Predicate<NodeDocument> getGarbageCheckPredicate(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
	final long oldestRevTimeStamp) {
	return new Predicate<NodeDocument>() {
	@Override
	public boolean apply(NodeDocument doc) {
	return gcTypes.contains(doc.getSplitDocType()) && doc.hasAllRevisionLessThan(oldestRevTimeStamp)
	&& !isDefaultNoBranchSplitNewerThan(doc, sweepRevs);
	}
	};
	}

	private Iterable<NodeDocument> identifyGarbageMode2(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
	final long oldestRevTimeStamp) {
	Iterable<NodeDocument> it1;
	Iterable<NodeDocument> it2;
	String name1, name2;

	// for schema 0 or 1 rows, we'll have to constrain the path
	List<String> excludeKeyPatterns = Arrays.asList("_:/%", "__:/%", "___:/%");

	try {
	List<Integer> gcTypeCodes = Lists.newArrayList();
	for (SplitDocType type : gcTypes) {
	gcTypeCodes.add(type.typeCode());
	}

	List<QueryCondition> conditions1 = new ArrayList<QueryCondition>();
	conditions1.add(new QueryCondition(NodeDocument.SD_TYPE, "in", gcTypeCodes));
	conditions1.add(new QueryCondition(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "<=", NodeDocument.getModifiedInSecs(oldestRevTimeStamp)));
	conditions1.add(new QueryCondition(RDBDocumentStore.VERSIONPROP, ">=", 2));
	name1 = "version 2 query";
	it1 = store.queryAsIterable(Collection.NODES, null, null, Collections.emptyList(), conditions1,
	Integer.MAX_VALUE, null);

	List<QueryCondition> conditions2 = new ArrayList<QueryCondition>();
	conditions2.add(new QueryCondition(RDBDocumentStore.VERSIONPROP, "null or <", 2));
	it2 = store.queryAsIterable(Collection.NODES, null, null, excludeKeyPatterns, conditions2,
	Integer.MAX_VALUE, null);
	name2 = "version <2 fallback on " + excludeKeyPatterns;
	} catch (UnsupportedIndexedPropertyException ex) {
	// this will happen if we query a table that doesn't have the SD*
	// columns - create a new query without the constraint, and let the
	// Java code filter the results
	it1 = store.queryAsIterable(Collection.NODES, null, null, excludeKeyPatterns, Collections.emptyList(),
	Integer.MAX_VALUE, null);
	it2 = Collections.emptySet();
	name1 = "version <2 fallback on " + excludeKeyPatterns;
	name2 = "";
	}

	final Iterable<NodeDocument> fit1 = it1;
	final Iterable<NodeDocument> fit2 = it2;

	Predicate<NodeDocument> pred = getGarbageCheckPredicate(gcTypes, sweepRevs, oldestRevTimeStamp);

	final CountingPredicate<NodeDocument> cp1 = new CountingPredicate<NodeDocument>(name1, pred);
	final CountingPredicate<NodeDocument> cp2 = new CountingPredicate<NodeDocument>(name2, pred);

	return CloseableIterable.wrap(Iterables.concat(Iterables.filter(fit1, cp1), Iterables.filter(fit2, cp2)), new Closeable() {
	@Override
	public void close() throws IOException {
	Utils.closeIfCloseable(fit1);
	Utils.closeIfCloseable(fit2);
	if (LOG.isDebugEnabled()) {
	String stats1 = cp1.getStats();
	String stats2 = cp2.getStats();
	String message = "";
	if (!stats1.isEmpty()) {
	message = stats1;
	}
	if (!stats2.isEmpty()) {
	if (!message.isEmpty()) {
	message += ", ";
	}
	message += stats2;
	}
	if (!message.isEmpty()) {
	LOG.debug(message);
	}
	}
	}
	});
	}

	private static class CountingPredicate<T> implements Predicate<T> {

	private final String name;
	private final Predicate<T> predicate;
	private int count, matches;

	public CountingPredicate(String name, Predicate<T> predicate) {
	this.name = name;
	this.predicate = predicate;
	}

	public String getStats() {
	return count == 0 ? "" : ("Predicate statistics for '" + name + "': " + matches + "/" + count);
	}

	@Override
	public boolean apply(T doc) {
	count += 1;
	boolean match = predicate.apply(doc);
	matches += (match ? 1 : 0);
	return match;
	}
	}

	@Override
	public long getOldestDeletedOnceTimestamp(Clock clock, long precisionMs) {
	long modifiedMs = Long.MIN_VALUE;

	LOG.debug("getOldestDeletedOnceTimestamp() <- start");
	try {
	long modifiedSec = store.getMinValue(Collection.NODES, NodeDocument.MODIFIED_IN_SECS, null, null,
	RDBDocumentStore.EMPTY_KEY_PATTERN,
	Collections.singletonList(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1)));
	modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
	} catch (DocumentStoreException ex) {
	LOG.debug("getMinValue(MODIFIED)", ex);
	}

	if (modifiedMs > 0) {
	LOG.debug("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs));
	return modifiedMs;
	} else {
	LOG.debug("getOldestDeletedOnceTimestamp() -> none found, return current time");
	return clock.getTime();
	}
	}

	@Override
	public long getDeletedOnceCount() {
	return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN,
	Collections.singletonList(new QueryCondition(NodeDocument.DELETED_ONCE, "=", 1)));
	}

	private Iterable<NodeDocument> getIterator(final List<String> excludeKeyPatterns, final List<QueryCondition> conditions) {
	return new Iterable<NodeDocument>() {
	@Override
	public Iterator<NodeDocument> iterator() {
	return new AbstractIterator<NodeDocument>() {

	private static final int BATCH_SIZE = 100;
	private String startId = NodeDocument.MIN_ID_VALUE;
	private Iterator<NodeDocument> batch = nextBatch();

	@Override
	protected NodeDocument computeNext() {
	// read next batch if necessary
	if (!batch.hasNext()) {
	batch = nextBatch();
	}

	NodeDocument doc;
	if (batch.hasNext()) {
	doc = batch.next();
	// remember current id
	startId = doc.getId();
	} else {
	doc = endOfData();
	}
	return doc;
	}

	private Iterator<NodeDocument> nextBatch() {
	List<NodeDocument> result = store.query(Collection.NODES, startId, NodeDocument.MAX_ID_VALUE,
	excludeKeyPatterns, conditions, BATCH_SIZE);
	return result.iterator();
	}
	};
	}
	};
	}
	}