OAK-9265 - Oak-run tool recovery function scans for previous documents too

git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1883582 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeeker.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeeker.java
index f12bd56..b97f54f 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeeker.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeeker.java
@@ -31,6 +31,7 @@
 
 import static org.apache.jackrabbit.oak.plugins.document.Collection.CLUSTER_NODES;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
 
@@ -86,7 +87,8 @@
             @Override
             public boolean apply(NodeDocument input) {
                 Long modified = (Long) input.get(MODIFIED_IN_SECS);
-                return (modified != null && (modified >= getModifiedInSecs(startTime)));
+                Long sdType = (Long) input.get(SD_TYPE);
+                return (modified != null && (modified >= getModifiedInSecs(startTime)) && sdType == null);
             }
         });
     }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoMissingLastRevSeeker.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoMissingLastRevSeeker.java
index 63f71d6..81ed50a 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoMissingLastRevSeeker.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoMissingLastRevSeeker.java
@@ -54,7 +54,9 @@
     @Override
     @NotNull
     public CloseableIterable<NodeDocument> getCandidates(final long startTime) {
-        Bson query = Filters.gte(NodeDocument.MODIFIED_IN_SECS, NodeDocument.getModifiedInSecs(startTime));
+        Bson query = Filters.and(
+                Filters.gte(NodeDocument.MODIFIED_IN_SECS, NodeDocument.getModifiedInSecs(startTime)),
+                Filters.exists(NodeDocument.SD_TYPE, false));
         Bson sortFields = new BasicDBObject(NodeDocument.MODIFIED_IN_SECS, 1);
 
         FindIterable<BasicDBObject> cursor = getNodeCollection()
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBMissingLastRevSeeker.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBMissingLastRevSeeker.java
index 8e99645..844ad1c 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBMissingLastRevSeeker.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBMissingLastRevSeeker.java
@@ -19,6 +19,7 @@
 
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
@@ -63,8 +64,9 @@
         if (MODE == 1) {
             return super.getCandidates(startTime);
         } else {
-            List<QueryCondition> conditions = Collections.singletonList(
-                    new QueryCondition(NodeDocument.MODIFIED_IN_SECS, ">=", NodeDocument.getModifiedInSecs(startTime)));
+            List<QueryCondition> conditions = new ArrayList<>();
+            conditions.add(new QueryCondition(NodeDocument.MODIFIED_IN_SECS, ">=", NodeDocument.getModifiedInSecs(startTime)));
+            conditions.add(new QueryCondition(NodeDocument.SD_TYPE, "is null"));
             return store.queryAsIterable(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN, conditions,
                     Integer.MAX_VALUE, null);
         }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BasicDocumentStoreTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BasicDocumentStoreTest.java
index de346ff..7608e22 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BasicDocumentStoreTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BasicDocumentStoreTest.java
@@ -1142,11 +1142,13 @@
 
     @Test
     public void removeInvalidatesCache() throws Exception {
-        String id = Utils.getIdFromPath("/foo");
+        String path = "/foo";
+        String id = Utils.getIdFromPath(path);
         long modified = 1;
         removeMe.add(id);
-        ds.create(Collection.NODES, Collections.singletonList(newDocument(id, modified)));
-        ds.remove(Collection.NODES, Collections.singletonMap(id, modified));
+        ds.create(Collection.NODES, Collections.singletonList(newDocument(path, modified)));
+        int removed = ds.remove(Collection.NODES, Collections.singletonMap(id, modified));
+        assertEquals(1, removed);
         assertNull(ds.getIfCached(Collection.NODES, id));
     }
 
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeekerTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeekerTest.java
index 607a0b4..6280066 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeekerTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/MissingLastRevSeekerTest.java
@@ -24,12 +24,17 @@
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoMissingLastRevSeeker;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBMissingLastRevSeeker;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.DEFAULT_LEASE_DURATION_MILLIS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD;
 import static org.apache.jackrabbit.oak.plugins.document.RecoveryHandler.NOOP;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -70,6 +75,12 @@
         ClusterNodeInfo.resetClockToDefault();
         Revision.resetClockToDefault();
     }
+    
+    private void markDocumentsForCleanup() {
+        for (NodeDocument doc : Utils.getAllDocuments(ds)) {
+            removeMe.add(doc.getId());
+        }
+    }
 
     @Test
     public void acquireRecoveryLockOnActiveClusterNode() {
@@ -216,4 +227,31 @@
     private ClusterNodeInfoDocument getClusterNodeInfo(int clusterId) {
         return seeker.getClusterNodeInfo(clusterId);
     }
+    
+    @Test
+    public void getNonSplitDocs() throws Exception {
+        String nodeName = this.getClass().getName() + "-foo";
+        DocumentNodeStore dns = getBuilder().clock(clock).setAsyncDelay(0).setDocumentStore(new DocumentStoreWrapper(store) {
+            @Override
+            public void dispose() {
+                // do not close underlying store, otherwise cleanup
+                // cannot remove documents after the test
+            }
+        }).getNodeStore();
+        NodeBuilder b1 = dns.getRoot().builder();
+        b1.child(nodeName);
+        dns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        //Modify and commit changes on this node 100 times to create a split document
+        for (int i = 0; i < NUM_REVS_THRESHOLD; i++) {
+            b1 = dns.getRoot().builder();
+            b1.child(nodeName).setProperty("prop",i);
+            dns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        }
+        dns.runBackgroundOperations();
+        //seeker should return only non split documents
+        int docs = Iterables.size(seeker.getCandidates(0));
+        assertEquals(2, docs);
+        markDocumentsForCleanup();
+        dns.dispose();
+    }
 }