ATLAS-1961: Basic search improvement in use of Solr index for attribute filtering (# 4)
diff --git a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
index b6e0de5..745f9d7 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
@@ -58,7 +58,7 @@
         if (useSolrSearch) {
             StringBuilder solrQuery = new StringBuilder();
 
-            constructTypeTestQuery(solrQuery, classificationType, typeAndSubTypes);
+            constructTypeTestQuery(solrQuery, typeAndSubTypes);
             constructFilterQuery(solrQuery, classificationType, filterCriteria, solrAttributes);
 
             String solrQueryString = STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")");
@@ -95,16 +95,22 @@
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = nextProcessor == null ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int     startIdx   = context.getSearchParameters().getOffset();
+            final int     limit      = context.getSearchParameters().getLimit();
+            final boolean activeOnly = context.getSearchParameters().getExcludeDeletedEntities();
+
+            // query to start at 0, even though startIdx can be higher - because few results in earlier retrieval could
+            // have been dropped: like non-active-entities or duplicate-entities (same entity pointed to by multiple
+            // classifications in the result)
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = 0;
+            int resultIdx = qryOffset;
 
             final Set<String>       processedGuids         = new HashSet<>();
             final List<AtlasVertex> entityVertices         = new ArrayList<>();
             final List<AtlasVertex> classificationVertices = new ArrayList<>();
 
-
             for (; ret.size() < limit; qryOffset += limit) {
                 entityVertices.clear();
                 classificationVertices.clear();
@@ -138,15 +144,20 @@
 
                     for (AtlasEdge edge : edges) {
                         AtlasVertex entityVertex = edge.getOutVertex();
-                        String      guid         = AtlasGraphUtilsV1.getIdFromVertex(entityVertex);
 
-                        if (!processedGuids.contains(guid)) {
-                            if (!context.getSearchParameters().getExcludeDeletedEntities() || AtlasGraphUtilsV1.getState(entityVertex) == AtlasEntity.Status.ACTIVE) {
-                                entityVertices.add(entityVertex);
-                            }
-
-                            processedGuids.add(guid);
+                        if (activeOnly && AtlasGraphUtilsV1.getState(entityVertex) != AtlasEntity.Status.ACTIVE) {
+                            continue;
                         }
+
+                        String guid = AtlasGraphUtilsV1.getIdFromVertex(entityVertex);
+
+                        if (processedGuids.contains(guid)) {
+                            continue;
+                        }
+
+                        entityVertices.add(entityVertex);
+
+                        processedGuids.add(guid);
                     }
                 }
 
diff --git a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
index 6f629eb..a3525c9 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
@@ -18,6 +18,7 @@
 package org.apache.atlas.discovery;
 
 import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria;
+import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.repository.Constants;
 import org.apache.atlas.repository.graphdb.*;
 import org.apache.atlas.repository.store.graph.v1.AtlasGraphUtilsV1;
@@ -60,7 +61,7 @@
         StringBuilder solrQuery = new StringBuilder();
 
         if (typeSearchBySolr) {
-            constructTypeTestQuery(solrQuery, entityType, typeAndSubTypes);
+            constructTypeTestQuery(solrQuery, typeAndSubTypes);
         }
 
         if (attrSearchBySolr) {
@@ -70,6 +71,10 @@
         }
 
         if (solrQuery.length() > 0) {
+            if (context.getSearchParameters().getExcludeDeletedEntities()) {
+                constructStateTestQuery(solrQuery);
+            }
+
             String solrQueryString = STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")");
 
             solrQueryString = STRAY_OR_PATTERN.matcher(solrQueryString).replaceAll(")");
@@ -128,10 +133,14 @@
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = (nextProcessor == null && (graphQuery == null || indexQuery == null)) ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int startIdx = context.getSearchParameters().getOffset();
+            final int limit    = context.getSearchParameters().getLimit();
+
+            // when subsequent filtering stages are involved, query should start at 0 even though startIdx can be higher
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = (nextProcessor != null || (graphQuery != null && indexQuery != null)) ? 0 : startIdx;
+            int resultIdx = qryOffset;
 
             final List<AtlasVertex> entityVertices = new ArrayList<>();
 
@@ -154,13 +163,6 @@
                     while (idxQueryResult.hasNext()) {
                         AtlasVertex vertex = idxQueryResult.next().getVertex();
 
-                        // skip non-entity vertices
-                        if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
-                            LOG.warn("EntitySearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result
-
-                            continue;
-                        }
-
                         entityVertices.add(vertex);
                     }
 
diff --git a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
index 22d91e0..1b19a0e 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
@@ -18,6 +18,7 @@
 package org.apache.atlas.discovery;
 
 import org.apache.atlas.model.discovery.SearchParameters;
+import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.repository.Constants;
 import org.apache.atlas.repository.graph.GraphHelper;
 import org.apache.atlas.repository.graphdb.AtlasIndexQuery;
@@ -74,6 +75,10 @@
             }
         }
 
+        if (context.getSearchParameters().getExcludeDeletedEntities()) {
+            queryString.append(AND_STR).append("(ACTIVE)");
+        }
+
         queryString.append(")");
 
         indexQuery = context.getGraph().indexQuery(Constants.FULLTEXT_INDEX, queryString.toString());
@@ -94,10 +99,16 @@
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = nextProcessor == null ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int     startIdx   = context.getSearchParameters().getOffset();
+            final int     limit      = context.getSearchParameters().getLimit();
+            final boolean activeOnly = context.getSearchParameters().getExcludeDeletedEntities();
+
+            // query to start at 0, even though startIdx can be higher - because few results in earlier retrieval could
+            // have been dropped: like vertices of non-entity or non-active-entity
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = 0;
+            int resultIdx = qryOffset;
 
             final List<AtlasVertex> entityVertices = new ArrayList<>();
 
@@ -121,11 +132,17 @@
 
                     // skip non-entity vertices
                     if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
-                        LOG.warn("FullTextSearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("FullTextSearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId());
+                        }
 
                         continue;
                     }
 
+                    if (activeOnly && AtlasGraphUtilsV1.getState(vertex) != AtlasEntity.Status.ACTIVE) {
+                        continue;
+                    }
+
                     entityVertices.add(vertex);
                 }
 
diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
index 2e75dfe..7950127 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
@@ -181,7 +181,7 @@
         return ret;
     }
 
-    protected void constructTypeTestQuery(StringBuilder solrQuery, AtlasStructType type, Set<String> typeAndAllSubTypes) {
+    protected void constructTypeTestQuery(StringBuilder solrQuery, Set<String> typeAndAllSubTypes) {
         String typeAndSubtypesString = StringUtils.join(typeAndAllSubTypes, SPACE_STRING);
 
         if (CollectionUtils.isNotEmpty(typeAndAllSubTypes)) {
@@ -193,14 +193,6 @@
                     .append(typeAndSubtypesString)
                     .append(")");
         }
-
-        if (type instanceof AtlasEntityType && context.getSearchParameters().getExcludeDeletedEntities()) {
-            if (solrQuery.length() > 0) {
-                solrQuery.append(AND_STR);
-            }
-
-            solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
-        }
     }
 
     protected void constructFilterQuery(StringBuilder solrQuery, AtlasStructType type, FilterCriteria filterCriteria, Set<String> solrAttributes) {
@@ -219,6 +211,14 @@
         }
     }
 
+    protected void constructStateTestQuery(StringBuilder solrQuery) {
+        if (solrQuery.length() > 0) {
+            solrQuery.append(AND_STR);
+        }
+
+        solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
+    }
+
     private String toSolrQuery(AtlasStructType type, FilterCriteria criteria, Set<String> solrAttributes, int level) {
         return toSolrQuery(type, criteria, solrAttributes, new StringBuilder(), level);
     }