ATLAS-1961: Basic search improvement in use of Solr index for attribute filtering (# 3)
diff --git a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
index 77b2c7c..b6e0de5 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
@@ -58,7 +58,7 @@
if (useSolrSearch) {
StringBuilder solrQuery = new StringBuilder();
- constructTypeTestQuery(solrQuery, typeAndSubTypes);
+ constructTypeTestQuery(solrQuery, classificationType, typeAndSubTypes);
constructFilterQuery(solrQuery, classificationType, filterCriteria, solrAttributes);
String solrQueryString = STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")");
@@ -95,20 +95,26 @@
}
try {
- int qryOffset = (nextProcessor == null) ? context.getSearchParameters().getOffset() : 0;
- int limit = context.getSearchParameters().getLimit();
- int resultIdx = qryOffset;
- Set<String> processedGuids = new HashSet<>();
+ final int startIdx = context.getSearchParameters().getOffset();
+ final int limit = context.getSearchParameters().getLimit();
+ int qryOffset = nextProcessor == null ? startIdx : 0;
+ int resultIdx = qryOffset;
- while (ret.size() < limit) {
+ final Set<String> processedGuids = new HashSet<>();
+ final List<AtlasVertex> entityVertices = new ArrayList<>();
+ final List<AtlasVertex> classificationVertices = new ArrayList<>();
+
+
+ for (; ret.size() < limit; qryOffset += limit) {
+ entityVertices.clear();
+ classificationVertices.clear();
+
if (context.terminateSearch()) {
LOG.warn("query terminated: {}", context.getSearchParameters());
break;
}
- List<AtlasVertex> classificationVertices;
-
if (indexQuery != null) {
Iterator<AtlasIndexQuery.Result> queryResult = indexQuery.vertices(qryOffset, limit);
@@ -116,7 +122,7 @@
break;
}
- classificationVertices = getVerticesFromIndexQueryResult(queryResult);
+ getVerticesFromIndexQueryResult(queryResult, classificationVertices);
} else {
Iterator<AtlasVertex> queryResult = allGraphQuery.vertices(qryOffset, limit).iterator();
@@ -124,13 +130,9 @@
break;
}
- classificationVertices = getVertices(queryResult);
+ getVertices(queryResult, classificationVertices);
}
- qryOffset += limit;
-
- List<AtlasVertex> entityVertices = new ArrayList<>();
-
for (AtlasVertex classificationVertex : classificationVertices) {
Iterable<AtlasEdge> edges = classificationVertex.getEdges(AtlasEdgeDirection.IN);
@@ -148,12 +150,12 @@
}
}
- entityVertices = super.filter(entityVertices);
+ super.filter(entityVertices);
for (AtlasVertex entityVertex : entityVertices) {
resultIdx++;
- if (resultIdx < context.getSearchParameters().getOffset()) {
+ if (resultIdx <= startIdx) {
continue;
}
@@ -176,7 +178,7 @@
}
@Override
- public List<AtlasVertex> filter(List<AtlasVertex> entityVertices) {
+ public void filter(List<AtlasVertex> entityVertices) {
if (LOG.isDebugEnabled()) {
LOG.debug("==> ClassificationSearchProcessor.filter({})", entityVertices.size());
}
@@ -185,14 +187,13 @@
query.addConditionsFrom(filterGraphQuery);
- List<AtlasVertex> ret = getVertices(query.vertices().iterator());
+ entityVertices.clear();
+ getVertices(query.vertices().iterator(), entityVertices);
- ret = super.filter(ret);
+ super.filter(entityVertices);
if (LOG.isDebugEnabled()) {
- LOG.debug("<== ClassificationSearchProcessor.filter({}): ret.size()={}", entityVertices.size(), ret.size());
+ LOG.debug("<== ClassificationSearchProcessor.filter(): ret.size()={}", entityVertices.size());
}
-
- return ret;
}
}
diff --git a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
index 50376ef..6f629eb 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
@@ -20,6 +20,7 @@
import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.repository.graphdb.*;
+import org.apache.atlas.repository.store.graph.v1.AtlasGraphUtilsV1;
import org.apache.atlas.type.AtlasClassificationType;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.utils.AtlasPerfTracer;
@@ -59,7 +60,7 @@
StringBuilder solrQuery = new StringBuilder();
if (typeSearchBySolr) {
- constructTypeTestQuery(solrQuery, typeAndSubTypes);
+ constructTypeTestQuery(solrQuery, entityType, typeAndSubTypes);
}
if (attrSearchBySolr) {
@@ -127,34 +128,48 @@
}
try {
- int qryOffset = (nextProcessor == null && (graphQuery == null || indexQuery == null)) ? context.getSearchParameters().getOffset() : 0;
- int limit = context.getSearchParameters().getLimit();
- int resultIdx = qryOffset;
+ final int startIdx = context.getSearchParameters().getOffset();
+ final int limit = context.getSearchParameters().getLimit();
+ int qryOffset = (nextProcessor == null && (graphQuery == null || indexQuery == null)) ? startIdx : 0;
+ int resultIdx = qryOffset;
- while (ret.size() < limit) {
+ final List<AtlasVertex> entityVertices = new ArrayList<>();
+
+ for (; ret.size() < limit; qryOffset += limit) {
+ entityVertices.clear();
+
if (context.terminateSearch()) {
LOG.warn("query terminated: {}", context.getSearchParameters());
break;
}
- List<AtlasVertex> vertices;
-
if (indexQuery != null) {
- Iterator<AtlasIndexQuery.Result> queryResult = indexQuery.vertices(qryOffset, limit);
+ Iterator<AtlasIndexQuery.Result> idxQueryResult = indexQuery.vertices(qryOffset, limit);
- if (!queryResult.hasNext()) { // no more results from solr - end of search
+ if (!idxQueryResult.hasNext()) { // no more results from solr - end of search
break;
}
- vertices = getVerticesFromIndexQueryResult(queryResult);
+ while (idxQueryResult.hasNext()) {
+ AtlasVertex vertex = idxQueryResult.next().getVertex();
+
+ // skip non-entity vertices
+ if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
+ LOG.warn("EntitySearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result
+
+ continue;
+ }
+
+ entityVertices.add(vertex);
+ }
if (graphQuery != null) {
- AtlasGraphQuery guidQuery = context.getGraph().query().in(Constants.GUID_PROPERTY_KEY, getGuids(vertices));
+ AtlasGraphQuery guidQuery = context.getGraph().query().in(Constants.GUID_PROPERTY_KEY, getGuids(entityVertices));
guidQuery.addConditionsFrom(graphQuery);
- vertices = getVertices(guidQuery.vertices().iterator());
+ getVertices(guidQuery.vertices().iterator(), entityVertices);
}
} else {
Iterator<AtlasVertex> queryResult = graphQuery.vertices(qryOffset, limit).iterator();
@@ -163,21 +178,19 @@
break;
}
- vertices = getVertices(queryResult);
+ getVertices(queryResult, entityVertices);
}
- qryOffset += limit;
+ super.filter(entityVertices);
- vertices = super.filter(vertices);
-
- for (AtlasVertex vertex : vertices) {
+ for (AtlasVertex entityVertex : entityVertices) {
resultIdx++;
- if (resultIdx < context.getSearchParameters().getOffset()) {
+ if (resultIdx <= startIdx) {
continue;
}
- ret.add(vertex);
+ ret.add(entityVertex);
if (ret.size() == limit) {
break;
@@ -196,7 +209,7 @@
}
@Override
- public List<AtlasVertex> filter(List<AtlasVertex> entityVertices) {
+ public void filter(List<AtlasVertex> entityVertices) {
if (LOG.isDebugEnabled()) {
LOG.debug("==> EntitySearchProcessor.filter({})", entityVertices.size());
}
@@ -205,14 +218,13 @@
query.addConditionsFrom(filterGraphQuery);
- List<AtlasVertex> ret = getVertices(query.vertices().iterator());
+ entityVertices.clear();
+ getVertices(query.vertices().iterator(), entityVertices);
- ret = super.filter(ret);
+ super.filter(entityVertices);
if (LOG.isDebugEnabled()) {
- LOG.debug("<== EntitySearchProcessor.filter({}): ret.size()={}", entityVertices.size(), ret.size());
+ LOG.debug("<== EntitySearchProcessor.filter(): ret.size()={}", entityVertices.size());
}
-
- return ret;
}
}
diff --git a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
index 83368c2..22d91e0 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
@@ -19,8 +19,10 @@
import org.apache.atlas.model.discovery.SearchParameters;
import org.apache.atlas.repository.Constants;
+import org.apache.atlas.repository.graph.GraphHelper;
import org.apache.atlas.repository.graphdb.AtlasIndexQuery;
import org.apache.atlas.repository.graphdb.AtlasVertex;
+import org.apache.atlas.repository.store.graph.v1.AtlasGraphUtilsV1;
import org.apache.atlas.utils.AtlasPerfTracer;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
@@ -68,7 +70,7 @@
queryString.append(AND_STR).append("(").append(StringUtils.join(typeAndSubTypeNames, SPACE_STRING)).append(")");
} else {
LOG.warn("'{}' has too many subtypes ({}) to include in index-query; might cause poor performance",
- context.getEntityType().getTypeName(), typeAndSubTypeNames.size());
+ context.getClassificationType().getTypeName(), typeAndSubTypeNames.size());
}
}
@@ -92,11 +94,16 @@
}
try {
- int qryOffset = nextProcessor == null ? context.getSearchParameters().getOffset() : 0;
- int limit = context.getSearchParameters().getLimit();
- int resultIdx = qryOffset;
+ final int startIdx = context.getSearchParameters().getOffset();
+ final int limit = context.getSearchParameters().getLimit();
+ int qryOffset = nextProcessor == null ? startIdx : 0;
+ int resultIdx = qryOffset;
- while (ret.size() < limit) {
+ final List<AtlasVertex> entityVertices = new ArrayList<>();
+
+ for (; ret.size() < limit; qryOffset += limit) {
+ entityVertices.clear();
+
if (context.terminateSearch()) {
LOG.warn("query terminated: {}", context.getSearchParameters());
@@ -109,20 +116,29 @@
break;
}
- qryOffset += limit;
+ while (idxQueryResult.hasNext()) {
+ AtlasVertex vertex = idxQueryResult.next().getVertex();
- List<AtlasVertex> vertices = getVerticesFromIndexQueryResult(idxQueryResult);
+ // skip non-entity vertices
+ if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
+ LOG.warn("FullTextSearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result
- vertices = super.filter(vertices);
-
- for (AtlasVertex vertex : vertices) {
- resultIdx++;
-
- if (resultIdx < context.getSearchParameters().getOffset()) {
continue;
}
- ret.add(vertex);
+ entityVertices.add(vertex);
+ }
+
+ super.filter(entityVertices);
+
+ for (AtlasVertex entityVertex : entityVertices) {
+ resultIdx++;
+
+ if (resultIdx <= startIdx) {
+ continue;
+ }
+
+ ret.add(entityVertex);
if (ret.size() == limit) {
break;
diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
index 596b43b..2e75dfe 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
@@ -50,6 +50,7 @@
public static final String SPACE_STRING = " ";
public static final String BRACE_OPEN_STR = "( ";
public static final String BRACE_CLOSE_STR = " )";
+ public static final char DOUBLE_QUOTE = '"';
private static final Map<SearchParameters.Operator, String> OPERATOR_MAP = new HashMap<>();
private static final char[] OFFENDING_CHARS = {'@', '/', ' '}; // This can grow as we discover corner cases
@@ -87,8 +88,10 @@
public abstract List<AtlasVertex> execute();
- public List<AtlasVertex> filter(List<AtlasVertex> entityVertices) {
- return nextProcessor == null || CollectionUtils.isEmpty(entityVertices) ? entityVertices : nextProcessor.filter(entityVertices);
+ public void filter(List<AtlasVertex> entityVertices) {
+ if (nextProcessor != null && CollectionUtils.isNotEmpty(entityVertices)) {
+ nextProcessor.filter(entityVertices);
+ }
}
@@ -178,12 +181,26 @@
return ret;
}
- protected void constructTypeTestQuery(StringBuilder solrQuery, Set<String> typeAndAllSubTypes) {
+ protected void constructTypeTestQuery(StringBuilder solrQuery, AtlasStructType type, Set<String> typeAndAllSubTypes) {
String typeAndSubtypesString = StringUtils.join(typeAndAllSubTypes, SPACE_STRING);
- solrQuery.append("v.\"").append(Constants.TYPE_NAME_PROPERTY_KEY).append("\": (")
- .append(typeAndSubtypesString)
- .append(")");
+ if (CollectionUtils.isNotEmpty(typeAndAllSubTypes)) {
+ if (solrQuery.length() > 0) {
+ solrQuery.append(AND_STR);
+ }
+
+ solrQuery.append("v.\"").append(Constants.TYPE_NAME_PROPERTY_KEY).append("\": (")
+ .append(typeAndSubtypesString)
+ .append(")");
+ }
+
+ if (type instanceof AtlasEntityType && context.getSearchParameters().getExcludeDeletedEntities()) {
+ if (solrQuery.length() > 0) {
+ solrQuery.append(AND_STR);
+ }
+
+ solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
+ }
}
protected void constructFilterQuery(StringBuilder solrQuery, AtlasStructType type, FilterCriteria filterCriteria, Set<String> solrAttributes) {
@@ -200,14 +217,6 @@
solrQuery.append(filterQuery);
}
}
-
- if (type instanceof AtlasEntityType && context.getSearchParameters().getExcludeDeletedEntities()) {
- if (solrQuery.length() > 0) {
- solrQuery.append(AND_STR);
- }
-
- solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
- }
}
private String toSolrQuery(AtlasStructType type, FilterCriteria criteria, Set<String> solrAttributes, int level) {
@@ -246,15 +255,10 @@
String ret = EMPTY_STRING;
try {
- String qualifiedName = type.getQualifiedAttributeName(attrName);
-
if (OPERATOR_MAP.get(op) != null) {
- if (hasOffendingChars(attrVal)) {
- // FIXME: if attrVal has offending chars & op is contains, endsWith, startsWith, solr doesn't like it and results are skewed
- ret = String.format(OPERATOR_MAP.get(op), qualifiedName, "\"" + attrVal + "\"");
- } else {
- ret = String.format(OPERATOR_MAP.get(op), qualifiedName, attrVal);
- }
+ String qualifiedName = type.getQualifiedAttributeName(attrName);
+
+ ret = String.format(OPERATOR_MAP.get(op), qualifiedName, escapeIndexQueryValue(attrVal));
}
} catch (AtlasBaseException ex) {
LOG.warn(ex.getMessage());
@@ -348,32 +352,28 @@
private String getLikeRegex(String attributeValue) { return ".*" + attributeValue + ".*"; }
- protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result> idxQueryResult) {
- List<AtlasVertex> ret = new ArrayList<>();
-
+ protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result> idxQueryResult, List<AtlasVertex> vertices) {
if (idxQueryResult != null) {
while (idxQueryResult.hasNext()) {
AtlasVertex vertex = idxQueryResult.next().getVertex();
- ret.add(vertex);
+ vertices.add(vertex);
}
}
- return ret;
+ return vertices;
}
- protected List<AtlasVertex> getVertices(Iterator<AtlasVertex> vertices) {
- List<AtlasVertex> ret = new ArrayList<>();
+ protected List<AtlasVertex> getVertices(Iterator<AtlasVertex> iterator, List<AtlasVertex> vertices) {
+ if (iterator != null) {
+ while (iterator.hasNext()) {
+ AtlasVertex vertex = iterator.next();
- if (vertices != null) {
- while (vertices.hasNext()) {
- AtlasVertex vertex = vertices.next();
-
- ret.add(vertex);
+ vertices.add(vertex);
}
}
- return ret;
+ return vertices;
}
protected Set<String> getGuids(List<AtlasVertex> vertices) {
@@ -402,7 +402,24 @@
return defaultValue;
}
- private boolean hasOffendingChars(String str) {
- return StringUtils.containsAny(str, OFFENDING_CHARS);
+ private String escapeIndexQueryValue(String value) {
+ String ret = value;
+
+ if (StringUtils.containsAny(value, OFFENDING_CHARS)) {
+ boolean isQuoteAtStart = value.charAt(0) == DOUBLE_QUOTE;
+ boolean isQuoteAtEnd = value.charAt(value.length() - 1) == DOUBLE_QUOTE;
+
+ if (!isQuoteAtStart) {
+ if (!isQuoteAtEnd) {
+ ret = DOUBLE_QUOTE + value + DOUBLE_QUOTE;
+ } else {
+ ret = DOUBLE_QUOTE + value;
+ }
+ } else if (!isQuoteAtEnd) {
+ ret = value + DOUBLE_QUOTE;
+ }
+ }
+
+ return ret;
}
}
diff --git a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasGraphUtilsV1.java b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasGraphUtilsV1.java
index cd9a47a..43f2c55 100644
--- a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasGraphUtilsV1.java
+++ b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasGraphUtilsV1.java
@@ -103,6 +103,10 @@
}
}
+ public static boolean isEntityVertex(AtlasVertex vertex) {
+ return StringUtils.isNotEmpty(getIdFromVertex(vertex)) && StringUtils.isNotEmpty(getTypeName(vertex));
+ }
+
public static boolean isReference(AtlasType type) {
return isReference(type.getTypeCategory());
}