blob: b560ca91368aa15829625a3a870cef1eec6a0667 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.elastic.query;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NByteArrayEntity;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.facets.ElasticFacetProvider;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.MoreLikeThisHelperUtil;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.BlobByteSource;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PlanResult;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.apache.jackrabbit.oak.spi.query.QueryIndex.IndexPlan;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.client.Request;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.NestedQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.ScriptScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.jcr.PropertyType;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.BiPredicate;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils.toDoubles;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newAncestorQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newDepthQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newMixinTypeQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newNodeTypeQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newNotNullPropQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newNullPropQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newPathQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newPrefixPathQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newPrefixQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newPropertyRestrictionQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newWildcardPathQuery;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newWildcardQuery;
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_PATH;
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_SCORE;
import static org.apache.jackrabbit.util.ISO8601.parse;
import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS;
import static org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.existsQuery;
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.scriptScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
/**
* Class to map query plans into Elastic request objects.
*/
public class ElasticRequestHandler {
private static final Logger LOG = LoggerFactory.getLogger(ElasticRequestHandler.class);
private final static String SPELLCHECK_PREFIX = "spellcheck?term=";
protected final static String SUGGEST_PREFIX = "suggest?term=";
private static final String ES_TRIGRAM_SUFFIX = ".trigram";
private static final List<FieldSortBuilder> DEFAULT_SORTS = Arrays.asList(
SortBuilders.fieldSort("_score").order(SortOrder.DESC),
SortBuilders.fieldSort(FieldNames.PATH).order(SortOrder.ASC) // tie-breaker
);
private final IndexPlan indexPlan;
private final Filter filter;
private final PlanResult planResult;
private final ElasticIndexDefinition elasticIndexDefinition;
private final String propertyRestrictionQuery;
private final NodeState rootState;
ElasticRequestHandler(@NotNull IndexPlan indexPlan, @NotNull FulltextIndexPlanner.PlanResult planResult, NodeState rootState) {
this.indexPlan = indexPlan;
this.filter = indexPlan.getFilter();
this.planResult = planResult;
this.elasticIndexDefinition = (ElasticIndexDefinition) planResult.indexDefinition;
//Check if native function is supported
Filter.PropertyRestriction pr = null;
if (elasticIndexDefinition.hasFunctionDefined()) {
pr = filter.getPropertyRestriction(elasticIndexDefinition.getFunctionName());
}
this.propertyRestrictionQuery = pr != null ? String.valueOf(pr.first.getValue(pr.first.getType())) : null;
this.rootState = rootState;
}
public BoolQueryBuilder baseQuery() {
final BoolQueryBuilder boolQuery = boolQuery();
FullTextExpression ft = filter.getFullTextConstraint();
if (ft != null) {
boolQuery.must(fullTextQuery(ft, planResult));
}
if (propertyRestrictionQuery != null) {
if (propertyRestrictionQuery.startsWith("mlt?")) {
List<PropertyDefinition> sp = new LinkedList<>();
for (IndexDefinition.IndexingRule r : elasticIndexDefinition.getDefinedRules()) {
sp.addAll(r.getSimilarityProperties());
}
String mltQueryString = propertyRestrictionQuery.substring("mlt?".length());
Map<String, String> mltParams = MoreLikeThisHelperUtil.getParamMapFromMltQuery(mltQueryString);
String text = mltParams.get(MoreLikeThisHelperUtil.MLT_STREAM_BODY);
if (text == null) {
// TODO : See if we might want to support like Text here (passed as null in above constructors)
// IT is not supported in our lucene implementation.
throw new IllegalArgumentException("Missing required field stream.body in MLT query: " + mltQueryString);
}
if (sp.isEmpty()) {
// SimilarityImpl in oak-core sets property restriction for sim search and the query is something like
// mlt?mlt.fl=:path&mlt.mindf=0&stream.body=<path> . We need parse this query string and turn into a query
// elastic can understand.
MoreLikeThisQueryBuilder mltqb = mltQuery(mltParams);
boolQuery.must(mltqb);
// add should clause to improve relevance using similarity tags
boolQuery.should(moreLikeThisQuery(
new String[]{ElasticIndexDefinition.SIMILARITY_TAGS}, null, mltqb.likeItems())
.minTermFreq(1).minDocFreq(1)
);
} else {
boolQuery.must(similarityQuery(text, sp));
// add should clause to improve relevance using similarity tags
boolQuery.should(moreLikeThisQuery(
new String[]{ElasticIndexDefinition.SIMILARITY_TAGS}, null,
new Item[]{new Item(null, ElasticIndexUtils.idFromPath(text))})
.minTermFreq(1).minDocFreq(1)
);
}
} else {
boolQuery.must(queryStringQuery(propertyRestrictionQuery));
}
} else if (planResult.evaluateNonFullTextConstraints()) {
for (QueryBuilder constraint : nonFullTextConstraints(indexPlan, planResult)) {
boolQuery.filter(constraint);
}
}
if (!boolQuery.hasClauses()) {
// TODO: what happens here in planning mode (specially, apparently for things like rep:similar)
//For purely nodeType based queries all the documents would have to
//be returned (if the index definition has a single rule)
if (planResult.evaluateNodeTypeRestriction()) {
boolQuery.must(matchAllQuery());
}
}
return boolQuery;
}
public @NotNull List<FieldSortBuilder> baseSorts() {
List<QueryIndex.OrderEntry> sortOrder = indexPlan.getSortOrder();
if (sortOrder == null || sortOrder.isEmpty()) {
return DEFAULT_SORTS;
}
Map<String, List<PropertyDefinition>> indexProperties = elasticIndexDefinition.getPropertiesByName();
boolean hasTieBreaker = false;
List<FieldSortBuilder> list = new ArrayList<>();
for (QueryIndex.OrderEntry o : sortOrder) {
hasTieBreaker = false;
String sortPropertyName = o.getPropertyName();
String fieldName;
if (JCR_PATH.equals(sortPropertyName)) {
fieldName = FieldNames.PATH;
hasTieBreaker = true;
} else if (JCR_SCORE.equals(sortPropertyName)) {
fieldName = "_score";
} else if (indexProperties.containsKey(sortPropertyName)) {
fieldName = elasticIndexDefinition.getElasticKeyword(sortPropertyName);
} else {
LOG.warn("Unable to sort by {} for index {}", sortPropertyName, elasticIndexDefinition.getIndexName());
continue;
}
FieldSortBuilder order = SortBuilders.fieldSort(fieldName)
.order(QueryIndex.OrderEntry.Order.ASCENDING.equals(o.getOrder()) ? SortOrder.ASC : SortOrder.DESC);
list.add(order);
}
if (!hasTieBreaker) {
list.add(SortBuilders.fieldSort(FieldNames.PATH).order(SortOrder.ASC));
}
return list;
}
/**
* Receives a {@link SearchSourceBuilder} as input and converts it to a low level {@link Request} reducing the response
* in order to reduce size and improve speed.
* https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#common-options-response-filtering
*
* @param searchSourceBuilder the search request
* @param indexName the index to query
* @return a low level {@link Request} instance
*/
public Request createLowLevelRequest(SearchSourceBuilder searchSourceBuilder, String indexName) {
String endpoint = "/" + indexName
+ "/_search?filter_path=took,timed_out,hits.total.value,hits.hits._score,hits.hits.sort,hits.hits._source,aggregations";
Request request = new Request("POST", endpoint);
try {
BytesRef source = XContentHelper.toXContent(searchSourceBuilder, XContentType.JSON, EMPTY_PARAMS, false).toBytesRef();
request.setEntity(
new NByteArrayEntity(source.bytes, source.offset, source.length,
ContentType.create(XContentType.JSON.mediaTypeWithoutParameters(), (Charset) null))
);
} catch (IOException e) {
throw new IllegalStateException("Error creating request entity", e);
}
return request;
}
public String getPropertyRestrictionQuery() {
return propertyRestrictionQuery;
}
public boolean requiresSpellCheck() {
return propertyRestrictionQuery != null && propertyRestrictionQuery.startsWith(SPELLCHECK_PREFIX);
}
public boolean requiresSuggestion() {
return propertyRestrictionQuery != null && propertyRestrictionQuery.startsWith(SUGGEST_PREFIX);
}
public ElasticFacetProvider getAsyncFacetProvider(ElasticResponseHandler responseHandler) {
return requiresFacets() ?
ElasticFacetProvider.getProvider(
planResult.indexDefinition.getSecureFacetConfiguration(),
this, responseHandler,
filter::isAccessible
) : null;
}
private boolean requiresFacets() {
return filter.getPropertyRestrictions()
.stream()
.anyMatch(pr -> QueryConstants.REP_FACET.equals(pr.propertyName));
}
public Stream<TermsAggregationBuilder> aggregations() {
return facetFields()
.map(facetProp ->
AggregationBuilders.terms(facetProp)
.field(elasticIndexDefinition.getElasticKeyword(facetProp))
.size(elasticIndexDefinition.getNumberOfTopFacets())
);
}
public Stream<String> facetFields() {
return filter.getPropertyRestrictions()
.stream()
.filter(pr -> QueryConstants.REP_FACET.equals(pr.propertyName))
.map(pr -> FulltextIndex.parseFacetField(pr.first.getValue(Type.STRING)));
}
public Stream<String> spellCheckFields() {
return StreamSupport
.stream(planResult.indexingRule.getProperties().spliterator(), false)
.filter(pd -> pd.useInSpellcheck)
.map(pd -> pd.name);
}
private QueryBuilder similarityQuery(@NotNull String text, List<PropertyDefinition> sp) {
BoolQueryBuilder query = boolQuery();
if (!sp.isEmpty()) {
LOG.debug("generating similarity query for {}", text);
NodeState targetNodeState = rootState;
for (String token : PathUtils.elements(text)) {
targetNodeState = targetNodeState.getChildNode(token);
}
if (!targetNodeState.exists()) {
throw new IllegalArgumentException("Could not find node " + text);
}
for (PropertyDefinition pd : sp) {
String propertyPath = PathUtils.getParentPath(pd.name);
String propertyName = PathUtils.getName(pd.name);
NodeState tempState = targetNodeState;
for (String token : PathUtils.elements(propertyPath)) {
if (token.isEmpty()) {
break;
}
tempState = tempState.getChildNode(token);
}
PropertyState ps = tempState.getProperty(propertyName);
Blob property = ps != null ? ps.getValue(Type.BINARY) : null;
if (property == null) {
LOG.warn("Couldn't find property {} on {}", pd.name, text);
continue;
}
byte[] bytes;
try {
bytes = new BlobByteSource(property).read();
} catch (IOException e) {
LOG.error("Error reading bytes from property " + pd.name +" on " + text, e);
continue;
}
String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name);
Map<String, Object> paramMap = new HashMap<>();
paramMap.put("query_vector", toDoubles(bytes));
paramMap.put("field_name", similarityPropFieldName);
ScriptScoreQueryBuilder scriptScoreQueryBuilder = scriptScoreQuery(existsQuery(similarityPropFieldName),
new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG, "cosineSimilarity(params.query_vector, params.field_name) + 1.0",
Collections.emptyMap(), paramMap));
query.should(scriptScoreQueryBuilder);
}
}
return query;
}
/*
Generates mlt query builder from the given mltQueryString
There could be 2 cases here -
1) select [jcr:path] from [nt:base] where similar(., '/test/a') [Return nodes with similar content to /test/a]
Xpath variant - //element(*, nt:base)[rep:similar(., '/test/a')]
In this case org.apache.jackrabbit.oak.query.ast.SimilarImpl creates the mltQueryString as
mlt?mlt.fl=:path&mlt.mindf=0&stream.body=/test/a
2) select [jcr:path] from [nt:base] where " +
"native('elastic-sim', 'mlt?stream.body=/test/a&mlt.fl=:path&mlt.mindf=0&mlt.mintf=0')
In this case the the exact mlt query passed above is passed to this method. This can be useful if we want to
fine tune the various default parameters.
The function name passed to native func ('elastic-sim') needs to be defined on index def
Refer https://jackrabbit.apache.org/oak/docs/query/lucene.html#native-query
TODO : Docs for writing a native mlt query with the various parameters that can be tuned
(The above is important since this is not a one-size-fits-all situation and the default values might not
be useful in every situation based on the type of content)
*/
private MoreLikeThisQueryBuilder mltQuery(Map<String, String> mltParams) {
// creates a shallow copy of mltParams so we can remove the entries to
// improve validation without changing the original structure
Map<String, String> shallowMltParams = new HashMap<>(mltParams);
String text = shallowMltParams.remove(MoreLikeThisHelperUtil.MLT_STREAM_BODY);
MoreLikeThisQueryBuilder mlt;
String fields = shallowMltParams.remove(MoreLikeThisHelperUtil.MLT_FILED);
// It's expected the text here to be the path of the doc
// In case the path of a node is greater than 512 bytes,
// we hash it before storing it as the _id for the elastic doc
text = ElasticIndexUtils.idFromPath(text);
if (fields == null || FieldNames.PATH.equals(fields)) {
// Handle the case 1) where default query sent by SimilarImpl (No Custom fields)
// We just need to specify the doc (Item) whose similar content we need to find
// We store path as the _id so no need to do anything extra here
// We expect Similar impl to send a query where text would have evaluated to node path.
mlt = moreLikeThisQuery(new Item[]{new Item(null, text)});
} else {
// This is for native queries if someone send additional fields via mlt.fl=field1,field2
String[] fieldsArray = fields.split(",");
mlt = moreLikeThisQuery(fieldsArray, null, new Item[]{new Item(null, text)});
}
if (!shallowMltParams.isEmpty()) {
BiConsumer<String, Consumer<String>> mltParamSetter = (key, setter) -> {
String val = shallowMltParams.remove(key);
if (val != null) {
setter.accept(val);
}
};
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_DOC_FREQ, (val) -> mlt.minDocFreq(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_TERM_FREQ, (val) -> mlt.minTermFreq(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_BOOST_FACTOR, (val) -> mlt.boost(Float.parseFloat(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_DOC_FREQ, (val) -> mlt.maxDocFreq(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_QUERY_TERMS, (val) -> mlt.maxQueryTerms(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_WORD_LENGTH, (val) -> mlt.maxWordLength(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_WORD_LENGTH, (val) -> mlt.minWordLength(Integer.parseInt(val)));
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_SHOULD_MATCH, mlt::minimumShouldMatch);
mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_STOP_WORDS, (val) -> {
// TODO : Read this from a stopwords text file, configured via index defn maybe ?
String[] stopWords = val.split(",");
mlt.stopWords(stopWords);
});
if (!shallowMltParams.isEmpty()) {
LOG.warn("mlt query contains unrecognized params {} that will be skipped", shallowMltParams);
}
}
return mlt;
}
public PhraseSuggestionBuilder suggestQuery(String field, String spellCheckQuery) {
BoolQueryBuilder query = boolQuery()
.must(new MatchPhraseQueryBuilder(field, "{{suggestion}}"));
nonFullTextConstraints(indexPlan, planResult).forEach(query::must);
PhraseSuggestionBuilder.CandidateGenerator candidateGeneratorBuilder =
new DirectCandidateGeneratorBuilder(getTrigramField(field)).suggestMode("missing");
return SuggestBuilders
.phraseSuggestion(getTrigramField(field))
.size(10)
.addCandidateGenerator(candidateGeneratorBuilder)
.text(spellCheckQuery)
.collateQuery(query.toString());
}
public BoolQueryBuilder suggestMatchQuery(String suggestion, String[] fields) {
BoolQueryBuilder query = boolQuery()
.must(new MultiMatchQueryBuilder(suggestion, fields)
.operator(Operator.AND).fuzzyTranspositions(false)
.autoGenerateSynonymsPhraseQuery(false)
.type(MatchQuery.Type.PHRASE));
nonFullTextConstraints(indexPlan, planResult).forEach(query::must);
return query;
}
private String getTrigramField(String field) {
return field + ES_TRIGRAM_SUFFIX;
}
private QueryBuilder fullTextQuery(FullTextExpression ft, final PlanResult pr) {
// a reference to the query, so it can be set in the visitor
// (a "non-local return")
final AtomicReference<QueryBuilder> result = new AtomicReference<>();
ft.accept(new FullTextVisitor() {
@Override
public boolean visit(FullTextContains contains) {
visitTerm(contains.getPropertyName(), contains.getRawText(), null, contains.isNot());
return true;
}
@Override
public boolean visit(FullTextOr or) {
BoolQueryBuilder q = boolQuery();
for (FullTextExpression e : or.list) {
q.should(fullTextQuery(e, pr));
}
result.set(q);
return true;
}
@Override
public boolean visit(FullTextAnd and) {
BoolQueryBuilder q = boolQuery();
for (FullTextExpression e : and.list) {
QueryBuilder x = fullTextQuery(e, pr);
// TODO: see OAK-2434 and see if ES also can't work without unwrapping
/* Only unwrap the clause if MUST_NOT(x) */
boolean hasMustNot = false;
if (x instanceof BoolQueryBuilder) {
BoolQueryBuilder bq = (BoolQueryBuilder) x;
if (bq.mustNot().size() == 1
// no other clauses
&& bq.should().isEmpty() && bq.must().isEmpty() && bq.filter().isEmpty()) {
hasMustNot = true;
q.mustNot(bq.mustNot().get(0));
}
}
if (!hasMustNot) {
q.must(x);
}
}
result.set(q);
return true;
}
@Override
public boolean visit(FullTextTerm term) {
return visitTerm(term.getPropertyName(), term.getText(), term.getBoost(), term.isNot());
}
private boolean visitTerm(String propertyName, String text, String boost, boolean not) {
// base query
QueryBuilder fullTextQuery = fullTextQuery(text, getElasticFieldName(propertyName), pr);
if (boost != null) {
fullTextQuery.boost(Float.parseFloat(boost));
}
BoolQueryBuilder boolQueryBuilder = boolQuery().must(fullTextQuery);
// add dynamic boosts in SHOULD if available
Stream<QueryBuilder> dynamicScoreQueries = dynamicScoreQueries(text);
dynamicScoreQueries.forEach(boolQueryBuilder::should);
if (not) {
BoolQueryBuilder bq = boolQuery().mustNot(boolQueryBuilder);
result.set(bq);
} else {
result.set(boolQueryBuilder);
}
return true;
}
});
return result.get();
}
private Stream<QueryBuilder> dynamicScoreQueries(String text) {
return elasticIndexDefinition.getDynamicBoostProperties().stream()
.map(pd -> nestedQuery(pd.nodeName, functionScoreQuery(matchQuery(pd.nodeName + ".value", text),
ScoreFunctionBuilders.fieldValueFactorFunction(pd.nodeName + ".boost")), ScoreMode.Avg));
}
private List<QueryBuilder> nonFullTextConstraints(IndexPlan plan, PlanResult planResult) {
final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
StreamSupport.stream(iterable.spliterator(), false).anyMatch(value::equals);
final List<QueryBuilder> queries = new ArrayList<>();
Filter filter = plan.getFilter();
if (!filter.matchesAllTypes()) {
queries.add(nodeTypeConstraints(planResult.indexingRule, filter));
}
String path = FulltextIndex.getPathRestriction(plan);
switch (filter.getPathRestriction()) {
case ALL_CHILDREN:
if (!"/".equals(path)) {
queries.add(newAncestorQuery(path));
}
break;
case DIRECT_CHILDREN:
BoolQueryBuilder bq = boolQuery()
.must(newAncestorQuery(path))
.must(newDepthQuery(path, planResult));
queries.add(bq);
break;
case EXACT:
// For transformed paths, we can only add path restriction if absolute path to property can be
// deduced
if (planResult.isPathTransformed()) {
String parentPathSegment = planResult.getParentPathSegment();
if (!any.test(PathUtils.elements(parentPathSegment), "*")) {
queries.add(newPathQuery(path + parentPathSegment));
}
} else {
queries.add(newPathQuery(path));
}
break;
case PARENT:
if (PathUtils.denotesRoot(path)) {
// there's no parent of the root node
// we add a path that can not possibly occur because there
// is no way to say "match no documents" in Lucene
queries.add(newPathQuery("///"));
} else {
// For transformed paths, we can only add path restriction if absolute path to property can be
// deduced
if (planResult.isPathTransformed()) {
String parentPathSegment = planResult.getParentPathSegment();
if (!any.test(PathUtils.elements(parentPathSegment), "*")) {
queries.add(newPathQuery(PathUtils.getParentPath(path) + parentPathSegment));
}
} else {
queries.add(newPathQuery(PathUtils.getParentPath(path)));
}
}
break;
case NO_RESTRICTION:
break;
}
for (Filter.PropertyRestriction pr : filter.getPropertyRestrictions()) {
String name = pr.propertyName;
if (QueryConstants.REP_EXCERPT.equals(name) || QueryConstants.OAK_SCORE_EXPLANATION.equals(name)
|| QueryConstants.REP_FACET.equals(name)) {
continue;
}
if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
if (planResult.evaluateNodeNameRestriction()) {
QueryBuilder q = nodeName(pr);
if (q != null) {
queries.add(q);
}
}
continue;
}
if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) {
String first = pr.first.getValue(Type.STRING);
first = first.replace("\\", "");
if (JCR_PATH.equals(name)) {
queries.add(newPathQuery(first));
continue;
} else if ("*".equals(name)) {
//TODO Revisit reference constraint. For performant impl
//references need to be indexed in a different manner
queries.add(referenceConstraint(first));
continue;
}
}
PropertyDefinition pd = planResult.getPropDefn(pr);
if (pd == null) {
continue;
}
QueryBuilder q = createQuery(planResult.getPropertyName(pr), pr, pd);
if (q != null) {
queries.add(q);
}
}
return queries;
}
public BoolQueryBuilder suggestionMatchQuery(String suggestion) {
QueryBuilder qb = new MatchBoolPrefixQueryBuilder(FieldNames.SUGGEST + ".value", suggestion).operator(Operator.AND);
NestedQueryBuilder nestedQueryBuilder = nestedQuery(FieldNames.SUGGEST, qb, ScoreMode.Max);
nestedQueryBuilder.innerHit(new InnerHitBuilder().setSize(100));
BoolQueryBuilder query = boolQuery()
.must(nestedQueryBuilder);
nonFullTextConstraints(indexPlan, planResult).forEach(query::must);
return query;
}
private static QueryBuilder nodeTypeConstraints(IndexDefinition.IndexingRule defn, Filter filter) {
final BoolQueryBuilder bq = boolQuery();
PropertyDefinition primaryType = defn.getConfig(JCR_PRIMARYTYPE);
//TODO OAK-2198 Add proper nodeType query support
if (primaryType != null && primaryType.propertyIndex) {
for (String type : filter.getPrimaryTypes()) {
bq.should(newNodeTypeQuery(type));
}
}
PropertyDefinition mixinType = defn.getConfig(JCR_MIXINTYPES);
if (mixinType != null && mixinType.propertyIndex) {
for (String type : filter.getMixinTypes()) {
bq.should(newMixinTypeQuery(type));
}
}
return bq;
}
private static QueryBuilder nodeName(Filter.PropertyRestriction pr) {
String first = pr.first != null ? pr.first.getValue(Type.STRING) : null;
if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
&& pr.lastIncluding) {
// [property]=[value]
return termQuery(FieldNames.NODE_NAME, first);
}
if (pr.isLike) {
return like(FieldNames.NODE_NAME, first);
}
throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported " + pr);
}
private static QueryBuilder like(String name, String first) {
first = first.replace('%', WildcardQuery.WILDCARD_STRING);
first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
int len = first.length();
if (indexOfWS == len || indexOfWC == len) {
// remove trailing "*" for prefix query
first = first.substring(0, first.length() - 1);
if (JCR_PATH.equals(name)) {
return newPrefixPathQuery(first);
} else {
return newPrefixQuery(name, first);
}
} else {
if (JCR_PATH.equals(name)) {
return newWildcardPathQuery(first);
} else {
return newWildcardQuery(name, first);
}
}
}
private static QueryBuilder referenceConstraint(String uuid) {
// TODO: this seems very bad as a query - do we really want to support it. In fact, is it even used?
// reference query
return QueryBuilders.multiMatchQuery(uuid);
}
private static QueryBuilder fullTextQuery(String text, String fieldName, PlanResult pr) {
// default match query are executed in OR, we need to use AND instead to avoid that
// every document having at least one term in the `text` will match. If there are multiple
// contains clause they will go to different match queries and will be executed in OR
if (FieldNames.FULLTEXT.equals(fieldName) && !pr.indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
MultiMatchQueryBuilder multiMatchQuery = multiMatchQuery(text)
.operator(Operator.AND)
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd -> multiMatchQuery.field(pd.name, pd.boost));
// Add the query for actual fulltext field also. That query would not be boosted
// and could contain other parts like renditions, node name, etc
return multiMatchQuery.field(fieldName);
} else {
return matchQuery(fieldName, text).operator(Operator.AND);
}
}
private QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
PropertyDefinition defn) {
int propType = FulltextIndex.determinePropertyType(defn, pr);
if (pr.isNullRestriction()) {
return newNullPropQuery(defn.name);
}
//If notNullCheckEnabled explicitly enabled use the simple TermQuery
//otherwise later fallback to range query
if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) {
return newNotNullPropQuery(defn.name);
}
final String field = elasticIndexDefinition.getElasticKeyword(propertyName);
QueryBuilder in;
switch (propType) {
case PropertyType.DATE: {
in = newPropertyRestrictionQuery(field, pr, value -> parse(value.getValue(Type.DATE)).getTimeInMillis());
break;
}
case PropertyType.DOUBLE: {
in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.DOUBLE));
break;
}
case PropertyType.LONG: {
in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.LONG));
break;
}
default: {
if (pr.isLike) {
return like(propertyName, pr.first.getValue(Type.STRING));
}
//TODO Confirm that all other types can be treated as string
in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.STRING));
}
}
if (in != null) {
return in;
}
throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn);
}
private String getElasticFieldName(@Nullable String p) {
if (p == null) {
return FieldNames.FULLTEXT;
}
if (planResult.isPathTransformed()) {
p = PathUtils.getName(p);
}
if ("*".equals(p)) {
p = FieldNames.FULLTEXT;
}
return p;
}
}