| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.jackrabbit.oak.plugins.index.lucene; |
| |
| import javax.annotation.CheckForNull; |
| import javax.annotation.Nonnull; |
| import javax.annotation.Nullable; |
| import javax.jcr.PropertyType; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Deque; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicReference; |
| |
| import com.google.common.collect.AbstractIterator; |
| import com.google.common.collect.Lists; |
| import com.google.common.collect.Queues; |
| import com.google.common.collect.Sets; |
| import com.google.common.primitives.Chars; |
| import org.apache.jackrabbit.oak.api.PropertyValue; |
| import org.apache.jackrabbit.oak.api.Result.SizePrecision; |
| import org.apache.jackrabbit.oak.api.Type; |
| import org.apache.jackrabbit.oak.commons.PathUtils; |
| import org.apache.jackrabbit.oak.commons.json.JsopBuilder; |
| import org.apache.jackrabbit.oak.commons.json.JsopWriter; |
| import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.score.ScorerProviderFactory; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.util.FacetHelper; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper; |
| import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper; |
| import org.apache.jackrabbit.oak.query.QueryEngineSettings; |
| import org.apache.jackrabbit.oak.query.QueryImpl; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextContains; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextOr; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextTerm; |
| import org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor; |
| import org.apache.jackrabbit.oak.spi.query.Cursor; |
| import org.apache.jackrabbit.oak.spi.query.Cursors.PathCursor; |
| import org.apache.jackrabbit.oak.spi.query.Filter; |
| import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction; |
| import org.apache.jackrabbit.oak.spi.query.IndexRow; |
| import org.apache.jackrabbit.oak.spi.query.PropertyValues; |
| import org.apache.jackrabbit.oak.spi.query.QueryConstants; |
| import org.apache.jackrabbit.oak.spi.query.QueryIndex; |
| import org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex; |
| import org.apache.jackrabbit.oak.spi.state.NodeState; |
| import org.apache.jackrabbit.oak.util.PerfLogger; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.facet.FacetResult; |
| import org.apache.lucene.facet.Facets; |
| import org.apache.lucene.facet.LabelAndValue; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexableField; |
| import org.apache.lucene.index.MultiFields; |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.queries.CustomScoreQuery; |
| import org.apache.lucene.queryparser.classic.ParseException; |
| import org.apache.lucene.queryparser.classic.QueryParser; |
| import org.apache.lucene.queryparser.classic.QueryParserBase; |
| import org.apache.lucene.queryparser.flexible.core.QueryNodeException; |
| import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; |
| import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.NumericRangeQuery; |
| import org.apache.lucene.search.PrefixQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TermRangeQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.TotalHitCountCollector; |
| import org.apache.lucene.search.WildcardQuery; |
| import org.apache.lucene.search.highlight.Highlighter; |
| import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
| import org.apache.lucene.search.highlight.QueryScorer; |
| import org.apache.lucene.search.highlight.SimpleHTMLEncoder; |
| import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
| import org.apache.lucene.search.highlight.TextFragment; |
| import org.apache.lucene.search.postingshighlight.PostingsHighlighter; |
| import org.apache.lucene.search.spell.SuggestWord; |
| import org.apache.lucene.search.suggest.Lookup; |
| import org.apache.lucene.util.Version; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import static com.google.common.base.Preconditions.checkNotNull; |
| import static com.google.common.base.Preconditions.checkState; |
| import static com.google.common.collect.Lists.newArrayListWithCapacity; |
| import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES; |
| import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; |
| import static org.apache.jackrabbit.oak.api.Type.LONG; |
| import static org.apache.jackrabbit.oak.api.Type.STRING; |
| import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; |
| import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.ANALYZED_FIELD_PREFIX; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.NATIVE_SORT_ORDER; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm; |
| import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm; |
| import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH; |
| import static org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex; |
| import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex; |
| import static org.apache.lucene.search.BooleanClause.Occur.*; |
| |
| /** |
| * Provides a QueryIndex that does lookups against a Lucene-based index |
| * |
| * <p> |
| * To define a lucene index on a subtree you have to add an |
| * <code>oak:index<code> node. |
| * |
| * Under it follows the index definition node that: |
| * <ul> |
| * <li>must be of type <code>oak:QueryIndexDefinition</code></li> |
| * <li>must have the <code>type</code> property set to <b><code>lucene</code></b></li> |
| * <li>must have the <code>async</code> property set to <b><code>async</code></b></li> |
| * </b></li> |
| * </ul> |
| * </p> |
| * <p> |
| * Optionally you can add |
| * <ul> |
| * <li>what subset of property types to be included in the index via the <code>includePropertyTypes<code> property</li> |
| * <li>a blacklist of property names: what property to be excluded from the index via the <code>excludePropertyNames<code> property</li> |
| * <li>the <code>reindex<code> flag which when set to <code>true<code>, triggers a full content re-index.</li> |
| * </ul> |
| * </p> |
| * <pre> |
| * <code> |
| * { |
| * NodeBuilder index = root.child("oak:index"); |
| * index.child("lucene") |
| * .setProperty("jcr:primaryType", "oak:QueryIndexDefinition", Type.NAME) |
| * .setProperty("type", "lucene") |
| * .setProperty("async", "async") |
| * .setProperty("reindex", "true"); |
| * } |
| * </code> |
| * </pre> |
| * |
| * @see org.apache.jackrabbit.oak.spi.query.QueryIndex |
| * |
| */ |
| public class LucenePropertyIndex implements AdvancedQueryIndex, QueryIndex, NativeQueryIndex, |
| AdvanceFulltextQueryIndex { |
| |
| private static double MIN_COST = 2.1; |
| |
| private static final Logger LOG = LoggerFactory |
| .getLogger(LucenePropertyIndex.class); |
| private static final PerfLogger PERF_LOGGER = |
| new PerfLogger(LoggerFactory.getLogger(LucenePropertyIndex.class.getName() + ".perf")); |
| |
| static final String ATTR_PLAN_RESULT = "oak.lucene.planResult"; |
| |
| /** |
| * Batch size for fetching results from Lucene queries. |
| */ |
| static final int LUCENE_QUERY_BATCH_SIZE = 50; |
| |
| protected final IndexTracker tracker; |
| |
| private final ScorerProviderFactory scorerProviderFactory; |
| |
| private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), |
| new SimpleHTMLEncoder(), null); |
| |
| private final PostingsHighlighter postingsHighlighter = new PostingsHighlighter(); |
| |
| private final IndexAugmentorFactory augmentorFactory; |
| |
| public LucenePropertyIndex(IndexTracker tracker) { |
| this(tracker, ScorerProviderFactory.DEFAULT); |
| } |
| |
| public LucenePropertyIndex(IndexTracker tracker, ScorerProviderFactory factory) { |
| this(tracker, factory, null); |
| } |
| |
| public LucenePropertyIndex(IndexTracker tracker, ScorerProviderFactory factory, IndexAugmentorFactory augmentorFactory) { |
| this.tracker = tracker; |
| this.scorerProviderFactory = factory; |
| this.augmentorFactory = augmentorFactory; |
| } |
| |
| @Override |
| public double getMinimumCost() { |
| return MIN_COST; |
| } |
| |
| @Override |
| public String getIndexName() { |
| return "lucene-property"; |
| } |
| |
| @Override |
| public List<IndexPlan> getPlans(Filter filter, List<OrderEntry> sortOrder, NodeState rootState) { |
| Collection<String> indexPaths = new LuceneIndexLookup(rootState).collectIndexNodePaths(filter); |
| List<IndexPlan> plans = Lists.newArrayListWithCapacity(indexPaths.size()); |
| for (String path : indexPaths) { |
| IndexNode indexNode = null; |
| try { |
| indexNode = tracker.acquireIndexNode(path); |
| |
| if (indexNode != null) { |
| IndexPlan plan = new IndexPlanner(indexNode, path, filter, sortOrder).getPlan(); |
| if (plan != null) { |
| plans.add(plan); |
| } |
| } |
| } catch (Exception e) { |
| LOG.error("Error getting plan for {}", path); |
| LOG.error("Exception:", e); |
| } finally { |
| if (indexNode != null) { |
| indexNode.release(); |
| } |
| } |
| } |
| return plans; |
| } |
| |
| @Override |
| public double getCost(Filter filter, NodeState root) { |
| throw new UnsupportedOperationException("Not supported as implementing AdvancedQueryIndex"); |
| } |
| |
| @Override |
| public String getPlan(Filter filter, NodeState root) { |
| throw new UnsupportedOperationException("Not supported as implementing AdvancedQueryIndex"); |
| } |
| |
| @Override |
| public String getPlanDescription(IndexPlan plan, NodeState root) { |
| Filter filter = plan.getFilter(); |
| IndexNode index = tracker.acquireIndexNode(getPlanResult(plan).indexPath); |
| checkState(index != null, "The Lucene index is not available"); |
| try { |
| FullTextExpression ft = filter.getFullTextConstraint(); |
| StringBuilder sb = new StringBuilder("lucene:"); |
| String path = getPlanResult(plan).indexPath; |
| sb.append(getIndexName(plan)) |
| .append("(") |
| .append(path) |
| .append(") "); |
| sb.append(getLuceneRequest(plan, augmentorFactory, null)); |
| if (plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()) { |
| sb.append(" ordering:").append(plan.getSortOrder()); |
| } |
| if (ft != null) { |
| sb.append(" ft:(").append(ft).append(")"); |
| } |
| return sb.toString(); |
| } finally { |
| index.release(); |
| } |
| } |
| |
| @Override |
| public Cursor query(final Filter filter, final NodeState root) { |
| throw new UnsupportedOperationException("Not supported as implementing AdvancedQueryIndex"); |
| } |
| |
| @Override |
| public Cursor query(final IndexPlan plan, NodeState rootState) { |
| final Filter filter = plan.getFilter(); |
| final Sort sort = getSort(plan); |
| final PlanResult pr = getPlanResult(plan); |
| QueryEngineSettings settings = filter.getQueryEngineSettings(); |
| Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { |
| private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); |
| private final Set<String> seenPaths = Sets.newHashSet(); |
| private ScoreDoc lastDoc; |
| private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; |
| private boolean noDocs = false; |
| private long lastSearchIndexerVersion; |
| |
| @Override |
| protected LuceneResultRow computeNext() { |
| while (!queue.isEmpty() || loadDocs()) { |
| return queue.remove(); |
| } |
| return endOfData(); |
| } |
| |
| private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, |
| String explanation) throws IOException { |
| IndexReader reader = searcher.getIndexReader(); |
| //TODO Look into usage of field cache for retrieving the path |
| //instead of reading via reader if no of docs in index are limited |
| PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); |
| reader.document(doc.doc, visitor); |
| String path = visitor.getPath(); |
| if (path != null) { |
| if ("".equals(path)) { |
| path = "/"; |
| } |
| if (pr.isPathTransformed()) { |
| String originalPath = path; |
| path = pr.transformPath(path); |
| |
| if (path == null) { |
| LOG.trace("Ignoring path {} : Transformation returned null", originalPath); |
| return null; |
| } |
| |
| // avoid duplicate entries |
| if (seenPaths.contains(path)) { |
| LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); |
| return null; |
| } |
| seenPaths.add(path); |
| } |
| |
| LOG.trace("Matched path {}", path); |
| return new LuceneResultRow(path, doc.score, excerpt, facets, explanation); |
| } |
| return null; |
| } |
| |
| /** |
| * Loads the lucene documents in batches |
| * @return true if any document is loaded |
| */ |
| private boolean loadDocs() { |
| |
| if (noDocs) { |
| return false; |
| } |
| |
| ScoreDoc lastDocToRecord = null; |
| |
| final IndexNode indexNode = acquireIndexNode(plan); |
| checkState(indexNode != null); |
| try { |
| IndexSearcher searcher = indexNode.getSearcher(); |
| LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); |
| if (luceneRequestFacade.getLuceneRequest() instanceof Query) { |
| Query query = (Query) luceneRequestFacade.getLuceneRequest(); |
| |
| CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query); |
| |
| if (customScoreQuery != null) { |
| query = customScoreQuery; |
| } |
| |
| checkForIndexVersionChange(searcher); |
| |
| TopDocs docs; |
| long start = PERF_LOGGER.start(); |
| while (true) { |
| if (lastDoc != null) { |
| LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); |
| if (sort == null) { |
| docs = searcher.searchAfter(lastDoc, query, nextBatchSize); |
| } else { |
| docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); |
| } |
| } else { |
| LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); |
| if (sort == null) { |
| docs = searcher.search(query, nextBatchSize); |
| } else { |
| docs = searcher.search(query, nextBatchSize, sort); |
| } |
| } |
| PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); |
| nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); |
| |
| long f = PERF_LOGGER.start(); |
| Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets()); |
| PERF_LOGGER.end(f, -1, "facets retrieved"); |
| |
| PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); |
| boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); |
| |
| restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION); |
| boolean addExplain = restriction != null && restriction.isNotNullRestriction(); |
| |
| Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); |
| |
| FieldInfos mergedFieldInfos = null; |
| if (addExcerpt) { |
| // setup highlighter |
| QueryScorer scorer = new QueryScorer(query); |
| scorer.setExpandMultiTermQuery(true); |
| highlighter.setFragmentScorer(scorer); |
| mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); |
| } |
| |
| for (ScoreDoc doc : docs.scoreDocs) { |
| String excerpt = null; |
| if (addExcerpt) { |
| excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos); |
| } |
| |
| String explanation = null; |
| if (addExplain) { |
| explanation = searcher.explain(query, doc.doc).toString(); |
| } |
| |
| LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation); |
| if (row != null) { |
| queue.add(row); |
| } |
| lastDocToRecord = doc; |
| } |
| |
| if (queue.isEmpty() && docs.scoreDocs.length > 0) { |
| //queue is still empty but more results can be fetched |
| //from Lucene so still continue |
| lastDoc = lastDocToRecord; |
| } else { |
| break; |
| } |
| } |
| } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { |
| String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; |
| noDocs = true; |
| SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest(); |
| SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); |
| |
| // ACL filter spellchecks |
| QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); |
| for (SuggestWord suggestion : suggestWords) { |
| Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); |
| |
| query = addDescendantClauseIfRequired(query, plan); |
| |
| TopDocs topDocs = searcher.search(query, 100); |
| if (topDocs.totalHits > 0) { |
| for (ScoreDoc doc : topDocs.scoreDocs) { |
| Document retrievedDoc = searcher.doc(doc.doc); |
| String prefix = filter.getPath(); |
| if (prefix.length() == 1) { |
| prefix = ""; |
| } |
| if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { |
| queue.add(new LuceneResultRow(suggestion.string)); |
| break; |
| } |
| } |
| } |
| } |
| |
| } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { |
| SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest(); |
| noDocs = true; |
| |
| List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery); |
| |
| QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, |
| indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : |
| SuggestHelper.getAnalyzer()); |
| |
| // ACL filter suggestions |
| for (Lookup.LookupResult suggestion : lookupResults) { |
| Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); |
| |
| query = addDescendantClauseIfRequired(query, plan); |
| |
| TopDocs topDocs = searcher.search(query, 100); |
| if (topDocs.totalHits > 0) { |
| for (ScoreDoc doc : topDocs.scoreDocs) { |
| Document retrievedDoc = searcher.doc(doc.doc); |
| String prefix = filter.getPath(); |
| if (prefix.length() == 1) { |
| prefix = ""; |
| } |
| if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { |
| queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value)); |
| break; |
| } |
| } |
| } |
| } |
| } |
| } catch (Exception e) { |
| LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); |
| } finally { |
| indexNode.release(); |
| } |
| |
| if (lastDocToRecord != null) { |
| this.lastDoc = lastDocToRecord; |
| } |
| |
| return !queue.isEmpty(); |
| } |
| |
| |
| private void checkForIndexVersionChange(IndexSearcher searcher) { |
| long currentVersion = getVersion(searcher); |
| if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { |
| lastDoc = null; |
| LOG.debug("Change in index version detected {} => {}. Query would be performed without " + |
| "offset", currentVersion, lastSearchIndexerVersion); |
| } |
| this.lastSearchIndexerVersion = currentVersion; |
| } |
| }; |
| SizeEstimator sizeEstimator = new SizeEstimator() { |
| @Override |
| public long getSize() { |
| IndexNode indexNode = acquireIndexNode(plan); |
| checkState(indexNode != null); |
| try { |
| IndexSearcher searcher = indexNode.getSearcher(); |
| LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); |
| if (luceneRequestFacade.getLuceneRequest() instanceof Query) { |
| Query query = (Query) luceneRequestFacade.getLuceneRequest(); |
| TotalHitCountCollector collector = new TotalHitCountCollector(); |
| searcher.search(query, collector); |
| int totalHits = collector.getTotalHits(); |
| LOG.debug("Estimated size for query {} is {}", query, totalHits); |
| return totalHits; |
| } |
| LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); |
| } catch (IOException e) { |
| LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); |
| } finally { |
| indexNode.release(); |
| } |
| return -1; |
| } |
| }; |
| return new LucenePathCursor(itr, plan, settings, sizeEstimator); |
| } |
| |
| private static Query addDescendantClauseIfRequired(Query query, IndexPlan plan) { |
| Filter filter = plan.getFilter(); |
| |
| if (filter.getPathRestriction() == Filter.PathRestriction.ALL_CHILDREN) { |
| String path = getPathRestriction(plan); |
| if (!PathUtils.denotesRoot(path)) { |
| if (getPlanResult(plan).indexDefinition.evaluatePathRestrictions()) { |
| |
| BooleanQuery compositeQuery = new BooleanQuery(); |
| compositeQuery.add(query, BooleanClause.Occur.MUST); |
| |
| Query pathQuery = new TermQuery(newAncestorTerm(path)); |
| compositeQuery.add(pathQuery, BooleanClause.Occur.MUST); |
| |
| query = compositeQuery; |
| } else { |
| LOG.warn("Descendant clause could not be added without path restrictions enabled. Plan: {}", plan); |
| } |
| } |
| } |
| |
| return query; |
| } |
| |
| private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, |
| FieldInfos fieldInfos) throws IOException { |
| StringBuilder excerpt = new StringBuilder(); |
| int docID = doc.doc; |
| List<String> names = new LinkedList<String>(); |
| |
| for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) { |
| String name = field.name(); |
| // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored. |
| if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) { |
| names.add(name); |
| } |
| } |
| |
| if (names.size() > 0) { |
| int[] maxPassages = new int[names.size()]; |
| for (int i = 0; i < maxPassages.length; i++) { |
| maxPassages[i] = 1; |
| } |
| try { |
| Map<String, String[]> stringMap = postingsHighlighter.highlightFields(names.toArray(new String[names.size()]), |
| query, searcher, new int[]{docID}, maxPassages); |
| for (Map.Entry<String, String[]> entry : stringMap.entrySet()) { |
| String value = Arrays.toString(entry.getValue()); |
| if (value.contains("<b>")) { |
| if (excerpt.length() > 0) { |
| excerpt.append("..."); |
| } |
| excerpt.append(value); |
| } |
| } |
| } catch (Exception e) { |
| LOG.error("postings highlighting failed", e); |
| } |
| } |
| |
| // fallback if no excerpt could be retrieved using postings highlighter |
| if (excerpt.length() == 0) { |
| |
| for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { |
| String name = field.name(); |
| // only full text or analyzed fields |
| if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { |
| String text = field.stringValue(); |
| TokenStream tokenStream = analyzer.tokenStream(name, text); |
| |
| try { |
| TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1); |
| if (textFragments != null && textFragments.length > 0) { |
| for (TextFragment fragment : textFragments) { |
| if (excerpt.length() > 0) { |
| excerpt.append("..."); |
| } |
| excerpt.append(fragment.toString()); |
| } |
| break; |
| } |
| } catch (InvalidTokenOffsetsException e) { |
| LOG.error("higlighting failed", e); |
| } |
| } |
| } |
| } |
| return excerpt.toString(); |
| } |
| |
| @Override |
| public NodeAggregator getNodeAggregator() { |
| return null; |
| } |
| |
| /** |
| * In a fulltext term for jcr:contains(foo, 'bar') 'foo' |
| * is the property name. While in jcr:contains(foo/*, 'bar') |
| * 'foo' is node name |
| * |
| * @return true if the term is related to node |
| */ |
| public static boolean isNodePath(String fulltextTermPath) { |
| return fulltextTermPath.endsWith("/*"); |
| } |
| |
| private IndexNode acquireIndexNode(IndexPlan plan) { |
| return tracker.acquireIndexNode(getPlanResult(plan).indexPath); |
| } |
| |
| private static Sort getSort(IndexPlan plan) { |
| List<OrderEntry> sortOrder = plan.getSortOrder(); |
| if (sortOrder == null || sortOrder.isEmpty()) { |
| return null; |
| } |
| |
| List<SortField> fieldsList = newArrayListWithCapacity(sortOrder.size()); |
| PlanResult planResult = getPlanResult(plan); |
| for (int i = 0; i < sortOrder.size(); i++) { |
| OrderEntry oe = sortOrder.get(i); |
| if (!isNativeSort(oe)) { |
| PropertyDefinition pd = planResult.getOrderedProperty(i); |
| boolean reverse = oe.getOrder() != OrderEntry.Order.ASCENDING; |
| String propName = oe.getPropertyName(); |
| propName = FieldNames.createDocValFieldName(propName); |
| fieldsList.add(new SortField(propName, toLuceneSortType(oe, pd), reverse)); |
| } |
| } |
| |
| if (fieldsList.isEmpty()) { |
| return null; |
| } else { |
| return new Sort(fieldsList.toArray(new SortField[0])); |
| } |
| } |
| |
| /** |
| * Identifies the default sort order used by the index (@jcr:score descending) |
| * |
| * @param oe order entry |
| * @return |
| */ |
| private static boolean isNativeSort(OrderEntry oe) { |
| return oe.getPropertyName().equals(NATIVE_SORT_ORDER.getPropertyName()); |
| } |
| |
| private static SortField.Type toLuceneSortType(OrderEntry oe, PropertyDefinition defn) { |
| Type<?> t = oe.getPropertyType(); |
| checkState(t != null, "Type cannot be null"); |
| checkState(!t.isArray(), "Array types are not supported"); |
| |
| int type = getPropertyType(defn, oe.getPropertyName(), t.tag()); |
| switch (type) { |
| case PropertyType.LONG: |
| case PropertyType.DATE: |
| return SortField.Type.LONG; |
| case PropertyType.DOUBLE: |
| return SortField.Type.DOUBLE; |
| default: |
| //TODO Check about SortField.Type.STRING_VAL |
| return SortField.Type.STRING; |
| } |
| } |
| |
| private static String getIndexName(IndexPlan plan) { |
| return PathUtils.getName(getPlanResult(plan).indexPath); |
| } |
| |
| /** |
| * Get the Lucene query for the given filter. |
| * |
| * @param plan index plan containing filter details |
| * @param reader the Lucene reader |
| * @return the Lucene query |
| */ |
| private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) { |
| FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory); |
| List<Query> qs = new ArrayList<Query>(); |
| Filter filter = plan.getFilter(); |
| FullTextExpression ft = filter.getFullTextConstraint(); |
| PlanResult planResult = getPlanResult(plan); |
| IndexDefinition defn = planResult.indexDefinition; |
| Analyzer analyzer = defn.getAnalyzer(); |
| if (ft == null) { |
| // there might be no full-text constraint |
| // when using the LowCostLuceneIndexProvider |
| // which is used for testing |
| } else { |
| qs.add(getFullTextQuery(plan, ft, analyzer, augmentor)); |
| } |
| |
| |
| //Check if native function is supported |
| PropertyRestriction pr = null; |
| if (defn.hasFunctionDefined()) { |
| pr = filter.getPropertyRestriction(defn.getFunctionName()); |
| } |
| |
| if (pr != null) { |
| String query = String.valueOf(pr.first.getValue(pr.first.getType())); |
| QueryParser queryParser = new QueryParser(VERSION, "", analyzer); |
| if (query.startsWith("mlt?")) { |
| String mltQueryString = query.replace("mlt?", ""); |
| if (reader != null) { |
| Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString); |
| if (moreLikeThis != null) { |
| qs.add(moreLikeThis); |
| } |
| } |
| } else if (query.startsWith("spellcheck?")) { |
| String spellcheckQueryString = query.replace("spellcheck?", ""); |
| if (reader != null) { |
| return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader)); |
| } |
| } else if (query.startsWith("suggest?")) { |
| String suggestQueryString = query.replace("suggest?", ""); |
| if (reader != null) { |
| return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString)); |
| } |
| } else { |
| try { |
| qs.add(queryParser.parse(query)); |
| } catch (ParseException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| } else if (planResult.evaluateNonFullTextConstraints()) { |
| addNonFullTextConstraints(qs, plan, reader); |
| } |
| |
| if (qs.size() == 0 |
| && plan.getSortOrder() != null) { |
| //This case indicates that query just had order by and no |
| //property restriction defined. In this case property |
| //existence queries for each sort entry |
| List<OrderEntry> orders = plan.getSortOrder(); |
| for (int i = 0; i < orders.size(); i++) { |
| OrderEntry oe = orders.get(i); |
| if (!isNativeSort(oe)) { |
| PropertyDefinition pd = planResult.getOrderedProperty(i); |
| PropertyRestriction orderRest = new PropertyRestriction(); |
| orderRest.propertyName = oe.getPropertyName(); |
| Query q = createQuery(orderRest, pd); |
| if (q != null) { |
| qs.add(q); |
| } |
| } |
| } |
| } |
| |
| if (qs.size() == 0) { |
| if (reader == null) { |
| //When called in planning mode then some queries like rep:similar |
| //cannot create query as reader is not provided. In such case we |
| //just return match all queries |
| return new LuceneRequestFacade<Query>(new MatchAllDocsQuery()); |
| } |
| //For purely nodeType based queries all the documents would have to |
| //be returned (if the index definition has a single rule) |
| if (planResult.evaluateNodeTypeRestriction()) { |
| return new LuceneRequestFacade<Query>(new MatchAllDocsQuery()); |
| } |
| |
| throw new IllegalStateException("No query created for filter " + filter); |
| } |
| return performAdditionalWraps(qs); |
| } |
| |
| /** |
| * Perform additional wraps on the list of queries to allow, for example, the NOT CONTAINS to |
| * play properly when sent to lucene. |
| * |
| * @param qs the list of queries. Cannot be null. |
| * @return |
| */ |
| @Nonnull |
| public static LuceneRequestFacade<Query> performAdditionalWraps(@Nonnull List<Query> qs) { |
| checkNotNull(qs); |
| if (qs.size() == 1) { |
| Query q = qs.get(0); |
| if (q instanceof BooleanQuery) { |
| BooleanQuery ibq = (BooleanQuery) q; |
| boolean onlyNotClauses = true; |
| for (BooleanClause c : ibq.getClauses()) { |
| if (c.getOccur() != BooleanClause.Occur.MUST_NOT) { |
| onlyNotClauses = false; |
| break; |
| } |
| } |
| if (onlyNotClauses) { |
| // if we have only NOT CLAUSES we have to add a match all docs (*.*) for the |
| // query to work |
| ibq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); |
| } |
| } |
| return new LuceneRequestFacade<Query>(qs.get(0)); |
| } |
| BooleanQuery bq = new BooleanQuery(); |
| for (Query q : qs) { |
| boolean unwrapped = false; |
| if (q instanceof BooleanQuery) { |
| unwrapped = unwrapMustNot((BooleanQuery) q, bq); |
| } |
| |
| if (!unwrapped) { |
| bq.add(q, MUST); |
| } |
| } |
| return new LuceneRequestFacade<Query>(bq); |
| } |
| |
| /** |
| * unwraps any NOT clauses from the provided boolean query into another boolean query. |
| * |
| * @param input the query to be analysed for the existence of NOT clauses. Cannot be null. |
| * @param output the query where the unwrapped NOTs will be saved into. Cannot be null. |
| * @return true if there where at least one unwrapped NOT. false otherwise. |
| */ |
| private static boolean unwrapMustNot(@Nonnull BooleanQuery input, @Nonnull BooleanQuery output) { |
| checkNotNull(input); |
| checkNotNull(output); |
| boolean unwrapped = false; |
| for (BooleanClause bc : input.getClauses()) { |
| if (bc.getOccur() == BooleanClause.Occur.MUST_NOT) { |
| output.add(bc); |
| unwrapped = true; |
| } |
| } |
| |
| return unwrapped; |
| } |
| |
| private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query subQuery) { |
| PlanResult planResult = getPlanResult(plan); |
| IndexDefinition idxDef = planResult.indexDefinition; |
| String providerName = idxDef.getScorerProviderName(); |
| if (scorerProviderFactory != null && providerName != null) { |
| return scorerProviderFactory.getScorerProvider(providerName) |
| .createCustomScoreQuery(subQuery); |
| } |
| return null; |
| } |
| private static FulltextQueryTermsProvider getIndexAgumentor(IndexPlan plan, IndexAugmentorFactory augmentorFactory) { |
| PlanResult planResult = getPlanResult(plan); |
| |
| if (augmentorFactory != null){ |
| return augmentorFactory.getFulltextQueryTermsProvider(planResult.indexingRule.getNodeTypeName()); |
| } |
| |
| return null; |
| } |
| |
| private static void addNonFullTextConstraints(List<Query> qs, |
| IndexPlan plan, IndexReader reader) { |
| Filter filter = plan.getFilter(); |
| PlanResult planResult = getPlanResult(plan); |
| IndexDefinition defn = planResult.indexDefinition; |
| if (!filter.matchesAllTypes()) { |
| addNodeTypeConstraints(planResult.indexingRule, qs, filter); |
| } |
| |
| String path = getPathRestriction(plan); |
| switch (filter.getPathRestriction()) { |
| case ALL_CHILDREN: |
| if (defn.evaluatePathRestrictions()) { |
| if ("/".equals(path)) { |
| break; |
| } |
| qs.add(new TermQuery(newAncestorTerm(path))); |
| } |
| break; |
| case DIRECT_CHILDREN: |
| if (defn.evaluatePathRestrictions()) { |
| BooleanQuery bq = new BooleanQuery(); |
| bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST)); |
| bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST)); |
| qs.add(bq); |
| } |
| break; |
| case EXACT: |
| qs.add(new TermQuery(newPathTerm(path))); |
| break; |
| case PARENT: |
| if (denotesRoot(path)) { |
| // there's no parent of the root node |
| // we add a path that can not possibly occur because there |
| // is no way to say "match no documents" in Lucene |
| qs.add(new TermQuery(new Term(FieldNames.PATH, "///"))); |
| } else { |
| qs.add(new TermQuery(newPathTerm(getParentPath(path)))); |
| } |
| break; |
| case NO_RESTRICTION: |
| break; |
| } |
| |
| for (PropertyRestriction pr : filter.getPropertyRestrictions()) { |
| String name = pr.propertyName; |
| |
| if (QueryImpl.REP_EXCERPT.equals(name) || QueryImpl.OAK_SCORE_EXPLANATION.equals(name) |
| || QueryImpl.REP_FACET.equals(name)) { |
| continue; |
| } |
| |
| if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) { |
| if (planResult.evaluateNodeNameRestriction()) { |
| Query q = createNodeNameQuery(pr); |
| if (q != null) { |
| qs.add(q); |
| } |
| } |
| continue; |
| } |
| |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| String first = pr.first.getValue(STRING); |
| first = first.replace("\\", ""); |
| if (JCR_PATH.equals(name)) { |
| qs.add(new TermQuery(newPathTerm(first))); |
| continue; |
| } else if ("*".equals(name)) { |
| //TODO Revisit reference constraint. For performant impl |
| //references need to be indexed in a different manner |
| addReferenceConstraint(first, qs, reader); |
| continue; |
| } |
| } |
| |
| PropertyDefinition pd = planResult.getPropDefn(pr); |
| if (pd == null) { |
| continue; |
| } |
| |
| Query q = createQuery(pr, pd); |
| if (q != null) { |
| qs.add(q); |
| } |
| } |
| } |
| |
| private static int determinePropertyType(PropertyDefinition defn, PropertyRestriction pr) { |
| int typeFromRestriction = pr.propertyType; |
| if (typeFromRestriction == PropertyType.UNDEFINED) { |
| //If no explicit type defined then determine the type from restriction |
| //value |
| if (pr.first != null && pr.first.getType() != Type.UNDEFINED) { |
| typeFromRestriction = pr.first.getType().tag(); |
| } else if (pr.last != null && pr.last.getType() != Type.UNDEFINED) { |
| typeFromRestriction = pr.last.getType().tag(); |
| } else if (pr.list != null && !pr.list.isEmpty()) { |
| typeFromRestriction = pr.list.get(0).getType().tag(); |
| } |
| } |
| return getPropertyType(defn, pr.propertyName, typeFromRestriction); |
| } |
| |
| private static int getPropertyType(PropertyDefinition defn, String name, int defaultVal) { |
| if (defn.isTypeDefined()) { |
| return defn.getType(); |
| } |
| return defaultVal; |
| } |
| |
| private static PlanResult getPlanResult(IndexPlan plan) { |
| return (PlanResult) plan.getAttribute(ATTR_PLAN_RESULT); |
| } |
| |
| private static Query createLikeQuery(String name, String first) { |
| first = first.replace('%', WildcardQuery.WILDCARD_STRING); |
| first = first.replace('_', WildcardQuery.WILDCARD_CHAR); |
| |
| int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING); |
| int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR); |
| int len = first.length(); |
| |
| if (indexOfWS == len || indexOfWC == len) { |
| // remove trailing "*" for prefixquery |
| first = first.substring(0, first.length() - 1); |
| if (JCR_PATH.equals(name)) { |
| return new PrefixQuery(newPathTerm(first)); |
| } else { |
| return new PrefixQuery(new Term(name, first)); |
| } |
| } else { |
| if (JCR_PATH.equals(name)) { |
| return new WildcardQuery(newPathTerm(first)); |
| } else { |
| return new WildcardQuery(new Term(name, first)); |
| } |
| } |
| } |
| |
| @CheckForNull |
| private static Query createQuery(PropertyRestriction pr, |
| PropertyDefinition defn) { |
| int propType = determinePropertyType(defn, pr); |
| |
| if (pr.isNullRestriction()) { |
| return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name)); |
| } |
| |
| //If notNullCheckEnabled explicitly enabled use the simple TermQuery |
| //otherwise later fallback to range query |
| if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) { |
| return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS, defn.name)); |
| } |
| |
| switch (propType) { |
| case PropertyType.DATE: { |
| Long first = pr.first != null ? FieldFactory.dateToLong(pr.first.getValue(Type.DATE)) : null; |
| Long last = pr.last != null ? FieldFactory.dateToLong(pr.last.getValue(Type.DATE)) : null; |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| // [property]=[value] |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, first, true, true); |
| } else if (pr.first != null && pr.last != null) { |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, last, |
| pr.firstIncluding, pr.lastIncluding); |
| } else if (pr.first != null && pr.last == null) { |
| // '>' & '>=' use cases |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, null, pr.firstIncluding, true); |
| } else if (pr.last != null && !pr.last.equals(pr.first)) { |
| // '<' & '<=' |
| return NumericRangeQuery.newLongRange(pr.propertyName, null, last, true, pr.lastIncluding); |
| } else if (pr.list != null) { |
| BooleanQuery in = new BooleanQuery(); |
| for (PropertyValue value : pr.list) { |
| Long dateVal = FieldFactory.dateToLong(value.getValue(Type.DATE)); |
| in.add(NumericRangeQuery.newLongRange(pr.propertyName, dateVal, dateVal, true, true), BooleanClause.Occur.SHOULD); |
| } |
| return in; |
| } else if (pr.isNotNullRestriction()) { |
| // not null. For date lower bound of zero can be used |
| return NumericRangeQuery.newLongRange(pr.propertyName, 0L, Long.MAX_VALUE, true, true); |
| } |
| |
| break; |
| } |
| case PropertyType.DOUBLE: { |
| Double first = pr.first != null ? pr.first.getValue(Type.DOUBLE) : null; |
| Double last = pr.last != null ? pr.last.getValue(Type.DOUBLE) : null; |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| // [property]=[value] |
| return NumericRangeQuery.newDoubleRange(pr.propertyName, first, first, true, true); |
| } else if (pr.first != null && pr.last != null) { |
| return NumericRangeQuery.newDoubleRange(pr.propertyName, first, last, |
| pr.firstIncluding, pr.lastIncluding); |
| } else if (pr.first != null && pr.last == null) { |
| // '>' & '>=' use cases |
| return NumericRangeQuery.newDoubleRange(pr.propertyName, first, null, pr.firstIncluding, true); |
| } else if (pr.last != null && !pr.last.equals(pr.first)) { |
| // '<' & '<=' |
| return NumericRangeQuery.newDoubleRange(pr.propertyName, null, last, true, pr.lastIncluding); |
| } else if (pr.list != null) { |
| BooleanQuery in = new BooleanQuery(); |
| for (PropertyValue value : pr.list) { |
| Double doubleVal = value.getValue(Type.DOUBLE); |
| in.add(NumericRangeQuery.newDoubleRange(pr.propertyName, doubleVal, doubleVal, true, true), BooleanClause.Occur.SHOULD); |
| } |
| return in; |
| } else if (pr.isNotNullRestriction()) { |
| // not null. |
| return NumericRangeQuery.newDoubleRange(pr.propertyName, Double.MIN_VALUE, Double.MAX_VALUE, true, true); |
| } |
| break; |
| } |
| case PropertyType.LONG: { |
| Long first = pr.first != null ? pr.first.getValue(LONG) : null; |
| Long last = pr.last != null ? pr.last.getValue(LONG) : null; |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| // [property]=[value] |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, first, true, true); |
| } else if (pr.first != null && pr.last != null) { |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, last, |
| pr.firstIncluding, pr.lastIncluding); |
| } else if (pr.first != null && pr.last == null) { |
| // '>' & '>=' use cases |
| return NumericRangeQuery.newLongRange(pr.propertyName, first, null, pr.firstIncluding, true); |
| } else if (pr.last != null && !pr.last.equals(pr.first)) { |
| // '<' & '<=' |
| return NumericRangeQuery.newLongRange(pr.propertyName, null, last, true, pr.lastIncluding); |
| } else if (pr.list != null) { |
| BooleanQuery in = new BooleanQuery(); |
| for (PropertyValue value : pr.list) { |
| Long longVal = value.getValue(LONG); |
| in.add(NumericRangeQuery.newLongRange(pr.propertyName, longVal, longVal, true, true), BooleanClause.Occur.SHOULD); |
| } |
| return in; |
| } else if (pr.isNotNullRestriction()) { |
| // not null. |
| return NumericRangeQuery.newLongRange(pr.propertyName, Long.MIN_VALUE, Long.MAX_VALUE, true, true); |
| } |
| break; |
| } |
| default: { |
| if (pr.isLike) { |
| return createLikeQuery(pr.propertyName, pr.first.getValue(STRING)); |
| } |
| |
| //TODO Confirm that all other types can be treated as string |
| String first = pr.first != null ? pr.first.getValue(STRING) : null; |
| String last = pr.last != null ? pr.last.getValue(STRING) : null; |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| // [property]=[value] |
| return new TermQuery(new Term(pr.propertyName, first)); |
| } else if (pr.first != null && pr.last != null) { |
| return TermRangeQuery.newStringRange(pr.propertyName, first, last, |
| pr.firstIncluding, pr.lastIncluding); |
| } else if (pr.first != null && pr.last == null) { |
| // '>' & '>=' use cases |
| return TermRangeQuery.newStringRange(pr.propertyName, first, null, pr.firstIncluding, true); |
| } else if (pr.last != null && !pr.last.equals(pr.first)) { |
| // '<' & '<=' |
| return TermRangeQuery.newStringRange(pr.propertyName, null, last, true, pr.lastIncluding); |
| } else if (pr.list != null) { |
| BooleanQuery in = new BooleanQuery(); |
| for (PropertyValue value : pr.list) { |
| String strVal = value.getValue(STRING); |
| in.add(new TermQuery(new Term(pr.propertyName, strVal)), BooleanClause.Occur.SHOULD); |
| } |
| return in; |
| } else if (pr.isNotNullRestriction()) { |
| return new TermRangeQuery(pr.propertyName, null, null, true, true); |
| } |
| } |
| } |
| throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn); |
| } |
| |
| static long getVersion(IndexSearcher indexSearcher) { |
| IndexReader reader = indexSearcher.getIndexReader(); |
| if (reader instanceof DirectoryReader) { |
| return ((DirectoryReader) reader).getVersion(); |
| } |
| return -1; |
| } |
| |
| private static Query createNodeNameQuery(PropertyRestriction pr) { |
| String first = pr.first != null ? pr.first.getValue(STRING) : null; |
| if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding |
| && pr.lastIncluding) { |
| // [property]=[value] |
| return new TermQuery(new Term(FieldNames.NODE_NAME, first)); |
| } |
| |
| if (pr.isLike) { |
| return createLikeQuery(FieldNames.NODE_NAME, first); |
| } |
| |
| throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported " + pr); |
| } |
| |
| private static void addReferenceConstraint(String uuid, List<Query> qs, |
| IndexReader reader) { |
| if (reader == null) { |
| // getPlan call |
| qs.add(new TermQuery(new Term("*", uuid))); |
| return; |
| } |
| |
| // reference query |
| BooleanQuery bq = new BooleanQuery(); |
| Collection<String> fields = MultiFields.getIndexedFields(reader); |
| for (String f : fields) { |
| bq.add(new TermQuery(new Term(f, uuid)), SHOULD); |
| } |
| qs.add(bq); |
| } |
| |
| private static void addNodeTypeConstraints(IndexingRule defn, List<Query> qs, Filter filter) { |
| BooleanQuery bq = new BooleanQuery(); |
| PropertyDefinition primaryType = defn.getConfig(JCR_PRIMARYTYPE); |
| //TODO OAK-2198 Add proper nodeType query support |
| |
| if (primaryType != null && primaryType.propertyIndex) { |
| for (String type : filter.getPrimaryTypes()) { |
| bq.add(new TermQuery(new Term(JCR_PRIMARYTYPE, type)), SHOULD); |
| } |
| } |
| |
| PropertyDefinition mixinType = defn.getConfig(JCR_MIXINTYPES); |
| if (mixinType != null && mixinType.propertyIndex) { |
| for (String type : filter.getMixinTypes()) { |
| bq.add(new TermQuery(new Term(JCR_MIXINTYPES, type)), SHOULD); |
| } |
| } |
| |
| if (bq.clauses().size() != 0) { |
| qs.add(bq); |
| } |
| } |
| |
| static Query getFullTextQuery(final IndexPlan plan, FullTextExpression ft, |
| final Analyzer analyzer, final FulltextQueryTermsProvider augmentor) { |
| final PlanResult pr = getPlanResult(plan); |
| // a reference to the query, so it can be set in the visitor |
| // (a "non-local return") |
| final AtomicReference<Query> result = new AtomicReference<Query>(); |
| ft.accept(new FullTextVisitor() { |
| |
| @Override |
| public boolean visit(FullTextContains contains) { |
| visitTerm(contains.getPropertyName(), contains.getRawText(), null, contains.isNot()); |
| return true; |
| } |
| |
| @Override |
| public boolean visit(FullTextOr or) { |
| BooleanQuery q = new BooleanQuery(); |
| for (FullTextExpression e : or.list) { |
| Query x = getFullTextQuery(plan, e, analyzer, augmentor); |
| q.add(x, SHOULD); |
| } |
| result.set(q); |
| return true; |
| } |
| |
| @Override |
| public boolean visit(FullTextAnd and) { |
| BooleanQuery q = new BooleanQuery(); |
| for (FullTextExpression e : and.list) { |
| Query x = getFullTextQuery(plan, e, analyzer, augmentor); |
| /* Only unwrap the clause if MUST_NOT(x) */ |
| boolean hasMustNot = false; |
| if (x instanceof BooleanQuery) { |
| BooleanQuery bq = (BooleanQuery) x; |
| if ((bq.getClauses().length == 1) && |
| (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) { |
| hasMustNot = true; |
| q.add(bq.getClauses()[0]); |
| } |
| } |
| |
| if (!hasMustNot) { |
| q.add(x, MUST); |
| } |
| } |
| result.set(q); |
| return true; |
| } |
| |
| @Override |
| public boolean visit(FullTextTerm term) { |
| return visitTerm(term.getPropertyName(), term.getText(), term.getBoost(), term.isNot()); |
| } |
| |
| private boolean visitTerm(String propertyName, String text, String boost, boolean not) { |
| String p = getLuceneFieldName(propertyName, pr); |
| Query q = tokenToQuery(text, p, pr, analyzer, augmentor); |
| if (q == null) { |
| return false; |
| } |
| if (boost != null) { |
| q.setBoost(Float.parseFloat(boost)); |
| } |
| if (not) { |
| BooleanQuery bq = new BooleanQuery(); |
| bq.add(q, MUST_NOT); |
| result.set(bq); |
| } else { |
| result.set(q); |
| } |
| return true; |
| } |
| }); |
| return result.get(); |
| } |
| |
| static String getLuceneFieldName(@Nullable String p, PlanResult pr) { |
| if (p == null) { |
| return FieldNames.FULLTEXT; |
| } |
| |
| if (isNodePath(p)) { |
| if (pr.isPathTransformed()) { |
| p = PathUtils.getName(p); |
| } else { |
| //Get rid of /* as aggregated fulltext field name is the |
| //node relative path |
| p = FieldNames.createFulltextFieldName(PathUtils.getParentPath(p)); |
| } |
| } else { |
| if (pr.isPathTransformed()) { |
| p = PathUtils.getName(p); |
| } |
| p = FieldNames.createAnalyzedFieldName(p); |
| } |
| |
| if ("*".equals(p)) { |
| p = FieldNames.FULLTEXT; |
| } |
| return p; |
| } |
| |
| private static Query tokenToQuery(String text, String fieldName, PlanResult pr, Analyzer analyzer, |
| FulltextQueryTermsProvider augmentor) { |
| Query ret; |
| IndexingRule indexingRule = pr.indexingRule; |
| //Expand the query on fulltext field |
| if (FieldNames.FULLTEXT.equals(fieldName) && |
| !indexingRule.getNodeScopeAnalyzedProps().isEmpty()) { |
| BooleanQuery in = new BooleanQuery(); |
| for (PropertyDefinition pd : indexingRule.getNodeScopeAnalyzedProps()) { |
| Query q = tokenToQuery(text, FieldNames.createAnalyzedFieldName(pd.name), analyzer); |
| q.setBoost(pd.boost); |
| in.add(q, BooleanClause.Occur.SHOULD); |
| } |
| |
| //Add the query for actual fulltext field also. That query would |
| //not be boosted |
| in.add(tokenToQuery(text, fieldName, analyzer), BooleanClause.Occur.SHOULD); |
| ret = in; |
| } else { |
| ret = tokenToQuery(text, fieldName, analyzer); |
| } |
| |
| //Augment query terms if available (as a 'SHOULD' clause) |
| if (augmentor != null && FieldNames.FULLTEXT.equals(fieldName)) { |
| Query subQuery = augmentor.getQueryTerm(text, analyzer, pr.indexDefinition.getDefinitionNodeState()); |
| if (subQuery != null) { |
| BooleanQuery query = new BooleanQuery(); |
| |
| query.add(ret, BooleanClause.Occur.SHOULD); |
| query.add(subQuery, BooleanClause.Occur.SHOULD); |
| |
| ret = query; |
| } |
| } |
| |
| return ret; |
| } |
| |
| static Query tokenToQuery(String text, String fieldName, Analyzer analyzer) { |
| if (analyzer == null) { |
| return null; |
| } |
| StandardQueryParser parserHelper = new StandardQueryParser(analyzer); |
| parserHelper.setAllowLeadingWildcard(true); |
| parserHelper.setDefaultOperator(StandardQueryConfigHandler.Operator.AND); |
| |
| text = rewriteQueryText(text); |
| |
| try { |
| return parserHelper.parse(text, fieldName); |
| } catch (QueryNodeException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| /** |
| * Following chars are used as operators in Lucene Query and should be escaped |
| */ |
| private static final char[] LUCENE_QUERY_OPERATORS = {':' , '/', '!', '&', '|', '='}; |
| |
| /** |
| * Following logic is taken from org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String) |
| */ |
| static String rewriteQueryText(String textsearch) { |
| // replace escaped ' with just ' |
| StringBuilder rewritten = new StringBuilder(); |
| // the default lucene query parser recognizes 'AND' and 'NOT' as |
| // keywords. |
| textsearch = textsearch.replaceAll("AND", "and"); |
| textsearch = textsearch.replaceAll("NOT", "not"); |
| boolean escaped = false; |
| for (int i = 0; i < textsearch.length(); i++) { |
| char c = textsearch.charAt(i); |
| if (c == '\\') { |
| if (escaped) { |
| rewritten.append("\\\\"); |
| escaped = false; |
| } else { |
| escaped = true; |
| } |
| } else if (c == '\'') { |
| if (escaped) { |
| escaped = false; |
| } |
| rewritten.append(c); |
| } else if (Chars.contains(LUCENE_QUERY_OPERATORS, c)) { |
| rewritten.append('\\').append(c); |
| } else { |
| if (escaped) { |
| rewritten.append('\\'); |
| escaped = false; |
| } |
| rewritten.append(c); |
| } |
| } |
| return rewritten.toString(); |
| } |
| |
| private static String getPathRestriction(IndexPlan plan) { |
| Filter f = plan.getFilter(); |
| String pathPrefix = plan.getPathPrefix(); |
| if (pathPrefix.isEmpty()) { |
| return f.getPath(); |
| } |
| String relativePath = PathUtils.relativize(pathPrefix, f.getPath()); |
| return "/" + relativePath; |
| } |
| |
| private static Query newDepthQuery(String path) { |
| int depth = PathUtils.getDepth(path) + 1; |
| return NumericRangeQuery.newIntRange(FieldNames.PATH_DEPTH, depth, depth, true, true); |
| } |
| |
| static class LuceneResultRow { |
| final String path; |
| final double score; |
| final String suggestion; |
| final boolean isVirutal; |
| final String excerpt; |
| final String explanation; |
| final Facets facets; |
| |
| LuceneResultRow(String path, double score, String excerpt, Facets facets, String explanation) { |
| this.explanation = explanation; |
| this.excerpt = excerpt; |
| this.facets = facets; |
| this.isVirutal = false; |
| this.path = path; |
| this.score = score; |
| this.suggestion = null; |
| } |
| |
| LuceneResultRow(String suggestion, long weight) { |
| this.isVirutal = true; |
| this.path = "/"; |
| this.score = weight; |
| this.suggestion = suggestion; |
| this.excerpt = null; |
| this.facets = null; |
| this.explanation = null; |
| } |
| |
| LuceneResultRow(String suggestion) { |
| this(suggestion, 1); |
| } |
| |
| @Override |
| public String toString() { |
| return String.format("%s (%1.2f)", path, score); |
| } |
| } |
| |
| /** |
| * A cursor over Lucene results. The result includes the path, |
| * and the jcr:score pseudo-property as returned by Lucene. |
| */ |
| static class LucenePathCursor implements Cursor { |
| |
| private final Cursor pathCursor; |
| private final String pathPrefix; |
| LuceneResultRow currentRow; |
| private final SizeEstimator sizeEstimator; |
| private long estimatedSize; |
| private int numberOfFacets; |
| |
| LucenePathCursor(final Iterator<LuceneResultRow> it, final IndexPlan plan, QueryEngineSettings settings, SizeEstimator sizeEstimator) { |
| pathPrefix = plan.getPathPrefix(); |
| this.sizeEstimator = sizeEstimator; |
| Iterator<String> pathIterator = new Iterator<String>() { |
| |
| @Override |
| public boolean hasNext() { |
| return it.hasNext(); |
| } |
| |
| @Override |
| public String next() { |
| currentRow = it.next(); |
| return currentRow.path; |
| } |
| |
| @Override |
| public void remove() { |
| it.remove(); |
| } |
| |
| }; |
| |
| PlanResult planResult = getPlanResult(plan); |
| pathCursor = new PathCursor(pathIterator, planResult.isUniquePathsRequired(), settings); |
| numberOfFacets = planResult.indexDefinition.getNumberOfTopFacets(); |
| } |
| |
| |
| @Override |
| public boolean hasNext() { |
| return pathCursor.hasNext(); |
| } |
| |
| @Override |
| public void remove() { |
| pathCursor.remove(); |
| } |
| |
| @Override |
| public IndexRow next() { |
| final IndexRow pathRow = pathCursor.next(); |
| return new IndexRow() { |
| |
| @Override |
| public boolean isVirtualRow() { |
| return currentRow.isVirutal; |
| } |
| |
| @Override |
| public String getPath() { |
| String sub = pathRow.getPath(); |
| if (isVirtualRow()) { |
| return sub; |
| } else if (PathUtils.isAbsolute(sub)) { |
| return pathPrefix + sub; |
| } else { |
| return PathUtils.concat(pathPrefix, sub); |
| } |
| } |
| |
| @Override |
| public PropertyValue getValue(String columnName) { |
| // overlay the score |
| if (QueryImpl.JCR_SCORE.equals(columnName)) { |
| return PropertyValues.newDouble(currentRow.score); |
| } |
| if (QueryImpl.REP_SPELLCHECK.equals(columnName) || QueryImpl.REP_SUGGEST.equals(columnName)) { |
| return PropertyValues.newString(currentRow.suggestion); |
| } |
| if (QueryImpl.OAK_SCORE_EXPLANATION.equals(columnName)) { |
| return PropertyValues.newString(currentRow.explanation); |
| } |
| if (QueryImpl.REP_EXCERPT.equals(columnName)) { |
| return PropertyValues.newString(currentRow.excerpt); |
| } |
| if (columnName.startsWith(QueryImpl.REP_FACET)) { |
| String facetFieldName = FacetHelper.parseFacetField(columnName); |
| Facets facets = currentRow.facets; |
| try { |
| if (facets != null) { |
| FacetResult topChildren = facets.getTopChildren(numberOfFacets, facetFieldName); |
| if (topChildren != null) { |
| JsopWriter writer = new JsopBuilder(); |
| writer.object(); |
| for (LabelAndValue lav : topChildren.labelValues) { |
| writer.key(lav.label).value(lav.value.intValue()); |
| } |
| writer.endObject(); |
| return PropertyValues.newString(writer.toString()); |
| } else { |
| return null; |
| } |
| } |
| } catch (Exception e) { |
| throw new RuntimeException(e); |
| } |
| } |
| return pathRow.getValue(columnName); |
| } |
| |
| }; |
| } |
| |
| |
| @Override |
| public long getSize(SizePrecision precision, long max) { |
| if (estimatedSize != 0) { |
| return estimatedSize; |
| } |
| return estimatedSize = sizeEstimator.getSize(); |
| } |
| } |
| |
| static class PathStoredFieldVisitor extends StoredFieldVisitor { |
| |
| private String path; |
| private boolean pathVisited; |
| |
| @Override |
| public Status needsField(FieldInfo fieldInfo) throws IOException { |
| if (PATH.equals(fieldInfo.name)) { |
| return Status.YES; |
| } |
| return pathVisited ? Status.STOP : Status.NO; |
| } |
| |
| @Override |
| public void stringField(FieldInfo fieldInfo, String value) |
| throws IOException { |
| if (PATH.equals(fieldInfo.name)) { |
| path = value; |
| pathVisited = true; |
| } |
| } |
| |
| public String getPath() { |
| return path; |
| } |
| } |
| |
| } |