| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.uhighlight; |
| |
| import java.io.IOException; |
| import java.nio.charset.StandardCharsets; |
| import java.text.BreakIterator; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.EnumSet; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.SortedSet; |
| import java.util.TreeSet; |
| import java.util.function.Predicate; |
| import java.util.function.Supplier; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.index.BaseCompositeReader; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.Fields; |
| import org.apache.lucene.index.FilterLeafReader; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.MultiReader; |
| import org.apache.lucene.index.ReaderUtil; |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.MatchNoDocsQuery; |
| import org.apache.lucene.search.MultiTermQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.QueryVisitor; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.Weight; |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.InPlaceMergeSorter; |
| |
| /** |
| * A Highlighter that can get offsets from either |
| * postings ({@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}), |
| * term vectors ({@link FieldType#setStoreTermVectorOffsets(boolean)}), |
| * or via re-analyzing text. |
| * <p> |
| * This highlighter treats the single original document as the whole corpus, and then scores individual |
| * passages as if they were documents in this corpus. It uses a {@link BreakIterator} to find |
| * passages in the text; by default it breaks using {@link BreakIterator#getSentenceInstance(Locale) |
| * getSentenceInstance(Locale.ROOT)}. It then iterates in parallel (merge sorting by offset) through |
| * the positions of all terms from the query, coalescing those hits that occur in a single passage |
| * into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}. |
| * Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}. |
| * <p> |
| * You can customize the behavior by calling some of the setters, or by subclassing and overriding some methods. |
| * Some important hooks: |
| * <ul> |
| * <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages. |
| * <li>{@link #getScorer(String)}: Customize how passages are ranked. |
| * <li>{@link #getFormatter(String)}: Customize how snippets are formatted. |
| * </ul> |
| * <p> |
| * This is thread-safe. |
| * |
| * @lucene.experimental |
| */ |
| public class UnifiedHighlighter { |
| |
| protected static final char MULTIVAL_SEP_CHAR = (char) 0; |
| |
| public static final int DEFAULT_MAX_LENGTH = 10000; |
| |
| public static final int DEFAULT_CACHE_CHARS_THRESHOLD = 524288; // ~ 1 MB (2 byte chars) |
| |
| static final IndexSearcher EMPTY_INDEXSEARCHER; |
| |
| static { |
| try { |
| IndexReader emptyReader = new MultiReader(); |
| EMPTY_INDEXSEARCHER = new IndexSearcher(emptyReader); |
| EMPTY_INDEXSEARCHER.setQueryCache(null); |
| } catch (IOException bogus) { |
| throw new RuntimeException(bogus); |
| } |
| } |
| |
| protected static final LabelledCharArrayMatcher[] ZERO_LEN_AUTOMATA_ARRAY = new LabelledCharArrayMatcher[0]; |
| |
| protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher |
| |
| protected final Analyzer indexAnalyzer; |
| |
| private boolean defaultHandleMtq = true; // e.g. wildcards |
| |
| private boolean defaultHighlightPhrasesStrictly = true; // AKA "accuracy" or "query debugging" |
| |
| private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy |
| |
| private int maxLength = DEFAULT_MAX_LENGTH; |
| |
| // BreakIterator is stateful so we use a Supplier factory method |
| private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT); |
| |
| private Predicate<String> defaultFieldMatcher; |
| |
| private PassageScorer defaultScorer = new PassageScorer(); |
| |
| private PassageFormatter defaultFormatter = new DefaultPassageFormatter(); |
| |
| private int defaultMaxNoHighlightPassages = -1; |
| |
| // lazy initialized with double-check locking; protected so subclass can init |
| protected volatile FieldInfos fieldInfos; |
| |
| private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD; |
| |
| /** |
| * Extracts matching terms after rewriting against an empty index |
| */ |
| protected static Set<Term> extractTerms(Query query) throws IOException { |
| Set<Term> queryTerms = new HashSet<>(); |
| EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms)); |
| return queryTerms; |
| } |
| |
| /** |
| * Constructs the highlighter with the given index searcher and analyzer. |
| * |
| * @param indexSearcher Usually required, unless {@link #highlightWithoutSearcher(String, Query, String, int)} is |
| * used, in which case this needs to be null. |
| * @param indexAnalyzer Required, even if in some circumstances it isn't used. |
| */ |
| public UnifiedHighlighter(IndexSearcher indexSearcher, Analyzer indexAnalyzer) { |
| this.searcher = indexSearcher; //TODO: make non nullable |
| this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer, |
| "indexAnalyzer is required" |
| + " (even if in some circumstances it isn't used)"); |
| } |
| |
| public void setHandleMultiTermQuery(boolean handleMtq) { |
| this.defaultHandleMtq = handleMtq; |
| } |
| |
| public void setHighlightPhrasesStrictly(boolean highlightPhrasesStrictly) { |
| this.defaultHighlightPhrasesStrictly = highlightPhrasesStrictly; |
| } |
| |
| public void setMaxLength(int maxLength) { |
| if (maxLength < 0 || maxLength == Integer.MAX_VALUE) { |
| // two reasons: no overflow problems in BreakIterator.preceding(offset+1), |
| // our sentinel in the offsets queue uses this value to terminate. |
| throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE"); |
| } |
| this.maxLength = maxLength; |
| } |
| |
| public void setBreakIterator(Supplier<BreakIterator> breakIterator) { |
| this.defaultBreakIterator = breakIterator; |
| } |
| |
| public void setScorer(PassageScorer scorer) { |
| this.defaultScorer = scorer; |
| } |
| |
| public void setFormatter(PassageFormatter formatter) { |
| this.defaultFormatter = formatter; |
| } |
| |
| public void setMaxNoHighlightPassages(int defaultMaxNoHighlightPassages) { |
| this.defaultMaxNoHighlightPassages = defaultMaxNoHighlightPassages; |
| } |
| |
| public void setCacheFieldValCharsThreshold(int cacheFieldValCharsThreshold) { |
| this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold; |
| } |
| |
| public void setFieldMatcher(Predicate<String> predicate) { |
| this.defaultFieldMatcher = predicate; |
| } |
| |
| /** |
| * Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's enabled. MTQ |
| * highlighting can be expensive, particularly when using offsets in postings. |
| */ |
| protected boolean shouldHandleMultiTermQuery(String field) { |
| return defaultHandleMtq; |
| } |
| |
| /** |
| * Returns whether position sensitive queries (e.g. phrases and {@link SpanQuery}ies) |
| * should be highlighted strictly based on query matches (slower) |
| * versus any/all occurrences of the underlying terms. By default it's enabled, but there's no overhead if such |
| * queries aren't used. |
| */ |
| protected boolean shouldHighlightPhrasesStrictly(String field) { |
| return defaultHighlightPhrasesStrictly; |
| } |
| |
| |
| protected boolean shouldPreferPassageRelevancyOverSpeed(String field) { |
| return defaultPassageRelevancyOverSpeed; |
| } |
| |
| /** |
| * Returns the predicate to use for extracting the query part that must be highlighted. |
| * By default only queries that target the current field are kept. (AKA requireFieldMatch) |
| */ |
| protected Predicate<String> getFieldMatcher(String field) { |
| if (defaultFieldMatcher != null) { |
| return defaultFieldMatcher; |
| } else { |
| // requireFieldMatch = true |
| return (qf) -> field.equals(qf); |
| } |
| } |
| |
| /** |
| * The maximum content size to process. Content will be truncated to this size before highlighting. Typically |
| * snippets closer to the beginning of the document better summarize its content. |
| */ |
| public int getMaxLength() { |
| return maxLength; |
| } |
| |
| /** |
| * Returns the {@link BreakIterator} to use for |
| * dividing text into passages. This returns |
| * {@link BreakIterator#getSentenceInstance(Locale)} by default; |
| * subclasses can override to customize. |
| * <p> |
| * Note: this highlighter will call |
| * {@link BreakIterator#preceding(int)} and {@link BreakIterator#next()} many times on it. |
| * The default generic JDK implementation of {@code preceding} performs poorly. |
| */ |
| protected BreakIterator getBreakIterator(String field) { |
| return defaultBreakIterator.get(); |
| } |
| |
| /** |
| * Returns the {@link PassageScorer} to use for |
| * ranking passages. This |
| * returns a new {@code PassageScorer} by default; |
| * subclasses can override to customize. |
| */ |
| protected PassageScorer getScorer(String field) { |
| return defaultScorer; |
| } |
| |
| /** |
| * Returns the {@link PassageFormatter} to use for |
| * formatting passages into highlighted snippets. This |
| * returns a new {@code PassageFormatter} by default; |
| * subclasses can override to customize. |
| */ |
| protected PassageFormatter getFormatter(String field) { |
| return defaultFormatter; |
| } |
| |
| /** |
| * Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no |
| * highlights could be found. If it's less than 0 (the default) then this defaults to the {@code maxPassages} |
| * parameter given for each request. If this is 0 then the resulting highlight is null (not formatted). |
| */ |
| protected int getMaxNoHighlightPassages(String field) { |
| return defaultMaxNoHighlightPassages; |
| } |
| |
| /** |
| * Limits the amount of field value pre-fetching until this threshold is passed. The highlighter |
| * internally highlights in batches of documents sized on the sum field value length (in chars) of the fields |
| * to be highlighted (bounded by {@link #getMaxLength()} for each field). By setting this to 0, you can force |
| * documents to be fetched and highlighted one at a time, which you usually shouldn't do. |
| * The default is 524288 chars which translates to about a megabyte. However, note |
| * that the highlighter sometimes ignores this and highlights one document at a time (without caching a |
| * bunch of documents in advance) when it can detect there's no point in it -- such as when all fields will be |
| * highlighted via re-analysis as one example. |
| */ |
| public int getCacheFieldValCharsThreshold() { // question: should we size by bytes instead? |
| return cacheFieldValCharsThreshold; |
| } |
| |
| /** |
| * ... as passed in from constructor. |
| */ |
| public IndexSearcher getIndexSearcher() { |
| return searcher; |
| } |
| |
| /** |
| * ... as passed in from constructor. |
| */ |
| public Analyzer getIndexAnalyzer() { |
| return indexAnalyzer; |
| } |
| |
| /** |
| * Source of term offsets; essential for highlighting. |
| */ |
| public enum OffsetSource { |
| POSTINGS, TERM_VECTORS, ANALYSIS, POSTINGS_WITH_TERM_VECTORS, NONE_NEEDED |
| } |
| |
| /** |
| * Determine the offset source for the specified field. The default algorithm is as follows: |
| * <ol> |
| * <li>This calls {@link #getFieldInfo(String)}. Note this returns null if there is no searcher or if the |
| * field isn't found there.</li> |
| * <li> If there's a field info it has |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} then {@link OffsetSource#POSTINGS} is |
| * returned.</li> |
| * <li>If there's a field info and {@link FieldInfo#hasVectors()} then {@link OffsetSource#TERM_VECTORS} is |
| * returned (note we can't check here if the TV has offsets; if there isn't then an exception will get thrown |
| * down the line).</li> |
| * <li>Fall-back: {@link OffsetSource#ANALYSIS} is returned.</li> |
| * </ol> |
| * <p> |
| * Note that the highlighter sometimes switches to something else based on the query, such as if you have |
| * {@link OffsetSource#POSTINGS_WITH_TERM_VECTORS} but in fact don't need term vectors. |
| */ |
| protected OffsetSource getOffsetSource(String field) { |
| FieldInfo fieldInfo = getFieldInfo(field); |
| if (fieldInfo != null) { |
| if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { |
| return fieldInfo.hasVectors() ? OffsetSource.POSTINGS_WITH_TERM_VECTORS : OffsetSource.POSTINGS; |
| } |
| if (fieldInfo.hasVectors()) { // unfortunately we can't also check if the TV has offsets |
| return OffsetSource.TERM_VECTORS; |
| } |
| } |
| return OffsetSource.ANALYSIS; |
| } |
| |
| /** |
| * Called by the default implementation of {@link #getOffsetSource(String)}. |
| * If there is no searcher then we simply always return null. |
| */ |
| protected FieldInfo getFieldInfo(String field) { |
| if (searcher == null) { |
| return null; |
| } |
| // Need thread-safety for lazy-init but lets avoid 'synchronized' by using double-check locking idiom |
| FieldInfos fieldInfos = this.fieldInfos; // note: it's volatile; read once |
| if (fieldInfos == null) { |
| synchronized (this) { |
| fieldInfos = this.fieldInfos; |
| if (fieldInfos == null) { |
| fieldInfos = FieldInfos.getMergedFieldInfos(searcher.getIndexReader()); |
| this.fieldInfos = fieldInfos; |
| } |
| |
| } |
| |
| } |
| return fieldInfos.fieldInfo(field); |
| } |
| |
| /** |
| * Highlights the top passages from a single field. |
| * |
| * @param field field name to highlight. |
| * Must have a stored string value and also be indexed with offsets. |
| * @param query query to highlight. |
| * @param topDocs TopDocs containing the summary result documents to highlight. |
| * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>. |
| * If no highlights were found for a document, the |
| * first sentence for the field will be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| public String[] highlight(String field, Query query, TopDocs topDocs) throws IOException { |
| return highlight(field, query, topDocs, 1); |
| } |
| |
| /** |
| * Highlights the top-N passages from a single field. |
| * |
| * @param field field name to highlight. Must have a stored string value. |
| * @param query query to highlight. |
| * @param topDocs TopDocs containing the summary result documents to highlight. |
| * @param maxPassages The maximum number of top-N ranked passages used to |
| * form the highlighted snippets. |
| * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>. |
| * If no highlights were found for a document, the |
| * first {@code maxPassages} sentences from the |
| * field will be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| public String[] highlight(String field, Query query, TopDocs topDocs, int maxPassages) throws IOException { |
| Map<String, String[]> res = highlightFields(new String[]{field}, query, topDocs, new int[]{maxPassages}); |
| return res.get(field); |
| } |
| |
| /** |
| * Highlights the top passages from multiple fields. |
| * <p> |
| * Conceptually, this behaves as a more efficient form of: |
| * <pre class="prettyprint"> |
| * Map m = new HashMap(); |
| * for (String field : fields) { |
| * m.put(field, highlight(field, query, topDocs)); |
| * } |
| * return m; |
| * </pre> |
| * |
| * @param fields field names to highlight. Must have a stored string value. |
| * @param query query to highlight. |
| * @param topDocs TopDocs containing the summary result documents to highlight. |
| * @return Map keyed on field name, containing the array of formatted snippets |
| * corresponding to the documents in <code>topDocs</code>. |
| * If no highlights were found for a document, the |
| * first sentence from the field will be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| public Map<String, String[]> highlightFields(String[] fields, Query query, TopDocs topDocs) throws IOException { |
| int maxPassages[] = new int[fields.length]; |
| Arrays.fill(maxPassages, 1); |
| return highlightFields(fields, query, topDocs, maxPassages); |
| } |
| |
| /** |
| * Highlights the top-N passages from multiple fields. |
| * <p> |
| * Conceptually, this behaves as a more efficient form of: |
| * <pre class="prettyprint"> |
| * Map m = new HashMap(); |
| * for (String field : fields) { |
| * m.put(field, highlight(field, query, topDocs, maxPassages)); |
| * } |
| * return m; |
| * </pre> |
| * |
| * @param fields field names to highlight. Must have a stored string value. |
| * @param query query to highlight. |
| * @param topDocs TopDocs containing the summary result documents to highlight. |
| * @param maxPassages The maximum number of top-N ranked passages per-field used to |
| * form the highlighted snippets. |
| * @return Map keyed on field name, containing the array of formatted snippets |
| * corresponding to the documents in <code>topDocs</code>. |
| * If no highlights were found for a document, the |
| * first {@code maxPassages} sentences from the |
| * field will be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| public Map<String, String[]> highlightFields(String[] fields, Query query, TopDocs topDocs, int[] maxPassages) |
| throws IOException { |
| final ScoreDoc scoreDocs[] = topDocs.scoreDocs; |
| int docids[] = new int[scoreDocs.length]; |
| for (int i = 0; i < docids.length; i++) { |
| docids[i] = scoreDocs[i].doc; |
| } |
| |
| return highlightFields(fields, query, docids, maxPassages); |
| } |
| |
| /** |
| * Highlights the top-N passages from multiple fields, |
| * for the provided int[] docids. |
| * |
| * @param fieldsIn field names to highlight. Must have a stored string value. |
| * @param query query to highlight. |
| * @param docidsIn containing the document IDs to highlight. |
| * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to |
| * form the highlighted snippets. |
| * @return Map keyed on field name, containing the array of formatted snippets |
| * corresponding to the documents in <code>docidsIn</code>. |
| * If no highlights were found for a document, the |
| * first {@code maxPassages} from the field will |
| * be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| public Map<String, String[]> highlightFields(String[] fieldsIn, Query query, int[] docidsIn, int[] maxPassagesIn) |
| throws IOException { |
| Map<String, String[]> snippets = new HashMap<>(); |
| for (Map.Entry<String, Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, docidsIn, maxPassagesIn).entrySet()) { |
| Object[] snippetObjects = ent.getValue(); |
| String[] snippetStrings = new String[snippetObjects.length]; |
| snippets.put(ent.getKey(), snippetStrings); |
| for (int i = 0; i < snippetObjects.length; i++) { |
| Object snippet = snippetObjects[i]; |
| if (snippet != null) { |
| snippetStrings[i] = snippet.toString(); |
| } |
| } |
| } |
| |
| return snippets; |
| } |
| |
| /** |
| * Expert: highlights the top-N passages from multiple fields, |
| * for the provided int[] docids, to custom Object as |
| * returned by the {@link PassageFormatter}. Use |
| * this API to render to something other than String. |
| * |
| * @param fieldsIn field names to highlight. Must have a stored string value. |
| * @param query query to highlight. |
| * @param docIdsIn containing the document IDs to highlight. |
| * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to |
| * form the highlighted snippets. |
| * @return Map keyed on field name, containing the array of formatted snippets |
| * corresponding to the documents in <code>docIdsIn</code>. |
| * If no highlights were found for a document, the |
| * first {@code maxPassages} from the field will |
| * be returned. |
| * @throws IOException if an I/O error occurred during processing |
| * @throws IllegalArgumentException if <code>field</code> was indexed without |
| * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} |
| */ |
| protected Map<String, Object[]> highlightFieldsAsObjects(String[] fieldsIn, Query query, int[] docIdsIn, |
| int[] maxPassagesIn) throws IOException { |
| if (fieldsIn.length < 1) { |
| throw new IllegalArgumentException("fieldsIn must not be empty"); |
| } |
| if (fieldsIn.length != maxPassagesIn.length) { |
| throw new IllegalArgumentException("invalid number of maxPassagesIn"); |
| } |
| if (searcher == null) { |
| throw new IllegalStateException("This method requires that an indexSearcher was passed in the " |
| + "constructor. Perhaps you mean to call highlightWithoutSearcher?"); |
| } |
| |
| // Sort docs & fields for sequential i/o |
| |
| // Sort doc IDs w/ index to original order: (copy input arrays since we sort in-place) |
| int[] docIds = new int[docIdsIn.length]; |
| int[] docInIndexes = new int[docIds.length]; // fill in ascending order; points into docIdsIn[] |
| copyAndSortDocIdsWithIndex(docIdsIn, docIds, docInIndexes); // latter 2 are "out" params |
| |
| // Sort fields w/ maxPassages pair: (copy input arrays since we sort in-place) |
| final String fields[] = new String[fieldsIn.length]; |
| final int maxPassages[] = new int[maxPassagesIn.length]; |
| copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params |
| |
| // Init field highlighters (where most of the highlight logic lives, and on a per field basis) |
| Set<Term> queryTerms = extractTerms(query); |
| FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length]; |
| int numTermVectors = 0; |
| int numPostings = 0; |
| for (int f = 0; f < fields.length; f++) { |
| FieldHighlighter fieldHighlighter = getFieldHighlighter(fields[f], query, queryTerms, maxPassages[f]); |
| fieldHighlighters[f] = fieldHighlighter; |
| |
| switch (fieldHighlighter.getOffsetSource()) { |
| case TERM_VECTORS: |
| numTermVectors++; |
| break; |
| case POSTINGS: |
| numPostings++; |
| break; |
| case POSTINGS_WITH_TERM_VECTORS: |
| numTermVectors++; |
| numPostings++; |
| break; |
| case ANALYSIS: |
| case NONE_NEEDED: |
| default: |
| //do nothing |
| break; |
| } |
| } |
| |
| int cacheCharsThreshold = calculateOptimalCacheCharsThreshold(numTermVectors, numPostings); |
| |
| IndexReader indexReaderWithTermVecCache = |
| (numTermVectors >= 2) ? TermVectorReusingLeafReader.wrap(searcher.getIndexReader()) : null; |
| |
| // [fieldIdx][docIdInIndex] of highlightDoc result |
| Object[][] highlightDocsInByField = new Object[fields.length][docIds.length]; |
| // Highlight in doc batches determined by loadFieldValues (consumes from docIdIter) |
| DocIdSetIterator docIdIter = asDocIdSetIterator(docIds); |
| for (int batchDocIdx = 0; batchDocIdx < docIds.length; ) { |
| // Load the field values of the first batch of document(s) (note: commonly all docs are in this batch) |
| List<CharSequence[]> fieldValsByDoc = |
| loadFieldValues(fields, docIdIter, cacheCharsThreshold); |
| // the size of the above list is the size of the batch (num of docs in the batch) |
| |
| // Highlight in per-field order first, then by doc (better I/O pattern) |
| for (int fieldIdx = 0; fieldIdx < fields.length; fieldIdx++) { |
| Object[] resultByDocIn = highlightDocsInByField[fieldIdx];//parallel to docIdsIn |
| FieldHighlighter fieldHighlighter = fieldHighlighters[fieldIdx]; |
| for (int docIdx = batchDocIdx; docIdx - batchDocIdx < fieldValsByDoc.size(); docIdx++) { |
| int docId = docIds[docIdx];//sorted order |
| CharSequence content = fieldValsByDoc.get(docIdx - batchDocIdx)[fieldIdx]; |
| if (content == null) { |
| continue; |
| } |
| IndexReader indexReader = |
| (fieldHighlighter.getOffsetSource() == OffsetSource.TERM_VECTORS |
| && indexReaderWithTermVecCache != null) |
| ? indexReaderWithTermVecCache |
| : searcher.getIndexReader(); |
| final LeafReader leafReader; |
| if (indexReader instanceof LeafReader) { |
| leafReader = (LeafReader) indexReader; |
| } else { |
| List<LeafReaderContext> leaves = indexReader.leaves(); |
| LeafReaderContext leafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves)); |
| leafReader = leafReaderContext.reader(); |
| docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader |
| } |
| int docInIndex = docInIndexes[docIdx];//original input order |
| assert resultByDocIn[docInIndex] == null; |
| resultByDocIn[docInIndex] = |
| fieldHighlighter |
| .highlightFieldForDoc(leafReader, docId, content.toString()); |
| } |
| |
| } |
| |
| batchDocIdx += fieldValsByDoc.size(); |
| } |
| IOUtils.close(indexReaderWithTermVecCache); // FYI won't close underlying reader |
| assert docIdIter.docID() == DocIdSetIterator.NO_MORE_DOCS |
| || docIdIter.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; |
| |
| // TODO reconsider the return type; since this is an "advanced" method, lets not return a Map? Notice the only |
| // caller simply iterates it to build another structure. |
| |
| // field -> object highlights parallel to docIdsIn |
| Map<String, Object[]> resultMap = new HashMap<>(fields.length); |
| for (int f = 0; f < fields.length; f++) { |
| resultMap.put(fields[f], highlightDocsInByField[f]); |
| } |
| return resultMap; |
| } |
| |
| /** |
| * When cacheCharsThreshold is 0, loadFieldValues() only fetches one document at a time. We override it to be 0 |
| * in two circumstances: |
| */ |
| private int calculateOptimalCacheCharsThreshold(int numTermVectors, int numPostings) { |
| if (numPostings == 0 && numTermVectors == 0) { |
| // (1) When all fields are ANALYSIS there's no point in caching a batch of documents |
| // because no other info on disk is needed to highlight it. |
| return 0; |
| } else if (numTermVectors >= 2) { |
| // (2) When two or more fields have term vectors, given the field-then-doc algorithm, the underlying term |
| // vectors will be fetched in a terrible access pattern unless we highlight a doc at a time and use a special |
| // current-doc TV cache. So we do that. Hopefully one day TVs will be improved to make this pointless. |
| return 0; |
| } else { |
| return getCacheFieldValCharsThreshold(); |
| } |
| } |
| |
| private void copyAndSortFieldsWithMaxPassages(String[] fieldsIn, int[] maxPassagesIn, final String[] fields, |
| final int[] maxPassages) { |
| System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length); |
| System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length); |
| new InPlaceMergeSorter() { |
| @Override |
| protected void swap(int i, int j) { |
| String tmp = fields[i]; |
| fields[i] = fields[j]; |
| fields[j] = tmp; |
| int tmp2 = maxPassages[i]; |
| maxPassages[i] = maxPassages[j]; |
| maxPassages[j] = tmp2; |
| } |
| |
| @Override |
| protected int compare(int i, int j) { |
| return fields[i].compareTo(fields[j]); |
| } |
| |
| }.sort(0, fields.length); |
| } |
| |
| private void copyAndSortDocIdsWithIndex(int[] docIdsIn, final int[] docIds, final int[] docInIndexes) { |
| System.arraycopy(docIdsIn, 0, docIds, 0, docIdsIn.length); |
| for (int i = 0; i < docInIndexes.length; i++) { |
| docInIndexes[i] = i; |
| } |
| new InPlaceMergeSorter() { |
| @Override |
| protected void swap(int i, int j) { |
| int tmp = docIds[i]; |
| docIds[i] = docIds[j]; |
| docIds[j] = tmp; |
| tmp = docInIndexes[i]; |
| docInIndexes[i] = docInIndexes[j]; |
| docInIndexes[j] = tmp; |
| } |
| |
| @Override |
| protected int compare(int i, int j) { |
| return Integer.compare(docIds[i], docIds[j]); |
| } |
| }.sort(0, docIds.length); |
| } |
| |
| /** |
| * Highlights text passed as a parameter. This requires the {@link IndexSearcher} provided to this highlighter is |
| * null. This use-case is more rare. Naturally, the mode of operation will be {@link OffsetSource#ANALYSIS}. |
| * The result of this method is whatever the {@link PassageFormatter} returns. For the {@link |
| * DefaultPassageFormatter} and assuming {@code content} has non-zero length, the result will be a non-null |
| * string -- so it's safe to call {@link Object#toString()} on it in that case. |
| * |
| * @param field field name to highlight (as found in the query). |
| * @param query query to highlight. |
| * @param content text to highlight. |
| * @param maxPassages The maximum number of top-N ranked passages used to |
| * form the highlighted snippets. |
| * @return result of the {@link PassageFormatter} -- probably a String. Might be null. |
| * @throws IOException if an I/O error occurred during processing |
| */ |
| //TODO make content a List? and return a List? and ensure getEmptyHighlight is never invoked multiple times? |
| public Object highlightWithoutSearcher(String field, Query query, String content, int maxPassages) |
| throws IOException { |
| if (this.searcher != null) { |
| throw new IllegalStateException("highlightWithoutSearcher should only be called on a " + |
| getClass().getSimpleName() + " without an IndexSearcher."); |
| } |
| Objects.requireNonNull(content, "content is required"); |
| Set<Term> queryTerms = extractTerms(query); |
| return getFieldHighlighter(field, query, queryTerms, maxPassages) |
| .highlightFieldForDoc(null, -1, content); |
| } |
| |
| protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) { |
| UHComponents components = getHighlightComponents(field, query, allTerms); |
| OffsetSource offsetSource = getOptimizedOffsetSource(components); |
| return new FieldHighlighter(field, |
| getOffsetStrategy(offsetSource, components), |
| new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR), |
| getScorer(field), |
| maxPassages, |
| getMaxNoHighlightPassages(field), |
| getFormatter(field)); |
| } |
| |
| protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) { |
| Predicate<String> fieldMatcher = getFieldMatcher(field); |
| Set<HighlightFlag> highlightFlags = getFlags(field); |
| PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags); |
| boolean queryHasUnrecognizedPart = hasUnrecognizedQuery(fieldMatcher, query); |
| BytesRef[] terms = null; |
| LabelledCharArrayMatcher[] automata = null; |
| if (!highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES) || !queryHasUnrecognizedPart) { |
| terms = filterExtractedTerms(fieldMatcher, allTerms); |
| automata = getAutomata(field, query, highlightFlags); |
| } // otherwise don't need to extract |
| return new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, queryHasUnrecognizedPart, highlightFlags); |
| } |
| |
| protected boolean hasUnrecognizedQuery(Predicate<String> fieldMatcher, Query query) { |
| boolean[] hasUnknownLeaf = new boolean[1]; |
| query.visit(new QueryVisitor() { |
| @Override |
| public boolean acceptField(String field) { |
| // checking hasUnknownLeaf is a trick to exit early |
| return hasUnknownLeaf[0] == false && fieldMatcher.test(field); |
| } |
| |
| @Override |
| public void visitLeaf(Query query) { |
| if (MultiTermHighlighting.canExtractAutomataFromLeafQuery(query) == false) { |
| if (!(query instanceof MatchAllDocsQuery || query instanceof MatchNoDocsQuery)) { |
| hasUnknownLeaf[0] = true; |
| } |
| } |
| } |
| }); |
| return hasUnknownLeaf[0]; |
| } |
| |
| protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) { |
| // Strip off the redundant field and sort the remaining terms |
| SortedSet<BytesRef> filteredTerms = new TreeSet<>(); |
| for (Term term : queryTerms) { |
| if (fieldMatcher.test(term.field())) { |
| filteredTerms.add(term.bytes()); |
| } |
| } |
| return filteredTerms.toArray(new BytesRef[filteredTerms.size()]); |
| } |
| |
| protected Set<HighlightFlag> getFlags(String field) { |
| Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class); |
| if (shouldHandleMultiTermQuery(field)) { |
| highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY); |
| } |
| if (shouldHighlightPhrasesStrictly(field)) { |
| highlightFlags.add(HighlightFlag.PHRASES); |
| } |
| if (shouldPreferPassageRelevancyOverSpeed(field)) { |
| highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED); |
| } |
| return highlightFlags; |
| } |
| |
| protected PhraseHelper getPhraseHelper(String field, Query query, Set<HighlightFlag> highlightFlags) { |
| boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES); |
| if (useWeightMatchesIter) { |
| return PhraseHelper.NONE; // will be handled by Weight.matches which always considers phrases |
| } |
| boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES); |
| boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY); |
| return highlightPhrasesStrictly ? |
| new PhraseHelper(query, field, getFieldMatcher(field), |
| this::requiresRewrite, |
| this::preSpanQueryRewrite, |
| !handleMultiTermQuery |
| ) |
| : PhraseHelper.NONE; |
| } |
| |
| protected LabelledCharArrayMatcher[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) { |
| // do we "eagerly" look in span queries for automata here, or do we not and let PhraseHelper handle those? |
| // if don't highlight phrases strictly, |
| final boolean lookInSpan = |
| !highlightFlags.contains(HighlightFlag.PHRASES) // no PhraseHelper |
| || highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES); // Weight.Matches will find all |
| |
| return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY) |
| ? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), lookInSpan) |
| : ZERO_LEN_AUTOMATA_ARRAY; |
| } |
| |
| protected OffsetSource getOptimizedOffsetSource(UHComponents components) { |
| OffsetSource offsetSource = getOffsetSource(components.getField()); |
| |
| // null automata means unknown, so assume a possibility |
| boolean mtqOrRewrite = components.getAutomata() == null || components.getAutomata().length > 0 |
| || components.getPhraseHelper().willRewrite() || components.hasUnrecognizedQueryPart(); |
| |
| // null terms means unknown, so assume something to highlight |
| if (mtqOrRewrite == false && components.getTerms() != null && components.getTerms().length == 0) { |
| return OffsetSource.NONE_NEEDED; //nothing to highlight |
| } |
| |
| switch (offsetSource) { |
| case POSTINGS: |
| if (mtqOrRewrite) { // may need to see scan through all terms for the highlighted document efficiently |
| return OffsetSource.ANALYSIS; |
| } |
| break; |
| case POSTINGS_WITH_TERM_VECTORS: |
| if (mtqOrRewrite == false) { |
| return OffsetSource.POSTINGS; //We don't need term vectors |
| } |
| break; |
| case ANALYSIS: |
| case TERM_VECTORS: |
| case NONE_NEEDED: |
| default: |
| //stick with the original offset source |
| break; |
| } |
| |
| return offsetSource; |
| } |
| |
| protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, UHComponents components) { |
| switch (offsetSource) { |
| case ANALYSIS: |
| if (!components.getPhraseHelper().hasPositionSensitivity() && |
| !components.getHighlightFlags().contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED) && |
| !components.getHighlightFlags().contains(HighlightFlag.WEIGHT_MATCHES)) { |
| //skip using a memory index since it's pure term filtering |
| return new TokenStreamOffsetStrategy(components, getIndexAnalyzer()); |
| } else { |
| return new MemoryIndexOffsetStrategy(components, getIndexAnalyzer()); |
| } |
| case NONE_NEEDED: |
| return NoOpOffsetStrategy.INSTANCE; |
| case TERM_VECTORS: |
| return new TermVectorOffsetStrategy(components); |
| case POSTINGS: |
| return new PostingsOffsetStrategy(components); |
| case POSTINGS_WITH_TERM_VECTORS: |
| return new PostingsWithTermVectorsOffsetStrategy(components); |
| default: |
| throw new IllegalArgumentException("Unrecognized offset source " + offsetSource); |
| } |
| } |
| |
| /** |
| * When highlighting phrases accurately, we need to know which {@link SpanQuery}'s need to have |
| * {@link Query#rewrite(IndexReader)} called on them. It helps performance to avoid it if it's not needed. |
| * This method will be invoked on all SpanQuery instances recursively. If you have custom SpanQuery queries then |
| * override this to check instanceof and provide a definitive answer. If the query isn't your custom one, simply |
| * return null to have the default rules apply, which govern the ones included in Lucene. |
| */ |
| protected Boolean requiresRewrite(SpanQuery spanQuery) { |
| return null; |
| } |
| |
| /** |
| * When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the |
| * {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}. |
| * Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate, |
| * or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply. |
| * |
| * @param query Query to be highlighted |
| * @return A Collection of Query object(s) if needs to be rewritten, otherwise null. |
| */ |
| protected Collection<Query> preSpanQueryRewrite(Query query) { |
| return null; |
| } |
| |
| private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) { |
| return new DocIdSetIterator() { |
| int idx = -1; |
| |
| @Override |
| public int docID() { |
| if (idx < 0 || idx >= sortedDocIds.length) { |
| return NO_MORE_DOCS; |
| } |
| return sortedDocIds[idx]; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| idx++; |
| return docID(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| return super.slowAdvance(target); // won't be called, so whatever |
| } |
| |
| @Override |
| public long cost() { |
| return Math.max(0, sortedDocIds.length - (idx + 1)); // remaining docs |
| } |
| }; |
| } |
| |
| /** |
| * Loads the String values for each docId by field to be highlighted. By default this loads from stored fields |
| * by the same name as given, but a subclass can change the source. The returned Strings must be identical to |
| * what was indexed (at least for postings or term-vectors offset sources). |
| * This method must load fields for at least one document from the given {@link DocIdSetIterator} |
| * but need not return all of them; by default the character lengths are summed and this method will return early |
| * when {@code cacheCharsThreshold} is exceeded. Specifically if that number is 0, then only one document is |
| * fetched no matter what. Values in the array of {@link CharSequence} will be null if no value was found. |
| */ |
| protected List<CharSequence[]> loadFieldValues(String[] fields, |
| DocIdSetIterator docIter, int cacheCharsThreshold) |
| throws IOException { |
| List<CharSequence[]> docListOfFields = |
| new ArrayList<>(cacheCharsThreshold == 0 ? 1 : (int) Math.min(64, docIter.cost())); |
| |
| LimitedStoredFieldVisitor visitor = newLimitedStoredFieldsVisitor(fields); |
| int sumChars = 0; |
| do { |
| int docId = docIter.nextDoc(); |
| if (docId == DocIdSetIterator.NO_MORE_DOCS) { |
| break; |
| } |
| visitor.init(); |
| searcher.doc(docId, visitor); |
| CharSequence[] valuesByField = visitor.getValuesByField(); |
| docListOfFields.add(valuesByField); |
| for (CharSequence val : valuesByField) { |
| sumChars += (val == null ? 0 : val.length()); |
| } |
| } while (sumChars <= cacheCharsThreshold && cacheCharsThreshold != 0); |
| return docListOfFields; |
| } |
| |
| /** |
| * @lucene.internal |
| */ |
| protected LimitedStoredFieldVisitor newLimitedStoredFieldsVisitor(String[] fields) { |
| return new LimitedStoredFieldVisitor(fields, MULTIVAL_SEP_CHAR, getMaxLength()); |
| } |
| |
| /** |
| * Fetches stored fields for highlighting. Uses a multi-val separator char and honors a max length to retrieve. |
| * @lucene.internal |
| */ |
| protected static class LimitedStoredFieldVisitor extends StoredFieldVisitor { |
| protected final String[] fields; |
| protected final char valueSeparator; |
| protected final int maxLength; |
| protected CharSequence[] values;// starts off as String; may become StringBuilder. |
| protected int currentField; |
| |
| public LimitedStoredFieldVisitor(String[] fields, char valueSeparator, int maxLength) { |
| this.fields = fields; |
| this.valueSeparator = valueSeparator; |
| this.maxLength = maxLength; |
| } |
| |
| void init() { |
| values = new CharSequence[fields.length]; |
| currentField = -1; |
| } |
| |
| @Override |
| public void stringField(FieldInfo fieldInfo, byte[] byteValue) throws IOException { |
| String value = new String(byteValue, StandardCharsets.UTF_8); |
| assert currentField >= 0; |
| CharSequence curValue = values[currentField]; |
| if (curValue == null) { |
| //question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on |
| // the backing char[]? |
| values[currentField] = value.substring(0, Math.min(maxLength, value.length()));//note: may return 'this' |
| return; |
| } |
| final int lengthBudget = maxLength - curValue.length(); |
| if (lengthBudget <= 0) { |
| return; |
| } |
| StringBuilder curValueBuilder; |
| if (curValue instanceof StringBuilder) { |
| curValueBuilder = (StringBuilder) curValue; |
| } else { |
| // upgrade String to StringBuilder. Choose a good initial size. |
| curValueBuilder = new StringBuilder(curValue.length() + Math.min(lengthBudget, value.length() + 256)); |
| curValueBuilder.append(curValue); |
| } |
| curValueBuilder.append(valueSeparator); |
| curValueBuilder.append(value.substring(0, Math.min(lengthBudget - 1, value.length()))); |
| values[currentField] = curValueBuilder; |
| } |
| |
| @Override |
| public Status needsField(FieldInfo fieldInfo) throws IOException { |
| currentField = Arrays.binarySearch(fields, fieldInfo.name); |
| if (currentField < 0) { |
| return Status.NO; |
| } |
| CharSequence curVal = values[currentField]; |
| if (curVal != null && curVal.length() >= maxLength) { |
| return fields.length == 1 ? Status.STOP : Status.NO; |
| } |
| return Status.YES; |
| } |
| |
| CharSequence[] getValuesByField() { |
| return this.values; |
| } |
| |
| } |
| |
| /** |
| * Wraps an IndexReader that remembers/caches the last call to {@link LeafReader#getTermVectors(int)} so that |
| * if the next call has the same ID, then it is reused. If TV's were column-stride (like doc-values), there would |
| * be no need for this. |
| */ |
| private static class TermVectorReusingLeafReader extends FilterLeafReader { |
| |
| static IndexReader wrap(IndexReader reader) throws IOException { |
| LeafReader[] leafReaders = reader.leaves().stream() |
| .map(LeafReaderContext::reader) |
| .map(TermVectorReusingLeafReader::new) |
| .toArray(LeafReader[]::new); |
| return new BaseCompositeReader<IndexReader>(leafReaders, null) { |
| @Override |
| protected void doClose() { // don't close the underlying reader |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return null; |
| } |
| }; |
| } |
| |
| private int lastDocId = -1; |
| private Fields tvFields; |
| |
| TermVectorReusingLeafReader(LeafReader in) { |
| super(in); |
| } |
| |
| @Override |
| public Fields getTermVectors(int docID) throws IOException { |
| if (docID != lastDocId) { |
| lastDocId = docID; |
| tvFields = in.getTermVectors(docID); |
| } |
| return tvFields; |
| } |
| |
| @Override |
| public CacheHelper getCoreCacheHelper() { |
| return null; |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return null; |
| } |
| |
| } |
| |
| /** |
| * Flags for controlling highlighting behavior. |
| */ |
| public enum HighlightFlag { |
| /** @see UnifiedHighlighter#setHighlightPhrasesStrictly(boolean) */ |
| PHRASES, |
| |
| /** @see UnifiedHighlighter#setHandleMultiTermQuery(boolean) */ |
| MULTI_TERM_QUERY, |
| |
| /** Passage relevancy is more important than speed. True by default. */ |
| PASSAGE_RELEVANCY_OVER_SPEED, |
| |
| /** |
| * Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. |
| * It's more accurate to the query, though might not calculate passage relevancy as well. |
| * Use of this flag requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES}. |
| * {@link #PASSAGE_RELEVANCY_OVER_SPEED} will be ignored. False by default. |
| */ |
| WEIGHT_MATCHES |
| |
| // TODO: useQueryBoosts |
| } |
| } |