blob: 4c05e1b9f665794a9722e4d526432848df3d40d7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Locale;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.QueryBuilder;
import org.junit.After;
import org.junit.Before;
//TODO rename to reflect position sensitivity
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
final FieldType fieldType;
Directory dir;
MockAnalyzer indexAnalyzer;
RandomIndexWriter indexWriter;
IndexSearcher searcher;
UnifiedHighlighter highlighter;
IndexReader indexReader;
// Is it okay if a match (identified by offset pair) appears multiple times in the passage?
AtomicBoolean dupMatchAllowed = new AtomicBoolean(true);
@ParametersFactory
public static Iterable<Object[]> parameters() {
return UHTestHelper.parametersFactoryList();
}
public TestUnifiedHighlighterStrictPhrases(FieldType fieldType) {
this.fieldType = fieldType;
}
@Before
public void doBefore() throws IOException {
indexAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);//whitespace, punctuation, lowercase
indexAnalyzer.setPositionIncrementGap(3);// more than default
dir = newDirectory();
indexWriter = new RandomIndexWriter(random(), dir, indexAnalyzer);
}
@After
public void doAfter() throws IOException {
IOUtils.close(indexReader, indexWriter, dir);
}
private Document newDoc(String... bodyVals) {
Document doc = new Document();
for (String bodyVal : bodyVals) {
doc.add(new Field("body", bodyVal, fieldType));
}
return doc;
}
private void initReaderSearcherHighlighter() throws IOException {
indexReader = indexWriter.getReader();
searcher = newSearcher(indexReader);
highlighter = TestUnifiedHighlighter.randomUnifiedHighlighter(searcher, indexAnalyzer,
EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), true);
// intercept the formatter in order to check constraints on the passage.
final PassageFormatter defaultFormatter = highlighter.getFormatter(null);
highlighter.setFormatter(new PassageFormatter() {
@Override
public Object format(Passage[] passages, String content) {
boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
for (Passage passage : passages) {
String prevPair = "";
for (int i = 0; i < passage.getNumMatches(); i++) {
// pad each to make comparable
String pair = String.format(Locale.ROOT, "%03d-%03d", passage.getMatchStarts()[i], passage.getMatchEnds()[i]);
int cmp = prevPair.compareTo(pair);
if (cmp == 0) {
assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
} else if (cmp > 0) {
fail("bad match order in passage at offset " + pair);
}
prevPair = pair;
}
}
return defaultFormatter.format(passages, content);
}
});
}
private PhraseQuery newPhraseQuery(String field, String phrase) {
return (PhraseQuery) new QueryBuilder(indexAnalyzer).createPhraseQuery(field, phrase);
}
private PhraseQuery setSlop(PhraseQuery query, int slop) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
Term[] terms = query.getTerms();
int[] positions = query.getPositions();
for (int i = 0; i < terms.length; i++) {
builder.add(terms[i], positions[i]);
}
builder.setSlop(slop);
return builder.build();
}
public void testBasics() throws IOException {
indexWriter.addDocument(newDoc("Yin yang, filter")); // filter out. test getTermToSpanLists reader 1-doc filter
indexWriter.addDocument(newDoc("yin alone, Yin yang, yin gap yang"));
initReaderSearcherHighlighter();
//query: -filter +"yin yang"
BooleanQuery query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "filter")), BooleanClause.Occur.MUST_NOT)
.add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
} else {
assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
}
}
public void testWithSameTermQuery() throws IOException {
indexWriter.addDocument(newDoc("Yin yang, yin gap yang"));
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "yin")), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(new TermQuery(new Term("title", "yang")), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
}
}
public void testPhraseNotInDoc() throws IOException {
indexWriter.addDocument(newDoc("Whatever yin")); // query matches this; highlight it
indexWriter.addDocument(newDoc("nextdoc yin"));// query does NOT match this, only the SHOULD clause does
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder()
//MUST:
.add(new TermQuery(new Term("body", "whatever")), BooleanClause.Occur.MUST)
//SHOULD: (yet won't)
.add(newPhraseQuery("body", "nextdoc yin"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "nonexistent yin"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>Whatever</b> yin"}, snippets);
}
public void testSubPhrases() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder()
.add(newPhraseQuery("body", "alpha bravo charlie"), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
dupMatchAllowed.set(false); // We don't want duplicates from both PhraseQuery
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo charlie</b> - charlie bravo alpha"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
}
}
public void testSynonyms() throws IOException {
indexWriter.addDocument(newDoc("mother father w mom father w dad"));
initReaderSearcherHighlighter();
MultiPhraseQuery query = new MultiPhraseQuery.Builder()
.add(new Term[]{new Term("body", "mom"), new Term("body", "mother")})
.add(new Term[]{new Term("body", "dad"), new Term("body", "father")})
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad"}, snippets);
}
}
/**
* Test it does *not* highlight the same term's not next to the span-near. "charlie" in this case.
* This particular example exercises "Rewrite" plus "MTQ" in the same query.
*/
public void testRewriteAndMtq() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("body", "bravo")),
new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
0, true);
BooleanQuery query = new BooleanQuery.Builder()
.add(snq, BooleanClause.Occur.MUST)
.add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
snippets);
}
/**
* Like {@link #testRewriteAndMtq} but no freestanding MTQ
*/
public void testRewrite() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("body", "bravo")),
new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
0, true);
BooleanQuery query = new BooleanQuery.Builder()
.add(snq, BooleanClause.Occur.MUST)
// .add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
snippets);
}
/**
* Like {@link #testRewriteAndMtq} but no rewrite.
*/
public void testMtq() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("body", "bravo")),
new SpanTermQuery(new Term("body", "charlie"))}, // does NOT rewrite
0, true);
BooleanQuery query = new BooleanQuery.Builder()
.add(snq, BooleanClause.Occur.MUST)
.add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
}
// do again, this time with MTQ disabled.
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
//note: without MTQ, the WEIGHT_MATCHES is disabled which affects the snippet boundaries
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"},
snippets);
}
public void testMultiValued() throws IOException {
indexWriter.addDocument(newDoc("one bravo three", "four bravo six"));
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder()
.add(newPhraseQuery("body", "one bravo"), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "four bravo"), BooleanClause.Occur.MUST)
.add(new PrefixQuery(new Term("body", "br")), BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>one bravo</b> three... <b>four bravo</b> six"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six"}, snippets);
}
// now test phraseQuery won't span across values
assert indexAnalyzer.getPositionIncrementGap("body") > 0;
PhraseQuery phraseQuery = newPhraseQuery("body", "three four");
// 1 too little; won't span
phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body") - 1);
query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST)
.add(phraseQuery, BooleanClause.Occur.SHOULD)
.build();
topDocs = searcher.search(query, 10);
snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals("one <b>bravo</b> three... four <b>bravo</b> six", snippets[0]);
// and add just enough slop to cross the values:
phraseQuery = newPhraseQuery("body", "three four");
phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body")); // just enough to span
query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST)
.add(phraseQuery, BooleanClause.Occur.MUST) // must match and it will
.build();
topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
snippets = highlighter.highlight("body", query, topDocs, 2);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("one <b>bravo</b> <b>three</b>... four <b>bravo</b> six", snippets[0]);
} else {
assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
}
}
public void testMaxLen() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
initReaderSearcherHighlighter();
highlighter.setMaxLength(21);
BooleanQuery query = new BooleanQuery.Builder()
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
if (weightMatches) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
}
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
}
}
public void testFilteredOutSpan() throws IOException {
indexWriter.addDocument(newDoc("freezing cold stuff like stuff freedom of speech"));
initReaderSearcherHighlighter();
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "free*"));
SpanMultiTermQueryWrapper<WildcardQuery> wildcardSpanQuery = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanTermQuery termQuery = new SpanTermQuery(new Term("body", "speech"));
SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[]{wildcardSpanQuery, termQuery}, 3, false);
BooleanQuery query = new BooleanQuery.Builder()
.add(spanQuery, BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
// spans' MatchesIterator exposes each underlying term; thus doesn't enclose intermediate "of"
assertArrayEquals(new String[]{"freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>"}, snippets);
}
public void testMatchNoDocsQuery() throws IOException {
highlighter = new UnifiedHighlighter(null, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(true);
String content = "whatever";
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
assertEquals(content, o);
}
public void testPreSpanQueryRewrite() throws IOException {
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
initReaderSearcherHighlighter();
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
final Set<HighlightFlag> flags = super.getFlags(field);
flags.remove(HighlightFlag.WEIGHT_MATCHES);//unsupported
return flags;
}
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery)query).wrapped);
}
return null;
}
};
highlighter.setHighlightPhrasesStrictly(true);
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
Query oredTerms = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
.build();
Query proximityBoostingQuery = new MyQuery(oredTerms);
Query totalQuery = bqBuilder
.add(phraseQuery, BooleanClause.Occur.SHOULD)
.add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
}
// Tests that terms collected out of order due to being present in multiple Spans are handled correctly
// See LUCENE-8365
public void testReverseOrderSpanCollection() throws IOException {
// Processing order may depend on various optimizations or other weird factor.
indexWriter.addDocument(newDoc("alpha bravo - alpha charlie"));
indexWriter.addDocument(newDoc("alpha charlie - alpha bravo"));
initReaderSearcherHighlighter();
SpanNearQuery query = new SpanNearQuery(new SpanQuery[]{
new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term("body", "alpha")),
new SpanTermQuery(new Term("body", "bravo"))
}, 0, true),
new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term("body", "alpha")),
new SpanTermQuery(new Term("body", "charlie"))
}, 0, true)
}, 10, false);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{
"<b>alpha</b> <b>bravo</b> - <b>alpha</b> <b>charlie</b>",
"<b>alpha</b> <b>charlie</b> - <b>alpha</b> <b>bravo</b>",
},
snippets);
}
private static class MyQuery extends Query {
private final Query wrapped;
MyQuery(Query wrapped) {
this.wrapped = wrapped;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return wrapped.createWeight(searcher, scoreMode, boost);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newWrapped = wrapped.rewrite(reader);
if (newWrapped != wrapped) {
return new MyQuery(newWrapped);
}
return this;
}
@Override
public String toString(String field) {
return "[[["+wrapped.toString(field)+"]]]";
}
@Override
public boolean equals(Object obj) {
return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
}
@Override
public int hashCode() {
return wrapped.hashCode();
}
@Override
public void visit(QueryVisitor visitor) {
wrapped.visit(visitor);
}
}
// Ported from LUCENE-5455 (fixed in LUCENE-8121). Also see LUCENE-2287.
public void testNestedSpanQueryHighlight() throws Exception {
// For a long time, the highlighters used to assume all query terms within the SpanQuery were valid at the Spans'
// position range. This would highlight occurrences of terms that were actually not matched by the query.
// But now using the SpanCollector API we don't make this kind of mistake.
final String FIELD_NAME = "body";
final String indexedText = "x y z x z x a";
indexWriter.addDocument(newDoc(indexedText));
initReaderSearcherHighlighter();
TopDocs topDocs = new TopDocs(new TotalHits(1, TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{new ScoreDoc(0, 1f)});
String expected = "<b>x</b> <b>y</b> <b>z</b> x z x <b>a</b>";
Query q = new SpanNearQuery(new SpanQuery[] {
new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "x")),
new SpanTermQuery(new Term(FIELD_NAME, "y")),
new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
String observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Nested SpanNear query not properly highlighted.", expected, observed);
expected = "x <b>y</b> <b>z</b> <b>x</b> <b>z</b> x <b>a</b>";
q = new SpanNearQuery(new SpanQuery[] {
new SpanOrQuery(
new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "x")),
new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "y")),
new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true)),
new SpanOrQuery(
new SpanTermQuery(new Term(FIELD_NAME, "a")),
new SpanTermQuery(new Term(FIELD_NAME, "b")))}, 10, false);
observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed);
expected = "x <b>y</b> <b>z</b> <b>x</b> <b>z</b> x <b>a</b>";
q = new SpanNearQuery(new SpanQuery[] {
new SpanNearQuery(new SpanQuery[] {
new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD_NAME, "*"))),
new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Nested SpanNear query with wildcard not properly highlighted.", expected, observed);
expected = "<b>x</b> <b>y</b> z x z x <b>a</b>";
q = new SpanNearQuery(new SpanQuery[] {
new SpanOrQuery(
new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "x")),
new SpanTermQuery(new Term(FIELD_NAME, "y"))}, 0, true),
new SpanNearQuery(new SpanQuery[] { //No hit span query
new SpanTermQuery(new Term(FIELD_NAME, "z")),
new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 0, true)),
new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed);
}
}