blob: eb13db4d942d4f8b434b3e52dd0434dc80b40531 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.monitor;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
public class TestHighlightingMatcher extends MonitorTestBase {
private static final Analyzer WHITESPACE = new WhitespaceAnalyzer();
public static Document buildDoc(String text) {
Document doc = new Document();
doc.add(newTextField(FIELD, text, Field.Store.NO));
return doc;
}
public void testSingleTermQueryMatchesSingleDocument() throws IOException {
try (Monitor monitor = newMonitor()) {
MonitorQuery mq = new MonitorQuery("query1", parse("test"));
monitor.register(mq);
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("this is a test document"),
HighlightsMatch.MATCHER);
assertEquals(1, matches.getMatchCount());
HighlightsMatch match = matches.matches("query1");
assertTrue(match.getHits(FIELD).contains(new HighlightsMatch.Hit(3, 10, 3, 14)));
}
}
public void testSinglePhraseQueryMatchesSingleDocument() throws IOException {
try (Monitor monitor = newMonitor()) {
MonitorQuery mq = new MonitorQuery("query1", parse("\"test document\""));
monitor.register(mq);
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("this is a test document"),
HighlightsMatch.MATCHER);
assertEquals(1, matches.getMatchCount());
HighlightsMatch m = matches.matches("query1");
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(3, 10, 4, 23)));
}
}
public void testToString() {
HighlightsMatch match = new HighlightsMatch("1");
match.addHit("field", 2, 3, -1, -1);
match.addHit("field", 0, 1, -1, -1);
match.addHit("afield", 0, 1, 0, 4);
assertEquals("Match(query=1){hits={afield=[0(0)->1(4)], field=[0(-1)->1(-1), 2(-1)->3(-1)]}}", match.toString());
}
public void testMultiFieldQueryMatches() throws IOException {
Document doc = new Document();
doc.add(newTextField("field1", "this is a test of field one", Field.Store.NO));
doc.add(newTextField("field2", "and this is an additional test", Field.Store.NO));
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("query1", parse("field1:test field2:test")));
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
assertEquals(1, matches.getMatchCount());
HighlightsMatch m = matches.matches("query1");
assertNotNull(m);
assertTrue(m.getFields().contains("field1"));
assertTrue(m.getHits("field1").contains(new HighlightsMatch.Hit(3, 10, 3, 14)));
assertTrue(m.getHits("field2").contains(new HighlightsMatch.Hit(5, 26, 5, 30)));
}
}
public void testQueryErrors() throws IOException {
try (Monitor monitor = new Monitor(ANALYZER, Presearcher.NO_FILTERING)) {
monitor.register(new MonitorQuery("1", parse("test")),
new MonitorQuery("2", new ThrowOnRewriteQuery()),
new MonitorQuery("3", parse("document")),
new MonitorQuery("4", parse("foo")));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("this is a test document"), HighlightsMatch.MATCHER);
assertEquals(4, matches.getQueriesRun());
assertEquals(2, matches.getMatchCount());
assertEquals(1, matches.getErrors().size());
}
}
public void testWildcards() throws IOException {
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", new RegexpQuery(new Term(FIELD, "he.*"))));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("hello world"), HighlightsMatch.MATCHER);
assertEquals(1, matches.getQueriesRun());
assertEquals(1, matches.getMatchCount());
assertEquals(1, matches.matches("1").getHitCount());
}
}
public void testWildcardCombinations() throws Exception {
final BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "term1")), BooleanClause.Occur.MUST)
.add(new PrefixQuery(new Term(FIELD, "term2")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD, "term3")), BooleanClause.Occur.MUST_NOT)
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", bq));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 term22 term4"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
}
}
public void testDisjunctionMaxQuery() throws IOException {
final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(
new TermQuery(new Term(FIELD, "term1")), new PrefixQuery(new Term(FIELD, "term2"))
), 1.0f);
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", query));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 term2 term3"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
}
}
public void testIdenticalMatches() throws Exception {
final BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "term1")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD, "term1")), BooleanClause.Occur.SHOULD)
.build();
try (Monitor monitor = new Monitor(ANALYZER)) {
monitor.register(new MonitorQuery("1", bq));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 term2"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(1, m.getHitCount());
}
}
public void testWildcardBooleanRewrites() throws Exception {
final Query wc = new PrefixQuery(new Term(FIELD, "term1"));
final Query wrapper = new BooleanQuery.Builder()
.add(wc, BooleanClause.Occur.MUST)
.build();
final Query wrapper2 = new BooleanQuery.Builder()
.add(wrapper, BooleanClause.Occur.MUST)
.build();
final BooleanQuery bq = new BooleanQuery.Builder()
.add(new PrefixQuery(new Term(FIELD, "term2")), BooleanClause.Occur.MUST)
.add(wrapper2, BooleanClause.Occur.MUST_NOT)
.build();
try (Monitor monitor = new Monitor(ANALYZER)) {
monitor.register(new MonitorQuery("1", bq));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term2 term"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(1, m.getHitCount());
matches = monitor.match(buildDoc("term2 term"), HighlightsMatch.MATCHER);
m = matches.matches("1");
assertNotNull(m);
assertEquals(1, m.getHitCount());
}
}
public void testWildcardProximityRewrites() throws Exception {
final SpanNearQuery snq = SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD, "term*"))))
.addClause(new SpanTermQuery(new Term(FIELD, "foo")))
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", snq));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 foo"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
}
}
public void testDisjunctionWithOrderedNearSpans() throws Exception {
final Query bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.SHOULD)
.add(SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanTermQuery(new Term(FIELD, "b")))
.addClause(new SpanTermQuery(new Term(FIELD, "c")))
.setSlop(1)
.build(), BooleanClause.Occur.SHOULD)
.build();
final Query parent = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.MUST)
.add(bq, BooleanClause.Occur.MUST)
.build();
try (Monitor monitor = new Monitor(ANALYZER)) {
monitor.register(new MonitorQuery("1", parent));
Document doc = buildDoc("a b x x x x c");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(1, m.getHitCount());
}
}
public void testDisjunctionWithUnorderedNearSpans() throws Exception {
final Query bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.SHOULD)
.add(SpanNearQuery.newUnorderedNearQuery(FIELD)
.addClause(new SpanTermQuery(new Term(FIELD, "b")))
.addClause(new SpanTermQuery(new Term(FIELD, "c")))
.setSlop(1)
.build(), BooleanClause.Occur.SHOULD)
.build();
final Query parent = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.MUST)
.add(bq, BooleanClause.Occur.MUST)
.build();
try (Monitor monitor = new Monitor(ANALYZER)) {
monitor.register(new MonitorQuery("1", parent));
Document doc = buildDoc("a b x x x x c");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(1, m.getHitCount());
}
}
public void testEquality() {
HighlightsMatch m1 = new HighlightsMatch("1");
m1.addHit("field", 0, 1, 0, 1);
HighlightsMatch m2 = new HighlightsMatch("1");
m2.addHit("field", 0, 1, 0, 1);
HighlightsMatch m3 = new HighlightsMatch("1");
m3.addHit("field", 0, 2, 0, 1);
HighlightsMatch m4 = new HighlightsMatch("2");
m4.addHit("field", 0, 1, 0, 1);
assertEquals(m1, m2);
assertEquals(m1.hashCode(), m2.hashCode());
assertNotEquals(m1, m3);
assertNotEquals(m1, m4);
}
public void testMutliValuedFieldWithNonDefaultGaps() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
public int getPositionIncrementGap(String fieldName) {
return 1000;
}
@Override
public int getOffsetGap(String fieldName) {
return 2000;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new WhitespaceTokenizer());
}
};
MonitorQuery mq = new MonitorQuery("query", parse(FIELD + ":\"hello world\"~5"));
try (Monitor monitor = newMonitor(analyzer)) {
monitor.register(mq);
Document doc1 = new Document();
doc1.add(newTextField(FIELD, "hello world", Field.Store.NO));
doc1.add(newTextField(FIELD, "goodbye", Field.Store.NO));
MatchingQueries<HighlightsMatch> matcher1 = monitor.match(doc1, HighlightsMatch.MATCHER);
assertEquals(1, matcher1.getMatchCount());
HighlightsMatch m1 = matcher1.matches("query");
assertNotNull(m1);
assertTrue(m1.getFields().contains(FIELD));
assertTrue(m1.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 1, 11)));
Document doc2 = new Document();
doc1.add(newTextField(FIELD, "hello", Field.Store.NO));
doc1.add(newTextField(FIELD, "world", Field.Store.NO));
MatchingQueries<HighlightsMatch> matcher2 = monitor.match(doc2, HighlightsMatch.MATCHER);
assertNull(matcher2.matches("query"));
assertEquals(0, matcher2.getMatchCount());
Document doc3 = new Document();
doc3.add(newTextField(FIELD, "hello world", Field.Store.NO));
doc3.add(newTextField(FIELD, "hello goodbye world", Field.Store.NO));
MatchingQueries<HighlightsMatch> matcher3 = monitor.match(doc3, HighlightsMatch.MATCHER);
assertEquals(1, matcher3.getMatchCount());
HighlightsMatch m3 = matcher3.matches("query");
assertNotNull(m3);
assertTrue(m3.getFields().contains(FIELD));
assertTrue(m3.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 1, 11)));
assertTrue(m3.getHits(FIELD).contains(new HighlightsMatch.Hit(1002, 2011, 1004, 2030)));
}
}
public void testDisjunctionWithOrderedNearMatch() throws Exception {
final Query bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.SHOULD)
.add(SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanTermQuery(new Term(FIELD, "b")))
.addClause(new SpanTermQuery(new Term(FIELD, "c")))
.setSlop(1)
.build(), BooleanClause.Occur.SHOULD)
.build();
final Query parent = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.MUST)
.add(bq, BooleanClause.Occur.MUST)
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", parent));
Document doc = buildDoc("a b c");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(3, m.getHitCount());
assertTrue(m.getFields().contains(FIELD));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 0, 1)));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(1, 2, 1, 3)));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(2, 4, 2, 5)));
}
}
public void testUnorderedNearWithinOrderedNear() throws Exception {
final SpanQuery spanPhrase = SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanTermQuery(new Term(FIELD, "time")))
.addClause(new SpanTermQuery(new Term(FIELD, "men")))
.setSlop(1)
.build();
final SpanQuery unorderedNear = SpanNearQuery.newUnorderedNearQuery(FIELD)
.addClause(spanPhrase)
.addClause(new SpanTermQuery(new Term(FIELD, "all")))
.setSlop(5)
.build();
final SpanQuery orderedNear = SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanTermQuery(new Term(FIELD, "the")))
.addClause(unorderedNear)
.setSlop(10)
.build();
final Query innerConjunct = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "is")), BooleanClause.Occur.MUST)
.add(orderedNear, BooleanClause.Occur.MUST)
.build();
final Query disjunct = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "now")), BooleanClause.Occur.SHOULD)
.add(innerConjunct, BooleanClause.Occur.SHOULD)
.build();
final Query outerConjunct = new BooleanQuery.Builder()
.add(disjunct, BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD, "good")), BooleanClause.Occur.MUST)
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", outerConjunct));
Document doc = buildDoc("now is the time for all good men");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertEquals(2, m.getHitCount());
assertTrue(m.getFields().contains(FIELD));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 0, 3)));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(6, 24, 6, 28)));
}
}
public void testMinShouldMatchQuery() throws Exception {
final Query minq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "x")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD, "y")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD, "z")), BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
final Query bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD, "a")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD, "b")), BooleanClause.Occur.MUST)
.add(minq, BooleanClause.Occur.SHOULD)
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", bq));
Document doc = buildDoc("a b x");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
assertTrue(m.getFields().contains(FIELD));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 0, 1)));
assertTrue(m.getHits(FIELD).contains(new HighlightsMatch.Hit(1, 2, 1, 3)));
}
}
public void testComplexPhraseQueryParser() throws Exception {
ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(FIELD, new StandardAnalyzer());
Query query = cpqp.parse("\"x b\"");
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", query));
Document doc = buildDoc("x b c");
MatchingQueries<HighlightsMatch> matches = monitor.match(doc, HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
assertTrue(m.getFields().contains(FIELD));
}
}
public void testHighlightBatches() throws Exception {
String query = "\"cell biology\"";
try (Monitor monitor = newMonitor(WHITESPACE)) {
monitor.register(new MonitorQuery("query0", parse("non matching query")));
monitor.register(new MonitorQuery("query1", parse(query)));
monitor.register(new MonitorQuery("query2", parse("biology")));
Document doc1 = new Document();
doc1.add(newTextField(FIELD, "the cell biology count", Field.Store.NO)); // matches
Document doc2 = new Document();
doc2.add(newTextField(FIELD, "nope", Field.Store.NO));
Document doc3 = new Document();
doc3.add(newTextField(FIELD, "biology text", Field.Store.NO));
MultiMatchingQueries<HighlightsMatch> matches = monitor.match(new Document[]{doc1, doc2, doc3}, HighlightsMatch.MATCHER);
assertEquals(2, matches.getMatchCount(0));
assertEquals(0, matches.getMatchCount(1));
assertEquals(1, matches.getMatchCount(2));
HighlightsMatch m1 = matches.matches("query1", 0);
assertTrue(m1.getHits(FIELD).contains(new HighlightsMatch.Hit(1, 4, 2, 16)));
HighlightsMatch m2 = matches.matches("query2", 2);
assertTrue(m2.getHits(FIELD).contains(new HighlightsMatch.Hit(0, 0, 0, 7)));
}
}
}