| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.highlight.custom; |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.Map; |
| import java.util.Objects; |
| |
| import org.apache.lucene.analysis.CannedTokenStream; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.analysis.MockTokenFilter; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.BoostQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.QueryVisitor; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.highlight.Highlighter; |
| import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
| import org.apache.lucene.search.highlight.QueryScorer; |
| import org.apache.lucene.search.highlight.SimpleFragmenter; |
| import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
| import org.apache.lucene.search.highlight.WeightedSpanTerm; |
| import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| /** |
| * Tests the extensibility of {@link WeightedSpanTermExtractor} and |
| * {@link QueryScorer} in a user defined package |
| */ |
| public class HighlightCustomQueryTest extends LuceneTestCase { |
| |
| private static final String FIELD_NAME = "contents"; |
| |
| public void testHighlightCustomQuery() throws IOException, |
| InvalidTokenOffsetsException { |
| String s1 = "I call our world Flatland, not because we call it so,"; |
| |
| // Verify that a query against the default field results in text being |
| // highlighted |
| // regardless of the field name. |
| |
| CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world")); |
| |
| String expected = "I call our <B>world</B> Flatland, not because we call it so,"; |
| String observed = highlightField(q, "SOME_FIELD_NAME", s1); |
| if (VERBOSE) |
| System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" |
| + observed); |
| assertEquals( |
| "Query in the default field results in text for *ANY* field being highlighted", |
| expected, observed); |
| |
| // Verify that a query against a named field does not result in any |
| // highlighting |
| // when the query field name differs from the name of the field being |
| // highlighted, |
| // which in this example happens to be the default field name. |
| q = new CustomQuery(new Term("text", "world")); |
| |
| expected = s1; |
| observed = highlightField(q, FIELD_NAME, s1); |
| if (VERBOSE) |
| System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" |
| + observed); |
| assertEquals( |
| "Query in a named field does not result in highlighting when that field isn't in the query", |
| s1, highlightField(q, FIELD_NAME, s1)); |
| |
| } |
| |
| public void testHighlightKnownQuery() throws IOException { |
| WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() { |
| @Override |
| protected void extractUnknownQuery(Query query, Map<String,WeightedSpanTerm> terms) throws IOException { |
| terms.put("foo", new WeightedSpanTerm(3, "foo")); |
| } |
| }; |
| Map<String,WeightedSpanTerm> terms = extractor.getWeightedSpanTerms( |
| new TermQuery(new Term("bar", "quux")), 3, new CannedTokenStream()); |
| // no foo |
| assertEquals(Collections.singleton("quux"), terms.keySet()); |
| } |
| |
| /** |
| * This method intended for use with |
| * <tt>testHighlightingWithDefaultField()</tt> |
| */ |
| private String highlightField(Query query, String fieldName, |
| String text) throws IOException, InvalidTokenOffsetsException { |
| try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true, |
| MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) { |
| // Assuming "<B>", "</B>" used to highlight |
| SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); |
| MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); |
| Highlighter highlighter = new Highlighter(formatter, scorer); |
| highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); |
| |
| String rv = highlighter.getBestFragments(tokenStream, text, 1, |
| "(FIELD TEXT TRUNCATED)"); |
| return rv.length() == 0 ? text : rv; |
| } |
| } |
| |
| public static class MyWeightedSpanTermExtractor extends |
| WeightedSpanTermExtractor { |
| |
| public MyWeightedSpanTermExtractor() { |
| super(); |
| } |
| |
| public MyWeightedSpanTermExtractor(String defaultField) { |
| super(defaultField); |
| } |
| |
| @Override |
| protected void extractUnknownQuery(Query query, |
| Map<String, WeightedSpanTerm> terms) throws IOException { |
| float boost = 1f; |
| while (query instanceof BoostQuery) { |
| BoostQuery bq = (BoostQuery) query; |
| boost *= bq.getBoost(); |
| query = bq.getQuery(); |
| } |
| if (query instanceof CustomQuery) { |
| extractWeightedTerms(terms, new TermQuery(((CustomQuery) query).term), boost); |
| } |
| } |
| |
| } |
| |
| public static class MyQueryScorer extends QueryScorer { |
| |
| public MyQueryScorer(Query query, String field, String defaultField) { |
| super(query, field, defaultField); |
| } |
| |
| @Override |
| protected WeightedSpanTermExtractor newTermExtractor(String defaultField) { |
| return defaultField == null ? new MyWeightedSpanTermExtractor() |
| : new MyWeightedSpanTermExtractor(defaultField); |
| } |
| |
| } |
| |
| public static class CustomQuery extends Query { |
| private final Term term; |
| |
| public CustomQuery(Term term) { |
| this.term = term; |
| } |
| |
| @Override |
| public String toString(String field) { |
| return new TermQuery(term).toString(field); |
| } |
| |
| @Override |
| public Query rewrite(IndexReader reader) throws IOException { |
| return new TermQuery(term); |
| } |
| |
| @Override |
| public void visit(QueryVisitor visitor) { |
| visitor.consumeTerms(this, term); |
| } |
| |
| @Override |
| public int hashCode() { |
| return classHash() + Objects.hashCode(term); |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| return sameClassAs(other) && |
| Objects.equals(term, ((CustomQuery) other).term); |
| } |
| } |
| } |