| Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java |
| =================================================================== |
| --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1515825) |
| +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy) |
| @@ -47,6 +47,7 @@ |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util._TestUtil; |
| |
| |
| public class FastVectorHighlighterTest extends LuceneTestCase { |
| @@ -298,6 +299,49 @@ |
| writer.close(); |
| dir.close(); |
| } |
| + |
| + public void testLotsOfPhrases() throws IOException { |
| + Directory dir = newDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); |
| + FieldType type = new FieldType(TextField.TYPE_STORED); |
| + type.setStoreTermVectorOffsets(true); |
| + type.setStoreTermVectorPositions(true); |
| + type.setStoreTermVectors(true); |
| + type.freeze(); |
| + String[] terms = { "org", "apache", "lucene"}; |
| + int iters = atLeast(1000); |
| + StringBuilder builder = new StringBuilder(); |
| + for (int i = 0; i < iters; i++) { |
| + builder.append(terms[random().nextInt(terms.length)]).append(" "); |
| + if (random().nextInt(6) == 3) { |
| + builder.append("solr").append(" "); |
| + } |
| + } |
| + Document doc = new Document(); |
| + Field field = new Field("field", builder.toString(), type); |
| + doc.add(field); |
| + writer.addDocument(doc); |
| + PhraseQuery query = new PhraseQuery(); |
| + query.add(new Term("field", "org")); |
| + query.add(new Term("field", "apache")); |
| + query.add(new Term("field", "lucene")); |
| + |
| + |
| + FastVectorHighlighter highlighter = new FastVectorHighlighter(); |
| + IndexReader reader = DirectoryReader.open(writer, true); |
| + IndexSearcher searcher = newSearcher(reader); |
| + TopDocs hits = searcher.search(query, 10); |
| + assertEquals(1, hits.totalHits); |
| + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); |
| + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1); |
| + for (int i = 0; i < bestFragments.length; i++) { |
| + String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR"); |
| + assertFalse(result.contains("org apache lucene")); |
| + } |
| + reader.close(); |
| + writer.close(); |
| + dir.close(); |
| + } |
| |
| public void testOverlappingPhrases() throws IOException { |
| final Analyzer analyzer = new Analyzer() { |
| Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java |
| =================================================================== |
| --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1515825) |
| +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy) |
| @@ -30,7 +30,6 @@ |
| |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| -import org.apache.lucene.queries.CommonTermsQuery; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.ConstantScoreQuery; |
| @@ -63,6 +62,8 @@ |
| |
| // The maximum number of different matching terms accumulated from any one MultiTermQuery |
| private static final int MAX_MTQ_TERMS = 1024; |
| + |
| + private int maxPhraseWindow = 1; |
| |
| FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException { |
| this.fieldMatch = fieldMatch; |
| @@ -400,7 +401,7 @@ |
| return positions[i] - positions[j]; |
| } |
| }.sort(0, terms.length); |
| - |
| + |
| addToMap(pq, terms, positions, 0, subMap, pq.getSlop()); |
| } |
| else |
| @@ -474,9 +475,16 @@ |
| this.boost = boost; |
| this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); |
| this.positions = positions; |
| + if (positions != null) { |
| + fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]); |
| + } |
| } |
| } |
| |
| + public int getMaxPhraseWindow() { |
| + return fieldQuery.maxPhraseWindow; |
| + } |
| + |
| public boolean isTerminal(){ |
| return terminal; |
| } |
| Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java |
| =================================================================== |
| --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (revision 1515825) |
| +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (working copy) |
| @@ -69,6 +69,9 @@ |
| } |
| |
| void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) { |
| + if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) { |
| + return; |
| + } |
| if (terms.isEmpty()) { |
| if (longest > 0) { |
| addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |