blob: 320eb64ea1bf1d0e5ab99c0b102ee4531cfdf950 [file] [log] [blame]
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1515825)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy)
@@ -47,6 +47,7 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class FastVectorHighlighterTest extends LuceneTestCase {
@@ -298,6 +299,49 @@
writer.close();
dir.close();
}
+
+ public void testLotsOfPhrases() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
+ FieldType type = new FieldType(TextField.TYPE_STORED);
+ type.setStoreTermVectorOffsets(true);
+ type.setStoreTermVectorPositions(true);
+ type.setStoreTermVectors(true);
+ type.freeze();
+ String[] terms = { "org", "apache", "lucene"};
+ int iters = atLeast(1000);
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < iters; i++) {
+ builder.append(terms[random().nextInt(terms.length)]).append(" ");
+ if (random().nextInt(6) == 3) {
+ builder.append("solr").append(" ");
+ }
+ }
+ Document doc = new Document();
+ Field field = new Field("field", builder.toString(), type);
+ doc.add(field);
+ writer.addDocument(doc);
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "org"));
+ query.add(new Term("field", "apache"));
+ query.add(new Term("field", "lucene"));
+
+
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+ IndexSearcher searcher = newSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(1, hits.totalHits);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
+ for (int i = 0; i < bestFragments.length; i++) {
+ String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
+ assertFalse(result.contains("org apache lucene"));
+ }
+ reader.close();
+ writer.close();
+ dir.close();
+ }
public void testOverlappingPhrases() throws IOException {
final Analyzer analyzer = new Analyzer() {
Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1515825)
+++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy)
@@ -30,7 +30,6 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
@@ -63,6 +62,8 @@
// The maximum number of different matching terms accumulated from any one MultiTermQuery
private static final int MAX_MTQ_TERMS = 1024;
+
+ private int maxPhraseWindow = 1;
FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
this.fieldMatch = fieldMatch;
@@ -400,7 +401,7 @@
return positions[i] - positions[j];
}
}.sort(0, terms.length);
-
+
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
}
else
@@ -474,9 +475,16 @@
this.boost = boost;
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
this.positions = positions;
+ if (positions != null) {
+ fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]);
+ }
}
}
+ public int getMaxPhraseWindow() {
+ return fieldQuery.maxPhraseWindow;
+ }
+
public boolean isTerminal(){
return terminal;
}
Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (revision 1515825)
+++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (working copy)
@@ -69,6 +69,9 @@
}
void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
+ if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) {
+ return;
+ }
if (terms.isEmpty()) {
if (longest > 0) {
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );