| Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java |
| =================================================================== |
| --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1504378) |
| +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy) |
| @@ -16,10 +16,18 @@ |
| * limitations under the License. |
| */ |
| import java.io.IOException; |
| +import java.io.Reader; |
| +import java.util.Arrays; |
| |
| +import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.analysis.MockTokenFilter; |
| import org.apache.lucene.analysis.MockTokenizer; |
| +import org.apache.lucene.analysis.TokenFilter; |
| +import org.apache.lucene.analysis.TokenStream; |
| +import org.apache.lucene.analysis.Tokenizer; |
| +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| @@ -27,12 +35,14 @@ |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.queries.CommonTermsQuery; |
| import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.PhraseQuery; |
| +import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.store.Directory; |
| @@ -40,8 +50,9 @@ |
| |
| |
| public class FastVectorHighlighterTest extends LuceneTestCase { |
| + |
| + private static final String FIELD = "text"; |
| |
| - |
| public void testSimpleHighlightTest() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); |
| @@ -287,4 +298,128 @@ |
| writer.close(); |
| dir.close(); |
| } |
| + |
| + public void testOverlappingPhrases() throws IOException { |
| + final Analyzer analyzer = new Analyzer() { |
| + |
| + @Override |
| + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { |
| + final Tokenizer source = new MockTokenizer(reader); |
| + TokenStream sink = source; |
| + sink = new SynonymFilter(sink); |
| + return new TokenStreamComponents(source, sink); |
| + } |
| + |
| + }; |
| + final Directory directory = newDirectory(); |
| + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); |
| + Document doc = new Document(); |
| + FieldType withVectors = new FieldType(TextField.TYPE_STORED); |
| + withVectors.setStoreTermVectors(true); |
| + withVectors.setStoreTermVectorPositions(true); |
| + withVectors.setStoreTermVectorOffsets(true); |
| + doc.add(new Field(FIELD, "a b c", withVectors)); |
| + iw.addDocument(doc); |
| + DirectoryReader ir = iw.getReader(); |
| + |
| + // Disjunction of two overlapping phrase queries |
| + final PhraseQuery pq1 = new PhraseQuery(); |
| + pq1.add(new Term(FIELD, "a"), 0); |
| + pq1.add(new Term(FIELD, "b"), 1); |
| + pq1.add(new Term(FIELD, "c"), 2); |
| + |
| + final PhraseQuery pq2 = new PhraseQuery(); |
| + pq2.add(new Term(FIELD, "a"), 0); |
| + pq2.add(new Term(FIELD, "B"), 1); |
| + pq2.add(new Term(FIELD, "c"), 2); |
| + |
| + final BooleanQuery bq = new BooleanQuery(); |
| + bq.add(pq1, Occur.SHOULD); |
| + bq.add(pq2, Occur.SHOULD); |
| + |
| + // Single phrase query with two terms at the same position |
| + final PhraseQuery pq = new PhraseQuery(); |
| + pq.add(new Term(FIELD, "a"), 0); |
| + pq.add(new Term(FIELD, "b"), 1); |
| + pq.add(new Term(FIELD, "B"), 1); |
| + pq.add(new Term(FIELD, "c"), 2); |
| + |
| + for (Query query : Arrays.asList(pq1, pq2, bq, pq)) { |
| + assertEquals(1, new IndexSearcher(ir).search(bq, 1).totalHits); |
| + |
| + FastVectorHighlighter highlighter = new FastVectorHighlighter(); |
| + FieldQuery fieldQuery = highlighter.getFieldQuery(query, ir); |
| + String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1); |
| + assertEquals("<b>a b c</b>", bestFragments[0]); |
| + } |
| + |
| + ir.close(); |
| + iw.close(); |
| + directory.close(); |
| + } |
| + |
| + public void testPhraseWithGap() throws IOException { |
| + final Directory directory = newDirectory(); |
| + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); |
| + Document doc = new Document(); |
| + FieldType withVectors = new FieldType(TextField.TYPE_STORED); |
| + withVectors.setStoreTermVectors(true); |
| + withVectors.setStoreTermVectorPositions(true); |
| + withVectors.setStoreTermVectorOffsets(true); |
| + doc.add(new Field(FIELD, "a b c", withVectors)); |
| + iw.addDocument(doc); |
| + DirectoryReader ir = iw.getReader(); |
| + |
| + final PhraseQuery pq = new PhraseQuery(); |
| + pq.add(new Term(FIELD, "c"), 2); |
| + pq.add(new Term(FIELD, "a"), 0); |
| + |
| + assertEquals(1, new IndexSearcher(ir).search(pq, 1).totalHits); |
| + |
| + FastVectorHighlighter highlighter = new FastVectorHighlighter(); |
| + FieldQuery fieldQuery = highlighter.getFieldQuery(pq, ir); |
| + String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1); |
| + assertEquals("<b>a</b> b <b>c</b>", bestFragments[0]); |
| + |
| + ir.close(); |
| + iw.close(); |
| + directory.close(); |
| + } |
| + |
| + // Simple token filter that adds 'B' as a synonym of 'b' |
| + private static class SynonymFilter extends TokenFilter { |
| + |
| + final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| + final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); |
| + |
| + State pending; |
| + |
| + protected SynonymFilter(TokenStream input) { |
| + super(input); |
| + } |
| + |
| + @Override |
| + public boolean incrementToken() throws IOException { |
| + if (pending != null) { |
| + restoreState(pending); |
| + termAtt.setEmpty().append('B'); |
| + posIncAtt.setPositionIncrement(0); |
| + pending = null; |
| + return true; |
| + } |
| + if (!input.incrementToken()) { |
| + return false; |
| + } |
| + if (termAtt.toString().equals("b")) { |
| + pending = captureState(); |
| + } |
| + return true; |
| + } |
| + |
| + @Override |
| + public void reset() throws IOException { |
| + super.reset(); |
| + pending = null; |
| + } |
| + } |
| } |
| Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java |
| =================================================================== |
| --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (revision 1504378) |
| +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (working copy) |
| @@ -863,8 +863,8 @@ |
| phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) ); |
| assertNull( fq.searchPhrase( F, phraseCandidate ) ); |
| |
| - // "a b c"~1 |
| - query = pqF( 1F, 1, "a", "b", "c" ); |
| + // "a b c"~2 |
| + query = pqF( 1F, 2, "a", "b", "c" ); |
| |
| // phraseHighlight = true, fieldMatch = true |
| fq = new FieldQuery( query, true, true ); |
| Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java |
| =================================================================== |
| --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (revision 1504378) |
| +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (working copy) |
| @@ -120,7 +120,31 @@ |
| assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); |
| assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() ); |
| } |
| - |
| + |
| + public void testProximityPhraseReverse() throws Exception { |
| + make1d1fIndex( "z a a b c" ); |
| + |
| + FieldQuery fq = new FieldQuery( pqF( 2F, 3, "c", "a" ), true, true ); |
| + FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); |
| + FieldPhraseList fpl = new FieldPhraseList( stack, fq ); |
| + assertEquals( 1, fpl.phraseList.size() ); |
| + assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() ); |
| + assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); |
| + assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() ); |
| + } |
| + |
| + public void testProximityPhraseWithRepeatedTerms() throws Exception { |
| + make1d1fIndex( "z a a b b z d" ); |
| + |
| + FieldQuery fq = new FieldQuery( pqF( 2F, 2, "a", "b", "d" ), true, true ); |
| + FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); |
| + FieldPhraseList fpl = new FieldPhraseList( stack, fq ); |
| + assertEquals( 1, fpl.phraseList.size() ); |
| + assertEquals( "abd(2.0)((4,7)(12,13))", fpl.phraseList.get( 0 ).toString() ); |
| + assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); |
| + assertEquals( 13, fpl.phraseList.get( 0 ).getEndOffset() ); |
| + } |
| + |
| public void test2PhrasesOverlap() throws Exception { |
| make1d1fIndex( "d a b c d" ); |
| |
| Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java |
| =================================================================== |
| --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (revision 1504378) |
| +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (working copy) |
| @@ -145,6 +145,13 @@ |
| } |
| |
| /** |
| + * Return the top TermInfo object of the stack without removing it. |
| + */ |
| + public TermInfo peek() { |
| + return termList.peek(); |
| + } |
| + |
| + /** |
| * @param termInfo the TermInfo object to be put on the top of the stack |
| */ |
| public void push( TermInfo termInfo ){ |
| Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java |
| =================================================================== |
| --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1504378) |
| +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy) |
| @@ -17,6 +17,8 @@ |
| */ |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| @@ -39,6 +41,7 @@ |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; |
| +import org.apache.lucene.util.InPlaceMergeSorter; |
| |
| /** |
| * FieldQuery breaks down query object into terms/phrases and keeps |
| @@ -347,6 +350,7 @@ |
| boolean terminal; |
| int slop; // valid if terminal == true and phraseHighlight == true |
| float boost; // valid if terminal == true |
| + int[] positions; // valid if terminal == true |
| int termOrPhraseNumber; // valid if terminal == true |
| FieldQuery fieldQuery; |
| Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>(); |
| @@ -369,38 +373,107 @@ |
| return map; |
| } |
| |
| - void add( Query query, IndexReader reader ) { |
| + void add( Query query, IndexReader reader ) { |
| if( query instanceof TermQuery ){ |
| addTerm( ((TermQuery)query).getTerm(), query.getBoost() ); |
| } |
| else if( query instanceof PhraseQuery ){ |
| PhraseQuery pq = (PhraseQuery)query; |
| - Term[] terms = pq.getTerms(); |
| - Map<String, QueryPhraseMap> map = subMap; |
| - QueryPhraseMap qpm = null; |
| - for( Term term : terms ){ |
| - qpm = getOrNewMap( map, term.text() ); |
| - map = qpm.subMap; |
| - } |
| - qpm.markTerminal( pq.getSlop(), pq.getBoost() ); |
| + final Term[] terms = pq.getTerms(); |
| + final int[] positions = pq.getPositions(); |
| + new InPlaceMergeSorter() { |
| + |
| + @Override |
| + protected void swap(int i, int j) { |
| + Term tmpTerm = terms[i]; |
| + terms[i] = terms[j]; |
| + terms[j] = tmpTerm; |
| + |
| + int tmpPos = positions[i]; |
| + positions[i] = positions[j]; |
| + positions[j] = tmpPos; |
| + } |
| + |
| + @Override |
| + protected int compare(int i, int j) { |
| + return positions[i] - positions[j]; |
| + } |
| + }.sort(0, terms.length); |
| + |
| + addToMap(pq, terms, positions, 0, subMap, pq.getSlop()); |
| } |
| else |
| throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); |
| } |
| - |
| + |
| + private int numTermsAtSamePosition(int[] positions, int i) { |
| + int numTermsAtSamePosition = 1; |
| + for (int j = i + 1; j < positions.length; ++j) { |
| + if (positions[j] == positions[i]) { |
| + ++numTermsAtSamePosition; |
| + } |
| + } |
| + return numTermsAtSamePosition; |
| + } |
| + |
| + private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map<String, QueryPhraseMap> map, int slop) { |
| + int numTermsAtSamePosition = numTermsAtSamePosition(positions, i); |
| + for (int j = 0; j < numTermsAtSamePosition; ++j) { |
| + QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text()); |
| + if (i + numTermsAtSamePosition == terms.length) { |
| + qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions)); |
| + } else { |
| + addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop); |
| + } |
| + } |
| + if (slop > 2 && i + numTermsAtSamePosition < terms.length) { |
| + Term[] otherTerms = Arrays.copyOf(terms, terms.length); |
| + int[] otherPositions = Arrays.copyOf(positions, positions.length); |
| + final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition); |
| + System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition); |
| + System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition); |
| + System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition); |
| + System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition); |
| + addToMap(pq, otherTerms, otherPositions, i, map, slop - 2); |
| + } |
| + } |
| + |
| + private int[] uniquePositions(int[] positions) { |
| + int uniqueCount = 1; |
| + for (int i = 1; i < positions.length; ++i) { |
| + if (positions[i] != positions[i - 1]) { |
| + ++uniqueCount; |
| + } |
| + } |
| + if (uniqueCount == positions.length) { |
| + return positions; |
| + } |
| + int[] result = new int[uniqueCount]; |
| + result[0] = positions[0]; |
| + for (int i = 1, j = 1; i < positions.length; ++i) { |
| + if (positions[i] != positions[i - 1]) { |
| + result[j++] = positions[i]; |
| + } |
| + } |
| + return result; |
| + } |
| + |
| public QueryPhraseMap getTermMap( String term ){ |
| return subMap.get( term ); |
| } |
| |
| private void markTerminal( float boost ){ |
| - markTerminal( 0, boost ); |
| + markTerminal( 0, boost, null ); |
| } |
| |
| - private void markTerminal( int slop, float boost ){ |
| - this.terminal = true; |
| - this.slop = slop; |
| - this.boost = boost; |
| - this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); |
| + private void markTerminal( int slop, float boost, int[] positions ){ |
| + if (slop > this.slop || (slop == this.slop && boost > this.boost)) { |
| + this.terminal = true; |
| + this.slop = slop; |
| + this.boost = boost; |
| + this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); |
| + this.positions = positions; |
| + } |
| } |
| |
| public boolean isTerminal(){ |
| @@ -435,15 +508,20 @@ |
| // if the candidate is a term, it is valid |
| if( phraseCandidate.size() == 1 ) return true; |
| |
| + |
| + assert phraseCandidate.size() == positions.length; |
| // else check whether the candidate is valid phrase |
| // compare position-gaps between terms to slop |
| int pos = phraseCandidate.get( 0 ).getPosition(); |
| + int totalDistance = 0; |
| for( int i = 1; i < phraseCandidate.size(); i++ ){ |
| int nextPos = phraseCandidate.get( i ).getPosition(); |
| - if( Math.abs( nextPos - pos - 1 ) > slop ) return false; |
| + final int expectedDelta = positions[i] - positions[i - 1]; |
| + final int actualDelta = nextPos - pos; |
| + totalDistance += Math.abs(expectedDelta - actualDelta); |
| pos = nextPos; |
| } |
| - return true; |
| + return totalDistance <= slop; |
| } |
| } |
| } |
| Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java |
| =================================================================== |
| --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (revision 1504378) |
| +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (working copy) |
| @@ -16,7 +16,9 @@ |
| * limitations under the License. |
| */ |
| |
| +import java.util.ArrayDeque; |
| import java.util.ArrayList; |
| +import java.util.Deque; |
| import java.util.LinkedList; |
| import java.util.List; |
| |
| @@ -60,49 +62,73 @@ |
| public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){ |
| final String field = fieldTermStack.getFieldName(); |
| |
| + @SuppressWarnings("unchecked") |
| + Deque<TermInfo>[] termStacks = new Deque[] {new ArrayDeque<TermInfo>()}; |
| + for (TermInfo ti = fieldTermStack.pop(); ti != null; ti = fieldTermStack.pop()) { |
| + // If there are tokens at the same position, compute all combinations |
| + if (!fieldTermStack.isEmpty() && fieldTermStack.peek().getPosition() == ti.getPosition()) { |
| + List<TermInfo> samePositionTermInfos = new ArrayList<>(2); |
| + samePositionTermInfos.add(ti); |
| + samePositionTermInfos.add(fieldTermStack.pop()); |
| + while (!fieldTermStack.isEmpty() && fieldTermStack.peek().getPosition() == ti.getPosition()) { |
| + samePositionTermInfos.add(fieldTermStack.pop()); |
| + } |
| + final int numTokensAtSamePosition = samePositionTermInfos.size(); |
| + @SuppressWarnings("unchecked") |
| + Deque<TermInfo>[] newTermStacks = new Deque[termStacks.length * numTokensAtSamePosition]; |
| + for (int i = 0, k = 0; i < termStacks.length; ++i) { |
| + for (int j = 0; j < numTokensAtSamePosition; ++j) { |
| + if (j == numTokensAtSamePosition - 1) { |
| + newTermStacks[k] = termStacks[i]; |
| + } else { |
| + newTermStacks[k] = new ArrayDeque<>(termStacks[i]); |
| + } |
| + newTermStacks[k++].offer(samePositionTermInfos.get(j)); |
| + } |
| + } |
| + termStacks = newTermStacks; |
| + } else { |
| + for (Deque<TermInfo> d : termStacks) { |
| + d.offer(ti); |
| + } |
| + } |
| + } |
| + |
| + for (Deque<TermInfo> d : termStacks) { |
| + extractPhrases(field, d, fieldQuery, phraseLimit); |
| + } |
| + } |
| + |
| + void extractPhrases(String field, Deque<TermInfo> fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { |
| LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>(); |
| - QueryPhraseMap currMap = null; |
| - QueryPhraseMap nextMap = null; |
| - while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) ) |
| - { |
| + while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) ) { |
| + |
| + int longest = 0; |
| phraseCandidate.clear(); |
| - |
| - TermInfo ti = fieldTermStack.pop(); |
| - currMap = fieldQuery.getFieldTermMap( field, ti.getText() ); |
| - |
| - // if not found, discard top TermInfo from stack, then try next element |
| - if( currMap == null ) continue; |
| - |
| - // if found, search the longest phrase |
| - phraseCandidate.add( ti ); |
| - while( true ){ |
| - ti = fieldTermStack.pop(); |
| - nextMap = null; |
| - if( ti != null ) |
| - nextMap = currMap.getTermMap( ti.getText() ); |
| - if( ti == null || nextMap == null ){ |
| - if( ti != null ) |
| - fieldTermStack.push( ti ); |
| - if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ |
| - addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |
| + QueryPhraseMap currMap = null; |
| + for (TermInfo ti : fieldTermStack) { |
| + QueryPhraseMap nextMap = null; |
| + if (currMap == null) { |
| + nextMap = fieldQuery.getFieldTermMap(field, ti.getText()); |
| + if (nextMap == null) { |
| + break; |
| } |
| - else{ |
| - while( phraseCandidate.size() > 1 ){ |
| - fieldTermStack.push( phraseCandidate.removeLast() ); |
| - currMap = fieldQuery.searchPhrase( field, phraseCandidate ); |
| - if( currMap != null ){ |
| - addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |
| - break; |
| - } |
| - } |
| - } |
| - break; |
| + } else { |
| + nextMap = currMap.getTermMap(ti.getText()); |
| } |
| - else{ |
| - phraseCandidate.add( ti ); |
| + if (nextMap != null) { |
| currMap = nextMap; |
| + phraseCandidate.add(ti); |
| + if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ |
| + longest = phraseCandidate.size(); |
| + } |
| } |
| } |
| + |
| + if (longest > 0) { |
| + addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |
| + } |
| + fieldTermStack.pop(); |
| } |
| } |
| |
| @@ -159,11 +185,11 @@ |
| return termsInfos; |
| } |
| |
| - public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){ |
| + public WeightedPhraseInfo( List<TermInfo> terms, float boost ){ |
| this( terms, boost, 0 ); |
| } |
| |
| - public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){ |
| + public WeightedPhraseInfo( List<TermInfo> terms, float boost, int seqnum ){ |
| this.boost = boost; |
| this.seqnum = seqnum; |
| |