| package org.apache.lucene.search.vectorhighlight; |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.ArrayList; |
| import java.util.LinkedList; |
| import java.util.List; |
| |
| import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; |
| import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; |
| |
| /** |
| * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder |
| * to create a FieldFragList object. |
| */ |
| public class FieldPhraseList { |
| |
| LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>(); |
| |
| /** |
| * a constructor. |
| * |
| * @param fieldTermStack FieldTermStack object |
| * @param fieldQuery FieldQuery object |
| */ |
| public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){ |
| final String field = fieldTermStack.getFieldName(); |
| |
| LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>(); |
| QueryPhraseMap currMap = null; |
| QueryPhraseMap nextMap = null; |
| while( !fieldTermStack.isEmpty() ){ |
| |
| phraseCandidate.clear(); |
| |
| TermInfo ti = fieldTermStack.pop(); |
| currMap = fieldQuery.getFieldTermMap( field, ti.getText() ); |
| |
| // if not found, discard top TermInfo from stack, then try next element |
| if( currMap == null ) continue; |
| |
| // if found, search the longest phrase |
| phraseCandidate.add( ti ); |
| while( true ){ |
| ti = fieldTermStack.pop(); |
| nextMap = null; |
| if( ti != null ) |
| nextMap = currMap.getTermMap( ti.getText() ); |
| if( ti == null || nextMap == null ){ |
| if( ti != null ) |
| fieldTermStack.push( ti ); |
| if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ |
| addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |
| } |
| else{ |
| while( phraseCandidate.size() > 1 ){ |
| fieldTermStack.push( phraseCandidate.removeLast() ); |
| currMap = fieldQuery.searchPhrase( field, phraseCandidate ); |
| if( currMap != null ){ |
| addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); |
| break; |
| } |
| } |
| } |
| break; |
| } |
| else{ |
| phraseCandidate.add( ti ); |
| currMap = nextMap; |
| } |
| } |
| } |
| } |
| |
| void addIfNoOverlap( WeightedPhraseInfo wpi ){ |
| for( WeightedPhraseInfo existWpi : phraseList ){ |
| if( existWpi.isOffsetOverlap( wpi ) ) return; |
| } |
| phraseList.add( wpi ); |
| } |
| |
| public static class WeightedPhraseInfo { |
| |
| String text; // unnecessary member, just exists for debugging purpose |
| List<Toffs> termsOffsets; // usually termsOffsets.size() == 1, |
| // but if position-gap > 1 and slop > 0 then size() could be greater than 1 |
| float boost; // query boost |
| int seqnum; |
| |
| public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){ |
| this( terms, boost, 0 ); |
| } |
| |
| public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){ |
| this.boost = boost; |
| this.seqnum = number; |
| termsOffsets = new ArrayList<Toffs>( terms.size() ); |
| TermInfo ti = terms.get( 0 ); |
| termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); |
| if( terms.size() == 1 ){ |
| text = ti.getText(); |
| return; |
| } |
| StringBuilder sb = new StringBuilder(); |
| sb.append( ti.getText() ); |
| int pos = ti.getPosition(); |
| for( int i = 1; i < terms.size(); i++ ){ |
| ti = terms.get( i ); |
| sb.append( ti.getText() ); |
| if( ti.getPosition() - pos == 1 ){ |
| Toffs to = termsOffsets.get( termsOffsets.size() - 1 ); |
| to.setEndOffset( ti.getEndOffset() ); |
| } |
| else{ |
| termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); |
| } |
| pos = ti.getPosition(); |
| } |
| text = sb.toString(); |
| } |
| |
| public int getStartOffset(){ |
| return termsOffsets.get( 0 ).startOffset; |
| } |
| |
| public int getEndOffset(){ |
| return termsOffsets.get( termsOffsets.size() - 1 ).endOffset; |
| } |
| |
| public boolean isOffsetOverlap( WeightedPhraseInfo other ){ |
| int so = getStartOffset(); |
| int eo = getEndOffset(); |
| int oso = other.getStartOffset(); |
| int oeo = other.getEndOffset(); |
| if( so <= oso && oso < eo ) return true; |
| if( so < oeo && oeo <= eo ) return true; |
| if( oso <= so && so < oeo ) return true; |
| if( oso < eo && eo <= oeo ) return true; |
| return false; |
| } |
| |
| @Override |
| public String toString(){ |
| StringBuilder sb = new StringBuilder(); |
| sb.append( text ).append( '(' ).append( boost ).append( ")(" ); |
| for( Toffs to : termsOffsets ){ |
| sb.append( to ); |
| } |
| sb.append( ')' ); |
| return sb.toString(); |
| } |
| |
| public static class Toffs { |
| int startOffset; |
| int endOffset; |
| public Toffs( int startOffset, int endOffset ){ |
| this.startOffset = startOffset; |
| this.endOffset = endOffset; |
| } |
| public void setEndOffset( int endOffset ){ |
| this.endOffset = endOffset; |
| } |
| public int getStartOffset(){ |
| return startOffset; |
| } |
| public int getEndOffset(){ |
| return endOffset; |
| } |
| @Override |
| public String toString(){ |
| StringBuilder sb = new StringBuilder(); |
| sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' ); |
| return sb.toString(); |
| } |
| } |
| } |
| } |