blob: b1700c8a538908a64a76b08cf40a663f628bc8f8 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
* of the document to be highlighted.
public class FieldTermStack {
private final String fieldName;
LinkedList<TermInfo> termList = new LinkedList<>();
//public static void main( String[] args ) throws Exception {
// Analyzer analyzer = new WhitespaceAnalyzer(Version.LATEST);
// QueryParser parser = new QueryParser(Version.LATEST, "f", analyzer );
// Query query = parser.parse( "a x:b" );
// FieldQuery fieldQuery = new FieldQuery( query, true, false );
// Directory dir = new RAMDirectory();
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LATEST, analyzer));
// Document doc = new Document();
// FieldType ft = new FieldType(TextField.TYPE_STORED);
// ft.setStoreTermVectors(true);
// ft.setStoreTermVectorOffsets(true);
// ft.setStoreTermVectorPositions(true);
// doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
// doc.add( new Field( "f", ft, "b a b a f" ) );
// writer.addDocument( doc );
// writer.close();
// IndexReader reader =;
// new FieldTermStack( reader, 0, "f", fieldQuery );
// reader.close();
* a constructor.
* @param reader IndexReader of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fieldQuery FieldQuery object
* @throws IOException If there is a low-level I/O error
public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
this.fieldName = fieldName;
Set<String> termSet = fieldQuery.getTermSet( fieldName );
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
if( termSet == null ) return;
final Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
// null snippet
final Terms vector = vectors.terms(fieldName);
if (vector == null || vector.hasPositions() == false) {
// null snippet
final CharsRefBuilder spare = new CharsRefBuilder();
final TermsEnum termsEnum = vector.iterator();
PostingsEnum dpEnum = null;
BytesRef text;
int numDocs = reader.maxDoc();
while ((text = != null) {
final String term = spare.toString();
if (!termSet.contains(term)) {
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.POSITIONS);
// For weight look here:
final float weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( new Term(fieldName, text) ) + 1 ) ) + 1.0 );
final int freq = dpEnum.freq();
for(int i = 0;i < freq;i++) {
int pos = dpEnum.nextPosition();
if (dpEnum.startOffset() < 0) {
return; // no offsets, null snippet
termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) );
// sort by position
// now look for dups at the same position, linking them together
int currentPos = -1;
TermInfo previous = null;
TermInfo first = null;
Iterator<TermInfo> iterator = termList.iterator();
while (iterator.hasNext()) {
TermInfo current =;
if (current.position == currentPos) {
assert previous != null;
previous = current;
} else {
if (previous != null) {
previous = first = current;
currentPos = current.position;
if (previous != null) {
* @return field name
public String getFieldName(){
return fieldName;
* @return the top TermInfo object of the stack
public TermInfo pop(){
return termList.poll();
* @param termInfo the TermInfo object to be put on the top of the stack
public void push( TermInfo termInfo ){
termList.push( termInfo );
* to know whether the stack is empty
* @return true if the stack is empty, false if not
public boolean isEmpty(){
return termList == null || termList.size() == 0;
* Single term with its position/offsets in the document and IDF weight.
* It is Comparable but considers only position.
public static class TermInfo implements Comparable<TermInfo>{
private final String text;
private final int startOffset;
private final int endOffset;
private final int position;
// IDF-weight of this term
private final float weight;
// pointer to other TermInfo's at the same position.
// this is a circular list, so with no syns, just points to itself
private TermInfo next;
TermInfo(String text, int startOffset, int endOffset, int position, float weight){
this.text = text;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.position = position;
this.weight = weight; = this;
void setNext(TermInfo next) { = next; }
* Returns the next TermInfo at this same position.
* This is a circular list!
public TermInfo getNext() { return next; }
public String getText(){ return text; }
public int getStartOffset(){ return startOffset; }
public int getEndOffset(){ return endOffset; }
public int getPosition(){ return position; }
public float getWeight(){ return weight; }
public String toString(){
return text + '(' + startOffset + ',' + endOffset + ',' + position + ')';
public int compareTo( TermInfo o ){
return ( this.position - o.position );
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + position;
return result;
public boolean equals(Object obj) {
if (this == obj) {
return true;
if (obj == null) {
return false;
if (getClass() != obj.getClass()) {
return false;
TermInfo other = (TermInfo) obj;
return position == other.position;