docs/attachments/LUCENE-1695/LUCENE-1695.patch - lucene-jira-archive - Git at Google

 Index: contrib/CHANGES.txt
 ===================================================================
 --- contrib/CHANGES.txt	(revision 799153)
 +++ contrib/CHANGES.txt	(working copy)
 @@ -11,7 +11,12 @@

  API Changes

 - (None)
 + 1. LUCENE-1695: Update the Highlighter to use the new TokenStream API. This issue breaks backwards
 +    compatibility with some public classes. If you have implemented custom Fregmenters or Scorers,
 +    you will need to adjust them to work with the new TokenStream API. Rather than getting passed a
 +    Token at a time, you will be given a TokenStream to init your impl with - store the Attributes
 +    you are interested in locally and access them on each call to the method that used to pass a new
 +    Token. Look at the included updated impls for examples.  (Mark Miller)

  Bug fixes

 @@ -41,9 +46,6 @@

   8. LUCENE-1491: EdgeNGramTokenFilter no longer stops on tokens shorter than minimum n-gram size.
      (Todd Teak via Otis Gospodnetic)
 -
 - 9. LUCENE-1752: Missing highlights when terms were repeated in separate, nested, boolean or
 -    disjunction queries. (Koji Sekiguchi, Mark Miller)

  New features

 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/Fragmenter.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/Fragmenter.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/Fragmenter.java	(working copy)
 @@ -1,4 +1,5 @@
  package org.apache.lucene.search.highlight;
 +
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -16,24 +17,31 @@
   * limitations under the License.
   */

 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;

  /**
 - * Implements the policy for breaking text into multiple fragments for consideration
 - * by the {@link Highlighter} class. A sophisticated implementation may do this on the basis
 - * of detecting end of sentences in the text.
 + * Implements the policy for breaking text into multiple fragments for
 + * consideration by the {@link Highlighter} class. A sophisticated
 + * implementation may do this on the basis of detecting end of sentences in the
 + * text.
   */
 -public interface Fragmenter
 -{
 -	/**
 -	 * Initializes the Fragmenter
 -	 * @param originalText
 -	 */
 -	public void start(String originalText);
 +public interface Fragmenter {

 -	/**
 -	 * Test to see if this token from the stream should be held in a new TextFragment
 -	 * @param nextToken
 -	 */
 -	public boolean isNewFragment(Token nextToken);
 +  /**
 +   * Initializes the Fragmenter. You can grab references to the Attributes you are
 +   * interested in from tokenStream and then access the values in isNewFragment.
 +   *
 +   * @param originalText
 +   * @param tokenStream
 +   */
 +  public void start(String originalText, TokenStream tokenStream);
 +
 +
 +  /**
 +   * Test to see if this token from the stream should be held in a new
 +   * TextFragment. Every time this is called, the TokenStream
 +   * passed to start(String, TokenStream) will have been incremented.
 +   *
 +   */
 +  public boolean isNewFragment();
  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java	(working copy)
 @@ -22,8 +22,10 @@
  import java.util.Iterator;

  import org.apache.lucene.analysis.Analyzer;
 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.util.PriorityQueue;

  /**
 @@ -214,8 +216,14 @@
  	{
  		ArrayList docFrags = new ArrayList();
  		StringBuffer newText=new StringBuffer();
 -
 +
 +	    TermAttribute termAtt = (TermAttribute) tokenStream.addAttribute(TermAttribute.class);
 +	    OffsetAttribute offsetAtt = (OffsetAttribute) tokenStream.addAttribute(OffsetAttribute.class);
 +	    tokenStream.addAttribute(PositionIncrementAttribute.class);
 +	    tokenStream.reset();
 +
  		TextFragment currentFrag =	new TextFragment(newText,newText.length(), docFrags.size());
 +		fragmentScorer.init(tokenStream);
  		fragmentScorer.startFragment(currentFrag);
  		docFrags.add(currentFrag);

 @@ -223,28 +231,27 @@

  		try
  		{
 -                  final Token reusableToken = new Token();
 +
  			String tokenText;
  			int startOffset;
  			int endOffset;
  			int lastEndOffset = 0;
 -			textFragmenter.start(text);
 +			textFragmenter.start(text, tokenStream);

 -			TokenGroup tokenGroup=new TokenGroup();
 -
 -			for (Token nextToken = tokenStream.next(reusableToken);
 -			     (nextToken!= null)&&(nextToken.startOffset()< maxDocCharsToAnalyze);
 -			     nextToken = tokenStream.next(reusableToken))
 +			TokenGroup tokenGroup=new TokenGroup(tokenStream);
 +
 +			for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
 +			      next = tokenStream.incrementToken())
  			{
 -				if(	(nextToken.endOffset()>text.length())
 +				if(	(offsetAtt.endOffset()>text.length())
  					||
 -					(nextToken.startOffset()>text.length())
 +					(offsetAtt.startOffset()>text.length())
  					)
  				{
 -					throw new InvalidTokenOffsetsException("Token "+nextToken.toString()
 +					throw new InvalidTokenOffsetsException("Token "+ termAtt.term()
  							+" exceeds length of provided text sized "+text.length());
  				}
 -				if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(nextToken)))
 +				if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
  				{
  					//the current token is distinct from previous tokens -
  					// markup the cached token group info
 @@ -260,7 +267,7 @@
  					tokenGroup.clear();

  					//check if current token marks the start of a new fragment
 -					if(textFragmenter.isNewFragment(nextToken))
 +					if(textFragmenter.isNewFragment())
  					{
  						currentFrag.setScore(fragmentScorer.getFragmentScore());
  						//record stats for a new fragment
 @@ -271,7 +278,7 @@
  					}
  				}

 -				tokenGroup.addToken(nextToken,fragmentScorer.getTokenScore(nextToken));
 +				tokenGroup.addToken(fragmentScorer.getTokenScore());

  //				if(lastEndOffset>maxDocBytesToAnalyze)
  //				{
 @@ -332,7 +339,7 @@
  				//The above code caused a problem as a result of Christoph Goller's 11th Sept 03
  				//fix to PriorityQueue. The correct method to use here is the new "insert" method
  				// USE ABOVE CODE IF THIS DOES NOT COMPILE!
 -				fragQueue.insert(currentFrag);
 +				fragQueue.insertWithOverflow(currentFrag);
  			}

  			//return the most relevant fragments
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/NullFragmenter.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/NullFragmenter.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/NullFragmenter.java	(working copy)
 @@ -16,17 +16,18 @@
   * limitations under the License.
   */

 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;

  /**
   * {@link Fragmenter} implementation which does not fragment the text.
   * This is useful for highlighting the entire content of a document or field.
   */
  public class NullFragmenter implements Fragmenter {
 -  public void start(String s) {
 +  public void start(String s, TokenStream tokenStream) {
    }

 -  public boolean isNewFragment(Token token) {
 +  public boolean isNewFragment() {
      return false;
    }
 +
  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java	(working copy)
 @@ -1,4 +1,5 @@
  package org.apache.lucene.search.highlight;
 +
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -19,134 +20,142 @@
  import java.util.HashMap;
  import java.util.HashSet;

 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.search.Query;

  /**
 - * {@link Scorer} implementation which scores text fragments by the number of unique query terms found.
 - * This class uses the {@link QueryTermExtractor} class to process determine the query terms and
 - * their boosts to be used.
 + * {@link Scorer} implementation which scores text fragments by the number of
 + * unique query terms found. This class uses the {@link QueryTermExtractor}
 + * class to process determine the query terms and their boosts to be used.
   */
 -//TODO: provide option to boost score of fragments near beginning of document
 +// TODO: provide option to boost score of fragments near beginning of document
  // based on fragment.getFragNum()
 -public class QueryScorer implements Scorer
 -{
 -	TextFragment currentTextFragment=null;
 -	HashSet uniqueTermsInFragment;
 -	float totalScore=0;
 -	float maxTermWeight=0;
 -	private HashMap termsToFind;
 -
 +public class QueryScorer implements Scorer {
 +
 +  TextFragment currentTextFragment = null;
 +  HashSet uniqueTermsInFragment;
 +
 +  float totalScore = 0;
 +  float maxTermWeight = 0;
 +  private HashMap termsToFind;
 +
 +  private TermAttribute termAtt;
 +
 +  /**
 +   *
 +   * @param query a Lucene query (ideally rewritten using query.rewrite before
 +   *        being passed to this class and the searcher)
 +   */
 +  public QueryScorer(Query query) {
 +    this(QueryTermExtractor.getTerms(query));
 +  }
 +
 +  /**
 +   *
 +   * @param query a Lucene query (ideally rewritten using query.rewrite before
 +   *        being passed to this class and the searcher)
 +   * @param fieldName the Field name which is used to match Query terms
 +   */
 +  public QueryScorer(Query query, String fieldName) {
 +    this(QueryTermExtractor.getTerms(query, false, fieldName));
 +  }
 +
 +  /**
 +   *
 +   * @param query a Lucene query (ideally rewritten using query.rewrite before
 +   *        being passed to this class and the searcher)
 +   * @param reader used to compute IDF which can be used to a) score selected
 +   *        fragments better b) use graded highlights eg set font color
 +   *        intensity
 +   * @param fieldName the field on which Inverse Document Frequency (IDF)
 +   *        calculations are based
 +   */
 +  public QueryScorer(Query query, IndexReader reader, String fieldName) {
 +    this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
 +  }

 -	/**
 -	 *
 -	 * @param query a Lucene query (ideally rewritten using query.rewrite
 -	 * before being passed to this class and the searcher)
 -	 */
 -	public QueryScorer(Query query)
 -	{
 -		this(QueryTermExtractor.getTerms(query));
 -	}
 -
 -	/**
 -	 *
 -	 * @param query a Lucene query (ideally rewritten using query.rewrite
 -	 * before being passed to this class and the searcher)
 -	 * @param fieldName the Field name which is used to match Query terms
 -	 */
 -	public QueryScorer(Query query, String fieldName)
 -	{
 -		this(QueryTermExtractor.getTerms(query, false,fieldName));
 -	}
 +  public QueryScorer(WeightedTerm[] weightedTerms) {
 +    termsToFind = new HashMap();
 +    for (int i = 0; i < weightedTerms.length; i++) {
 +      WeightedTerm existingTerm = (WeightedTerm) termsToFind
 +          .get(weightedTerms[i].term);
 +      if ((existingTerm == null)
 +          || (existingTerm.weight < weightedTerms[i].weight)) {
 +        // if a term is defined more than once, always use the highest scoring
 +        // weight
 +        termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
 +        maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
 +      }
 +    }
 +  }

 -	/**
 -	 *
 -	 * @param query a Lucene query (ideally rewritten using query.rewrite
 -	 * before being passed to this class and the searcher)
 -	 * @param reader used to compute IDF which can be used to a) score selected fragments better
 -	 * b) use graded highlights eg set font color intensity
 -	 * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
 -	 */
 -	public QueryScorer(Query query, IndexReader reader, String fieldName)
 -	{
 -		this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
 -	}
 +  /* (non-Javadoc)
 +   * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
 +   */
 +  public void init(TokenStream tokenStream) {
 +    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
 +  }

 -	public QueryScorer(WeightedTerm []weightedTerms	)
 -	{
 -		termsToFind = new HashMap();
 -		for (int i = 0; i < weightedTerms.length; i++)
 -		{
 -			WeightedTerm existingTerm=(WeightedTerm) termsToFind.get(weightedTerms[i].term);
 -			if( (existingTerm==null) ||(existingTerm.weight<weightedTerms[i].weight) )
 -			{
 -				//if a term is defined more than once, always use the highest scoring weight
 -				termsToFind.put(weightedTerms[i].term,weightedTerms[i]);
 -				maxTermWeight=Math.max(maxTermWeight,weightedTerms[i].getWeight());
 -			}
 -		}
 -	}
 -
 +  /*
 +   * (non-Javadoc)
 +   *
 +   * @see
 +   * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
 +   * .lucene.search.highlight.TextFragment)
 +   */
 +  public void startFragment(TextFragment newFragment) {
 +    uniqueTermsInFragment = new HashSet();
 +    currentTextFragment = newFragment;
 +    totalScore = 0;

 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
 -	 */
 -	public void startFragment(TextFragment newFragment)
 -	{
 -		uniqueTermsInFragment = new HashSet();
 -		currentTextFragment=newFragment;
 -		totalScore=0;
 -
 -	}
 -
 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.FragmentScorer#scoreToken(org.apache.lucene.analysis.Token)
 -	 */
 -	public float getTokenScore(Token token)
 -	{
 -		String termText=token.term();
 -
 -		WeightedTerm queryTerm=(WeightedTerm) termsToFind.get(termText);
 -		if(queryTerm==null)
 -		{
 -			//not a query term - return
 -			return 0;
 -		}
 -		//found a query term - is it unique in this doc?
 -		if(!uniqueTermsInFragment.contains(termText))
 -		{
 -			totalScore+=queryTerm.getWeight();
 -			uniqueTermsInFragment.add(termText);
 -		}
 -		return queryTerm.getWeight();
 -	}
 -
 -
 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.FragmentScorer#endFragment(org.apache.lucene.search.highlight.TextFragment)
 -	 */
 -	public float getFragmentScore()
 -	{
 -		return totalScore;
 -	}
 +  }


 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
 -	 */
 -	public void allFragmentsProcessed()
 -	{
 -		//this class has no special operations to perform at end of processing
 -	}
 +  /* (non-Javadoc)
 +   * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
 +   */
 +  public float getTokenScore() {
 +    String termText = termAtt.term();

 -	/**
 -	 *
 -	 * @return The highest weighted term (useful for passing to GradientFormatter to set
 -	 * top end of coloring scale.
 -	 */
 -    public float getMaxTermWeight()
 -    {
 -        return maxTermWeight;
 +    WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
 +    if (queryTerm == null) {
 +      // not a query term - return
 +      return 0;
      }
 +    // found a query term - is it unique in this doc?
 +    if (!uniqueTermsInFragment.contains(termText)) {
 +      totalScore += queryTerm.getWeight();
 +      uniqueTermsInFragment.add(termText);
 +    }
 +    return queryTerm.getWeight();
 +  }
 +
 +
 +  /* (non-Javadoc)
 +   * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
 +   */
 +  public float getFragmentScore() {
 +    return totalScore;
 +  }
 +
 +  /*
 +   * (non-Javadoc)
 +   *
 +   * @see
 +   * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
 +   */
 +  public void allFragmentsProcessed() {
 +    // this class has no special operations to perform at end of processing
 +  }
 +
 +  /**
 +   *
 +   * @return The highest weighted term (useful for passing to GradientFormatter
 +   *         to set top end of coloring scale.
 +   */
 +  public float getMaxTermWeight() {
 +    return maxTermWeight;
 +  }
  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java	(working copy)
 @@ -1,4 +1,5 @@
  package org.apache.lucene.search.highlight;
 +
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -16,34 +17,45 @@
   * limitations under the License.
   */

 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;

  /**
   * Adds to the score for a fragment based on its tokens
   */
 -public interface Scorer
 -{
 -	/**
 -	 * called when a new fragment is started for consideration
 -	 * @param newFragment
 -	 */
 -	public void startFragment(TextFragment newFragment);
 +public interface Scorer {

 -	/**
 -	 * Called for each token in the current fragment
 -	 * @param token The token to be scored
 -	 * @return a score which is passed to the Highlighter class to influence the mark-up of the text
 -	 * (this return value is NOT used to score the fragment)
 -	 */
 -	public float getTokenScore(Token token);
 -
 +  /**
 +   * Called to init the Scorer with a TokenStream. You can grab references to
 +   * the attributes you are interested in here and access them from
 +   * getTokenScore().
 +   *
 +   * @param tokenStream
 +   */
 +  public void init(TokenStream tokenStream);

 -	/**
 -	 * Called when the highlighter has no more tokens for the current fragment - the scorer returns
 -	 * the weighting it has derived for the most recent fragment, typically based on the tokens
 -	 * passed to getTokenScore().
 -	 *
 -	 */
 -	public float getFragmentScore();
 +  /**
 +   * called when a new fragment is started for consideration
 +   *
 +   * @param newFragment
 +   */
 +  public void startFragment(TextFragment newFragment);
 +
 +  /**
 +   * Called for each token in the current fragment. The Highlighter will
 +   * increment the TokenStream passed to init on every call.
 +   *
 +   * @return a score which is passed to the Highlighter class to influence the
 +   *         mark-up of the text (this return value is NOT used to score the
 +   *         fragment)
 +   */
 +  public float getTokenScore();
 +
 +  /**
 +   * Called when the highlighter has no more tokens for the current fragment -
 +   * the scorer returns the weighting it has derived for the most recent
 +   * fragment, typically based on the tokens passed to getTokenScore().
 +   *
 +   */
 +  public float getFragmentScore();

  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java	(working copy)
 @@ -1,4 +1,5 @@
  package org.apache.lucene.search.highlight;
 +
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -16,69 +17,64 @@
   * limitations under the License.
   */

 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

  /**
 - * {@link Fragmenter} implementation which breaks text up into same-size
 + * {@link Fragmenter} implementation which breaks text up into same-size
   * fragments with no concerns over spotting sentence boundaries.
   */
 -public class SimpleFragmenter implements Fragmenter
 -{
 -	private static final int DEFAULT_FRAGMENT_SIZE =100;
 -	private int currentNumFrags;
 -	private int fragmentSize;
 +public class SimpleFragmenter implements Fragmenter {
 +  private static final int DEFAULT_FRAGMENT_SIZE = 100;
 +  private int currentNumFrags;
 +  private int fragmentSize;
 +  private OffsetAttribute offsetAtt;

 +  public SimpleFragmenter() {
 +    this(DEFAULT_FRAGMENT_SIZE);
 +  }

 -	public SimpleFragmenter()
 -	{
 -		this(DEFAULT_FRAGMENT_SIZE);
 -	}
 +  /**
 +   *
 +   * @param fragmentSize size in number of characters of each fragment
 +   */
 +  public SimpleFragmenter(int fragmentSize) {
 +    this.fragmentSize = fragmentSize;
 +  }


 -	/**
 -	 *
 -	 * @param fragmentSize size in number of characters of each fragment
 -	 */
 -	public SimpleFragmenter(int fragmentSize)
 -	{
 -		this.fragmentSize=fragmentSize;
 -	}
 +  /* (non-Javadoc)
 +   * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream)
 +   */
 +  public void start(String originalText, TokenStream stream) {
 +    offsetAtt = (OffsetAttribute) stream.getAttribute(OffsetAttribute.class);
 +    currentNumFrags = 1;
 +  }

 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
 -	 */
 -	public void start(String originalText)
 -	{
 -		currentNumFrags=1;
 -	}

 -	/* (non-Javadoc)
 -	 * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
 -	 */
 -	public boolean isNewFragment(Token token)
 -	{
 -		boolean isNewFrag= token.endOffset()>=(fragmentSize*currentNumFrags);
 -		if(isNewFrag)
 -		{
 -			currentNumFrags++;
 -		}
 -		return isNewFrag;
 -	}
 +  /* (non-Javadoc)
 +   * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment()
 +   */
 +  public boolean isNewFragment() {
 +    boolean isNewFrag = offsetAtt.endOffset() >= (fragmentSize * currentNumFrags);
 +    if (isNewFrag) {
 +      currentNumFrags++;
 +    }
 +    return isNewFrag;
 +  }

 -	/**
 -	 * @return size in number of characters of each fragment
 -	 */
 -	public int getFragmentSize()
 -	{
 -		return fragmentSize;
 -	}
 +  /**
 +   * @return size in number of characters of each fragment
 +   */
 +  public int getFragmentSize() {
 +    return fragmentSize;
 +  }

 -	/**
 -	 * @param size size in characters of each fragment
 -	 */
 -	public void setFragmentSize(int size)
 -	{
 -		fragmentSize = size;
 -	}
 +  /**
 +   * @param size size in characters of each fragment
 +   */
 +  public void setFragmentSize(int size) {
 +    fragmentSize = size;
 +  }

  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java	(working copy)
 @@ -17,10 +17,13 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
 -import org.apache.lucene.analysis.Token;
 -
  import java.util.List;

 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 +

  /**
   * {@link Fragmenter} implementation which breaks text up into same-size
 @@ -34,6 +37,9 @@
    private SpanScorer spanScorer;
    private int waitForPos = -1;
    private int textSize;
 +  private TermAttribute termAtt;
 +  private PositionIncrementAttribute posIncAtt;
 +  private OffsetAttribute offsetAtt;

    /**
     * @param spanscorer SpanScorer that was used to score hits
 @@ -50,12 +56,12 @@
      this.fragmentSize = fragmentSize;
      this.spanScorer = spanscorer;
    }
 -
 +
    /* (non-Javadoc)
 -   * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment(org.apache.lucene.analysis.Token)
 +   * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment()
     */
 -  public boolean isNewFragment(Token token) {
 -    position += token.getPositionIncrement();
 +  public boolean isNewFragment() {
 +    position += posIncAtt.getPositionIncrement();

      if (waitForPos == position) {
        waitForPos = -1;
 @@ -63,7 +69,7 @@
        return false;
      }

 -    WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(token.term());
 +    WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(termAtt.term());

      if (wSpanTerm != null) {
        List positionSpans = wSpanTerm.getPositionSpans();
 @@ -76,8 +82,8 @@
        }
      }

 -    boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags)
 -        && (textSize - token.endOffset()) >= (fragmentSize >>> 1);
 +    boolean isNewFrag = offsetAtt.endOffset() >= (fragmentSize * currentNumFrags)
 +        && (textSize - offsetAtt.endOffset()) >= (fragmentSize >>> 1);

      if (isNewFrag) {
        currentNumFrags++;
 @@ -86,12 +92,16 @@
      return isNewFrag;
    }

 +
    /* (non-Javadoc)
 -   * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String)
 +   * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream)
     */
 -  public void start(String originalText) {
 +  public void start(String originalText, TokenStream tokenStream) {
      position = -1;
      currentNumFrags = 1;
      textSize = originalText.length();
 +    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
 +    posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
 +    offsetAtt = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class);
    }
  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java	(working copy)
 @@ -7,9 +7,10 @@
  import java.util.Set;

  import org.apache.lucene.analysis.CachingTokenFilter;
 -import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.index.IndexReader;
 -import org.apache.lucene.search.ConstantScoreRangeQuery;
  import org.apache.lucene.search.Query;


 @@ -26,6 +27,8 @@
    private float maxTermWeight;
    private int position = -1;
    private String defaultField;
 +  private TermAttribute termAtt;
 +  private PositionIncrementAttribute posIncAtt;
    private static boolean highlightCnstScrRngQuery;

    /**
 @@ -176,9 +179,9 @@
     * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
     *      int)
     */
 -  public float getTokenScore(Token token) {
 -    position += token.getPositionIncrement();
 -    String termText = token.term();
 +  public float getTokenScore() {
 +    position += posIncAtt.getPositionIncrement();
 +    String termText = termAtt.term();

      WeightedSpanTerm weightedSpanTerm;

 @@ -203,6 +206,11 @@
      return score;
    }

 +  public void init(TokenStream tokenStream) {
 +    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
 +    posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
 +  }
 +
    /**
     * Retrieve the WeightedSpanTerm for the specified token. Useful for passing
     * Span information to a Fragmenter.
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java	(working copy)
 @@ -1,4 +1,5 @@
  package org.apache.lucene.search.highlight;
 +
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -15,118 +16,117 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
 +
  import org.apache.lucene.analysis.Token;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;

  /**
 - * One, or several overlapping tokens, along with the score(s) and the
 - * scope of the original text
 + * One, or several overlapping tokens, along with the score(s) and the scope of
 + * the original text
   */
 -public class TokenGroup
 -{
 -
 -	private static final int MAX_NUM_TOKENS_PER_GROUP=50;
 -	Token [] tokens=new Token[MAX_NUM_TOKENS_PER_GROUP];
 -	float [] scores=new float[MAX_NUM_TOKENS_PER_GROUP];
 -	int numTokens=0;
 -	int startOffset=0;
 -	int endOffset=0;
 -	float tot;
 +public class TokenGroup {

 +  private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
 +  Token [] tokens=new Token[MAX_NUM_TOKENS_PER_GROUP];
 +  float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
 +  int numTokens = 0;
 +  int startOffset = 0;
 +  int endOffset = 0;
 +  float tot;
    int matchStartOffset, matchEndOffset;

 +  private OffsetAttribute offsetAtt;
 +  private TermAttribute termAtt;

 -  void addToken(Token token, float score)
 -	{
 -	    if(numTokens < MAX_NUM_TOKENS_PER_GROUP)
 -        {
 -			if(numTokens==0)
 -			{
 -				startOffset=matchStartOffset=token.startOffset();
 -				endOffset=matchEndOffset=token.endOffset();
 -				tot += score;
 -			}
 -			else
 -			{
 -				startOffset=Math.min(startOffset,token.startOffset());
 -				endOffset=Math.max(endOffset,token.endOffset());
 -        if (score>0) {
 -          if (tot==0) {
 -            matchStartOffset=token.startOffset();
 -            matchEndOffset=token.endOffset();
 +  public TokenGroup(TokenStream tokenStream) {
 +    offsetAtt = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class);
 +    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
 +  }
 +
 +  void addToken(float score) {
 +    if (numTokens < MAX_NUM_TOKENS_PER_GROUP) {
 +      int termStartOffset = offsetAtt.startOffset();
 +      int termEndOffset = offsetAtt.endOffset();
 +      if (numTokens == 0) {
 +        startOffset = matchStartOffset = termStartOffset;
 +        endOffset = matchEndOffset = termEndOffset;
 +        tot += score;
 +      } else {
 +        startOffset = Math.min(startOffset, termStartOffset);
 +        endOffset = Math.max(endOffset, termEndOffset);
 +        if (score > 0) {
 +          if (tot == 0) {
 +            matchStartOffset = offsetAtt.startOffset();
 +            matchEndOffset = offsetAtt.endOffset();
            } else {
 -            matchStartOffset=Math.min(matchStartOffset,token.startOffset());
 -            matchEndOffset=Math.max(matchEndOffset,token.endOffset());
 +            matchStartOffset = Math.min(matchStartOffset, termStartOffset);
 +            matchEndOffset = Math.max(matchEndOffset, termEndOffset);
            }
 -          tot+=score;
 +          tot += score;
          }
        }
 -			tokens[numTokens]= (Token) token.clone();
 -			scores[numTokens]=score;
 -			numTokens++;
 -        }
 -	}
 +      Token token = new Token(termStartOffset, termEndOffset);
 +      token.setTermBuffer(termAtt.term());
 +      tokens[numTokens] = token;
 +      scores[numTokens] = score;
 +      numTokens++;
 +    }
 +  }

 -	boolean isDistinct(Token token)
 -	{
 -		return token.startOffset()>=endOffset;
 -	}
 +  boolean isDistinct() {
 +    return offsetAtt.startOffset() >= endOffset;
 +  }

 +  void clear() {
 +    numTokens = 0;
 +    tot = 0;
 +  }
 +
 +  /*
 +  * @param index a value between 0 and numTokens -1
 +  * @return the "n"th token
 +  */
 + public Token getToken(int index)
 + {
 +     return tokens[index];
 + }

 -	void clear()
 -	{
 -		numTokens=0;
 -		tot=0;
 -	}
 -
 -	/**
 -	 *
 -	 * @param index a value between 0 and numTokens -1
 -	 * @return the "n"th token
 -	 */
 -	public Token getToken(int index)
 -	{
 -		return tokens[index];
 -	}
 +  /**
 +   *
 +   * @param index a value between 0 and numTokens -1
 +   * @return the "n"th score
 +   */
 +  public float getScore(int index) {
 +    return scores[index];
 +  }

 -	/**
 -	 *
 -	 * @param index a value between 0 and numTokens -1
 -	 * @return the "n"th score
 -	 */
 -	public float getScore(int index)
 -	{
 -		return scores[index];
 -	}
 +  /**
 +   * @return the end position in the original text
 +   */
 +  public int getEndOffset() {
 +    return endOffset;
 +  }

 -	/**
 -	 * @return the end position in the original text
 -	 */
 -	public int getEndOffset()
 -	{
 -		return endOffset;
 -	}
 +  /**
 +   * @return the number of tokens in this group
 +   */
 +  public int getNumTokens() {
 +    return numTokens;
 +  }

 -	/**
 -	 * @return the number of tokens in this group
 -	 */
 -	public int getNumTokens()
 -	{
 -		return numTokens;
 -	}
 +  /**
 +   * @return the start position in the original text
 +   */
 +  public int getStartOffset() {
 +    return startOffset;
 +  }

 -	/**
 -	 * @return the start position in the original text
 -	 */
 -	public int getStartOffset()
 -	{
 -		return startOffset;
 -	}
 -
 -	/**
 -	 * @return all tokens' scores summed up
 -	 */
 -	public float getTotalScore()
 -	{
 -		return tot;
 -	}
 +  /**
 +   * @return all tokens' scores summed up
 +   */
 +  public float getTotalScore() {
 +    return tot;
 +  }
  }
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java	(revision 797692)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java	(working copy)
 @@ -29,6 +29,8 @@
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.TermFreqVector;
 @@ -135,32 +137,45 @@
       * @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking
       * to eek out the last drops of performance, set to true. If in doubt, set to false.
       */
 -    public static TokenStream getTokenStream(TermPositionVector tpv, boolean tokenPositionsGuaranteedContiguous)
 -    {
 +    public static TokenStream getTokenStream(TermPositionVector tpv, boolean tokenPositionsGuaranteedContiguous) {
          //an object used to iterate across an array of tokens
 -        class StoredTokenStream extends TokenStream
 -        {
 -            Token tokens[];
 -            int currentToken=0;
 -            StoredTokenStream(Token tokens[])
 -            {
 -                this.tokens=tokens;
 +        class StoredTokenStream extends TokenStream {
 +          Token tokens[];
 +          int currentToken = 0;
 +          TermAttribute termAtt;
 +          OffsetAttribute offsetAtt;
 +
 +          StoredTokenStream(Token tokens[]) {
 +            this.tokens = tokens;
 +            termAtt = (TermAttribute) addAttribute(TermAttribute.class);
 +            offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
 +          }
 +
 +          public Token next(final Token reusableToken) {
 +            System.out.println("next token");
 +            assert reusableToken != null;
 +            if (currentToken >= tokens.length) {
 +              return null;
              }
 -            public Token next(final Token reusableToken)
 -            {
 -                assert reusableToken != null;
 -                if(currentToken>=tokens.length)
 -                {
 -                    return null;
 -                }
 -                return tokens[currentToken++];
 -            }
 -        }
 +            return tokens[currentToken++];
 +          }
 +
 +          public boolean incrementToken() throws IOException {
 +            System.out.println("inc token");
 +            if (currentToken >= tokens.length) {
 +              return false;
 +            }
 +            Token token = tokens[currentToken++];
 +            termAtt.setTermBuffer(token.term());
 +            offsetAtt.setOffset(token.startOffset(), token.endOffset());
 +            return true;
 +          }
 +        }
          //code to reconstruct the original sequence of Tokens
          String[] terms=tpv.getTerms();
          int[] freq=tpv.getTermFrequencies();
          int totalTokens=0;
 -        Token newToken = new Token();
 +
          for (int t = 0; t < freq.length; t++)
          {
              totalTokens+=freq[t];
 @@ -190,8 +205,9 @@
                  }
                  for (int tp = 0; tp < offsets.length; tp++)
                  {
 -                  newToken.reinit(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
 -                  unsortedTokens.add(newToken.clone());
 +                  Token token = new Token(offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
 +                  token.setTermBuffer(terms[t]);
 +                  unsortedTokens.add(token);
                  }
              }
              else
 @@ -204,8 +220,8 @@
                  //tokens stored with positions - can use this to index straight into sorted array
                  for (int tp = 0; tp < pos.length; tp++)
                  {
 -                  newToken.reinit(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
 -                  tokensInOriginalOrder[pos[tp]] = (Token) newToken.clone();
 +                  Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
 +                  tokensInOriginalOrder[pos[tp]] = token;
                  }
              }
          }
 @@ -218,7 +234,7 @@
                  {
                      Token t1=(Token) o1;
                      Token t2=(Token) o2;
 -                    if(t1.startOffset()>t2.startOffset())
 +                    if(t1.startOffset()>t2.endOffset())
                          return 1;
                      if(t1.startOffset()<t2.startOffset())
                          return -1;
 Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
 ===================================================================
 --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java	(revision 799153)
 +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java	(working copy)
 @@ -42,8 +42,8 @@
  import org.apache.lucene.search.PhraseQuery;
  import org.apache.lucene.search.PrefixQuery;
  import org.apache.lucene.search.Query;
 -import org.apache.lucene.search.TermRangeQuery;
  import org.apache.lucene.search.TermQuery;
 +import org.apache.lucene.search.TermRangeQuery;
  import org.apache.lucene.search.WildcardQuery;
  import org.apache.lucene.search.spans.SpanNearQuery;
  import org.apache.lucene.search.spans.SpanOrQuery;
 @@ -98,7 +98,7 @@
    private void extract(Query query, Map terms) throws IOException {
      if (query instanceof BooleanQuery) {
        BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
 -
 +
        for (int i = 0; i < queryClauses.length; i++) {
          if (!queryClauses[i].isProhibited()) {
            extract(queryClauses[i].getQuery(), terms);
 @@ -441,7 +441,7 @@
     * This class makes sure that if both position sensitive and insensitive
     * versions of the same term are added, the position insensitive one wins.
     */
 -  private class PositionCheckingMap extends HashMap {
 +  static private class PositionCheckingMap extends HashMap {

      public void putAll(Map m) {
        Iterator it = m.keySet().iterator();
 Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
 ===================================================================
 --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java	(revision 799153)
 +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java	(working copy)
 @@ -38,10 +38,14 @@
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.CachingTokenFilter;
  import org.apache.lucene.analysis.LowerCaseTokenizer;
 +import org.apache.lucene.analysis.SimpleAnalyzer;
  import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.WhitespaceAnalyzer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
 +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.document.Field.Index;
 @@ -53,18 +57,17 @@
  import org.apache.lucene.queryParser.ParseException;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.search.BooleanQuery;
 +import org.apache.lucene.search.MultiTermQuery;
  import org.apache.lucene.search.ConstantScoreRangeQuery;
  import org.apache.lucene.search.FilteredQuery;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.MultiPhraseQuery;
  import org.apache.lucene.search.MultiSearcher;
 -import org.apache.lucene.search.MultiTermQuery;
  import org.apache.lucene.search.PhraseQuery;
  import org.apache.lucene.search.Query;
 -import org.apache.lucene.search.TermRangeFilter;
 -import org.apache.lucene.search.Searcher;
  import org.apache.lucene.search.TermQuery;
 +import org.apache.lucene.search.TermRangeFilter;
  import org.apache.lucene.search.TopDocs;
  import org.apache.lucene.search.WildcardQuery;
  import org.apache.lucene.search.BooleanClause.Occur;
 @@ -75,6 +78,7 @@
  import org.apache.lucene.search.spans.SpanTermQuery;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.RAMDirectory;
 +import org.apache.lucene.util.Version;
  import org.w3c.dom.Element;
  import org.w3c.dom.NodeList;

 @@ -87,7 +91,7 @@
    static final String FIELD_NAME = "contents";
    private Query query;
    RAMDirectory ramDir;
 -  public Searcher searcher = null;
 +  public IndexSearcher searcher = null;
    public Hits hits = null;
    int numHighlights = 0;
    Analyzer analyzer = new StandardAnalyzer();
 @@ -108,11 +112,40 @@
      super(arg0);
    }

 +  public void testHits() throws Exception {
 +    Analyzer analyzer = new SimpleAnalyzer();
 +    QueryParser qp = new QueryParser(FIELD_NAME, analyzer);
 +    query = qp.parse("\"very long\"");
 +    searcher = new IndexSearcher(ramDir, false);
 +    TopDocs hits = searcher.search(query, 10);
 +
 +    Highlighter highlighter = new Highlighter(null);
 +
 +
 +    for (int i = 0; i < hits.scoreDocs.length; i++) {
 +      Document doc = searcher.doc(hits.scoreDocs[i].doc);
 +      String storedField = doc.get(FIELD_NAME);
 +
 +      TokenStream stream = TokenSources.getAnyTokenStream(searcher
 +          .getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
 +      CachingTokenFilter ctf = new CachingTokenFilter(stream);
 +      SpanScorer scorer = new SpanScorer(query, FIELD_NAME, ctf);
 +     // ctf.reset();
 +      Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
 +      highlighter.setFragmentScorer(scorer);
 +      highlighter.setTextFragmenter(fragmenter);
 +
 +      String fragment = highlighter.getBestFragment(ctf, storedField);
 +
 +      System.out.println(fragment);
 +    }
 +  }
 +
    public void testHighlightingWithDefaultField() throws Exception {

      String s1 = "I call our world Flatland, not because we call it so,";

 -    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
 +    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer(Version.LUCENE_CURRENT));

      // Verify that a query against the default field results in text being
      // highlighted
 @@ -144,7 +177,7 @@
     */
    private static String highlightField(Query query, String fieldName, String text)
        throws IOException, InvalidTokenOffsetsException {
 -    CachingTokenFilter tokenStream = new CachingTokenFilter(new StandardAnalyzer().tokenStream(
 +    CachingTokenFilter tokenStream = new CachingTokenFilter(new StandardAnalyzer(Version.LUCENE_CURRENT).tokenStream(
          fieldName, new StringReader(text)));
      // Assuming "<B>", "</B>" used to highlight
      SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
 @@ -908,10 +941,12 @@
          Query query = parser.parse(srchkey);

          TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
 +
          Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);

          // Get 3 best fragments and seperate with a "..."
          tokenStream = analyzer.tokenStream(null, new StringReader(s));
 +
          String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
          String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
          assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
 @@ -1075,10 +1110,11 @@
    }

    public void testUnRewrittenQuery() throws Exception {
 -    TestHighlightRunner helper = new TestHighlightRunner() {
 +    final TestHighlightRunner helper = new TestHighlightRunner() {

        public void run() throws Exception {
          numHighlights = 0;
 +        SpanScorer.setHighlightCnstScrRngQuery(false);
          // test to show how rewritten query can still be used
          searcher = new IndexSearcher(ramDir);
          Analyzer analyzer = new StandardAnalyzer();
 @@ -1154,13 +1190,17 @@
        public void startFragment(TextFragment newFragment) {
        }

 -      public float getTokenScore(Token token) {
 +      public float getTokenScore() {
          return 0;
        }

        public float getFragmentScore() {
          return 1;
        }
 +
 +      public void init(TokenStream tokenStream) {
 +
 +      }
      });
      highlighter.setTextFragmenter(new SimpleFragmenter(2000));
      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
 @@ -1292,27 +1332,44 @@
      return new TokenStream() {
        Iterator iter;
        List lst;
 +      private TermAttribute termAtt;
 +      private PositionIncrementAttribute posIncrAtt;
 +      private OffsetAttribute offsetAtt;
        {
 +        termAtt = (TermAttribute) addAttribute(TermAttribute.class);
 +        posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
 +        offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
          lst = new ArrayList();
          Token t;
          t = createToken("hi", 0, 2);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("hispeed", 0, 8);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("speed", 3, 8);
          t.setPositionIncrement(0);
          lst.add(t);
          t = createToken("10", 8, 10);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("foo", 11, 14);
 +        t.setPositionIncrement(1);
          lst.add(t);
          iter = lst.iterator();
        }

 -      public Token next(final Token reusableToken) throws IOException {
 -        assert reusableToken != null;
 -        return iter.hasNext() ? (Token) iter.next() : null;
 +      public boolean incrementToken() throws IOException {
 +        if(iter.hasNext()) {
 +          Token token = (Token) iter.next();
 +          termAtt.setTermBuffer(token.term());
 +          posIncrAtt.setPositionIncrement(token.getPositionIncrement());
 +          offsetAtt.setOffset(token.startOffset(), token.endOffset());
 +          return true;
 +        }
 +        return false;
        }
 +
      };
    }

 @@ -1322,26 +1379,42 @@
      return new TokenStream() {
        Iterator iter;
        List lst;
 +      private TermAttribute termAtt;
 +      private PositionIncrementAttribute posIncrAtt;
 +      private OffsetAttribute offsetAtt;
        {
 +        termAtt = (TermAttribute) addAttribute(TermAttribute.class);
 +        posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
 +        offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
          lst = new ArrayList();
          Token t;
          t = createToken("hispeed", 0, 8);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("hi", 0, 2);
          t.setPositionIncrement(0);
          lst.add(t);
          t = createToken("speed", 3, 8);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("10", 8, 10);
 +        t.setPositionIncrement(1);
          lst.add(t);
          t = createToken("foo", 11, 14);
 +        t.setPositionIncrement(1);
          lst.add(t);
          iter = lst.iterator();
        }

 -      public Token next(final Token reusableToken) throws IOException {
 -        assert reusableToken != null;
 -        return iter.hasNext() ? (Token) iter.next() : null;
 +      public boolean incrementToken() throws IOException {
 +        if(iter.hasNext()) {
 +          Token token = (Token) iter.next();
 +          termAtt.setTermBuffer(token.term());
 +          posIncrAtt.setPositionIncrement(token.getPositionIncrement());
 +          offsetAtt.setOffset(token.startOffset(), token.endOffset());
 +          return true;
 +        }
 +        return false;
        }
      };
    }
 @@ -1611,7 +1684,11 @@
     *      java.io.Reader)
     */
    public TokenStream tokenStream(String arg0, Reader arg1) {
 -    return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
 +    LowerCaseTokenizer stream = new LowerCaseTokenizer(arg1);
 +    stream.addAttribute(TermAttribute.class);
 +    stream.addAttribute(PositionIncrementAttribute.class);
 +    stream.addAttribute(OffsetAttribute.class);
 +    return new SynonymTokenizer(stream, synonyms);
    }
  }

 @@ -1622,47 +1699,70 @@
  class SynonymTokenizer extends TokenStream {
    private TokenStream realStream;
    private Token currentRealToken = null;
 +  private org.apache.lucene.analysis.Token cRealToken = null;
    private Map synonyms;
    StringTokenizer st = null;
 +  private TermAttribute realTermAtt;
 +  private PositionIncrementAttribute realPosIncrAtt;
 +  private OffsetAttribute realOffsetAtt;
 +  private TermAttribute termAtt;
 +  private PositionIncrementAttribute posIncrAtt;
 +  private OffsetAttribute offsetAtt;

    public SynonymTokenizer(TokenStream realStream, Map synonyms) {
      this.realStream = realStream;
      this.synonyms = synonyms;
 +    realTermAtt = (TermAttribute) realStream.getAttribute(TermAttribute.class);
 +    realPosIncrAtt = (PositionIncrementAttribute) realStream.getAttribute(PositionIncrementAttribute.class);
 +    realOffsetAtt = (OffsetAttribute) realStream.getAttribute(OffsetAttribute.class);
 +
 +    termAtt = (TermAttribute) addAttribute(TermAttribute.class);
 +    posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
 +    offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
    }

 -  public Token next(final Token reusableToken) throws IOException {
 -    assert reusableToken != null;
 +  public boolean incrementToken() throws IOException {
 +
      if (currentRealToken == null) {
 -      Token nextRealToken = realStream.next(reusableToken);
 -      if (nextRealToken == null) {
 -        return null;
 +      boolean next = realStream.incrementToken();
 +      if (!next) {
 +        return false;
        }
 -      String expansions = (String) synonyms.get(nextRealToken.term());
 +      //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset());
 +      termAtt.setTermBuffer(realTermAtt.term());
 +      offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
 +      posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement());
 +      System.out.println("term:" + realTermAtt.term());
 +      String expansions = (String) synonyms.get(realTermAtt.term());
        if (expansions == null) {
 -        return nextRealToken;
 +        return true;
        }
        st = new StringTokenizer(expansions, ",");
        if (st.hasMoreTokens()) {
 -        currentRealToken = (Token) nextRealToken.clone();
 +        currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
 +        currentRealToken.setTermBuffer(realTermAtt.term());
        }
 -      return currentRealToken;
 +
 +      return true;
      } else {
 -      reusableToken.reinit(st.nextToken(),
 -                           currentRealToken.startOffset(),
 -                           currentRealToken.endOffset());
 -      reusableToken.setPositionIncrement(0);
 +      String tok = st.nextToken();
 +      termAtt.setTermBuffer(tok);
 +      offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset());
 +      posIncrAtt.setPositionIncrement(0);
        if (!st.hasMoreTokens()) {
          currentRealToken = null;
          st = null;
        }
 -      return reusableToken;
 +      return true;
      }
 +
    }

    static abstract class TestHighlightRunner {
      static final int STANDARD = 0;
      static final int SPAN = 1;
      int mode = STANDARD;
 +    Fragmenter frag = new SimpleFragmenter(20);

      public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream,
          Formatter formatter) {
 @@ -1725,7 +1825,7 @@
          if (mode == SPAN) {
            ((CachingTokenFilter) tokenStream).reset();
          }
 -        highlighter.setTextFragmenter(new SimpleFragmenter(20));
 +        highlighter.setTextFragmenter(frag);

          String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
              fragmentSeparator);