docs/attachments/LUCENE-1132/LUCENE-1132.patch - lucene-jira-archive - Git at Google

 Index: src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java
 ===================================================================
 --- src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java	(revision 615225)
 +++ src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java	(working copy)
 @@ -38,7 +38,7 @@

  	/**
  	 *
 -	 * @param fragmentSize size in bytes of each fragment
 +	 * @param fragmentSize size in number of characters of each fragment
  	 */
  	public SimpleFragmenter(int fragmentSize)
  	{
 @@ -67,7 +67,7 @@
  	}

  	/**
 -	 * @return size in bytes of each fragment
 +	 * @return size in number of characters of each fragment
  	 */
  	public int getFragmentSize()
  	{
 @@ -75,7 +75,7 @@
  	}

  	/**
 -	 * @param size size in bytes of each fragment
 +	 * @param size size in characters of each fragment
  	 */
  	public void setFragmentSize(int size)
  	{
 Index: src/java/org/apache/lucene/search/highlight/Highlighter.java
 ===================================================================
 --- src/java/org/apache/lucene/search/highlight/Highlighter.java	(revision 615225)
 +++ src/java/org/apache/lucene/search/highlight/Highlighter.java	(working copy)
 @@ -22,7 +22,6 @@
  import java.util.Iterator;

  import org.apache.lucene.analysis.Analyzer;
 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.util.PriorityQueue;

 @@ -34,9 +33,12 @@
   */
  public class Highlighter
  {
 -
 -	public static final  int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024;
 -	private int maxDocBytesToAnalyze=DEFAULT_MAX_DOC_BYTES_TO_ANALYZE;
 +  public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
 +  /**
 +   * @deprecated See {@link #DEFAULT_MAX_CHARS_TO_ANALYZE}
 +   */
 +	public static final  int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=DEFAULT_MAX_CHARS_TO_ANALYZE;
 +  private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
  	private Formatter formatter;
  	private Encoder encoder;
  	private Fragmenter textFragmenter=new SimpleFragmenter();
 @@ -224,7 +226,7 @@

  			TokenGroup tokenGroup=new TokenGroup();
  			token = tokenStream.next();
 -			while ((token!= null)&&(token.startOffset()<maxDocBytesToAnalyze))
 +			while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze))
  			{
  				if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
  				{
 @@ -283,7 +285,7 @@
  					(lastEndOffset < text.length())
  					&&
  //					and that text is not too large...
 -					(text.length()<maxDocBytesToAnalyze)
 +					(text.length()< maxDocCharsToAnalyze)
  				)
  			{
  				//append it to the last fragment
 @@ -471,23 +473,35 @@
  	}

  	/**
 -	 * @return the maximum number of bytes to be tokenized per doc
 +	 * @return the maximum number of bytes to be tokenized per doc
 +   *
 +   * @deprecated See {@link #getMaxDocCharsToAnalyze()}, since this value has always counted on chars.  They both set the same internal value, however
  	 */
  	public int getMaxDocBytesToAnalyze()
  	{
 -		return maxDocBytesToAnalyze;
 +		return maxDocCharsToAnalyze;
  	}

  	/**
  	 * @param byteCount the maximum number of bytes to be tokenized per doc
  	 * (This can improve performance with large documents)
 +   *
 +   * @deprecated See {@link #setMaxDocCharsToAnalyze(int)}, since this value has always counted chars
  	 */
  	public void setMaxDocBytesToAnalyze(int byteCount)
  	{
 -		maxDocBytesToAnalyze = byteCount;
 +		maxDocCharsToAnalyze = byteCount;
  	}

 -	/**
 +  public int getMaxDocCharsToAnalyze() {
 +    return maxDocCharsToAnalyze;
 +  }
 +
 +  public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
 +    this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
 +  }
 +
 +  /**
  	 */
  	public Fragmenter getTextFragmenter()
  	{
	Index: src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java
	===================================================================
	--- src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java (revision 615225)
	+++ src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java (working copy)
	@@ -38,7 +38,7 @@

	/**
	*
	- * @param fragmentSize size in bytes of each fragment
	+ * @param fragmentSize size in number of characters of each fragment
	*/
	public SimpleFragmenter(int fragmentSize)
	{
	@@ -67,7 +67,7 @@
	}

	/**
	- * @return size in bytes of each fragment
	+ * @return size in number of characters of each fragment
	*/
	public int getFragmentSize()
	{
	@@ -75,7 +75,7 @@
	}

	/**
	- * @param size size in bytes of each fragment
	+ * @param size size in characters of each fragment
	*/
	public void setFragmentSize(int size)
	{
	Index: src/java/org/apache/lucene/search/highlight/Highlighter.java
	===================================================================
	--- src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 615225)
	+++ src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy)
	@@ -22,7 +22,6 @@
	import java.util.Iterator;

	import org.apache.lucene.analysis.Analyzer;
	-import org.apache.lucene.analysis.Token;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.util.PriorityQueue;

	@@ -34,9 +33,12 @@
	*/
	public class Highlighter
	{
	-
	- public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024;
	- private int maxDocBytesToAnalyze=DEFAULT_MAX_DOC_BYTES_TO_ANALYZE;
	+ public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
	+ /**
	+ * @deprecated See {@link #DEFAULT_MAX_CHARS_TO_ANALYZE}
	+ */
	+ public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=DEFAULT_MAX_CHARS_TO_ANALYZE;
	+ private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
	private Formatter formatter;
	private Encoder encoder;
	private Fragmenter textFragmenter=new SimpleFragmenter();
	@@ -224,7 +226,7 @@

	TokenGroup tokenGroup=new TokenGroup();
	token = tokenStream.next();
	- while ((token!= null)&&(token.startOffset()<maxDocBytesToAnalyze))
	+ while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze))
	{
	if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
	{
	@@ -283,7 +285,7 @@
	(lastEndOffset < text.length())
	&&
	// and that text is not too large...
	- (text.length()<maxDocBytesToAnalyze)
	+ (text.length()< maxDocCharsToAnalyze)
	)
	{
	//append it to the last fragment
	@@ -471,23 +473,35 @@
	}

	/**
	- * @return the maximum number of bytes to be tokenized per doc
	+ * @return the maximum number of bytes to be tokenized per doc
	+ *
	+ * @deprecated See {@link #getMaxDocCharsToAnalyze()}, since this value has always counted on chars. They both set the same internal value, however
	*/
	public int getMaxDocBytesToAnalyze()
	{
	- return maxDocBytesToAnalyze;
	+ return maxDocCharsToAnalyze;
	}

	/**
	* @param byteCount the maximum number of bytes to be tokenized per doc
	* (This can improve performance with large documents)
	+ *
	+ * @deprecated See {@link #setMaxDocCharsToAnalyze(int)}, since this value has always counted chars
	*/
	public void setMaxDocBytesToAnalyze(int byteCount)
	{
	- maxDocBytesToAnalyze = byteCount;
	+ maxDocCharsToAnalyze = byteCount;
	}

	- /**
	+ public int getMaxDocCharsToAnalyze() {
	+ return maxDocCharsToAnalyze;
	+ }
	+
	+ public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
	+ this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
	+ }
	+
	+ /**
	*/
	public Fragmenter getTextFragmenter()
	{