blob: 5bff47c58566cdb1623d7a71bb240d93ab229e3d [file] [log] [blame]
Index: src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java
===================================================================
--- src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java (revision 615225)
+++ src/java/org/apache/lucene/search/highlight/SimpleFragmenter.java (working copy)
@@ -38,7 +38,7 @@
/**
*
- * @param fragmentSize size in bytes of each fragment
+ * @param fragmentSize size in number of characters of each fragment
*/
public SimpleFragmenter(int fragmentSize)
{
@@ -67,7 +67,7 @@
}
/**
- * @return size in bytes of each fragment
+ * @return size in number of characters of each fragment
*/
public int getFragmentSize()
{
@@ -75,7 +75,7 @@
}
/**
- * @param size size in bytes of each fragment
+ * @param size size in characters of each fragment
*/
public void setFragmentSize(int size)
{
Index: src/java/org/apache/lucene/search/highlight/Highlighter.java
===================================================================
--- src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 615225)
+++ src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy)
@@ -22,7 +22,6 @@
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.PriorityQueue;
@@ -34,9 +33,12 @@
*/
public class Highlighter
{
-
- public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024;
- private int maxDocBytesToAnalyze=DEFAULT_MAX_DOC_BYTES_TO_ANALYZE;
+ public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
+ /**
+ * @deprecated See {@link #DEFAULT_MAX_CHARS_TO_ANALYZE}
+ */
+ public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=DEFAULT_MAX_CHARS_TO_ANALYZE;
+ private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
private Formatter formatter;
private Encoder encoder;
private Fragmenter textFragmenter=new SimpleFragmenter();
@@ -224,7 +226,7 @@
TokenGroup tokenGroup=new TokenGroup();
token = tokenStream.next();
- while ((token!= null)&&(token.startOffset()<maxDocBytesToAnalyze))
+ while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze))
{
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
{
@@ -283,7 +285,7 @@
(lastEndOffset < text.length())
&&
// and that text is not too large...
- (text.length()<maxDocBytesToAnalyze)
+ (text.length()< maxDocCharsToAnalyze)
)
{
//append it to the last fragment
@@ -471,23 +473,35 @@
}
/**
- * @return the maximum number of bytes to be tokenized per doc
+ * @return the maximum number of bytes to be tokenized per doc
+ *
+ * @deprecated See {@link #getMaxDocCharsToAnalyze()}, since this value has always counted on chars. They both set the same internal value, however
*/
public int getMaxDocBytesToAnalyze()
{
- return maxDocBytesToAnalyze;
+ return maxDocCharsToAnalyze;
}
/**
* @param byteCount the maximum number of bytes to be tokenized per doc
* (This can improve performance with large documents)
+ *
+ * @deprecated See {@link #setMaxDocCharsToAnalyze(int)}, since this value has always counted chars
*/
public void setMaxDocBytesToAnalyze(int byteCount)
{
- maxDocBytesToAnalyze = byteCount;
+ maxDocCharsToAnalyze = byteCount;
}
- /**
+ public int getMaxDocCharsToAnalyze() {
+ return maxDocCharsToAnalyze;
+ }
+
+ public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
+ this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
+ }
+
+ /**
*/
public Fragmenter getTextFragmenter()
{