blob: ffc50a112653775ef51436ecd3d313e5c408fb73 [file] [log] [blame]
Index: src/java/org/apache/lucene/analysis/Analyzer.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/Analyzer.java,v
retrieving revision 1.3
diff -u -r1.3 Analyzer.java
--- src/java/org/apache/lucene/analysis/Analyzer.java 7 Nov 2002 05:55:39 -0000 1.3
+++ src/java/org/apache/lucene/analysis/Analyzer.java 1 Mar 2004 07:21:59 -0000
@@ -55,6 +55,7 @@
*/
import java.io.Reader;
+import java.io.StringReader;
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
* policy for extracting index terms from text.
@@ -86,6 +87,16 @@
public TokenStream tokenStream(Reader reader)
{
return tokenStream(null, reader);
+ }
+
+ /**
+ * Creates a TokenStream that tokenizes a String. The default implementation
+ * forwards to tokenStram(fieldName, Reader) using a StringReader. Override
+ * if you want to provide a faster implementation.
+ */
+ public TokenStream tokenStream(String fieldName, String fieldValue)
+ {
+ return tokenStream(fieldName, new StringReader(fieldValue));
}
}
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java,v
retrieving revision 1.9
diff -u -r1.9 DocumentWriter.java
--- src/java/org/apache/lucene/index/DocumentWriter.java 20 Feb 2004 20:14:55 -0000 1.9
+++ src/java/org/apache/lucene/index/DocumentWriter.java 1 Mar 2004 07:22:00 -0000
@@ -57,9 +57,7 @@
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
-import java.util.Hashtable;
-import java.util.Enumeration;
-import java.util.Arrays;
+import java.util.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -144,7 +142,7 @@
// Keys are Terms, values are Postings.
// Used to buffer a document before it is written to the index.
- private final Hashtable postingTable = new Hashtable();
+ private final HashMap postingTable = new HashMap(512);
private int[] fieldLengths;
private int[] fieldPositions;
private float[] fieldBoosts;
@@ -166,17 +164,16 @@
addPosition(fieldName, field.stringValue(), position++);
length++;
} else {
- Reader reader; // find or make Reader
+ TokenStream stream;
if (field.readerValue() != null)
- reader = field.readerValue();
+ stream = analyzer.tokenStream(fieldName, field.readerValue());
else if (field.stringValue() != null)
- reader = new StringReader(field.stringValue());
+ stream = analyzer.tokenStream(fieldName, field.stringValue());
else
throw new IllegalArgumentException
("field must have either String or Reader value");
// Tokenize field and add to postingTable
- TokenStream stream = analyzer.tokenStream(fieldName, reader);
try {
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
@@ -220,9 +217,9 @@
private final Posting[] sortPostingTable() {
// copy postingTable into an array
Posting[] array = new Posting[postingTable.size()];
- Enumeration postings = postingTable.elements();
- for (int i = 0; postings.hasMoreElements(); i++)
- array[i] = (Posting) postings.nextElement();
+ Iterator postings = postingTable.values().iterator();
+ for (int i = 0; postings.hasNext(); i++)
+ array[i] = (Posting) postings.next();
// sort the array
quickSort(array, 0, array.length - 1);