| Index: src/java/org/apache/lucene/analysis/Analyzer.java |
| =================================================================== |
| RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/Analyzer.java,v |
| retrieving revision 1.3 |
| diff -u -r1.3 Analyzer.java |
| --- src/java/org/apache/lucene/analysis/Analyzer.java 7 Nov 2002 05:55:39 -0000 1.3 |
| +++ src/java/org/apache/lucene/analysis/Analyzer.java 1 Mar 2004 07:21:59 -0000 |
| @@ -55,6 +55,7 @@ |
| */ |
| |
| import java.io.Reader; |
| +import java.io.StringReader; |
| |
| /** An Analyzer builds TokenStreams, which analyze text. It thus represents a |
| * policy for extracting index terms from text. |
| @@ -86,6 +87,16 @@ |
| public TokenStream tokenStream(Reader reader) |
| { |
| return tokenStream(null, reader); |
| + } |
| + |
| + /** |
| + * Creates a TokenStream that tokenizes a String. The default implementation |
| + * forwards to tokenStram(fieldName, Reader) using a StringReader. Override |
| + * if you want to provide a faster implementation. |
| + */ |
| + public TokenStream tokenStream(String fieldName, String fieldValue) |
| + { |
| + return tokenStream(fieldName, new StringReader(fieldValue)); |
| } |
| } |
| |
| Index: src/java/org/apache/lucene/index/DocumentWriter.java |
| =================================================================== |
| RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java,v |
| retrieving revision 1.9 |
| diff -u -r1.9 DocumentWriter.java |
| --- src/java/org/apache/lucene/index/DocumentWriter.java 20 Feb 2004 20:14:55 -0000 1.9 |
| +++ src/java/org/apache/lucene/index/DocumentWriter.java 1 Mar 2004 07:22:00 -0000 |
| @@ -57,9 +57,7 @@ |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| -import java.util.Hashtable; |
| -import java.util.Enumeration; |
| -import java.util.Arrays; |
| +import java.util.*; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| @@ -144,7 +142,7 @@ |
| |
| // Keys are Terms, values are Postings. |
| // Used to buffer a document before it is written to the index. |
| - private final Hashtable postingTable = new Hashtable(); |
| + private final HashMap postingTable = new HashMap(512); |
| private int[] fieldLengths; |
| private int[] fieldPositions; |
| private float[] fieldBoosts; |
| @@ -166,17 +164,16 @@ |
| addPosition(fieldName, field.stringValue(), position++); |
| length++; |
| } else { |
| - Reader reader; // find or make Reader |
| + TokenStream stream; |
| if (field.readerValue() != null) |
| - reader = field.readerValue(); |
| + stream = analyzer.tokenStream(fieldName, field.readerValue()); |
| else if (field.stringValue() != null) |
| - reader = new StringReader(field.stringValue()); |
| + stream = analyzer.tokenStream(fieldName, field.stringValue()); |
| else |
| throw new IllegalArgumentException |
| ("field must have either String or Reader value"); |
| |
| // Tokenize field and add to postingTable |
| - TokenStream stream = analyzer.tokenStream(fieldName, reader); |
| try { |
| for (Token t = stream.next(); t != null; t = stream.next()) { |
| position += (t.getPositionIncrement() - 1); |
| @@ -220,9 +217,9 @@ |
| private final Posting[] sortPostingTable() { |
| // copy postingTable into an array |
| Posting[] array = new Posting[postingTable.size()]; |
| - Enumeration postings = postingTable.elements(); |
| - for (int i = 0; postings.hasMoreElements(); i++) |
| - array[i] = (Posting) postings.nextElement(); |
| + Iterator postings = postingTable.values().iterator(); |
| + for (int i = 0; postings.hasNext(); i++) |
| + array[i] = (Posting) postings.next(); |
| |
| // sort the array |
| quickSort(array, 0, array.length - 1); |