| Index: src/java/org/apache/lucene/index/DocumentsWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 609120) |
| +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) |
| @@ -2050,7 +2050,7 @@ |
| IndexOutput proxOut) |
| throws CorruptIndexException, IOException { |
| |
| - final String fieldName = fields[0].fieldInfo.name; |
| + final int fieldNumber = fields[0].fieldInfo.number; |
| int numFields = fields.length; |
| |
| final FieldMergeState[] mergeStates = new FieldMergeState[numFields]; |
| @@ -2101,9 +2101,6 @@ |
| while(text[pos] != 0xffff) |
| pos++; |
| |
| - // TODO: can we avoid 2 new objects here? |
| - Term term = new Term(fieldName, new String(text, start, pos-start)); |
| - |
| long freqPointer = freqOut.getFilePointer(); |
| long proxPointer = proxOut.getFilePointer(); |
| |
| @@ -2201,7 +2198,7 @@ |
| |
| // Write term |
| termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); |
| - termsOut.add(term, termInfo); |
| + termsOut.add(fieldNumber, text, start, pos-start, termInfo); |
| } |
| } |
| |
| Index: src/java/org/apache/lucene/index/TermInfosWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/TermInfosWriter.java (revision 609120) |
| +++ src/java/org/apache/lucene/index/TermInfosWriter.java (working copy) |
| @@ -21,7 +21,6 @@ |
| import java.io.IOException; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.StringHelper; |
| |
| /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a |
| Directory. A TermInfos can be written once, in order. */ |
| @@ -32,9 +31,8 @@ |
| |
| private FieldInfos fieldInfos; |
| private IndexOutput output; |
| - private Term lastTerm = new Term("", ""); |
| private TermInfo lastTi = new TermInfo(); |
| - private long size = 0; |
| + private long size; |
| |
| // TODO: the default values for these two parameters should be settable from |
| // IndexWriter. However, once that's done, folks will start setting them to |
| @@ -62,11 +60,16 @@ |
| */ |
| int maxSkipLevels = 10; |
| |
| - private long lastIndexPointer = 0; |
| - private boolean isIndex = false; |
| + private long lastIndexPointer; |
| + private boolean isIndex; |
| + private char[] lastTermText = new char[10]; |
| + private int lastTermTextLength; |
| + private int lastFieldNumber = -1; |
| |
| - private TermInfosWriter other = null; |
| + private char[] termTextBuffer = new char[10]; |
| |
| + private TermInfosWriter other; |
| + |
| TermInfosWriter(Directory directory, String segment, FieldInfos fis, |
| int interval) |
| throws IOException { |
| @@ -93,25 +96,59 @@ |
| output.writeInt(maxSkipLevels); // write maxSkipLevels |
| } |
| |
| - /** Adds a new <Term, TermInfo> pair to the set. |
| + void add(Term term, TermInfo ti) throws IOException { |
| + |
| + final int length = term.text.length(); |
| + if (termTextBuffer.length < length) |
| + termTextBuffer = new char[(int) (length*1.25)]; |
| + |
| + term.text.getChars(0, length, termTextBuffer, 0); |
| + |
| + add(fieldInfos.fieldNumber(term.field), termTextBuffer, 0, length, ti); |
| + } |
| + |
| + // Currently used only by assert statement |
| + private int compareToLastTerm(int fieldNumber, char[] termText, int start, int length) { |
| + int pos = 0; |
| + |
| + if (lastFieldNumber != fieldNumber) |
| + return fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber)); |
| + |
| + while(pos < length && pos < lastTermTextLength) { |
| + final char c1 = lastTermText[pos]; |
| + final char c2 = termText[pos + start]; |
| + if (c1 < c2) |
| + return -1; |
| + else if (c1 > c2) |
| + return 1; |
| + pos++; |
| + } |
| + |
| + if (pos < lastTermTextLength) |
| + // Last term was longer |
| + return 1; |
| + else if (pos < length) |
| + // Last term was shorter |
| + return -1; |
| + else |
| + return 0; |
| + } |
| + |
| + /** Adds a new <<fieldNumber, termText>, TermInfo> pair to the set. |
| Term must be lexicographically greater than all previous Terms added. |
| TermInfo pointers must be positive and greater than all previous.*/ |
| - final void add(Term term, TermInfo ti) |
| - throws CorruptIndexException, IOException { |
| - if (!isIndex && term.compareTo(lastTerm) <= 0) |
| - throw new CorruptIndexException("term out of order (\"" + term + |
| - "\".compareTo(\"" + lastTerm + "\") <= 0)"); |
| - if (ti.freqPointer < lastTi.freqPointer) |
| - throw new CorruptIndexException("freqPointer out of order (" + ti.freqPointer + |
| - " < " + lastTi.freqPointer + ")"); |
| - if (ti.proxPointer < lastTi.proxPointer) |
| - throw new CorruptIndexException("proxPointer out of order (" + ti.proxPointer + |
| - " < " + lastTi.proxPointer + ")"); |
| + void add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti) |
| + throws IOException { |
| |
| + assert compareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 || (isIndex && termTextLength == 0 && lastTermTextLength == 0); |
| + assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"; |
| + assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"; |
| + |
| if (!isIndex && size % indexInterval == 0) |
| - other.add(lastTerm, lastTi); // add an index term |
| + other.add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term |
| |
| - writeTerm(term); // write term |
| + writeTerm(fieldNumber, termText, termTextStart, termTextLength); // write term |
| + |
| output.writeVInt(ti.docFreq); // write doc freq |
| output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers |
| output.writeVLong(ti.proxPointer - lastTi.proxPointer); |
| @@ -125,28 +162,38 @@ |
| lastIndexPointer = other.output.getFilePointer(); // write pointer |
| } |
| |
| + if (lastTermText.length < termTextLength) |
| + lastTermText = new char[(int) (termTextLength*1.25)]; |
| + System.arraycopy(termText, termTextStart, lastTermText, 0, termTextLength); |
| + lastTermTextLength = termTextLength; |
| + lastFieldNumber = fieldNumber; |
| + |
| lastTi.set(ti); |
| size++; |
| } |
| |
| - private final void writeTerm(Term term) |
| + private void writeTerm(int fieldNumber, char[] termText, int termTextStart, int termTextLength) |
| throws IOException { |
| - int start = StringHelper.stringDifference(lastTerm.text, term.text); |
| - int length = term.text.length() - start; |
| |
| - output.writeVInt(start); // write shared prefix length |
| - output.writeVInt(length); // write delta length |
| - output.writeChars(term.text, start, length); // write delta chars |
| + // Compute prefix in common with last term: |
| + int start = 0; |
| + final int limit = termTextLength < lastTermTextLength ? termTextLength : lastTermTextLength; |
| + while(start < limit) { |
| + if (termText[termTextStart+start] != lastTermText[start]) |
| + break; |
| + start++; |
| + } |
| |
| - output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num |
| + int length = termTextLength - start; |
| |
| - lastTerm = term; |
| + output.writeVInt(start); // write shared prefix length |
| + output.writeVInt(length); // write delta length |
| + output.writeChars(termText, start+termTextStart, length); // write delta chars |
| + output.writeVInt(fieldNumber); // write field num |
| } |
| |
| - |
| - |
| /** Called to complete TermInfos creation. */ |
| - final void close() throws IOException { |
| + void close() throws IOException { |
| output.seek(4); // write size after format |
| output.writeLong(size); |
| output.close(); |