| Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java |
| =================================================================== |
| --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (revision 1591787) |
| +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (working copy) |
| @@ -81,7 +81,7 @@ |
| continue; |
| } |
| |
| - final TokenStream stream = field.tokenStream(analyzer); |
| + final TokenStream stream = field.tokenStream(analyzer, null); |
| // reset the TokenStream to the first token |
| stream.reset(); |
| |
| Index: lucene/core/src/java/org/apache/lucene/document/Field.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/document/Field.java (revision 1591787) |
| +++ lucene/core/src/java/org/apache/lucene/document/Field.java (working copy) |
| @@ -74,8 +74,6 @@ |
| * customize how it's tokenized */ |
| protected TokenStream tokenStream; |
| |
| - private transient TokenStream internalTokenStream; |
| - |
| /** |
| * Field's boost |
| * @see #boost() |
| @@ -494,7 +492,7 @@ |
| } |
| |
| @Override |
| - public TokenStream tokenStream(Analyzer analyzer) throws IOException { |
| + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { |
| if (!fieldType().indexed()) { |
| return null; |
| } |
| @@ -501,12 +499,12 @@ |
| |
| final NumericType numericType = fieldType().numericType(); |
| if (numericType != null) { |
| - if (!(internalTokenStream instanceof NumericTokenStream)) { |
| + if (!(reuse instanceof NumericTokenStream && ((NumericTokenStream)reuse).getPrecisionStep() == type.numericPrecisionStep())) { |
| // lazy init the TokenStream as it is heavy to instantiate |
| // (attributes,...) if not needed (stored field loading) |
| - internalTokenStream = new NumericTokenStream(type.numericPrecisionStep()); |
| + reuse = new NumericTokenStream(type.numericPrecisionStep()); |
| } |
| - final NumericTokenStream nts = (NumericTokenStream) internalTokenStream; |
| + final NumericTokenStream nts = (NumericTokenStream) reuse; |
| // initialize value in TokenStream |
| final Number val = (Number) fieldsData; |
| switch (numericType) { |
| @@ -525,7 +523,7 @@ |
| default: |
| throw new AssertionError("Should never get here"); |
| } |
| - return internalTokenStream; |
| + return reuse; |
| } |
| |
| if (!fieldType().tokenized()) { |
| @@ -532,13 +530,13 @@ |
| if (stringValue() == null) { |
| throw new IllegalArgumentException("Non-Tokenized Fields must have a String value"); |
| } |
| - if (!(internalTokenStream instanceof StringTokenStream)) { |
| + if (!(reuse instanceof StringTokenStream)) { |
| // lazy init the TokenStream as it is heavy to instantiate |
| // (attributes,...) if not needed (stored field loading) |
| - internalTokenStream = new StringTokenStream(); |
| + reuse = new StringTokenStream(); |
| } |
| - ((StringTokenStream) internalTokenStream).setValue(stringValue()); |
| - return internalTokenStream; |
| + ((StringTokenStream) reuse).setValue(stringValue()); |
| + return reuse; |
| } |
| |
| if (tokenStream != null) { |
| Index: lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (revision 1591807) |
| +++ lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (working copy) |
| @@ -511,6 +511,9 @@ |
| |
| // Lazy init'd: |
| NumericDocValuesWriter norms; |
| + |
| + // reused |
| + TokenStream tokenStream; |
| |
| public PerField(FieldInfo fieldInfo, boolean invert) { |
| this.fieldInfo = fieldInfo; |
| @@ -574,7 +577,7 @@ |
| */ |
| boolean aborting = false; |
| boolean succeededInProcessingField = false; |
| - try (TokenStream stream = field.tokenStream(docState.analyzer)) { |
| + try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) { |
| // reset the TokenStream to the first token |
| stream.reset(); |
| invertState.setAttributeSource(stream); |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexDocument.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexDocument.java (revision 1591787) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexDocument.java (working copy) |
| @@ -24,8 +24,8 @@ |
| public interface IndexDocument { |
| |
| /** Obtains all indexable fields in document */ |
| - public Iterable<IndexableField> indexableFields(); |
| + public Iterable<? extends IndexableField> indexableFields(); |
| |
| /** Obtains all storable fields in document */ |
| - public Iterable<StorableField> storableFields(); |
| + public Iterable<? extends StorableField> storableFields(); |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexableField.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexableField.java (revision 1591787) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexableField.java (working copy) |
| @@ -42,11 +42,17 @@ |
| * implementations should use the given Analyzer to create the TokenStreams. |
| * |
| * @param analyzer Analyzer that should be used to create the TokenStreams from |
| + * @param reuse TokenStream for a previous instance of this field <b>name</b>. This allows |
| + * custom field types (like StringField and NumericField) that do not use |
| + * the analyzer to still have good performance. Note: the passed-in type |
| + * may be inappropriate, for example if you mix up different types of Fields |
| + * for the same field name. So its the responsibility of the implementation to |
| + * check. |
| * @return TokenStream value for indexing the document. Should always return |
| * a non-null value if the field is to be indexed |
| * @throws IOException Can be thrown while creating the TokenStream |
| */ |
| - public TokenStream tokenStream(Analyzer analyzer) throws IOException; |
| + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException; |
| |
| /** |
| * Returns the field's index-time boost. |
| Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java (revision 0) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java (working copy) |
| @@ -0,0 +1,178 @@ |
| +package org.apache.lucene.index; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.Collections; |
| + |
| +import org.apache.lucene.analysis.Analyzer; |
| +import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| +import org.apache.lucene.analysis.CannedTokenStream; |
| +import org.apache.lucene.analysis.NumericTokenStream; |
| +import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttribute; |
| +import org.apache.lucene.analysis.Token; |
| +import org.apache.lucene.analysis.TokenStream; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.IntField; |
| +import org.apache.lucene.document.StringField; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.NumericUtils; |
| + |
| +/** test tokenstream reuse by DefaultIndexingChain */ |
| +public class TestFieldReuse extends BaseTokenStreamTestCase { |
| + |
| + public void testStringField() throws IOException { |
| + StringField stringField = new StringField("foo", "bar", Field.Store.NO); |
| + |
| + // passing null |
| + TokenStream ts = stringField.tokenStream(null, null); |
| + assertTokenStreamContents(ts, |
| + new String[] { "bar" }, |
| + new int[] { 0 }, |
| + new int[] { 3 } |
| + ); |
| + |
| + // now reuse previous stream |
| + stringField = new StringField("foo", "baz", Field.Store.NO); |
| + TokenStream ts2 = stringField.tokenStream(null, ts); |
| + assertSame(ts, ts); |
| + assertTokenStreamContents(ts, |
| + new String[] { "baz" }, |
| + new int[] { 0 }, |
| + new int[] { 3 } |
| + ); |
| + |
| + // pass a bogus stream and ensure its still ok |
| + stringField = new StringField("foo", "beer", Field.Store.NO); |
| + TokenStream bogus = new NumericTokenStream(); |
| + ts = stringField.tokenStream(null, bogus); |
| + assertNotSame(ts, bogus); |
| + assertTokenStreamContents(ts, |
| + new String[] { "beer" }, |
| + new int[] { 0 }, |
| + new int[] { 4 } |
| + ); |
| + } |
| + |
| + public void testNumericReuse() throws IOException { |
| + IntField intField = new IntField("foo", 5, Field.Store.NO); |
| + |
| + // passing null |
| + TokenStream ts = intField.tokenStream(null, null); |
| + assertTrue(ts instanceof NumericTokenStream); |
| + assertEquals(NumericUtils.PRECISION_STEP_DEFAULT, ((NumericTokenStream)ts).getPrecisionStep()); |
| + assertNumericContents(5, ts); |
| + |
| + // now reuse previous stream |
| + intField = new IntField("foo", 20, Field.Store.NO); |
| + TokenStream ts2 = intField.tokenStream(null, ts); |
| + assertSame(ts, ts2); |
| + assertNumericContents(20, ts); |
| + |
| + // pass a bogus stream and ensure its still ok |
| + intField = new IntField("foo", 2343, Field.Store.NO); |
| + TokenStream bogus = new CannedTokenStream(new Token("bogus", 0, 5)); |
| + ts = intField.tokenStream(null, bogus); |
| + assertNotSame(bogus, ts); |
| + assertNumericContents(2343, ts); |
| + |
| + // pass another bogus stream (numeric, but different precision step!) |
| + intField = new IntField("foo", 42, Field.Store.NO); |
| + assert 3 != NumericUtils.PRECISION_STEP_DEFAULT; |
| + bogus = new NumericTokenStream(3); |
| + ts = intField.tokenStream(null, bogus); |
| + assertNotSame(bogus, ts); |
| + assertNumericContents(42, ts); |
| + } |
| + |
| + static class MyField implements IndexableField { |
| + TokenStream lastSeen; |
| + TokenStream lastReturned; |
| + |
| + @Override |
| + public String name() { |
| + return "foo"; |
| + } |
| + |
| + @Override |
| + public IndexableFieldType fieldType() { |
| + return StringField.TYPE_NOT_STORED; |
| + } |
| + |
| + @Override |
| + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { |
| + lastSeen = reuse; |
| + return lastReturned = new CannedTokenStream(new Token("unimportant", 0, 10)); |
| + } |
| + |
| + @Override |
| + public float boost() { |
| + return 1; |
| + } |
| + } |
| + |
| + public void testIndexWriterActuallyReuses() throws IOException { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + IndexWriter iw = new IndexWriter(dir, iwc); |
| + final MyField field1 = new MyField(); |
| + iw.addDocument(new IndexDocument() { |
| + @Override |
| + public Iterable<? extends IndexableField> indexableFields() { |
| + return Collections.singletonList(field1); |
| + } |
| + @Override |
| + public Iterable<StorableField> storableFields() { |
| + return Collections.emptyList(); |
| + } |
| + }); |
| + TokenStream previous = field1.lastReturned; |
| + assertNotNull(previous); |
| + |
| + final MyField field2 = new MyField(); |
| + iw.addDocument(new IndexDocument() { |
| + @Override |
| + public Iterable<? extends IndexableField> indexableFields() { |
| + return Collections.singletonList(field2); |
| + } |
| + @Override |
| + public Iterable<StorableField> storableFields() { |
| + return Collections.emptyList(); |
| + } |
| + }); |
| + assertSame(previous, field2.lastSeen); |
| + iw.shutdown(); |
| + dir.close(); |
| + } |
| + |
| + private void assertNumericContents(int value, TokenStream ts) throws IOException { |
| + assertTrue(ts instanceof NumericTokenStream); |
| + NumericTermAttribute numericAtt = ts.getAttribute(NumericTermAttribute.class); |
| + ts.reset(); |
| + boolean seen = false; |
| + while (ts.incrementToken()) { |
| + if (numericAtt.getShift() == 0) { |
| + assertEquals(value, numericAtt.getRawValue()); |
| + seen = true; |
| + } |
| + } |
| + ts.end(); |
| + ts.close(); |
| + assertTrue(seen); |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/TestFieldReuse.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1591787) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) |
| @@ -1602,7 +1602,7 @@ |
| } |
| |
| @Override |
| - public TokenStream tokenStream(Analyzer analyzer) throws IOException { |
| + public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { |
| return null; |
| } |
| }); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java (revision 1591787) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java (working copy) |
| @@ -154,7 +154,7 @@ |
| } |
| |
| @Override |
| - public TokenStream tokenStream(Analyzer analyzer) throws IOException { |
| + public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { |
| return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) : |
| analyzer.tokenStream(name(), new StringReader(stringValue())); |
| } |