| Index: CHANGES.txt |
| =================================================================== |
| --- CHANGES.txt (revision 793530) |
| +++ CHANGES.txt (working copy) |
| @@ -135,6 +135,13 @@ |
| true in all Lucene releases before 2.3, but was broken in 2.3 and |
| 2.4, and is now fixed in 2.9. (Mike McCandless) |
| |
| +11. LUCENE-1678: The addition of Analyzer.reusableTokenStream |
| + accidentally broke back compatibility of external analyzers that |
| + subclassed core analyzers that implemented tokenStream but not |
| + reusableTokenStream. This is now fixed, such that if |
| + reusableTokenStream is invoked on such a subclass, that method |
| + will forcefully fallback to tokenStream. (Mike McCandless) |
| + |
| API Changes |
| |
| 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is |
| Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java |
| =================================================================== |
| --- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 793530) |
| +++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy) |
| @@ -19,8 +19,10 @@ |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| +import java.io.Reader; |
| |
| import org.apache.lucene.analysis.standard.StandardTokenizer; |
| +import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| import org.apache.lucene.analysis.tokenattributes.TermAttribute; |
| import org.apache.lucene.index.Payload; |
| @@ -130,6 +132,21 @@ |
| x = StandardTokenizer.CJ; |
| String[] y = StandardTokenizer.TOKEN_TYPES; |
| } |
| + |
| + private static class MyStandardAnalyzer extends StandardAnalyzer { |
| + public TokenStream tokenStream(String field, Reader reader) { |
| + return new WhitespaceAnalyzer().tokenStream(field, reader); |
| + } |
| + } |
| + |
| + public void testSubclassOverridingOnlyTokenStream() throws Throwable { |
| + Analyzer a = new MyStandardAnalyzer(); |
| + TokenStream ts = a.reusableTokenStream("field", new StringReader("the")); |
| + // StandardAnalyzer will discard "the" (it's a |
| + // stopword), by my subclass will not: |
| + assertTrue(ts.incrementToken()); |
| + assertFalse(ts.incrementToken()); |
| + } |
| } |
| |
| class PayloadSetter extends TokenFilter { |
| Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 793530) |
| +++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy) |
| @@ -44,7 +44,6 @@ |
| */ |
| public class StandardAnalyzer extends Analyzer { |
| private Set stopSet; |
| - private Version matchVersion; |
| |
| /** |
| * Specifies whether deprecated acronyms should be replaced with HOST type. |
| @@ -262,7 +261,7 @@ |
| } |
| |
| private final void init(Version matchVersion) { |
| - this.matchVersion = matchVersion; |
| + setOverridesTokenStreamMethod(StandardAnalyzer.class); |
| if (matchVersion.onOrAfter(Version.LUCENE_29)) { |
| enableStopPositionIncrements = true; |
| } else { |
| @@ -314,6 +313,12 @@ |
| |
| /** @deprecated Use {@link #tokenStream} instead */ |
| public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { |
| + if (overridesTokenStreamMethod) { |
| + // LUCENE-1678: force fallback to tokenStream() if we |
| + // have been subclassed and that subclass overrides |
| + // tokenStream but not reusableTokenStream |
| + return tokenStream(fieldName, reader); |
| + } |
| SavedStreams streams = (SavedStreams) getPreviousTokenStream(); |
| if (streams == null) { |
| streams = new SavedStreams(); |
| Index: src/java/org/apache/lucene/analysis/Analyzer.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/Analyzer.java (revision 793530) |
| +++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy) |
| @@ -19,6 +19,7 @@ |
| |
| import java.io.Reader; |
| import java.io.IOException; |
| +import java.lang.reflect.Method; |
| |
| import org.apache.lucene.util.CloseableThreadLocal; |
| import org.apache.lucene.store.AlreadyClosedException; |
| @@ -32,7 +33,8 @@ |
| */ |
| public abstract class Analyzer { |
| /** Creates a TokenStream which tokenizes all the text in the provided |
| - * Reader. Must be able to handle null field name for backward compatibility. |
| + * Reader. Must be able to handle null field name for |
| + * backward compatibility. |
| */ |
| public abstract TokenStream tokenStream(String fieldName, Reader reader); |
| |
| @@ -79,7 +81,30 @@ |
| } |
| } |
| |
| + protected boolean overridesTokenStreamMethod; |
| |
| + /** @deprecated This is only present to preserve |
| + * back-compat of classes that subclass a core analyzer |
| + * and override tokenStream but not reusableTokenStream */ |
| + protected void setOverridesTokenStreamMethod(Class baseClass) { |
| + |
| + final Class[] params = new Class[2]; |
| + params[0] = String.class; |
| + params[1] = Reader.class; |
| + |
| + try { |
| + Method m = this.getClass().getMethod("tokenStream", params); |
| + if (m != null) { |
| + overridesTokenStreamMethod = m.getDeclaringClass() != baseClass; |
| + } else { |
| + overridesTokenStreamMethod = false; |
| + } |
| + } catch (NoSuchMethodException nsme) { |
| + overridesTokenStreamMethod = false; |
| + } |
| + } |
| + |
| + |
| /** |
| * Invoked before indexing a Fieldable instance if |
| * terms have already been added to that field. This allows custom |
| Index: src/java/org/apache/lucene/analysis/KeywordAnalyzer.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (revision 793530) |
| +++ src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (working copy) |
| @@ -25,18 +25,27 @@ |
| * for data like zip codes, ids, and some product names. |
| */ |
| public class KeywordAnalyzer extends Analyzer { |
| + public KeywordAnalyzer() { |
| + setOverridesTokenStreamMethod(KeywordAnalyzer.class); |
| + } |
| public TokenStream tokenStream(String fieldName, |
| final Reader reader) { |
| return new KeywordTokenizer(reader); |
| } |
| public TokenStream reusableTokenStream(String fieldName, |
| final Reader reader) throws IOException { |
| + if (overridesTokenStreamMethod) { |
| + // LUCENE-1678: force fallback to tokenStream() if we |
| + // have been subclassed and that subclass overrides |
| + // tokenStream but not reusableTokenStream |
| + return tokenStream(fieldName, reader); |
| + } |
| Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); |
| if (tokenizer == null) { |
| tokenizer = new KeywordTokenizer(reader); |
| setPreviousTokenStream(tokenizer); |
| } else |
| - tokenizer.reset(reader); |
| + tokenizer.reset(reader); |
| return tokenizer; |
| } |
| } |
| Index: src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (revision 793530) |
| +++ src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (working copy) |
| @@ -55,6 +55,7 @@ |
| */ |
| public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) { |
| this.defaultAnalyzer = defaultAnalyzer; |
| + setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class); |
| } |
| |
| /** |
| @@ -77,6 +78,12 @@ |
| } |
| |
| public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { |
| + if (overridesTokenStreamMethod) { |
| + // LUCENE-1678: force fallback to tokenStream() if we |
| + // have been subclassed and that subclass overrides |
| + // tokenStream but not reusableTokenStream |
| + return tokenStream(fieldName, reader); |
| + } |
| Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName); |
| if (analyzer == null) |
| analyzer = defaultAnalyzer; |