blob: c089a79b1bdc1e959159fc12e2973be489cade55 [file] [log] [blame]
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 793530)
+++ CHANGES.txt (working copy)
@@ -135,6 +135,13 @@
true in all Lucene releases before 2.3, but was broken in 2.3 and
2.4, and is now fixed in 2.9. (Mike McCandless)
+11. LUCENE-1678: The addition of Analyzer.reusableTokenStream
+ accidentally broke back compatibility of external analyzers that
+ subclassed core analyzers that implemented tokenStream but not
+ reusableTokenStream. This is now fixed, such that if
+ reusableTokenStream is invoked on such a subclass, that method
+ will forcefully fallback to tokenStream. (Mike McCandless)
+
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 793530)
+++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy)
@@ -19,8 +19,10 @@
import java.io.IOException;
import java.io.StringReader;
+import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
@@ -130,6 +132,21 @@
x = StandardTokenizer.CJ;
String[] y = StandardTokenizer.TOKEN_TYPES;
}
+
+ private static class MyStandardAnalyzer extends StandardAnalyzer {
+ public TokenStream tokenStream(String field, Reader reader) {
+ return new WhitespaceAnalyzer().tokenStream(field, reader);
+ }
+ }
+
+ public void testSubclassOverridingOnlyTokenStream() throws Throwable {
+ Analyzer a = new MyStandardAnalyzer();
+ TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
+ // StandardAnalyzer will discard "the" (it's a
+ // stopword), by my subclass will not:
+ assertTrue(ts.incrementToken());
+ assertFalse(ts.incrementToken());
+ }
}
class PayloadSetter extends TokenFilter {
Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 793530)
+++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -44,7 +44,6 @@
*/
public class StandardAnalyzer extends Analyzer {
private Set stopSet;
- private Version matchVersion;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
@@ -262,7 +261,7 @@
}
private final void init(Version matchVersion) {
- this.matchVersion = matchVersion;
+ setOverridesTokenStreamMethod(StandardAnalyzer.class);
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
enableStopPositionIncrements = true;
} else {
@@ -314,6 +313,12 @@
/** @deprecated Use {@link #tokenStream} instead */
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ if (overridesTokenStreamMethod) {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return tokenStream(fieldName, reader);
+ }
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
Index: src/java/org/apache/lucene/analysis/Analyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/Analyzer.java (revision 793530)
+++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy)
@@ -19,6 +19,7 @@
import java.io.Reader;
import java.io.IOException;
+import java.lang.reflect.Method;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.store.AlreadyClosedException;
@@ -32,7 +33,8 @@
*/
public abstract class Analyzer {
/** Creates a TokenStream which tokenizes all the text in the provided
- * Reader. Must be able to handle null field name for backward compatibility.
+ * Reader. Must be able to handle null field name for
+ * backward compatibility.
*/
public abstract TokenStream tokenStream(String fieldName, Reader reader);
@@ -79,7 +81,30 @@
}
}
+ protected boolean overridesTokenStreamMethod;
+ /** @deprecated This is only present to preserve
+ * back-compat of classes that subclass a core analyzer
+ * and override tokenStream but not reusableTokenStream */
+ protected void setOverridesTokenStreamMethod(Class baseClass) {
+
+ final Class[] params = new Class[2];
+ params[0] = String.class;
+ params[1] = Reader.class;
+
+ try {
+ Method m = this.getClass().getMethod("tokenStream", params);
+ if (m != null) {
+ overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
+ } else {
+ overridesTokenStreamMethod = false;
+ }
+ } catch (NoSuchMethodException nsme) {
+ overridesTokenStreamMethod = false;
+ }
+ }
+
+
/**
* Invoked before indexing a Fieldable instance if
* terms have already been added to that field. This allows custom
Index: src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (revision 793530)
+++ src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (working copy)
@@ -25,18 +25,27 @@
* for data like zip codes, ids, and some product names.
*/
public class KeywordAnalyzer extends Analyzer {
+ public KeywordAnalyzer() {
+ setOverridesTokenStreamMethod(KeywordAnalyzer.class);
+ }
public TokenStream tokenStream(String fieldName,
final Reader reader) {
return new KeywordTokenizer(reader);
}
public TokenStream reusableTokenStream(String fieldName,
final Reader reader) throws IOException {
+ if (overridesTokenStreamMethod) {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return tokenStream(fieldName, reader);
+ }
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
if (tokenizer == null) {
tokenizer = new KeywordTokenizer(reader);
setPreviousTokenStream(tokenizer);
} else
- tokenizer.reset(reader);
+ tokenizer.reset(reader);
return tokenizer;
}
}
Index: src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
===================================================================
--- src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (revision 793530)
+++ src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (working copy)
@@ -55,6 +55,7 @@
*/
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
this.defaultAnalyzer = defaultAnalyzer;
+ setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class);
}
/**
@@ -77,6 +78,12 @@
}
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ if (overridesTokenStreamMethod) {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return tokenStream(fieldName, reader);
+ }
Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;