| diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java |
| index a697cce..c04f5a8 100644 |
| --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java |
| +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java |
| @@ -146,6 +146,12 @@ public final class CustomAnalyzer extends Analyzer { |
| @Override |
| protected TokenStream normalize(String fieldName, TokenStream in) { |
| TokenStream result = in; |
| + // tokenizers can return a tokenfilter if the tokenizer does normalization, |
| + // although this is really bogus/abstraction violation... |
| + if (tokenizer instanceof MultiTermAwareComponent) { |
| + TokenFilterFactory filter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenizer).getMultiTermComponent(); |
| + result = filter.create(result); |
| + } |
| for (TokenFilterFactory filter : tokenFilters) { |
| if (filter instanceof MultiTermAwareComponent) { |
| filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent(); |
| diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java |
| index d9ea43c..8614ee1 100644 |
| --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java |
| +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java |
| @@ -31,9 +31,9 @@ import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; |
| import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory; |
| -import org.apache.lucene.analysis.core.KeywordTokenizerFactory; |
| import org.apache.lucene.analysis.core.LowerCaseFilterFactory; |
| import org.apache.lucene.analysis.core.LowerCaseTokenizer; |
| +import org.apache.lucene.analysis.core.LowerCaseTokenizerFactory; |
| import org.apache.lucene.analysis.core.StopFilterFactory; |
| import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory; |
| import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; |
| @@ -431,7 +431,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase { |
| |
| @Override |
| public AbstractAnalysisFactory getMultiTermComponent() { |
| - return new KeywordTokenizerFactory(getOriginalArgs()); |
| + return new DummyTokenFilterFactory(Collections.emptyMap()); |
| } |
| |
| } |
| @@ -499,5 +499,13 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase { |
| .build(); |
| assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c")); |
| } |
| + |
| + /** test normalize where the TokenizerFactory returns a filter to normalize the text */ |
| + public void testNormalizationWithLowerCaseTokenizer() throws IOException { |
| + CustomAnalyzer analyzer1 = CustomAnalyzer.builder() |
| + .withTokenizer(LowerCaseTokenizerFactory.class, Collections.emptyMap()) |
| + .build(); |
| + assertEquals(new BytesRef("abc"), analyzer1.normalize("dummy", "ABC")); |
| + } |
| |
| } |