| commit 6c90079c80102d3eaa314921e06ff5d12353634e |
| Author: Gueust <jeanbaptiste.lespiau@gmail.com> |
| Date: Sat Jan 30 02:07:44 2016 +0100 |
| |
| Add a class to debug a TokenFilter |
| |
| When tesking a TokenFilter, one may need to print the result of a tokenization |
| of this TokenFilter for input strings |
| |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java |
| new file mode 100644 |
| index 0000000..0484930 |
| --- /dev/null |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java |
| @@ -0,0 +1,49 @@ |
| +package org.apache.lucene.analysis; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +/** Tools to debug TokenFilters by printing the results of the tokenization */ |
| +public final class TokenFilterDebugging { |
| + |
| + /** |
| + * Print the result of the given TokenFilter on the string input, after being |
| + * analyzed by MockTonkenizer.WHITESPACE. |
| + * |
| + * @param tokenFilter The TokenFilter to debug |
| + * @param input The string to be analyzed by the given tokenFilter |
| + */ |
| + public static void debugTokenFilter(TokenFilter tokenFilter, final String input) throws IOException{ |
| + |
| + Analyzer a = new Analyzer() { |
| + @Override |
| + public TokenStreamComponents createComponents(String field) { |
| + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| + return new TokenStreamComponents(tokenizer, tokenFilter); |
| + } |
| + }; |
| + TokenStream ts = a.tokenStream("Dummy token stream", input); |
| + ts.reset(); |
| + while (ts.incrementToken()) { |
| + System.out.println(ts.reflectAsString(false)); |
| + } |
| + System.out.flush(); |
| + a.close(); |
| + } |
| +} |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java |
| index 6c7c725..63a26ec 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java |
| @@ -29,9 +29,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; |
| // TODO: rename to OffsetsXXXTF? ie we only validate |
| // offsets (now anyway...) |
| |
| -// TODO: also make a DebuggingTokenFilter, that just prints |
| -// all att values that come through it... |
| - |
| // TODO: BTSTC should just append this to the chain |
| // instead of checking itself: |
| |