blob: 33baba156e84ee3ae0aab9d71bb24a545f550ce8 [file] [log] [blame]
commit 6c90079c80102d3eaa314921e06ff5d12353634e
Author: Gueust <jeanbaptiste.lespiau@gmail.com>
Date: Sat Jan 30 02:07:44 2016 +0100
Add a class to debug a TokenFilter
When tesking a TokenFilter, one may need to print the result of a tokenization
of this TokenFilter for input strings
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java
new file mode 100644
index 0000000..0484930
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/TokenFilterDebugging.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/** Tools to debug TokenFilters by printing the results of the tokenization */
+public final class TokenFilterDebugging {
+
+ /**
+ * Print the result of the given TokenFilter on the string input, after being
+ * analyzed by MockTonkenizer.WHITESPACE.
+ *
+ * @param tokenFilter The TokenFilter to debug
+ * @param input The string to be analyzed by the given tokenFilter
+ */
+ public static void debugTokenFilter(TokenFilter tokenFilter, final String input) throws IOException{
+
+ Analyzer a = new Analyzer() {
+ @Override
+ public TokenStreamComponents createComponents(String field) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenFilter);
+ }
+ };
+ TokenStream ts = a.tokenStream("Dummy token stream", input);
+ ts.reset();
+ while (ts.incrementToken()) {
+ System.out.println(ts.reflectAsString(false));
+ }
+ System.out.flush();
+ a.close();
+ }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index 6c7c725..63a26ec 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -29,9 +29,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
// TODO: rename to OffsetsXXXTF? ie we only validate
// offsets (now anyway...)
-// TODO: also make a DebuggingTokenFilter, that just prints
-// all att values that come through it...
-
// TODO: BTSTC should just append this to the chain
// instead of checking itself: