| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.ja; |
| |
| |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.LineNumberReader; |
| import java.nio.charset.StandardCharsets; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; |
| |
| public class TestSearchMode extends BaseTokenStreamTestCase { |
| private final static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt"; |
| private Analyzer analyzer; |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| analyzer = new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, Mode.SEARCH); |
| return new TokenStreamComponents(tokenizer, tokenizer); |
| } |
| }; |
| } |
| |
| @Override |
| public void tearDown() throws Exception { |
| analyzer.close(); |
| super.tearDown(); |
| } |
| |
| /** Test search mode segmentation */ |
| public void testSearchSegmentation() throws IOException { |
| InputStream is = TestSearchMode.class.getResourceAsStream(SEGMENTATION_FILENAME); |
| if (is == null) { |
| throw new FileNotFoundException("Cannot find " + SEGMENTATION_FILENAME + " in test classpath"); |
| } |
| try { |
| LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8)); |
| String line = null; |
| while ((line = reader.readLine()) != null) { |
| // Remove comments |
| line = line.replaceAll("#.*$", ""); |
| // Skip empty lines or comment lines |
| if (line.trim().isEmpty()) { |
| continue; |
| } |
| if (VERBOSE) { |
| System.out.println("Line no. " + reader.getLineNumber() + ": " + line); |
| } |
| String[] fields = line.split("\t", 2); |
| String sourceText = fields[0]; |
| String[] expectedTokens = fields[1].split("\\s+"); |
| int[] expectedPosIncrs = new int[expectedTokens.length]; |
| int[] expectedPosLengths = new int[expectedTokens.length]; |
| for(int tokIDX=0;tokIDX<expectedTokens.length;tokIDX++) { |
| if (expectedTokens[tokIDX].endsWith("/0")) { |
| expectedTokens[tokIDX] = expectedTokens[tokIDX].replace("/0", ""); |
| expectedPosLengths[tokIDX] = expectedTokens.length-1; |
| } else { |
| expectedPosIncrs[tokIDX] = 1; |
| expectedPosLengths[tokIDX] = 1; |
| } |
| } |
| assertAnalyzesTo(analyzer, sourceText, expectedTokens, expectedPosIncrs); |
| } |
| } finally { |
| is.close(); |
| } |
| } |
| } |