lucene/luke/src/test/org/apache/lucene/luke/models/analysis/TestAnalysisImpl.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.luke.models.analysis;

 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.custom.CustomAnalyzer;
 import org.apache.lucene.luke.models.LukeException;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;

 public class TestAnalysisImpl extends LuceneTestCase {

   @Test
   public void testGetPresetAnalyzerTypes() throws Exception {
     AnalysisImpl analysis = new AnalysisImpl();
     Collection<Class<? extends Analyzer>> analyerTypes = analysis.getPresetAnalyzerTypes();
     assertNotNull(analyerTypes);
     for (Class<? extends Analyzer> clazz : analyerTypes) {
       clazz.getConstructor().newInstance();
     }
   }

   @Test
   public void testGetAvailableCharFilters() {
     AnalysisImpl analysis = new AnalysisImpl();
     Collection<String> charFilters = analysis.getAvailableCharFilters();
     assertNotNull(charFilters);
   }

   @Test
   public void testGetAvailableTokenizers() {
     AnalysisImpl analysis = new AnalysisImpl();
     Collection<String> tokenizers = analysis.getAvailableTokenizers();
     assertNotNull(tokenizers);
   }

   @Test
   public void testGetAvailableTokenFilters() {
     AnalysisImpl analysis = new AnalysisImpl();
     Collection<String> tokenFilters = analysis.getAvailableTokenFilters();
     assertNotNull(tokenFilters);
   }

   @Test
   public void testAnalyze_preset() {
     AnalysisImpl analysis = new AnalysisImpl();
     String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer";
     Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType);
     assertEquals(analyzerType, analyzer.getClass().getName());

     String text =
         "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.";
     List<Analysis.Token> tokens = analysis.analyze(text);
     assertNotNull(tokens);
   }

   @Test
   public void testAnalyze_custom() {
     AnalysisImpl analysis = new AnalysisImpl();
     Map<String, String> tkParams = new HashMap<>();
     tkParams.put("maxTokenLen", "128");
     CustomAnalyzerConfig.Builder builder =
         new CustomAnalyzerConfig.Builder("keyword", tkParams)
             .addTokenFilterConfig("lowercase", Collections.emptyMap());
     CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
     assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.KeywordTokenizerFactory",
         analyzer.getTokenizerFactory().getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.LowerCaseFilterFactory",
         analyzer.getTokenFilterFactories().get(0).getClass().getName());

     String text = "Apache Lucene";
     List<Analysis.Token> tokens = analysis.analyze(text);
     assertNotNull(tokens);
   }

   @Test
   public void testAnalyzer_custom_with_confdir() throws Exception {
     Path confDir = createTempDir("conf");
     Path stopFile = Files.createFile(Paths.get(confDir.toString(), "stop.txt"));
     Files.write(stopFile, "of\nthe\nby\nfor\n".getBytes(StandardCharsets.UTF_8));

     AnalysisImpl analysis = new AnalysisImpl();
     Map<String, String> tkParams = new HashMap<>();
     tkParams.put("maxTokenLen", "128");
     Map<String, String> tfParams = new HashMap<>();
     tfParams.put("ignoreCase", "true");
     tfParams.put("words", "stop.txt");
     tfParams.put("format", "wordset");
     CustomAnalyzerConfig.Builder builder =
         new CustomAnalyzerConfig.Builder("whitespace", tkParams)
             .configDir(confDir.toString())
             .addTokenFilterConfig("lowercase", Collections.emptyMap())
             .addTokenFilterConfig("stop", tfParams);
     CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
     assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.WhitespaceTokenizerFactory",
         analyzer.getTokenizerFactory().getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.LowerCaseFilterFactory",
         analyzer.getTokenFilterFactories().get(0).getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.StopFilterFactory",
         analyzer.getTokenFilterFactories().get(1).getClass().getName());

     String text = "Government of the People, by the People, for the People";
     List<Analysis.Token> tokens = analysis.analyze(text);
     assertNotNull(tokens);
   }

   @Test(expected = LukeException.class)
   public void testAnalyze_not_set() {
     AnalysisImpl analysis = new AnalysisImpl();
     String text = "This test must fail.";
     analysis.analyze(text);
   }

   @Test(expected = LukeException.class)
   public void testAnalyzeStepByStep_preset() {
     AnalysisImpl analysis = new AnalysisImpl();
     String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer";
     Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType);
     assertEquals(analyzerType, analyzer.getClass().getName());

     String text = "This test must fail.";
     analysis.analyzeStepByStep(text);
   }

   @Test
   public void testAnalyzeStepByStep_custom() {
     AnalysisImpl analysis = new AnalysisImpl();
     Map<String, String> tkParams = new HashMap<>();
     tkParams.put("maxTokenLen", "128");
     CustomAnalyzerConfig.Builder builder =
         new CustomAnalyzerConfig.Builder("keyword", tkParams)
             .addTokenFilterConfig("lowercase", Collections.emptyMap())
             .addCharFilterConfig("htmlstrip", Collections.emptyMap());
     CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
     assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory",
         analyzer.getCharFilterFactories().get(0).getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.KeywordTokenizerFactory",
         analyzer.getTokenizerFactory().getClass().getName());
     assertEquals(
         "org.apache.lucene.analysis.core.LowerCaseFilterFactory",
         analyzer.getTokenFilterFactories().get(0).getClass().getName());

     String text = "Apache Lucene";
     Analysis.StepByStepResult result = analysis.analyzeStepByStep(text);
     assertNotNull(result);
     assertNotNull(result.getCharfilteredTexts());
     assertEquals(1, result.getCharfilteredTexts().size());
     assertEquals("htmlStrip", result.getCharfilteredTexts().get(0).getName());

     assertNotNull(result.getNamedTokens());
     assertEquals(2, result.getNamedTokens().size());
     // FIXME check each namedTokensList
     assertEquals("keyword", result.getNamedTokens().get(0).getName());
     assertEquals("lowercase", result.getNamedTokens().get(1).getName());
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.luke.models.analysis;

	import java.nio.charset.StandardCharsets;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.Paths;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.custom.CustomAnalyzer;
	import org.apache.lucene.luke.models.LukeException;
	import org.apache.lucene.util.LuceneTestCase;
	import org.junit.Test;

	public class TestAnalysisImpl extends LuceneTestCase {

	@Test
	public void testGetPresetAnalyzerTypes() throws Exception {
	AnalysisImpl analysis = new AnalysisImpl();
	Collection<Class<? extends Analyzer>> analyerTypes = analysis.getPresetAnalyzerTypes();
	assertNotNull(analyerTypes);
	for (Class<? extends Analyzer> clazz : analyerTypes) {
	clazz.getConstructor().newInstance();
	}
	}

	@Test
	public void testGetAvailableCharFilters() {
	AnalysisImpl analysis = new AnalysisImpl();
	Collection<String> charFilters = analysis.getAvailableCharFilters();
	assertNotNull(charFilters);
	}

	@Test
	public void testGetAvailableTokenizers() {
	AnalysisImpl analysis = new AnalysisImpl();
	Collection<String> tokenizers = analysis.getAvailableTokenizers();
	assertNotNull(tokenizers);
	}

	@Test
	public void testGetAvailableTokenFilters() {
	AnalysisImpl analysis = new AnalysisImpl();
	Collection<String> tokenFilters = analysis.getAvailableTokenFilters();
	assertNotNull(tokenFilters);
	}

	@Test
	public void testAnalyze_preset() {
	AnalysisImpl analysis = new AnalysisImpl();
	String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer";
	Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType);
	assertEquals(analyzerType, analyzer.getClass().getName());

	String text =
	"It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.";
	List<Analysis.Token> tokens = analysis.analyze(text);
	assertNotNull(tokens);
	}

	@Test
	public void testAnalyze_custom() {
	AnalysisImpl analysis = new AnalysisImpl();
	Map<String, String> tkParams = new HashMap<>();
	tkParams.put("maxTokenLen", "128");
	CustomAnalyzerConfig.Builder builder =
	new CustomAnalyzerConfig.Builder("keyword", tkParams)
	.addTokenFilterConfig("lowercase", Collections.emptyMap());
	CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
	assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.KeywordTokenizerFactory",
	analyzer.getTokenizerFactory().getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.LowerCaseFilterFactory",
	analyzer.getTokenFilterFactories().get(0).getClass().getName());

	String text = "Apache Lucene";
	List<Analysis.Token> tokens = analysis.analyze(text);
	assertNotNull(tokens);
	}

	@Test
	public void testAnalyzer_custom_with_confdir() throws Exception {
	Path confDir = createTempDir("conf");
	Path stopFile = Files.createFile(Paths.get(confDir.toString(), "stop.txt"));
	Files.write(stopFile, "of\nthe\nby\nfor\n".getBytes(StandardCharsets.UTF_8));

	AnalysisImpl analysis = new AnalysisImpl();
	Map<String, String> tkParams = new HashMap<>();
	tkParams.put("maxTokenLen", "128");
	Map<String, String> tfParams = new HashMap<>();
	tfParams.put("ignoreCase", "true");
	tfParams.put("words", "stop.txt");
	tfParams.put("format", "wordset");
	CustomAnalyzerConfig.Builder builder =
	new CustomAnalyzerConfig.Builder("whitespace", tkParams)
	.configDir(confDir.toString())
	.addTokenFilterConfig("lowercase", Collections.emptyMap())
	.addTokenFilterConfig("stop", tfParams);
	CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
	assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.WhitespaceTokenizerFactory",
	analyzer.getTokenizerFactory().getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.LowerCaseFilterFactory",
	analyzer.getTokenFilterFactories().get(0).getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.StopFilterFactory",
	analyzer.getTokenFilterFactories().get(1).getClass().getName());

	String text = "Government of the People, by the People, for the People";
	List<Analysis.Token> tokens = analysis.analyze(text);
	assertNotNull(tokens);
	}

	@Test(expected = LukeException.class)
	public void testAnalyze_not_set() {
	AnalysisImpl analysis = new AnalysisImpl();
	String text = "This test must fail.";
	analysis.analyze(text);
	}

	@Test(expected = LukeException.class)
	public void testAnalyzeStepByStep_preset() {
	AnalysisImpl analysis = new AnalysisImpl();
	String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer";
	Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType);
	assertEquals(analyzerType, analyzer.getClass().getName());

	String text = "This test must fail.";
	analysis.analyzeStepByStep(text);
	}

	@Test
	public void testAnalyzeStepByStep_custom() {
	AnalysisImpl analysis = new AnalysisImpl();
	Map<String, String> tkParams = new HashMap<>();
	tkParams.put("maxTokenLen", "128");
	CustomAnalyzerConfig.Builder builder =
	new CustomAnalyzerConfig.Builder("keyword", tkParams)
	.addTokenFilterConfig("lowercase", Collections.emptyMap())
	.addCharFilterConfig("htmlstrip", Collections.emptyMap());
	CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
	assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory",
	analyzer.getCharFilterFactories().get(0).getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.KeywordTokenizerFactory",
	analyzer.getTokenizerFactory().getClass().getName());
	assertEquals(
	"org.apache.lucene.analysis.core.LowerCaseFilterFactory",
	analyzer.getTokenFilterFactories().get(0).getClass().getName());

	String text = "Apache Lucene";
	Analysis.StepByStepResult result = analysis.analyzeStepByStep(text);
	assertNotNull(result);
	assertNotNull(result.getCharfilteredTexts());
	assertEquals(1, result.getCharfilteredTexts().size());
	assertEquals("htmlStrip", result.getCharfilteredTexts().get(0).getName());

	assertNotNull(result.getNamedTokens());
	assertEquals(2, result.getNamedTokens().size());
	// FIXME check each namedTokensList
	assertEquals("keyword", result.getNamedTokens().get(0).getName());
	assertEquals("lowercase", result.getNamedTokens().get(1).getName());
	}
	}