| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.hunspell; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.text.ParseException; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Map; |
| import org.apache.lucene.analysis.TokenFilterFactory; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.ResourceLoader; |
| import org.apache.lucene.util.ResourceLoaderAware; |
| |
| /** |
| * TokenFilterFactory that creates instances of {@link HunspellStemFilter}. Example config for |
| * British English: |
| * |
| * <pre class="prettyprint"> |
| * <filter class="solr.HunspellStemFilterFactory" |
| * dictionary="en_GB.dic,my_custom.dic" |
| * affix="en_GB.aff" |
| * ignoreCase="false" |
| * longestOnly="false" /></pre> |
| * |
| * Both parameters dictionary and affix are mandatory. Dictionaries for many languages are available |
| * through the OpenOffice project. |
| * |
| * <p>See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a> |
| * |
| * @lucene.experimental |
| * @since 3.5.0 |
| * @lucene.spi {@value #NAME} |
| */ |
| public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { |
| |
| /** SPI name */ |
| public static final String NAME = "hunspellStem"; |
| |
| private static final String PARAM_DICTIONARY = "dictionary"; |
| private static final String PARAM_AFFIX = "affix"; |
| // NOTE: this one is currently unused?: |
| private static final String PARAM_RECURSION_CAP = "recursionCap"; |
| private static final String PARAM_IGNORE_CASE = "ignoreCase"; |
| private static final String PARAM_LONGEST_ONLY = "longestOnly"; |
| |
| private final String dictionaryFiles; |
| private final String affixFile; |
| private final boolean ignoreCase; |
| private final boolean longestOnly; |
| private Dictionary dictionary; |
| |
| /** Creates a new HunspellStemFilterFactory */ |
| public HunspellStemFilterFactory(Map<String, String> args) { |
| super(args); |
| dictionaryFiles = require(args, PARAM_DICTIONARY); |
| affixFile = get(args, PARAM_AFFIX); |
| ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false); |
| longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false); |
| // this isnt necessary: we properly load all dictionaries. |
| // but recognize and ignore for back compat |
| getBoolean(args, "strictAffixParsing", true); |
| // this isn't necessary: multi-stage stripping is fixed and |
| // flags like COMPLEXPREFIXES in the data itself control this. |
| // but recognize and ignore for back compat |
| getInt(args, "recursionCap", 0); |
| if (!args.isEmpty()) { |
| throw new IllegalArgumentException("Unknown parameters: " + args); |
| } |
| } |
| |
| /** Default ctor for compatibility with SPI */ |
| public HunspellStemFilterFactory() { |
| throw defaultCtorException(); |
| } |
| |
| @Override |
| public void inform(ResourceLoader loader) throws IOException { |
| String dicts[] = dictionaryFiles.split(","); |
| |
| InputStream affix = null; |
| List<InputStream> dictionaries = new ArrayList<>(); |
| |
| try { |
| dictionaries = new ArrayList<>(); |
| for (String file : dicts) { |
| dictionaries.add(loader.openResource(file)); |
| } |
| affix = loader.openResource(affixFile); |
| |
| Path tempPath = Files.createTempDirectory(Dictionary.getDefaultTempDir(), "Hunspell"); |
| try (Directory tempDir = FSDirectory.open(tempPath)) { |
| this.dictionary = new Dictionary(tempDir, "hunspell", affix, dictionaries, ignoreCase); |
| } finally { |
| IOUtils.rm(tempPath); |
| } |
| } catch (ParseException e) { |
| throw new IOException( |
| "Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", |
| e); |
| } finally { |
| IOUtils.closeWhileHandlingException(affix); |
| IOUtils.closeWhileHandlingException(dictionaries); |
| } |
| } |
| |
| @Override |
| public TokenStream create(TokenStream tokenStream) { |
| return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly); |
| } |
| } |