blob: 70a85900911b2cf65aff6dadefb5e1db3e0fed71 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Abstract parent class for analysis factories {@link TokenizerFactory},
* {@link TokenFilterFactory} and {@link CharFilterFactory}.
* <p>
* The typical lifecycle for a factory consumer is:
* <ol>
* <li>Create factory via its constructor (or via XXXFactory.forName)
* <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
* <li>Consumer calls create() to obtain instances.
* </ol>
*/
public abstract class AbstractAnalysisFactory {
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
/** The original args, before any processing */
private final Map<String,String> originalArgs;
/** the luceneVersion arg */
protected final Version luceneMatchVersion;
/** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
private boolean isExplicitLuceneMatchVersion = false;
/**
* Initialize this factory via a set of key-value pairs.
*/
protected AbstractAnalysisFactory(Map<String,String> args) {
originalArgs = Collections.unmodifiableMap(new HashMap<>(args));
String version = get(args, LUCENE_MATCH_VERSION_PARAM);
if (version == null) {
luceneMatchVersion = Version.LATEST;
} else {
try {
luceneMatchVersion = Version.parseLeniently(version);
} catch (ParseException pe) {
throw new IllegalArgumentException(pe);
}
}
args.remove(CLASS_NAME); // consume the class arg
}
public final Map<String,String> getOriginalArgs() {
return originalArgs;
}
public final Version getLuceneMatchVersion() {
return this.luceneMatchVersion;
}
public String require(Map<String,String> args, String name) {
String s = args.remove(name);
if (s == null) {
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return s;
}
public String require(Map<String,String> args, String name, Collection<String> allowedValues) {
return require(args, name, allowedValues, true);
}
public String require(Map<String,String> args, String name, Collection<String> allowedValues, boolean caseSensitive) {
String s = args.remove(name);
if (s == null) {
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
} else {
for (String allowedValue : allowedValues) {
if (caseSensitive) {
if (s.equals(allowedValue)) {
return s;
}
} else {
if (s.equalsIgnoreCase(allowedValue)) {
return s;
}
}
}
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
}
}
public String get(Map<String,String> args, String name) {
return args.remove(name); // defaultVal = null
}
public String get(Map<String,String> args, String name, String defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : s;
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues) {
return get(args, name, allowedValues, null); // defaultVal = null
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal) {
return get(args, name, allowedValues, defaultVal, true);
}
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal, boolean caseSensitive) {
String s = args.remove(name);
if (s == null) {
return defaultVal;
} else {
for (String allowedValue : allowedValues) {
if (caseSensitive) {
if (s.equals(allowedValue)) {
return s;
}
} else {
if (s.equalsIgnoreCase(allowedValue)) {
return s;
}
}
}
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
}
}
protected final int requireInt(Map<String,String> args, String name) {
return Integer.parseInt(require(args, name));
}
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Integer.parseInt(s);
}
protected final boolean requireBoolean(Map<String,String> args, String name) {
return Boolean.parseBoolean(require(args, name));
}
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Boolean.parseBoolean(s);
}
protected final float requireFloat(Map<String,String> args, String name) {
return Float.parseFloat(require(args, name));
}
protected final float getFloat(Map<String,String> args, String name, float defaultVal) {
String s = args.remove(name);
return s == null ? defaultVal : Float.parseFloat(s);
}
public char requireChar(Map<String,String> args, String name) {
return require(args, name).charAt(0);
}
public char getChar(Map<String,String> args, String name, char defaultValue) {
String s = args.remove(name);
if (s == null) {
return defaultValue;
} else {
if (s.length() != 1) {
throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
} else {
return s.charAt(0);
}
}
}
private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
/** Returns whitespace- and/or comma-separated set of values, or null if none are found */
public Set<String> getSet(Map<String,String> args, String name) {
String s = args.remove(name);
if (s == null) {
return null;
} else {
Set<String> set = null;
Matcher matcher = ITEM_PATTERN.matcher(s);
if (matcher.find()) {
set = new HashSet<>();
set.add(matcher.group(0));
while (matcher.find()) {
set.add(matcher.group(0));
}
}
return set;
}
}
/**
* Compiles a pattern for the value of the specified argument key <code>name</code>
*/
protected final Pattern getPattern(Map<String,String> args, String name) {
try {
return Pattern.compile(require(args, name));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
this.getClass().getSimpleName(), e);
}
}
/**
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected final CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
}
}
return words;
}
/**
* Returns the resource's lines (with content treated as UTF-8)
*/
protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
}
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
InputStream stream = null;
Reader reader = null;
try {
stream = loader.openResource(file.trim());
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(stream, decoder);
WordlistLoader.getSnowballWordSet(reader, words);
} finally {
IOUtils.closeWhileHandlingException(reader, stream);
}
}
}
return words;
}
/**
* Splits file names separated by comma character.
* File names can contain comma characters escaped by backslash '\'
*
* @param fileNames the string containing file names
* @return a list of file names with the escaping backslashed removed
*/
protected final List<String> splitFileNames(String fileNames) {
return splitAt(',', fileNames);
}
/**
* Splits a list separated by zero or more given separator characters.
* List items can contain comma characters escaped by backslash '\'.
* Whitespace is NOT trimmed from the returned list items.
*
* @param list the string containing the split list items
* @return a list of items with the escaping backslashes removed
*/
protected final List<String> splitAt(char separator, String list) {
if (list == null)
return Collections.emptyList();
List<String> result = new ArrayList<>();
for (String item : list.split("(?<!\\\\)[" + separator + "]")) {
result.add(item.replaceAll("\\\\(?=[" + separator + "])", ""));
}
return result;
}
private static final String CLASS_NAME = "class";
/**
* @return the string used to specify the concrete class name in a serialized representation: the class arg.
* If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}.
*/
public String getClassArg() {
if (null != originalArgs) {
String className = originalArgs.get(CLASS_NAME);
if (null != className) {
return className;
}
}
return getClass().getName();
}
public boolean isExplicitLuceneMatchVersion() {
return isExplicitLuceneMatchVersion;
}
public void setExplicitLuceneMatchVersion(boolean isExplicitLuceneMatchVersion) {
this.isExplicitLuceneMatchVersion = isExplicitLuceneMatchVersion;
}
/**
* Looks up SPI name (static "NAME" field) with appropriate modifiers.
* Also it must be a String class and declared in the concrete class.
* @return the SPI name
* @throws NoSuchFieldException - if the "NAME" field is not defined.
* @throws IllegalAccessException - if the "NAME" field is inaccessible.
* @throws IllegalStateException - if the "NAME" field does not have appropriate modifiers or isn't a String field.
*/
static String lookupSPIName(Class<? extends AbstractAnalysisFactory> service) throws NoSuchFieldException, IllegalAccessException, IllegalStateException {
final Field field = service.getField("NAME");
int modifier = field.getModifiers();
if (Modifier.isStatic(modifier) && Modifier.isFinal(modifier) &&
field.getType().equals(String.class) &&
Objects.equals(field.getDeclaringClass(), service)) {
return ((String) field.get(null));
}
throw new IllegalStateException("No SPI name defined.");
}
/**
* Generate legacy SPI name derived from the class name.
* @return the SPI name
*/
@Deprecated
static String generateLegacySPIName(Class<? extends AbstractAnalysisFactory> service, String[] suffixes) {
final String clazzName = service.getSimpleName();
String name = null;
for (String suffix : suffixes) {
if (clazzName.endsWith(suffix)) {
name = clazzName.substring(0, clazzName.length() - suffix.length()).toLowerCase(Locale.ROOT);
break;
}
}
return name;
}
}