blob: f2050767f1bd722d67b81dc4ffb3374c344147bb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.phonetic;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.codec.language.Caverphone2;
import org.apache.commons.codec.language.Metaphone;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
public class TestPhoneticFilterFactory extends BaseTokenStreamTestCase {
/**
* Case: default
*/
public void testFactoryDefaults() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Metaphone);
assertTrue(factory.inject); // default
}
public void testInjectFalse() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
args.put(PhoneticFilterFactory.INJECT, "false");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertFalse(factory.inject);
}
public void testMaxCodeLength() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
}
/**
* Case: Failures and Exceptions
*/
public void testMissingEncoder() throws IOException {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new PhoneticFilterFactory(new HashMap<String,String>());
});
assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'encoder'"));
}
public void testUnknownEncoder() throws IOException {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
Map<String,String> args = new HashMap<>();
args.put("encoder", "XXX");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
});
assertTrue(expected.getMessage().contains("Error loading encoder"));
}
public void testUnknownEncoderReflection() throws IOException {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
Map<String,String> args = new HashMap<>();
args.put("encoder", "org.apache.commons.codec.language.NonExistence");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
});
assertTrue(expected.getMessage().contains("Error loading encoder"));
}
/**
* Case: Reflection
*/
public void testFactoryReflection() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Metaphone);
assertTrue(factory.inject); // default
}
/**
* we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
* so this effectively tests reflection without package name
*/
public void testFactoryReflectionCaverphone2() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Caverphone2);
assertTrue(factory.inject); // default
}
public void testFactoryReflectionCaverphone() throws IOException {
Map<String,String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Caverphone");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Caverphone2);
assertTrue(factory.inject); // default
}
public void testAlgorithms() throws Exception {
assertAlgorithm("Metaphone", "true", "aaa bbb ccc easgasg",
new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
assertAlgorithm("Metaphone", "false", "aaa bbb ccc easgasg",
new String[] { "A", "B", "KKK", "ESKS" });
assertAlgorithm("DoubleMetaphone", "true", "aaa bbb ccc easgasg",
new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
assertAlgorithm("DoubleMetaphone", "false", "aaa bbb ccc easgasg",
new String[] { "A", "PP", "KK", "ASKS" });
assertAlgorithm("Soundex", "true", "aaa bbb ccc easgasg",
new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
assertAlgorithm("Soundex", "false", "aaa bbb ccc easgasg",
new String[] { "A000", "B000", "C000", "E220" });
assertAlgorithm("RefinedSoundex", "true", "aaa bbb ccc easgasg",
new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
assertAlgorithm("RefinedSoundex", "false", "aaa bbb ccc easgasg",
new String[] { "A0", "B1", "C3", "E034034" });
assertAlgorithm("Caverphone", "true", "Darda Karleen Datha Carlene",
new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
"TTA1111111", "Datha", "KLN1111111", "Carlene" });
assertAlgorithm("Caverphone", "false", "Darda Karleen Datha Carlene",
new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
assertAlgorithm("ColognePhonetic", "true", "Meier Schmitt Meir Schmidt",
new String[] { "67", "Meier", "862", "Schmitt",
"67", "Meir", "862", "Schmidt" });
assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt",
new String[] { "67", "862", "67", "862" });
assertAlgorithm("Nysiis", "true", "Macintosh Knuth Bart Hurd",
new String[] { "MCANT", "Macintosh", "NAT", "Knuth",
"BAD", "Bart", "HAD", "Hurd" });
assertAlgorithm("Nysiis", "false", "Macintosh Knuth Bart Hurd",
new String[] { "MCANT", "NAT", "BAD", "HAD" });
}
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new PhoneticFilterFactory(new HashMap<String,String>() {{
put("encoder", "Metaphone");
put("bogusArg", "bogusValue");
}});
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
static void assertAlgorithm(String algName, String inject, String input,
String[] expected) throws Exception {
Tokenizer tokenizer = whitespaceMockTokenizer(input);
Map<String,String> args = new HashMap<>();
args.put("encoder", algName);
args.put("inject", inject);
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, expected);
}
}