blob: 98e904eafd1dcdaa44eaef96b2eb9b7861f907b6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoderAbstractTest;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests the {@code ColognePhonetic} class.
*
* <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
*
*/
public class ColognePhoneticTest extends StringEncoderAbstractTest<ColognePhonetic> {
private static final Set<String> TESTSET = new HashSet<>();
private static boolean hasTestCase(final String re) {
for(final String s : TESTSET) {
if (s.matches(re)) {
return true;
}
}
return false;
}
// Character sequences to be tested by the code
private static final String MATCHES[] = {
".*[AEIOUJY].*", // A, E, I, J, O, U, Y
".*H.*", // H
".*B.*", // B
".*P[^H].*", // P not before H
".*[DT][^CSZ].*", // D,T not before C,S,Z
".*[FVW].*", // F,V,W
".*PH.*", // P before H
".*[GKQ].*", // G,K,Q
"C[AHKLOQRUX].*", // Initial C before A, H, K, L, O, Q, R, U, X
".*[^SZ]C[AHKLOQRUX].*", // C before A, H, K, L, O, Q, R, U, X but not after S, Z
".*[^CKQ]X.*", // X not after C,K,Q
".*L.*", // L
".*[MN].*", // M,N
".*R.*", // R
".*[SZ].*", // S,Z
".*[SZ]C.*", // C after S,Z
"C[^AHKLOQRUX].*", // Initial C except before A, H, K, L, O, Q, R, U, X
".+C[^AHKLOQRUX].*", // C except before A, H, K, L, O, Q, R, U, X
".*[DT][CSZ].*", // D,T before C,S,Z
".*[CKQ]X.*", // X after C,K,Q
};
@AfterClass
// Check that all possible input sequence conditions are represented
public static void finishTests() {
int errors = 0;
for(final String m : MATCHES) {
if (!hasTestCase(m)) {
System.out.println(m + " has no test case");
errors++;
}
}
Assert.assertEquals("Not expecting any missing test cases", 0, errors);
}
@Override
// Capture test strings for later checking
public void checkEncoding(final String expected, final String source) throws EncoderException {
// Note that the German letter Eszett is converted to SS by toUpperCase, so we don't need to replace it
TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä', 'A').replace('Ö', 'O').replace('Ü', 'U'));
super.checkEncoding(expected, source);
}
@Override
protected ColognePhonetic createStringEncoder() {
return new ColognePhonetic();
}
@Test(expected=org.junit.ComparisonFailure.class)
// Ensure that override still allows tests to work
public void testCanFail() throws EncoderException {
this.checkEncoding("/", "Fehler");
}
@Test
public void testAabjoe() throws EncoderException {
this.checkEncoding("01", "Aabjoe");
}
@Test
public void testAaclan() throws EncoderException {
this.checkEncoding("0856", "Aaclan");
}
/**
* Tests [CODEC-122]
*
* @throws EncoderException for some failure scenarios */
@Test
public void testAychlmajrForCodec122() throws EncoderException {
this.checkEncoding("04567", "Aychlmajr");
}
@Test
public void testEdgeCases() throws EncoderException {
final String[][] data = {
{"a", "0"},
{"e", "0"},
{"i", "0"},
{"o", "0"},
{"u", "0"},
{"\u00E4", "0"}, // a-umlaut
{"\u00F6", "0"}, // o-umlaut
{"\u00FC", "0"}, // u-umlaut
{"\u00DF", "8"}, // small sharp s
{"aa", "0"},
{"ha", "0"},
{"h", ""},
{"aha", "0"},
{"b", "1"},
{"p", "1"},
{"ph", "3"},
{"f", "3"},
{"v", "3"},
{"w", "3"},
{"g", "4"},
{"k", "4"},
{"q", "4"},
{"x", "48"},
{"ax", "048"},
{"cx", "48"},
{"l", "5"},
{"cl", "45"},
{"acl", "085"},
{"mn", "6"},
{"{mn}","6"}, // test chars above Z
{"r", "7"}};
this.checkEncodings(data);
}
@Test
public void testExamples() throws EncoderException {
final String[][] data = {
{"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
{"m\u00FCller", "657"}, // müller - add equivalent lower-case
{"schmidt", "862"},
{"schneider", "8627"},
{"fischer", "387"},
{"weber", "317"},
{"wagner", "3467"},
{"becker", "147"},
{"hoffmann", "0366"},
{"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
{"sch\u00e4fer", "837"}, // schäfer - add equivalent lower-case
{"Breschnew", "17863"},
{"Wikipedia", "3412"},
{"peter", "127"},
{"pharma", "376"},
{"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
{"deutsch", "28"},
{"deutz", "28"},
{"hamburg", "06174"},
{"hannover", "0637"},
{"christstollen", "478256"},
{"Xanthippe", "48621"},
{"Zacharias", "8478"},
{"Holzbau", "0581"},
{"matsch", "68"},
{"matz", "68"},
{"Arbeitsamt", "071862"},
{"Eberhard", "01772"},
{"Eberhardt", "01772"},
{"Celsius", "8588"},
{"Ace", "08"},
{"shch", "84"}, // CODEC-254
{"xch", "484"}, // CODEC-255
{"heithabu", "021"}};
this.checkEncodings(data);
}
@Test
public void testHyphen() throws EncoderException {
final String[][] data = {{"bergisch-gladbach", "174845214"},
{"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt
this.checkEncodings(data);
}
@Test
public void testIsEncodeEquals() {
//@formatter:off
final String[][] data = {
{"Muller", "M\u00fcller"}, // Müller
{"Meyer", "Mayr"},
{"house", "house"},
{"House", "house"},
{"Haus", "house"},
{"ganz", "Gans"},
{"ganz", "G\u00e4nse"}, // Gänse
{"Miyagi", "Miyako"}};
//@formatter:on
for (final String[] element : data) {
final boolean encodeEqual = this.getStringEncoder().isEncodeEqual(element[1], element[0]);
Assert.assertTrue(element[1] + " != " + element[0], encodeEqual);
}
}
@Test
public void testVariationsMella() throws EncoderException {
final String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
this.checkEncodingVariations("65", data);
}
@Test
public void testVariationsMeyer() throws EncoderException {
final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
this.checkEncodingVariations("67", data);
}
@Test
public void testSpecialCharsBetweenSameLetters() throws EncoderException {
final String data[] = {"Test test", "Testtest", "Test-test", "TesT#Test", "TesT?test"};
this.checkEncodingVariations("28282", data);
}
// Allow command-line testing
public static void main(final String args[]) {
final ColognePhonetic coder = new ColognePhonetic();
for(final String arg : args) {
final String code = coder.encode(arg);
System.out.println("'" + arg + "' = '" + code + "'");
}
}
}