blob: c955b189fd12a6df5134f07aa68d7ec3b88f8e28 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.apache.commons.codec.StringEncoderAbstractTest;
import org.junit.Test;
/**
* Series of tests for the Match Rating Approach algorithm.
*
* General naming nomenclature for the test is of the form:
* GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
*
* An unusual value is indicated by the term "corner case"
*/
public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> {
// ********** BEGIN REGION - TEST SUPPORT METHODS
@Test
public final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
assertEquals("aeiou", this.getStringEncoder().removeAccents("áéíóú"));
}
@Test
public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
assertEquals("ae io u", this.getStringEncoder().removeAccents("áé íó ú"));
}
@Test
public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() {
assertEquals("AeiOuu", this.getStringEncoder().removeAccents("ÁeíÓuu"));
}
@Test
public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() {
assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("Á-e'í.,ó&ú"));
}
@Test
public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà"));
}
@Test
public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç"));
}
@Test
public final void testAccentRemovalNormalString_NoChange() {
assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
}
@Test
public final void testAccentRemoval_NINO_NoChange() {
assertEquals("", this.getStringEncoder().removeAccents(""));
}
@Test
public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
assertEquals(null, this.getStringEncoder().removeAccents(null));
}
@Test
public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE"));
}
@Test
public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
}
@Test
public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE"));
}
@Test
public final void testIsVowel_CapitalA_ReturnsTrue() {
assertTrue(this.getStringEncoder().isVowel("A"));
}
@Test
public final void testIsVowel_SmallD_ReturnsFalse() {
assertFalse(this.getStringEncoder().isVowel("d"));
}
@Test
public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA"));
}
@Test
public final void testRemoveVowel__AIDAN_Returns_ADN() {
assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN"));
}
@Test
public final void testRemoveVowel__DECLAN_Returns_DCLN() {
assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN"));
}
@Test
public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander"));
}
@Test
public final void testGetFirstLast3_PETE_Returns_PETE() {
assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE"));
}
@Test
public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
}
@Test
public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
}
@Test
public final void testGetMinRating_7_Return4_Successfully() {
assertEquals(4, this.getStringEncoder().getMinRating(7));
}
@Test
public final void testGetMinRating_1_Returns5_Successfully() {
assertEquals(5, this.getStringEncoder().getMinRating(1));
}
@Test
public final void testGetMinRating_2_Returns5_Successfully() {
assertEquals(5, this.getStringEncoder().getMinRating(2));
}
@Test
public final void testgetMinRating_5_Returns4_Successfully(){
assertEquals(4, this.getStringEncoder().getMinRating(5));
}
@Test
public final void testgetMinRating_5_Returns4_Successfully2(){
assertEquals(4, this.getStringEncoder().getMinRating(5));
}
@Test
public final void testgetMinRating_6_Returns4_Successfully(){
assertEquals(4, this.getStringEncoder().getMinRating(6));
}
@Test
public final void testgetMinRating_7_Returns4_Successfully(){
assertEquals(4, this.getStringEncoder().getMinRating(7));
}
@Test
public final void testgetMinRating_8_Returns3_Successfully(){
assertEquals(3, this.getStringEncoder().getMinRating(8));
}
@Test
public final void testgetMinRating_10_Returns3_Successfully(){
assertEquals(3, this.getStringEncoder().getMinRating(10));
}
@Test
public final void testgetMinRating_11_Returns_3_Successfully(){
assertEquals(3, this.getStringEncoder().getMinRating(11));
}
@Test
public final void testGetMinRating_13_Returns_1_Successfully() {
assertEquals(1, this.getStringEncoder().getMinRating(13));
}
@Test
public final void testcleanName_SuccessfullyClean() {
assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-ís a t.,es &t"));
}
@Test
public final void testisVowel_SingleVowel_ReturnsTrue() {
assertTrue(this.getStringEncoder().isVowel(("I")));
}
@Test
public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("test", ""));
}
@Test
public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
}
@Test
public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("test", " "));
}
@Test
public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test"));
}
@Test
public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("test", null));
}
@Test
public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
}
@Test
public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("t", "test"));
}
@Test
public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals("test", "t"));
}
// ***** END REGION - TEST SUPPORT METHODS
// ***** BEGIN REGION - TEST GET MRA ENCODING
@Test
public final void testGetEncoding_HARPER_HRPR() {
assertEquals("HRPR", this.getStringEncoder().encode("HARPER"));
}
@Test
public final void testGetEncoding_SMITH_to_SMTH() {
assertEquals("SMTH", this.getStringEncoder().encode("Smith"));
}
@Test
public final void testGetEncoding_SMYTH_to_SMYTH() {
assertEquals("SMYTH", this.getStringEncoder().encode("Smyth"));
}
@Test
public final void testGetEncoding_Space_to_Nothing() {
assertEquals("", this.getStringEncoder().encode(" "));
}
@Test
public final void testGetEncoding_NoSpace_to_Nothing() {
assertEquals("", this.getStringEncoder().encode(""));
}
@Test
public final void testGetEncoding_Null_to_Nothing() {
assertEquals("", this.getStringEncoder().encode(null));
}
@Test
public final void testGetEncoding_One_Letter_to_Nothing() {
assertEquals("", this.getStringEncoder().encode("E"));
}
@Test
public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
assertFalse(getStringEncoder().isEncodeEquals(null, " "));
}
@Test
public final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
}
// ***** END REGION - TEST GET MRA ENCODING
// ***** BEGIN REGION - TEST GET MRA COMPARISONS
@Test
public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth"));
}
@Test
public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne"));
}
@Test
public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed"));
}
@Test
public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
}
@Test
public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan"));
}
@Test
public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun"));
}
@Test
public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Cólm. ", "C-olín"));
}
@Test
public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven"));
}
@Test
public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan"));
}
@Test
public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
}
@Test
public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel"));
}
@Test
public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael"));
}
@Test
public final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
}
@Test
public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
}
@Test
public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
}
@Test
public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom"));
}
@Test
public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl"));
}
@Test
public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C"));
}
@Test
public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
}
@Test
public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
}
@Test
public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing
}
// ***** Begin Region - Test Get Encoding - Surnames
@Test
public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
}
@Test
public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
}
@Test
public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
}
@Test
public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
}
@Test
public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley"));
}
// **** BEGIN YIDDISH/SLAVIC SECTION ****
@Test
public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
}
@Test
public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
}
@Test
public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
}
@Test
public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
}
@Test
public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
}
@Test
public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
}
@Test
public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
}
// **** END YIDDISH/SLAVIC SECTION ****
@Test
public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters"));
}
@Test
public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
}
@Test
public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " "));
}
@Test
public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", ""));
}
@Test
public final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
assertFalse(this.getStringEncoder().isEncodeEquals(null, null));
}
@Test
public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
}
@Test
public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John"));
}
@Test
public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete"));
}
@Override
protected MatchRatingApproachEncoder createStringEncoder() {
return new MatchRatingApproachEncoder();
}
// ***** END REGION - TEST GET MRA COMPARISONS
}