001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import static org.junit.Assert.assertEquals; 021 import static org.junit.Assert.assertFalse; 022 import static org.junit.Assert.assertTrue; 023 024 import org.apache.commons.codec.StringEncoderAbstractTest; 025 import org.junit.Test; 026 027 /** 028 * Series of tests for the Match Rating Approach algorithm. 029 * 030 * General naming nomenclature for the test is of the form: 031 * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult 032 * 033 * An unusual value is indicated by the term "corner case" 034 */ 035 public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> { 036 037 // ********** BEGIN REGION - TEST SUPPORT METHODS 038 039 @Test 040 public final void testAccentRemoval_AllLower_SuccessfullyRemoved() { 041 assertEquals("aeiou", this.getStringEncoder().removeAccents("áéÃóú")); 042 } 043 044 @Test 045 public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() { 046 assertEquals("ae io u", this.getStringEncoder().removeAccents("áé Ãó ú")); 047 } 048 049 @Test 050 public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() { 051 assertEquals("AeiOuu", this.getStringEncoder().removeAccents("Ã?eÃÓuu")); 052 } 053 054 @Test 055 public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() { 056 assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("Ã?-e'Ã.,ó&ú")); 057 } 058 059 @Test 060 public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() { 061 assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà ")); 062 } 063 064 @Test 065 public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() { 066 assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c", 067 this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,Ã?,ÃŽ,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à ,â,ô,ç")); 068 } 069 070 @Test 071 public final void testAccentRemovalNormalString_NoChange() { 072 assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously")); 073 } 074 075 @Test 076 public final void testAccentRemoval_NINO_NoChange() { 077 assertEquals("", this.getStringEncoder().removeAccents("")); 078 } 079 080 @Test 081 public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() { 082 assertEquals(null, this.getStringEncoder().removeAccents(null)); 083 } 084 085 @Test 086 public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() { 087 assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE")); 088 } 089 090 @Test 091 public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() { 092 assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI")); 093 } 094 095 @Test 096 public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() { 097 assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE")); 098 } 099 100 @Test 101 public final void testIsVowel_CapitalA_ReturnsTrue() { 102 assertTrue(this.getStringEncoder().isVowel("A")); 103 } 104 105 @Test 106 public final void testIsVowel_SmallD_ReturnsFalse() { 107 assertFalse(this.getStringEncoder().isVowel("d")); 108 } 109 110 @Test 111 public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() { 112 assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA")); 113 } 114 115 @Test 116 public final void testRemoveVowel__AIDAN_Returns_ADN() { 117 assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN")); 118 } 119 120 @Test 121 public final void testRemoveVowel__DECLAN_Returns_DCLN() { 122 assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN")); 123 } 124 125 @Test 126 public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() { 127 assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander")); 128 } 129 130 @Test 131 public final void testGetFirstLast3_PETE_Returns_PETE() { 132 assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE")); 133 } 134 135 @Test 136 public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() { 137 assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA")); 138 } 139 140 @Test 141 public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() { 142 assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA")); 143 } 144 145 @Test 146 public final void testGetMinRating_7_Return4_Successfully() { 147 assertEquals(4, this.getStringEncoder().getMinRating(7)); 148 } 149 150 @Test 151 public final void testGetMinRating_1_Returns5_Successfully() { 152 assertEquals(5, this.getStringEncoder().getMinRating(1)); 153 } 154 155 @Test 156 public final void testGetMinRating_2_Returns5_Successfully() { 157 assertEquals(5, this.getStringEncoder().getMinRating(2)); 158 } 159 160 @Test 161 public final void testgetMinRating_5_Returns4_Successfully(){ 162 assertEquals(4, this.getStringEncoder().getMinRating(5)); 163 } 164 165 @Test 166 public final void testgetMinRating_5_Returns4_Successfully2(){ 167 assertEquals(4, this.getStringEncoder().getMinRating(5)); 168 } 169 170 @Test 171 public final void testgetMinRating_6_Returns4_Successfully(){ 172 assertEquals(4, this.getStringEncoder().getMinRating(6)); 173 } 174 175 @Test 176 public final void testgetMinRating_7_Returns4_Successfully(){ 177 assertEquals(4, this.getStringEncoder().getMinRating(7)); 178 } 179 180 @Test 181 public final void testgetMinRating_8_Returns3_Successfully(){ 182 assertEquals(3, this.getStringEncoder().getMinRating(8)); 183 } 184 185 @Test 186 public final void testgetMinRating_10_Returns3_Successfully(){ 187 assertEquals(3, this.getStringEncoder().getMinRating(10)); 188 } 189 190 @Test 191 public final void testgetMinRating_11_Returns_3_Successfully(){ 192 assertEquals(3, this.getStringEncoder().getMinRating(11)); 193 } 194 195 @Test 196 public final void testGetMinRating_13_Returns_1_Successfully() { 197 assertEquals(1, this.getStringEncoder().getMinRating(13)); 198 } 199 200 @Test 201 public final void testcleanName_SuccessfullyClean() { 202 assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-Ãs a t.,es &t")); 203 } 204 205 @Test 206 public final void testisVowel_SingleVowel_ReturnsTrue() { 207 assertTrue(this.getStringEncoder().isVowel(("I"))); 208 } 209 210 @Test 211 public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() { 212 assertFalse(this.getStringEncoder().isEncodeEquals("test", "")); 213 } 214 215 @Test 216 public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() { 217 assertFalse(this.getStringEncoder().isEncodeEquals("", "test")); 218 } 219 220 @Test 221 public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() { 222 assertFalse(this.getStringEncoder().isEncodeEquals("test", " ")); 223 } 224 225 @Test 226 public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() { 227 assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test")); 228 } 229 230 @Test 231 public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() { 232 assertFalse(this.getStringEncoder().isEncodeEquals("test", null)); 233 } 234 235 @Test 236 public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() { 237 assertFalse(this.getStringEncoder().isEncodeEquals(null, "test")); 238 } 239 240 @Test 241 public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() { 242 assertFalse(this.getStringEncoder().isEncodeEquals("t", "test")); 243 } 244 245 @Test 246 public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() { 247 assertFalse(this.getStringEncoder().isEncodeEquals("test", "t")); 248 } 249 250 // ***** END REGION - TEST SUPPORT METHODS 251 252 // ***** BEGIN REGION - TEST GET MRA ENCODING 253 254 @Test 255 public final void testGetEncoding_HARPER_HRPR() { 256 assertEquals("HRPR", this.getStringEncoder().encode("HARPER")); 257 } 258 259 @Test 260 public final void testGetEncoding_SMITH_to_SMTH() { 261 assertEquals("SMTH", this.getStringEncoder().encode("Smith")); 262 } 263 264 @Test 265 public final void testGetEncoding_SMYTH_to_SMYTH() { 266 assertEquals("SMYTH", this.getStringEncoder().encode("Smyth")); 267 } 268 269 @Test 270 public final void testGetEncoding_Space_to_Nothing() { 271 assertEquals("", this.getStringEncoder().encode(" ")); 272 } 273 274 @Test 275 public final void testGetEncoding_NoSpace_to_Nothing() { 276 assertEquals("", this.getStringEncoder().encode("")); 277 } 278 279 @Test 280 public final void testGetEncoding_Null_to_Nothing() { 281 assertEquals("", this.getStringEncoder().encode(null)); 282 } 283 284 @Test 285 public final void testGetEncoding_One_Letter_to_Nothing() { 286 assertEquals("", this.getStringEncoder().encode("E")); 287 } 288 289 @Test 290 public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() { 291 assertFalse(getStringEncoder().isEncodeEquals(null, " ")); 292 } 293 294 @Test 295 public final void testCompareNameSameNames_ReturnsFalseSuccessfully() { 296 assertTrue(getStringEncoder().isEncodeEquals("John", "John")); 297 } 298 299 // ***** END REGION - TEST GET MRA ENCODING 300 301 // ***** BEGIN REGION - TEST GET MRA COMPARISONS 302 303 @Test 304 public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() { 305 assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth")); 306 } 307 308 @Test 309 public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() { 310 assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne")); 311 } 312 313 @Test 314 public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() { 315 assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed")); 316 } 317 318 @Test 319 public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() { 320 assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn")); 321 } 322 323 @Test 324 public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() { 325 assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan")); 326 } 327 328 @Test 329 public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() { 330 assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun")); 331 } 332 333 @Test 334 public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() { 335 assertTrue(this.getStringEncoder().isEncodeEquals("Cólm. ", "C-olÃn")); 336 } 337 338 @Test 339 public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() { 340 assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven")); 341 } 342 343 @Test 344 public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() { 345 assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan")); 346 } 347 348 @Test 349 public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() { 350 assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan")); 351 } 352 353 @Test 354 public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() { 355 assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel")); 356 } 357 358 @Test 359 public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() { 360 assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael")); 361 } 362 363 @Test 364 public final void testCompare_OONA_OONAGH_SuccessfullyMatched() { 365 assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh")); 366 } 367 368 @Test 369 public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() { 370 assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia")); 371 } 372 373 @Test 374 public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() { 375 assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances")); 376 } 377 378 @Test 379 public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() { 380 assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom")); 381 } 382 383 @Test 384 public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() { 385 assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl")); 386 } 387 388 @Test 389 public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() { 390 assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C")); 391 } 392 393 @Test 394 public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() { 395 assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia")); 396 } 397 398 @Test 399 public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() { 400 assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro")); 401 } 402 403 @Test 404 public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() { 405 assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing 406 } 407 408 // ***** Begin Region - Test Get Encoding - Surnames 409 410 @Test 411 public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() { 412 assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin")); 413 } 414 415 @Test 416 public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() { 417 assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh")); 418 } 419 420 @Test 421 public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() { 422 assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh ")); 423 } 424 425 @Test 426 public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() { 427 assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn")); 428 } 429 430 @Test 431 public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() { 432 assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley")); 433 } 434 435 // **** BEGIN YIDDISH/SLAVIC SECTION **** 436 437 @Test 438 public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() { 439 assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach")); 440 } 441 442 @Test 443 public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() { 444 assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz")); 445 } 446 447 @Test 448 public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() { 449 assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC")); 450 } 451 452 @Test 453 public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() { 454 assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI")); 455 } 456 457 @Test 458 public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() { 459 assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ")); 460 } 461 462 @Test 463 public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() { 464 assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts")); 465 } 466 467 @Test 468 public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() { 469 assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l")); 470 } 471 472 // **** END YIDDISH/SLAVIC SECTION **** 473 474 @Test 475 public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() { 476 assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters")); 477 } 478 479 @Test 480 public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() { 481 assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan")); 482 } 483 484 @Test 485 public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() { 486 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " ")); 487 } 488 489 @Test 490 public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() { 491 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "")); 492 } 493 494 @Test 495 public final void testCompare_SurnameCornerCase_Nulls_NoMatch() { 496 assertFalse(this.getStringEncoder().isEncodeEquals(null, null)); 497 } 498 499 @Test 500 public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() { 501 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch")); 502 } 503 504 @Test 505 public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() { 506 assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John")); 507 } 508 509 @Test 510 public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() { 511 assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete")); 512 } 513 514 @Override 515 protected MatchRatingApproachEncoder createStringEncoder() { 516 return new MatchRatingApproachEncoder(); 517 } 518 519 // ***** END REGION - TEST GET MRA COMPARISONS 520 521 }