001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language;
019
020 import static org.junit.Assert.assertEquals;
021 import static org.junit.Assert.assertFalse;
022 import static org.junit.Assert.assertTrue;
023
024 import org.apache.commons.codec.StringEncoderAbstractTest;
025 import org.junit.Test;
026
027 /**
028 * Series of tests for the Match Rating Approach algorithm.
029 *
030 * General naming nomenclature for the test is of the form:
031 * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
032 *
033 * An unusual value is indicated by the term "corner case"
034 */
035 public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> {
036
037 // ********** BEGIN REGION - TEST SUPPORT METHODS
038
039 @Test
040 public final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
041 assertEquals("aeiou", this.getStringEncoder().removeAccents("áéÃóú"));
042 }
043
044 @Test
045 public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
046 assertEquals("ae io u", this.getStringEncoder().removeAccents("áé Ãó ú"));
047 }
048
049 @Test
050 public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() {
051 assertEquals("AeiOuu", this.getStringEncoder().removeAccents("Ã?eÃÓuu"));
052 }
053
054 @Test
055 public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() {
056 assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("Ã?-e'Ã.,ó&ú"));
057 }
058
059 @Test
060 public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
061 assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà "));
062 }
063
064 @Test
065 public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
066 assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
067 this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,�,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à ,â,ô,ç"));
068 }
069
070 @Test
071 public final void testAccentRemovalNormalString_NoChange() {
072 assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
073 }
074
075 @Test
076 public final void testAccentRemoval_NINO_NoChange() {
077 assertEquals("", this.getStringEncoder().removeAccents(""));
078 }
079
080 @Test
081 public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
082 assertEquals(null, this.getStringEncoder().removeAccents(null));
083 }
084
085 @Test
086 public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
087 assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE"));
088 }
089
090 @Test
091 public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
092 assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
093 }
094
095 @Test
096 public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
097 assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE"));
098 }
099
100 @Test
101 public final void testIsVowel_CapitalA_ReturnsTrue() {
102 assertTrue(this.getStringEncoder().isVowel("A"));
103 }
104
105 @Test
106 public final void testIsVowel_SmallD_ReturnsFalse() {
107 assertFalse(this.getStringEncoder().isVowel("d"));
108 }
109
110 @Test
111 public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
112 assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA"));
113 }
114
115 @Test
116 public final void testRemoveVowel__AIDAN_Returns_ADN() {
117 assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN"));
118 }
119
120 @Test
121 public final void testRemoveVowel__DECLAN_Returns_DCLN() {
122 assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN"));
123 }
124
125 @Test
126 public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
127 assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander"));
128 }
129
130 @Test
131 public final void testGetFirstLast3_PETE_Returns_PETE() {
132 assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE"));
133 }
134
135 @Test
136 public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
137 assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
138 }
139
140 @Test
141 public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
142 assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
143 }
144
145 @Test
146 public final void testGetMinRating_7_Return4_Successfully() {
147 assertEquals(4, this.getStringEncoder().getMinRating(7));
148 }
149
150 @Test
151 public final void testGetMinRating_1_Returns5_Successfully() {
152 assertEquals(5, this.getStringEncoder().getMinRating(1));
153 }
154
155 @Test
156 public final void testGetMinRating_2_Returns5_Successfully() {
157 assertEquals(5, this.getStringEncoder().getMinRating(2));
158 }
159
160 @Test
161 public final void testgetMinRating_5_Returns4_Successfully(){
162 assertEquals(4, this.getStringEncoder().getMinRating(5));
163 }
164
165 @Test
166 public final void testgetMinRating_5_Returns4_Successfully2(){
167 assertEquals(4, this.getStringEncoder().getMinRating(5));
168 }
169
170 @Test
171 public final void testgetMinRating_6_Returns4_Successfully(){
172 assertEquals(4, this.getStringEncoder().getMinRating(6));
173 }
174
175 @Test
176 public final void testgetMinRating_7_Returns4_Successfully(){
177 assertEquals(4, this.getStringEncoder().getMinRating(7));
178 }
179
180 @Test
181 public final void testgetMinRating_8_Returns3_Successfully(){
182 assertEquals(3, this.getStringEncoder().getMinRating(8));
183 }
184
185 @Test
186 public final void testgetMinRating_10_Returns3_Successfully(){
187 assertEquals(3, this.getStringEncoder().getMinRating(10));
188 }
189
190 @Test
191 public final void testgetMinRating_11_Returns_3_Successfully(){
192 assertEquals(3, this.getStringEncoder().getMinRating(11));
193 }
194
195 @Test
196 public final void testGetMinRating_13_Returns_1_Successfully() {
197 assertEquals(1, this.getStringEncoder().getMinRating(13));
198 }
199
200 @Test
201 public final void testcleanName_SuccessfullyClean() {
202 assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-Ãs a t.,es &t"));
203 }
204
205 @Test
206 public final void testisVowel_SingleVowel_ReturnsTrue() {
207 assertTrue(this.getStringEncoder().isVowel(("I")));
208 }
209
210 @Test
211 public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
212 assertFalse(this.getStringEncoder().isEncodeEquals("test", ""));
213 }
214
215 @Test
216 public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
217 assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
218 }
219
220 @Test
221 public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
222 assertFalse(this.getStringEncoder().isEncodeEquals("test", " "));
223 }
224
225 @Test
226 public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
227 assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test"));
228 }
229
230 @Test
231 public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
232 assertFalse(this.getStringEncoder().isEncodeEquals("test", null));
233 }
234
235 @Test
236 public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
237 assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
238 }
239
240 @Test
241 public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
242 assertFalse(this.getStringEncoder().isEncodeEquals("t", "test"));
243 }
244
245 @Test
246 public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
247 assertFalse(this.getStringEncoder().isEncodeEquals("test", "t"));
248 }
249
250 // ***** END REGION - TEST SUPPORT METHODS
251
252 // ***** BEGIN REGION - TEST GET MRA ENCODING
253
254 @Test
255 public final void testGetEncoding_HARPER_HRPR() {
256 assertEquals("HRPR", this.getStringEncoder().encode("HARPER"));
257 }
258
259 @Test
260 public final void testGetEncoding_SMITH_to_SMTH() {
261 assertEquals("SMTH", this.getStringEncoder().encode("Smith"));
262 }
263
264 @Test
265 public final void testGetEncoding_SMYTH_to_SMYTH() {
266 assertEquals("SMYTH", this.getStringEncoder().encode("Smyth"));
267 }
268
269 @Test
270 public final void testGetEncoding_Space_to_Nothing() {
271 assertEquals("", this.getStringEncoder().encode(" "));
272 }
273
274 @Test
275 public final void testGetEncoding_NoSpace_to_Nothing() {
276 assertEquals("", this.getStringEncoder().encode(""));
277 }
278
279 @Test
280 public final void testGetEncoding_Null_to_Nothing() {
281 assertEquals("", this.getStringEncoder().encode(null));
282 }
283
284 @Test
285 public final void testGetEncoding_One_Letter_to_Nothing() {
286 assertEquals("", this.getStringEncoder().encode("E"));
287 }
288
289 @Test
290 public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
291 assertFalse(getStringEncoder().isEncodeEquals(null, " "));
292 }
293
294 @Test
295 public final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
296 assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
297 }
298
299 // ***** END REGION - TEST GET MRA ENCODING
300
301 // ***** BEGIN REGION - TEST GET MRA COMPARISONS
302
303 @Test
304 public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
305 assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth"));
306 }
307
308 @Test
309 public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
310 assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne"));
311 }
312
313 @Test
314 public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
315 assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed"));
316 }
317
318 @Test
319 public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
320 assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
321 }
322
323 @Test
324 public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
325 assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan"));
326 }
327
328 @Test
329 public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
330 assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun"));
331 }
332
333 @Test
334 public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
335 assertTrue(this.getStringEncoder().isEncodeEquals("Cólm. ", "C-olÃn"));
336 }
337
338 @Test
339 public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
340 assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven"));
341 }
342
343 @Test
344 public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
345 assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan"));
346 }
347
348 @Test
349 public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
350 assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
351 }
352
353 @Test
354 public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
355 assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel"));
356 }
357
358 @Test
359 public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
360 assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael"));
361 }
362
363 @Test
364 public final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
365 assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
366 }
367
368 @Test
369 public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
370 assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
371 }
372
373 @Test
374 public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
375 assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
376 }
377
378 @Test
379 public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
380 assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom"));
381 }
382
383 @Test
384 public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
385 assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl"));
386 }
387
388 @Test
389 public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
390 assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C"));
391 }
392
393 @Test
394 public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
395 assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
396 }
397
398 @Test
399 public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
400 assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
401 }
402
403 @Test
404 public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
405 assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing
406 }
407
408 // ***** Begin Region - Test Get Encoding - Surnames
409
410 @Test
411 public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
412 assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
413 }
414
415 @Test
416 public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
417 assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
418 }
419
420 @Test
421 public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
422 assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
423 }
424
425 @Test
426 public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
427 assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
428 }
429
430 @Test
431 public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
432 assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley"));
433 }
434
435 // **** BEGIN YIDDISH/SLAVIC SECTION ****
436
437 @Test
438 public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
439 assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
440 }
441
442 @Test
443 public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
444 assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
445 }
446
447 @Test
448 public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
449 assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
450 }
451
452 @Test
453 public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
454 assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
455 }
456
457 @Test
458 public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
459 assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
460 }
461
462 @Test
463 public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
464 assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
465 }
466
467 @Test
468 public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
469 assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
470 }
471
472 // **** END YIDDISH/SLAVIC SECTION ****
473
474 @Test
475 public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
476 assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters"));
477 }
478
479 @Test
480 public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
481 assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
482 }
483
484 @Test
485 public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
486 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " "));
487 }
488
489 @Test
490 public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
491 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", ""));
492 }
493
494 @Test
495 public final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
496 assertFalse(this.getStringEncoder().isEncodeEquals(null, null));
497 }
498
499 @Test
500 public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
501 assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
502 }
503
504 @Test
505 public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
506 assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John"));
507 }
508
509 @Test
510 public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
511 assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete"));
512 }
513
514 @Override
515 protected MatchRatingApproachEncoder createStringEncoder() {
516 return new MatchRatingApproachEncoder();
517 }
518
519 // ***** END REGION - TEST GET MRA COMPARISONS
520
521 }