View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  
25  import org.apache.commons.codec.AbstractStringEncoderTest;
26  import org.junit.jupiter.api.Test;
27  
28  /**
29   * Series of tests for the Match Rating Approach algorithm.
30   *
31   * General naming nomenclature for the test is of the form:
32   * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
33   *
34   * An unusual value is indicated by the term "corner case"
35   */
36  class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<MatchRatingApproachEncoder> {
37  
38      @Override
39      protected MatchRatingApproachEncoder createStringEncoder() {
40          return new MatchRatingApproachEncoder();
41      }
42  
43      @Test
44      final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
45          assertEquals("aeiou", getStringEncoder().removeAccents("áéíóú"));
46      }
47  
48      @Test
49      final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
50          assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c", getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç"));
51      }
52  
53      @Test
54      final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
55          assertEquals("aeoußAEOUnNa", getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà"));
56      }
57  
58      @Test
59      final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualCharactersInvariant() {
60          assertEquals("A-e'i.,o&u", getStringEncoder().removeAccents("Á-e'í.,ó&ú"));
61      }
62  
63      @Test
64      final void testAccentRemoval_NINO_NoChange() {
65          assertEquals("", getStringEncoder().removeAccents(""));
66      }
67  
68      @Test
69      final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
70          assertNull(getStringEncoder().removeAccents(null));
71      }
72  
73      @Test
74      final void testAccentRemoval_UpperAndLower_SuccessfullyRemovedAndCaseInvariant() {
75          assertEquals("AeiOuu", getStringEncoder().removeAccents("ÁeíÓuu"));
76      }
77  
78      @Test
79      final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
80          assertEquals("ae io  u", getStringEncoder().removeAccents("áé íó  ú"));
81      }
82  
83      @Test
84      final void testAccentRemovalNormalString_NoChange() {
85          assertEquals("Colorless green ideas sleep furiously", getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
86      }
87  
88      @Test
89      final void testCleanNameSuccessfullyClean() {
90          assertEquals("THISISATEST", getStringEncoder().cleanName("This-ís   a t.,es &t"));
91      }
92  
93      @Test
94      final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
95          assertTrue(getStringEncoder().isEncodeEquals("Brian", "Bryan"));
96      }
97  
98      @Test
99      final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
100         assertTrue(getStringEncoder().isEncodeEquals("Burns", "Bourne"));
101     }
102 
103     @Test
104     final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
105         assertTrue(getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
106     }
107 
108     @Test
109     final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
110         assertTrue(getStringEncoder().isEncodeEquals("Cólm.   ", "C-olín"));
111     }
112 
113     @Test
114     final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
115         assertTrue(getStringEncoder().isEncodeEquals("Sean", "John"));
116     }
117 
118     @Test
119     final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
120         assertFalse(getStringEncoder().isEncodeEquals("Sean", "Pete"));
121     }
122 
123     @Test
124     final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
125         assertFalse(getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing
126     }
127 
128     @Test
129     final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
130         assertTrue(getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
131     }
132 
133     @Test
134     final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
135         assertFalse(getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
136     }
137 
138     @Test
139     final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
140         assertFalse(getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
141     }
142 
143     @Test
144     final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
145         assertTrue(getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
146     }
147 
148     @Test
149     final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
150         assertTrue(getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
151     }
152 
153     @Test
154     final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
155         assertTrue(getStringEncoder().isEncodeEquals("Micky", "Michael"));
156     }
157 
158     @Test
159     final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
160         assertTrue(getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
161     }
162 
163     @Test
164     final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
165         assertTrue(getStringEncoder().isEncodeEquals("Peterson", "Peters"));
166     }
167 
168     @Test
169     final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
170         assertTrue(getStringEncoder().isEncodeEquals("Sam", "Samuel"));
171     }
172 
173     @Test
174     final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
175         assertTrue(getStringEncoder().isEncodeEquals("Séan", "Shaun"));
176     }
177 
178     @Test
179     final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
180         assertFalse(getStringEncoder().isEncodeEquals("Al", "Ed"));
181     }
182 
183     @Test
184     final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
185         assertTrue(getStringEncoder().isEncodeEquals("Kl", "Karl"));
186     }
187 
188     @Test
189     final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
190         assertTrue(getStringEncoder().isEncodeEquals("smith", "smyth"));
191     }
192 
193     @Test
194     final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
195         assertTrue(getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
196     }
197 
198     @Test
199     final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
200         assertTrue(getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
201     }
202 
203     @Test
204     final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
205         assertTrue(getStringEncoder().isEncodeEquals("Stephen", "Steven"));
206     }
207 
208     @Test
209     final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
210         assertTrue(getStringEncoder().isEncodeEquals("Steven", "Stefan"));
211     }
212 
213     @Test
214     final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
215         assertTrue(getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
216     }
217 
218     @Test
219     final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
220         assertTrue(getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
221     }
222 
223     @Test
224     final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
225         assertTrue(getStringEncoder().isEncodeEquals("Hailey", "Halley"));
226     }
227 
228     @Test
229     final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
230         assertTrue(getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
231     }
232 
233     @Test
234     final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
235         assertTrue(getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
236     }
237 
238     @Test
239     final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
240         assertTrue(getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
241     }
242 
243     @Test
244     final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
245         assertTrue(getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
246     }
247 
248     @Test
249     final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
250         assertTrue(getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
251     }
252 
253     @Test
254     final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
255         assertTrue(getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
256     }
257 
258     @Test
259     final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
260         assertTrue(getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
261     }
262 
263     @Test
264     final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
265         assertFalse(getStringEncoder().isEncodeEquals(null, null));
266     }
267 
268     @Test
269     final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
270         assertFalse(getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
271     }
272 
273     @Test
274     final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
275         assertFalse(getStringEncoder().isEncodeEquals("Murphy", ""));
276     }
277 
278     @Test
279     final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
280         assertFalse(getStringEncoder().isEncodeEquals("Murphy", " "));
281     }
282 
283     @Test
284     final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
285         assertTrue(getStringEncoder().isEncodeEquals("Tomasz", "tom"));
286     }
287 
288     @Test
289     final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
290         assertTrue(getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
291     }
292 
293     @Test
294     final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
295         assertFalse(getStringEncoder().isEncodeEquals(null, " "));
296     }
297 
298     @Test
299     final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
300         assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
301     }
302 
303     @Test
304     final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
305         assertFalse(getStringEncoder().isEncodeEquals("Karl", "C"));
306     }
307 
308     @Test
309     final void testCompareWithWhitespace() {
310         // sanity check
311         assertTrue(getStringEncoder().isEncodeEquals("Brian", "Bryan"));
312         // whitespace
313         assertTrue(getStringEncoder().isEncodeEquals(" Brian", "Bryan"));
314         assertTrue(getStringEncoder().isEncodeEquals("Brian ", "Bryan"));
315         assertTrue(getStringEncoder().isEncodeEquals(" Brian ", "Bryan"));
316         assertTrue(getStringEncoder().isEncodeEquals("Brian", " Bryan"));
317         assertTrue(getStringEncoder().isEncodeEquals("Brian", "Bryan "));
318         assertTrue(getStringEncoder().isEncodeEquals("Brian", " Bryan "));
319     }
320 
321     @Test
322     final void testGetEncoding_HARPER_HRPR() {
323         assertEquals("HRPR", getStringEncoder().encode("HARPER"));
324     }
325 
326     @Test
327     final void testGetEncoding_NoSpace_to_Nothing() {
328         assertEquals("", getStringEncoder().encode(""));
329     }
330 
331     @Test
332     final void testGetEncoding_Null_to_Nothing() {
333         assertEquals("", getStringEncoder().encode(null));
334     }
335 
336     @Test
337     final void testGetEncoding_One_Letter_to_Nothing() {
338         assertEquals("", getStringEncoder().encode("E"));
339     }
340 
341     @Test
342     final void testGetEncoding_SMITH_to_SMTH() {
343         assertEquals("SMTH", getStringEncoder().encode("Smith"));
344     }
345 
346     @Test
347     final void testGetEncoding_SMYTH_to_SMYTH() {
348         assertEquals("SMYTH", getStringEncoder().encode("Smyth"));
349     }
350 
351     @Test
352     final void testGetEncoding_Space_to_Nothing() {
353         assertEquals("", getStringEncoder().encode(" "));
354     }
355 
356     @Test
357     final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
358         assertEquals("Aleder", getStringEncoder().getFirst3Last3("Alexzander"));
359     }
360 
361     @Test
362     final void testGetFirstLast3_PETE_Returns_PETE() {
363         assertEquals("PETE", getStringEncoder().getFirst3Last3("PETE"));
364     }
365 
366     @Test
367     final void testGetMinRating_1_Returns5_Successfully() {
368         assertEquals(5, getStringEncoder().getMinRating(1));
369     }
370 
371     @Test
372     final void testgetMinRating_10_Returns3_Successfully() {
373         assertEquals(3, getStringEncoder().getMinRating(10));
374     }
375 
376     @Test
377     final void testgetMinRating_11_Returns_3_Successfully() {
378         assertEquals(3, getStringEncoder().getMinRating(11));
379     }
380 
381     @Test
382     final void testGetMinRating_13_Returns_1_Successfully() {
383         assertEquals(1, getStringEncoder().getMinRating(13));
384     }
385 
386     @Test
387     final void testGetMinRating_2_Returns5_Successfully() {
388         assertEquals(5, getStringEncoder().getMinRating(2));
389     }
390 
391     @Test
392     final void testgetMinRating_5_Returns4_Successfully() {
393         assertEquals(4, getStringEncoder().getMinRating(5));
394     }
395 
396     @Test
397     final void testgetMinRating_5_Returns4_Successfully2() {
398         assertEquals(4, getStringEncoder().getMinRating(5));
399     }
400 
401     @Test
402     final void testgetMinRating_6_Returns4_Successfully() {
403         assertEquals(4, getStringEncoder().getMinRating(6));
404     }
405 
406     @Test
407     final void testGetMinRating_7_Return4_Successfully() {
408         assertEquals(4, getStringEncoder().getMinRating(7));
409     }
410 
411     // ***** Begin Region - Test Get Encoding - Surnames
412 
413     @Test
414     final void testgetMinRating_7_Returns4_Successfully() {
415         assertEquals(4, getStringEncoder().getMinRating(7));
416     }
417 
418     @Test
419     final void testgetMinRating_8_Returns3_Successfully() {
420         assertEquals(3, getStringEncoder().getMinRating(8));
421     }
422 
423     @Test
424     final void testIsEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
425         assertFalse(getStringEncoder().isEncodeEquals("t", "test"));
426     }
427 
428     @Test
429     final void testIsEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
430         assertFalse(getStringEncoder().isEncodeEquals(" ", "test"));
431     }
432 
433     @Test
434     final void testIsEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
435         assertFalse(getStringEncoder().isEncodeEquals("", "test"));
436     }
437 
438     @Test
439     final void testIsEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
440         assertFalse(getStringEncoder().isEncodeEquals(null, "test"));
441     }
442 
443     @Test
444     final void testIsEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
445         assertFalse(getStringEncoder().isEncodeEquals("test", " "));
446     }
447 
448     @Test
449     final void testIsEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
450         assertFalse(getStringEncoder().isEncodeEquals("test", ""));
451     }
452 
453     @Test
454     final void testIsEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
455         assertFalse(getStringEncoder().isEncodeEquals("test", null));
456     }
457 
458     @Test
459     final void testIsEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
460         assertFalse(getStringEncoder().isEncodeEquals("test", "t"));
461     }
462 
463     @Test
464     final void testIsVowel_CapitalA_ReturnsTrue() {
465         assertTrue(getStringEncoder().isVowel("A"));
466     }
467 
468     @Test
469     final void testIsVowel_SingleVowel_ReturnsTrue() {
470         assertTrue(getStringEncoder().isVowel("I"));
471     }
472 
473     @Test
474     final void testIsVowel_SmallD_ReturnsFalse() {
475         assertFalse(getStringEncoder().isVowel("d"));
476     }
477 
478     @Test
479     final void testLeftToRightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
480         assertEquals(4, getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
481     }
482 
483     @Test
484     final void testLeftToRightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
485         assertEquals(0, getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
486     }
487 
488     @Test
489     final void testPunctuationOnly() {
490         assertEquals(getStringEncoder().encode(".,-"), "");
491     }
492 
493     @Test
494     final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
495         assertEquals("MISISIPI", getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
496     }
497 
498     @Test
499     final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
500         assertEquals("BEETLE", getStringEncoder().removeDoubleConsonants("BEETLE"));
501     }
502 
503     @Test
504     final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
505         assertEquals("BUBLE", getStringEncoder().removeDoubleConsonants("BUBBLE"));
506     }
507 
508     @Test
509     final void testRemoveVowel__AIDAN_Returns_ADN() {
510         assertEquals("ADN", getStringEncoder().removeVowels("AIDAN"));
511     }
512 
513     @Test
514     final void testRemoveVowel__DECLAN_Returns_DCLN() {
515         assertEquals("DCLN", getStringEncoder().removeVowels("DECLAN"));
516     }
517 
518     // ***** END REGION - TEST GET MRA COMPARISONS
519 
520     @Test
521     final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
522         assertEquals("ALSSNDR", getStringEncoder().removeVowels("ALESSANDRA"));
523     }
524 
525     @Test
526     final void testVowelAndPunctuationOnly() {
527         assertEquals(getStringEncoder().encode("uoiea.,-AEIOU"), "U");
528     }
529 
530     @Test
531     final void testVowelOnly() {
532         assertEquals(getStringEncoder().encode("aeiouAEIOU"), "A");
533     }
534 }