View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  
25  import org.apache.commons.codec.AbstractStringEncoderTest;
26  import org.junit.jupiter.api.Test;
27  
28  /**
29   * Series of tests for the Match Rating Approach algorithm.
30   *
31   * General naming nomenclature for the test is of the form:
32   * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
33   *
34   * An unusual value is indicated by the term "corner case"
35   */
36  public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<MatchRatingApproachEncoder> {
37  
38      @Override
39      protected MatchRatingApproachEncoder createStringEncoder() {
40          return new MatchRatingApproachEncoder();
41      }
42  
43      @Test
44      public final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
45          assertEquals("aeiou", this.getStringEncoder().removeAccents("áéíóú"));
46      }
47  
48      @Test
49      public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
50          assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
51                  this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç"));
52      }
53  
54      @Test
55      public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
56          assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà"));
57      }
58  
59      @Test
60      public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualCharactersInvariant() {
61          assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("Á-e'í.,ó&ú"));
62      }
63  
64      @Test
65      public final void testAccentRemoval_NINO_NoChange() {
66          assertEquals("", this.getStringEncoder().removeAccents(""));
67      }
68  
69      @Test
70      public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
71          assertNull(this.getStringEncoder().removeAccents(null));
72      }
73  
74      @Test
75      public final void testAccentRemoval_UpperAndLower_SuccessfullyRemovedAndCaseInvariant() {
76          assertEquals("AeiOuu", this.getStringEncoder().removeAccents("ÁeíÓuu"));
77      }
78  
79      @Test
80      public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
81          assertEquals("ae io  u", this.getStringEncoder().removeAccents("áé íó  ú"));
82      }
83  
84      @Test
85      public final void testAccentRemovalNormalString_NoChange() {
86          assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
87      }
88  
89      @Test
90      public final void testCleanName_SuccessfullyClean() {
91          assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-ís   a t.,es &t"));
92      }
93  
94      @Test
95      public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
96          assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan"));
97      }
98  
99      @Test
100     public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
101         assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne"));
102     }
103 
104     @Test
105     public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
106         assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
107     }
108 
109     @Test
110     public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
111         assertTrue(this.getStringEncoder().isEncodeEquals("Cólm.   ", "C-olín"));
112     }
113 
114     @Test
115     public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
116         assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John"));
117     }
118 
119     @Test
120     public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
121         assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete"));
122     }
123 
124     @Test
125     public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
126         assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing
127     }
128 
129     @Test
130     public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
131         assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
132     }
133 
134     @Test
135     public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
136         assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
137     }
138 
139     @Test
140     public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
141         assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
142     }
143 
144     @Test
145     public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
146         assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
147     }
148 
149     @Test
150     public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
151         assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
152     }
153 
154     @Test
155     public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
156         assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael"));
157     }
158 
159     @Test
160     public final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
161         assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
162     }
163 
164     @Test
165     public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
166         assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters"));
167     }
168 
169     @Test
170     public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
171         assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel"));
172     }
173 
174     @Test
175     public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
176         assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun"));
177     }
178 
179     @Test
180     public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
181         assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed"));
182     }
183 
184     @Test
185     public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
186         assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl"));
187     }
188 
189     @Test
190     public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
191         assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth"));
192     }
193 
194     @Test
195     public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
196         assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
197     }
198 
199     @Test
200     public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
201         assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
202     }
203 
204     @Test
205     public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
206         assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven"));
207     }
208 
209     @Test
210     public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
211         assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan"));
212     }
213 
214     @Test
215     public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
216         assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
217     }
218 
219     @Test
220     public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
221         assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
222     }
223 
224     @Test
225     public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
226         assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley"));
227     }
228 
229     @Test
230     public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
231         assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
232     }
233 
234     @Test
235     public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
236         assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
237     }
238 
239     @Test
240     public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
241         assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
242     }
243 
244     @Test
245     public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
246         assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
247     }
248 
249     @Test
250     public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
251         assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
252     }
253 
254     @Test
255     public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
256         assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
257     }
258 
259     @Test
260     public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
261         assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
262     }
263 
264     @Test
265     public final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
266         assertFalse(this.getStringEncoder().isEncodeEquals(null, null));
267     }
268 
269     @Test
270     public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
271         assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
272     }
273 
274     @Test
275     public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
276         assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", ""));
277     }
278 
279     @Test
280     public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
281         assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " "));
282     }
283 
284     @Test
285     public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
286         assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom"));
287     }
288 
289     @Test
290     public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
291         assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
292     }
293 
294     @Test
295     public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
296         assertFalse(getStringEncoder().isEncodeEquals(null, " "));
297     }
298 
299     @Test
300     public final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
301         assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
302     }
303 
304     @Test
305     public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
306         assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C"));
307     }
308 
309     @Test
310     public final void testGetEncoding_HARPER_HRPR() {
311         assertEquals("HRPR", this.getStringEncoder().encode("HARPER"));
312     }
313 
314     @Test
315     public final void testGetEncoding_NoSpace_to_Nothing() {
316         assertEquals("", this.getStringEncoder().encode(""));
317     }
318 
319     @Test
320     public final void testGetEncoding_Null_to_Nothing() {
321         assertEquals("", this.getStringEncoder().encode(null));
322     }
323 
324     @Test
325     public final void testGetEncoding_One_Letter_to_Nothing() {
326         assertEquals("", this.getStringEncoder().encode("E"));
327     }
328 
329     @Test
330     public final void testGetEncoding_SMITH_to_SMTH() {
331         assertEquals("SMTH", this.getStringEncoder().encode("Smith"));
332     }
333 
334     @Test
335     public final void testGetEncoding_SMYTH_to_SMYTH() {
336         assertEquals("SMYTH", this.getStringEncoder().encode("Smyth"));
337     }
338 
339     @Test
340     public final void testGetEncoding_Space_to_Nothing() {
341         assertEquals("", this.getStringEncoder().encode(" "));
342     }
343 
344     @Test
345     public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
346         assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander"));
347     }
348 
349     @Test
350     public final void testGetFirstLast3_PETE_Returns_PETE() {
351         assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE"));
352     }
353 
354     @Test
355     public final void testGetMinRating_1_Returns5_Successfully() {
356         assertEquals(5, this.getStringEncoder().getMinRating(1));
357     }
358 
359     @Test
360     public final void testgetMinRating_10_Returns3_Successfully(){
361         assertEquals(3, this.getStringEncoder().getMinRating(10));
362     }
363 
364     @Test
365     public final void testgetMinRating_11_Returns_3_Successfully(){
366         assertEquals(3, this.getStringEncoder().getMinRating(11));
367     }
368 
369     @Test
370     public final void testGetMinRating_13_Returns_1_Successfully() {
371         assertEquals(1, this.getStringEncoder().getMinRating(13));
372     }
373 
374     @Test
375     public final void testGetMinRating_2_Returns5_Successfully() {
376         assertEquals(5, this.getStringEncoder().getMinRating(2));
377     }
378 
379     @Test
380     public final void testgetMinRating_5_Returns4_Successfully(){
381         assertEquals(4, this.getStringEncoder().getMinRating(5));
382     }
383 
384     @Test
385     public final void testgetMinRating_5_Returns4_Successfully2(){
386         assertEquals(4, this.getStringEncoder().getMinRating(5));
387     }
388 
389     @Test
390     public final void testgetMinRating_6_Returns4_Successfully(){
391         assertEquals(4, this.getStringEncoder().getMinRating(6));
392     }
393 
394     @Test
395     public final void testGetMinRating_7_Return4_Successfully() {
396         assertEquals(4, this.getStringEncoder().getMinRating(7));
397     }
398 
399     // ***** Begin Region - Test Get Encoding - Surnames
400 
401     @Test
402     public final void testgetMinRating_7_Returns4_Successfully(){
403         assertEquals(4, this.getStringEncoder().getMinRating(7));
404     }
405 
406     @Test
407     public final void testgetMinRating_8_Returns3_Successfully(){
408         assertEquals(3, this.getStringEncoder().getMinRating(8));
409     }
410 
411     @Test
412     public final void testIsEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
413         assertFalse(this.getStringEncoder().isEncodeEquals("t", "test"));
414     }
415 
416     @Test
417     public final void testIsEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
418         assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test"));
419     }
420 
421     @Test
422     public final void testIsEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
423         assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
424     }
425 
426     @Test
427     public final void testIsEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
428         assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
429     }
430 
431     @Test
432     public final void testIsEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
433         assertFalse(this.getStringEncoder().isEncodeEquals("test", " "));
434     }
435 
436     @Test
437     public final void testIsEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
438         assertFalse(this.getStringEncoder().isEncodeEquals("test", ""));
439     }
440 
441     @Test
442     public final void testIsEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
443         assertFalse(this.getStringEncoder().isEncodeEquals("test", null));
444     }
445 
446     @Test
447     public final void testIsEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
448         assertFalse(this.getStringEncoder().isEncodeEquals("test", "t"));
449     }
450 
451     @Test
452     public final void testIsVowel_CapitalA_ReturnsTrue() {
453         assertTrue(this.getStringEncoder().isVowel("A"));
454     }
455 
456     @Test
457     public final void testIsVowel_SingleVowel_ReturnsTrue() {
458         assertTrue(this.getStringEncoder().isVowel("I"));
459     }
460 
461     @Test
462     public final void testIsVowel_SmallD_ReturnsFalse() {
463         assertFalse(this.getStringEncoder().isVowel("d"));
464     }
465 
466     @Test
467     public final void testLeftToRightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
468         assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
469     }
470 
471     @Test
472     public final void testLeftToRightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
473         assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
474     }
475 
476     @Test
477     public final void testPunctuationOnly() {
478         assertEquals(this.getStringEncoder().encode(".,-"), "");
479     }
480 
481     @Test
482     public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
483         assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
484     }
485 
486     @Test
487     public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
488         assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE"));
489     }
490 
491     @Test
492     public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
493         assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE"));
494     }
495 
496     @Test
497     public final void testRemoveVowel__AIDAN_Returns_ADN() {
498         assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN"));
499     }
500 
501     @Test
502     public final void testRemoveVowel__DECLAN_Returns_DCLN() {
503         assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN"));
504     }
505 
506     // ***** END REGION - TEST GET MRA COMPARISONS
507 
508     @Test
509     public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
510         assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA"));
511     }
512 
513     @Test
514     public final void testVowelAndPunctuationOnly() {
515         assertEquals(this.getStringEncoder().encode("uoiea.,-AEIOU"), "U");
516     }
517 
518     @Test
519     public final void testVowelOnly() {
520         assertEquals(this.getStringEncoder().encode("aeiouAEIOU"), "A");
521     }
522 }