001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import org.apache.commons.codec.StringEncoderAbstractTest;
025import org.junit.Test;
026
027/**
028 * Series of tests for the Match Rating Approach algorithm.
029 *
030 * General naming nomenclature for the test is of the form:
031 * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
032 *
033 * An unusual value is indicated by the term "corner case"
034 */
035public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> {
036
037    // ********** BEGIN REGION - TEST SUPPORT METHODS
038
039    @Test
040    public final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
041        assertEquals("aeiou", this.getStringEncoder().removeAccents("�����"));
042    }
043
044    @Test
045    public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
046        assertEquals("ae io  u", this.getStringEncoder().removeAccents("�� ��  �"));
047    }
048
049    @Test
050    public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() {
051        assertEquals("AeiOuu", this.getStringEncoder().removeAccents("�e��uu"));
052    }
053
054    @Test
055    public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() {
056        assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("�-e'�.,�&�"));
057    }
058
059    @Test
060    public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
061        assertEquals("aeou�AEOUnNa", this.getStringEncoder().removeAccents("������������"));
062    }
063
064    @Test
065    public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
066        assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
067                this.getStringEncoder().removeAccents("�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�,�"));
068    }
069
070    @Test
071    public final void testAccentRemovalNormalString_NoChange() {
072        assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
073    }
074
075    @Test
076    public final void testAccentRemoval_NINO_NoChange() {
077        assertEquals("", this.getStringEncoder().removeAccents(""));
078    }
079    
080    @Test
081    public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
082        assertEquals(null, this.getStringEncoder().removeAccents(null));
083    }
084
085    @Test
086    public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
087        assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE"));
088    }
089
090    @Test
091    public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
092        assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
093    }
094
095    @Test
096    public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
097        assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE"));
098    }
099
100    @Test
101    public final void testIsVowel_CapitalA_ReturnsTrue() {
102        assertTrue(this.getStringEncoder().isVowel("A"));
103    }
104
105    @Test
106    public final void testIsVowel_SmallD_ReturnsFalse() {
107        assertFalse(this.getStringEncoder().isVowel("d"));
108    }
109
110    @Test
111    public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
112        assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA"));
113    }
114
115    @Test
116    public final void testRemoveVowel__AIDAN_Returns_ADN() {
117        assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN"));
118    }
119
120    @Test
121    public final void testRemoveVowel__DECLAN_Returns_DCLN() {
122        assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN"));
123    }
124
125    @Test
126    public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
127        assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander"));
128    }
129
130    @Test
131    public final void testGetFirstLast3_PETE_Returns_PETE() {
132        assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE"));
133    }
134
135    @Test
136    public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
137        assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
138    }
139
140    @Test
141    public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
142        assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
143    }
144
145    @Test
146    public final void testGetMinRating_7_Return4_Successfully() {
147        assertEquals(4, this.getStringEncoder().getMinRating(7));
148    }
149    
150    @Test
151    public final void testGetMinRating_1_Returns5_Successfully() {
152        assertEquals(5, this.getStringEncoder().getMinRating(1));
153    }
154
155    @Test
156    public final void testGetMinRating_2_Returns5_Successfully() {
157        assertEquals(5, this.getStringEncoder().getMinRating(2));
158    }
159    
160    @Test
161    public final void testgetMinRating_5_Returns4_Successfully(){
162        assertEquals(4, this.getStringEncoder().getMinRating(5)); 
163    }
164    
165    @Test
166    public final void testgetMinRating_5_Returns4_Successfully2(){
167        assertEquals(4, this.getStringEncoder().getMinRating(5)); 
168    }
169    
170    @Test
171    public final void testgetMinRating_6_Returns4_Successfully(){
172        assertEquals(4, this.getStringEncoder().getMinRating(6)); 
173    }
174    
175    @Test
176    public final void testgetMinRating_7_Returns4_Successfully(){
177        assertEquals(4, this.getStringEncoder().getMinRating(7)); 
178    }
179    
180    @Test
181    public final void testgetMinRating_8_Returns3_Successfully(){
182        assertEquals(3, this.getStringEncoder().getMinRating(8));
183    }
184    
185    @Test
186    public final void testgetMinRating_10_Returns3_Successfully(){
187        assertEquals(3, this.getStringEncoder().getMinRating(10)); 
188    }
189    
190    @Test
191    public final void testgetMinRating_11_Returns_3_Successfully(){
192        assertEquals(3, this.getStringEncoder().getMinRating(11)); 
193    }
194
195    @Test
196    public final void testGetMinRating_13_Returns_1_Successfully() {
197        assertEquals(1, this.getStringEncoder().getMinRating(13));
198    }
199    
200    @Test
201    public final void testcleanName_SuccessfullyClean() {
202        assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-�s   a t.,es &t"));
203    }
204    
205    @Test
206    public final void testisVowel_SingleVowel_ReturnsTrue() {
207        assertTrue(this.getStringEncoder().isVowel(("I")));
208    }
209    
210    @Test
211    public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
212        assertFalse(this.getStringEncoder().isEncodeEquals("test", ""));
213    }
214
215    @Test
216    public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
217        assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
218    }
219    
220    @Test
221    public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
222        assertFalse(this.getStringEncoder().isEncodeEquals("test", " "));
223    }
224
225    @Test
226    public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
227        assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test"));
228    }
229    
230    @Test
231    public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
232        assertFalse(this.getStringEncoder().isEncodeEquals("test", null));
233    }
234
235    @Test
236    public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
237        assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
238    }
239    
240    @Test
241    public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
242        assertFalse(this.getStringEncoder().isEncodeEquals("t", "test"));
243    }
244
245    @Test
246    public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
247        assertFalse(this.getStringEncoder().isEncodeEquals("test", "t"));
248    }
249    
250    // ***** END REGION - TEST SUPPORT METHODS
251
252    // ***** BEGIN REGION - TEST GET MRA ENCODING
253
254    @Test
255    public final void testGetEncoding_HARPER_HRPR() {
256        assertEquals("HRPR", this.getStringEncoder().encode("HARPER"));
257    }
258
259    @Test
260    public final void testGetEncoding_SMITH_to_SMTH() {
261        assertEquals("SMTH", this.getStringEncoder().encode("Smith"));
262    }
263
264    @Test
265    public final void testGetEncoding_SMYTH_to_SMYTH() {
266        assertEquals("SMYTH", this.getStringEncoder().encode("Smyth"));
267    }
268
269    @Test
270    public final void testGetEncoding_Space_to_Nothing() {
271        assertEquals("", this.getStringEncoder().encode(" "));
272    }
273
274    @Test
275    public final void testGetEncoding_NoSpace_to_Nothing() {
276        assertEquals("", this.getStringEncoder().encode(""));
277    }
278
279    @Test
280    public final void testGetEncoding_Null_to_Nothing() {
281        assertEquals("", this.getStringEncoder().encode(null));
282    }
283
284    @Test
285    public final void testGetEncoding_One_Letter_to_Nothing() {
286        assertEquals("", this.getStringEncoder().encode("E"));
287    }
288    
289    @Test
290    public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
291        assertFalse(getStringEncoder().isEncodeEquals(null, " "));
292    }
293    
294    @Test
295    public final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
296        assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
297    }
298
299    // ***** END REGION - TEST GET MRA ENCODING
300
301    // ***** BEGIN REGION - TEST GET MRA COMPARISONS
302
303    @Test
304    public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
305        assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth"));
306    }
307
308    @Test
309    public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
310        assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne"));
311    }
312
313    @Test
314    public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
315        assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed"));
316    }
317
318    @Test
319    public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
320        assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
321    }
322
323    @Test
324    public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
325        assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan"));
326    }
327
328    @Test
329    public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
330        assertTrue(this.getStringEncoder().isEncodeEquals("S�an", "Shaun"));
331    }
332
333    @Test
334    public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
335        assertTrue(this.getStringEncoder().isEncodeEquals("C�lm.   ", "C-ol�n"));
336    }
337
338    @Test
339    public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
340        assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven"));
341    }
342
343    @Test
344    public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
345        assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan"));
346    }
347
348    @Test
349    public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
350        assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
351    }
352
353    @Test
354    public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
355        assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel"));
356    }
357
358    @Test
359    public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
360        assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael"));
361    }
362
363    @Test
364    public final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
365        assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
366    }
367
368    @Test
369    public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
370        assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
371    }
372
373    @Test
374    public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
375        assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
376    }
377
378    @Test
379    public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
380        assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom"));
381    }
382
383    @Test
384    public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
385        assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl"));
386    }
387
388    @Test
389    public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
390        assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C"));
391    }
392
393    @Test
394    public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
395        assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
396    }
397
398    @Test
399    public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
400        assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
401    }
402
403    @Test
404    public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
405        assertFalse(this.getStringEncoder().isEncodeEquals("�na", "Oonagh")); // Disappointing
406    }
407
408    // ***** Begin Region - Test Get Encoding - Surnames
409
410    @Test
411    public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
412        assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "� ' S�illeabh�in"));
413    }
414
415    @Test
416    public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
417        assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
418    }
419
420    @Test
421    public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
422        assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "� 'Muircheartaigh "));
423    }
424
425    @Test
426    public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
427        assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
428    }
429
430    @Test
431    public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
432        assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley"));
433    }
434
435    // **** BEGIN YIDDISH/SLAVIC SECTION ****
436
437    @Test
438    public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
439        assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
440    }
441
442    @Test
443    public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
444        assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
445    }
446
447    @Test
448    public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
449        assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
450    }
451
452    @Test
453    public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
454        assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
455    }
456
457    @Test
458    public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
459        assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
460    }
461
462    @Test
463    public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
464        assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
465    }
466
467    @Test
468    public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
469        assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
470    }
471
472    // **** END YIDDISH/SLAVIC SECTION ****
473
474    @Test
475    public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
476        assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters"));
477    }
478
479    @Test
480    public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
481        assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
482    }
483
484    @Test
485    public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
486        assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " "));
487    }
488
489    @Test
490    public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
491        assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", ""));
492    }
493
494    @Test
495    public final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
496        assertFalse(this.getStringEncoder().isEncodeEquals(null, null));
497    }
498
499    @Test
500    public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
501        assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
502    }
503
504    @Test
505    public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
506        assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John"));
507    }
508
509    @Test
510    public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
511        assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete"));
512    }
513
514    @Override
515    protected MatchRatingApproachEncoder createStringEncoder() {
516        return new MatchRatingApproachEncoder();
517    }
518
519    // ***** END REGION - TEST GET MRA COMPARISONS
520
521}