001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import static org.junit.Assert.assertEquals;
021    import static org.junit.Assert.assertFalse;
022    import static org.junit.Assert.assertTrue;
023    
024    import org.apache.commons.codec.StringEncoderAbstractTest;
025    import org.junit.Test;
026    
027    /**
028     * Series of tests for the Match Rating Approach algorithm.
029     *
030     * General naming nomenclature for the test is of the form:
031     * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
032     *
033     * An unusual value is indicated by the term "corner case"
034     */
035    public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> {
036    
037        // ********** BEGIN REGION - TEST SUPPORT METHODS
038    
039        @Test
040        public final void testAccentRemoval_AllLower_SuccessfullyRemoved() {
041            assertEquals("aeiou", this.getStringEncoder().removeAccents("áéíóú"));
042        }
043    
044        @Test
045        public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() {
046            assertEquals("ae io  u", this.getStringEncoder().removeAccents("áé íó  ú"));
047        }
048    
049        @Test
050        public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() {
051            assertEquals("AeiOuu", this.getStringEncoder().removeAccents("�eíÓuu"));
052        }
053    
054        @Test
055        public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() {
056            assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("�-e'í.,ó&ú"));
057        }
058    
059        @Test
060        public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() {
061            assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà"));
062        }
063    
064        @Test
065        public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() {
066            assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
067                    this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,�,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç"));
068        }
069    
070        @Test
071        public final void testAccentRemovalNormalString_NoChange() {
072            assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously"));
073        }
074    
075        @Test
076        public final void testAccentRemoval_NINO_NoChange() {
077            assertEquals("", this.getStringEncoder().removeAccents(""));
078        }
079            
080            @Test
081        public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() {
082            assertEquals(null, this.getStringEncoder().removeAccents(null));
083        }
084    
085        @Test
086        public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() {
087            assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE"));
088        }
089    
090        @Test
091        public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() {
092            assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI"));
093        }
094    
095        @Test
096        public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() {
097            assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE"));
098        }
099    
100        @Test
101        public final void testIsVowel_CapitalA_ReturnsTrue() {
102            assertTrue(this.getStringEncoder().isVowel("A"));
103        }
104    
105        @Test
106        public final void testIsVowel_SmallD_ReturnsFalse() {
107            assertFalse(this.getStringEncoder().isVowel("d"));
108        }
109    
110        @Test
111        public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() {
112            assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA"));
113        }
114    
115        @Test
116        public final void testRemoveVowel__AIDAN_Returns_ADN() {
117            assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN"));
118        }
119    
120        @Test
121        public final void testRemoveVowel__DECLAN_Returns_DCLN() {
122            assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN"));
123        }
124    
125        @Test
126        public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() {
127            assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander"));
128        }
129    
130        @Test
131        public final void testGetFirstLast3_PETE_Returns_PETE() {
132            assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE"));
133        }
134    
135        @Test
136        public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() {
137            assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
138        }
139    
140        @Test
141        public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() {
142            assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
143        }
144    
145        @Test
146        public final void testGetMinRating_7_Return4_Successfully() {
147            assertEquals(4, this.getStringEncoder().getMinRating(7));
148        }
149        
150        @Test
151        public final void testGetMinRating_1_Returns5_Successfully() {
152            assertEquals(5, this.getStringEncoder().getMinRating(1));
153        }
154    
155        @Test
156        public final void testGetMinRating_2_Returns5_Successfully() {
157            assertEquals(5, this.getStringEncoder().getMinRating(2));
158        }
159        
160        @Test
161        public final void testgetMinRating_5_Returns4_Successfully(){
162            assertEquals(4, this.getStringEncoder().getMinRating(5)); 
163        }
164        
165        @Test
166        public final void testgetMinRating_5_Returns4_Successfully2(){
167            assertEquals(4, this.getStringEncoder().getMinRating(5)); 
168        }
169        
170        @Test
171        public final void testgetMinRating_6_Returns4_Successfully(){
172            assertEquals(4, this.getStringEncoder().getMinRating(6)); 
173        }
174        
175        @Test
176        public final void testgetMinRating_7_Returns4_Successfully(){
177            assertEquals(4, this.getStringEncoder().getMinRating(7)); 
178        }
179        
180        @Test
181        public final void testgetMinRating_8_Returns3_Successfully(){
182            assertEquals(3, this.getStringEncoder().getMinRating(8));
183        }
184        
185        @Test
186        public final void testgetMinRating_10_Returns3_Successfully(){
187            assertEquals(3, this.getStringEncoder().getMinRating(10)); 
188        }
189        
190        @Test
191        public final void testgetMinRating_11_Returns_3_Successfully(){
192            assertEquals(3, this.getStringEncoder().getMinRating(11)); 
193        }
194    
195        @Test
196        public final void testGetMinRating_13_Returns_1_Successfully() {
197            assertEquals(1, this.getStringEncoder().getMinRating(13));
198        }
199        
200        @Test
201        public final void testcleanName_SuccessfullyClean() {
202            assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-ís   a t.,es &t"));
203        }
204        
205        @Test
206        public final void testisVowel_SingleVowel_ReturnsTrue() {
207            assertTrue(this.getStringEncoder().isVowel(("I")));
208        }
209        
210        @Test
211        public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() {
212            assertFalse(this.getStringEncoder().isEncodeEquals("test", ""));
213        }
214    
215        @Test
216        public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() {
217            assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
218        }
219        
220        @Test
221        public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() {
222            assertFalse(this.getStringEncoder().isEncodeEquals("test", " "));
223        }
224    
225        @Test
226        public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() {
227            assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test"));
228        }
229        
230        @Test
231        public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() {
232            assertFalse(this.getStringEncoder().isEncodeEquals("test", null));
233        }
234    
235        @Test
236        public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
237            assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
238        }
239        
240        @Test
241        public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() {
242            assertFalse(this.getStringEncoder().isEncodeEquals("t", "test"));
243        }
244    
245        @Test
246        public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() {
247            assertFalse(this.getStringEncoder().isEncodeEquals("test", "t"));
248        }
249        
250        // ***** END REGION - TEST SUPPORT METHODS
251    
252        // ***** BEGIN REGION - TEST GET MRA ENCODING
253    
254        @Test
255        public final void testGetEncoding_HARPER_HRPR() {
256            assertEquals("HRPR", this.getStringEncoder().encode("HARPER"));
257        }
258    
259        @Test
260        public final void testGetEncoding_SMITH_to_SMTH() {
261            assertEquals("SMTH", this.getStringEncoder().encode("Smith"));
262        }
263    
264        @Test
265        public final void testGetEncoding_SMYTH_to_SMYTH() {
266            assertEquals("SMYTH", this.getStringEncoder().encode("Smyth"));
267        }
268    
269        @Test
270        public final void testGetEncoding_Space_to_Nothing() {
271            assertEquals("", this.getStringEncoder().encode(" "));
272        }
273    
274        @Test
275        public final void testGetEncoding_NoSpace_to_Nothing() {
276            assertEquals("", this.getStringEncoder().encode(""));
277        }
278    
279        @Test
280        public final void testGetEncoding_Null_to_Nothing() {
281            assertEquals("", this.getStringEncoder().encode(null));
282        }
283    
284        @Test
285        public final void testGetEncoding_One_Letter_to_Nothing() {
286            assertEquals("", this.getStringEncoder().encode("E"));
287        }
288            
289            @Test
290        public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
291            assertFalse(getStringEncoder().isEncodeEquals(null, " "));
292        }
293        
294        @Test
295        public final void testCompareNameSameNames_ReturnsFalseSuccessfully() {
296            assertTrue(getStringEncoder().isEncodeEquals("John", "John"));
297        }
298    
299        // ***** END REGION - TEST GET MRA ENCODING
300    
301        // ***** BEGIN REGION - TEST GET MRA COMPARISONS
302    
303        @Test
304        public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() {
305            assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth"));
306        }
307    
308        @Test
309        public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() {
310            assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne"));
311        }
312    
313        @Test
314        public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() {
315            assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed"));
316        }
317    
318        @Test
319        public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() {
320            assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn"));
321        }
322    
323        @Test
324        public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() {
325            assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan"));
326        }
327    
328        @Test
329        public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() {
330            assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun"));
331        }
332    
333        @Test
334        public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() {
335            assertTrue(this.getStringEncoder().isEncodeEquals("Cólm.   ", "C-olín"));
336        }
337    
338        @Test
339        public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() {
340            assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven"));
341        }
342    
343        @Test
344        public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() {
345            assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan"));
346        }
347    
348        @Test
349        public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() {
350            assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan"));
351        }
352    
353        @Test
354        public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() {
355            assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel"));
356        }
357    
358        @Test
359        public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() {
360            assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael"));
361        }
362    
363        @Test
364        public final void testCompare_OONA_OONAGH_SuccessfullyMatched() {
365            assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh"));
366        }
367    
368        @Test
369        public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() {
370            assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia"));
371        }
372    
373        @Test
374        public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() {
375            assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances"));
376        }
377    
378        @Test
379        public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() {
380            assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom"));
381        }
382    
383        @Test
384        public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() {
385            assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl"));
386        }
387    
388        @Test
389        public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() {
390            assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C"));
391        }
392    
393        @Test
394        public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() {
395            assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
396        }
397    
398        @Test
399        public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() {
400            assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro"));
401        }
402    
403        @Test
404        public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() {
405            assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing
406        }
407    
408        // ***** Begin Region - Test Get Encoding - Surnames
409    
410        @Test
411        public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() {
412            assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
413        }
414    
415        @Test
416        public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() {
417            assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh"));
418        }
419    
420        @Test
421        public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() {
422            assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
423        }
424    
425        @Test
426        public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() {
427            assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn"));
428        }
429    
430        @Test
431        public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() {
432            assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley"));
433        }
434    
435        // **** BEGIN YIDDISH/SLAVIC SECTION ****
436    
437        @Test
438        public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() {
439            assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach"));
440        }
441    
442        @Test
443        public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() {
444            assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz"));
445        }
446    
447        @Test
448        public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() {
449            assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC"));
450        }
451    
452        @Test
453        public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() {
454            assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI"));
455        }
456    
457        @Test
458        public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() {
459            assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
460        }
461    
462        @Test
463        public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() {
464            assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
465        }
466    
467        @Test
468        public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
469            assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
470        }
471    
472        // **** END YIDDISH/SLAVIC SECTION ****
473    
474        @Test
475        public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() {
476            assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters"));
477        }
478    
479        @Test
480        public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() {
481            assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan"));
482        }
483    
484        @Test
485        public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() {
486            assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " "));
487        }
488    
489        @Test
490        public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() {
491            assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", ""));
492        }
493    
494        @Test
495        public final void testCompare_SurnameCornerCase_Nulls_NoMatch() {
496            assertFalse(this.getStringEncoder().isEncodeEquals(null, null));
497        }
498    
499        @Test
500        public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() {
501            assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch"));
502        }
503    
504        @Test
505        public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() {
506            assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John"));
507        }
508    
509        @Test
510        public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() {
511            assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete"));
512        }
513    
514        @Override
515        protected MatchRatingApproachEncoder createStringEncoder() {
516            return new MatchRatingApproachEncoder();
517        }
518    
519        // ***** END REGION - TEST GET MRA COMPARISONS
520    
521    }