1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.codec.language;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20
21 import org.apache.commons.codec.AbstractStringEncoderTest;
22 import org.apache.commons.codec.EncoderException;
23 import org.junit.jupiter.api.Test;
24
25
26
27
28
29
30
31 public class DaitchMokotoffSoundexTest extends AbstractStringEncoderTest<DaitchMokotoffSoundex> {
32
33 @Override
34 protected DaitchMokotoffSoundex createStringEncoder() {
35 return new DaitchMokotoffSoundex();
36 }
37
38 private String encode(final String source) {
39 return getStringEncoder().encode(source);
40 }
41
42 private String soundex(final String source) {
43 return getStringEncoder().soundex(source);
44 }
45
46 @Test
47 public void testAccentedCharacterFolding() {
48 assertEquals("294795", soundex("Straßburg"));
49 assertEquals("294795", soundex("Strasburg"));
50
51 assertEquals("095600", soundex("Éregon"));
52 assertEquals("095600", soundex("Eregon"));
53 }
54
55 @Test
56 public void testAdjacentCodes() {
57
58
59
60
61 assertEquals("054800", soundex("AKSSOL"));
62
63
64
65
66
67 assertEquals("547830|545783|594783|594578", soundex("GERSCHFELD"));
68 }
69
70 public void testEncodeBasic() {
71
72 assertEquals("097400", encode("AUERBACH"));
73 assertEquals("097400", encode("OHRBACH"));
74 assertEquals("874400", encode("LIPSHITZ"));
75 assertEquals("874400", encode("LIPPSZYC"));
76 assertEquals("876450", encode("LEWINSKY"));
77 assertEquals("876450", encode("LEVINSKI"));
78 assertEquals("486740", encode("SZLAMAWICZ"));
79 assertEquals("486740", encode("SHLAMOVITZ"));
80 }
81
82 @Test
83 public void testEncodeIgnoreApostrophes() throws EncoderException {
84 this.checkEncodingVariations("079600", new String[] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien",
85 "OBri'en", "OBrie'n", "OBrien'" });
86 }
87
88
89
90
91
92 @Test
93 public void testEncodeIgnoreHyphens() throws EncoderException {
94 this.checkEncodingVariations("565463", new String[] { "KINGSMITH", "-KINGSMITH", "K-INGSMITH", "KI-NGSMITH",
95 "KIN-GSMITH", "KING-SMITH", "KINGS-MITH", "KINGSM-ITH", "KINGSMI-TH", "KINGSMIT-H", "KINGSMITH-" });
96 }
97
98 @Test
99 public void testEncodeIgnoreTrimmable() {
100 assertEquals("746536", encode(" \t\n\r Washington \t\n\r "));
101 assertEquals("746536", encode("Washington"));
102 }
103
104
105
106
107 @Test
108 public void testSoundexBasic() {
109 assertEquals("583600", soundex("GOLDEN"));
110 assertEquals("087930", soundex("Alpert"));
111 assertEquals("791900", soundex("Breuer"));
112 assertEquals("579000", soundex("Haber"));
113 assertEquals("665600", soundex("Mannheim"));
114 assertEquals("664000", soundex("Mintz"));
115 assertEquals("370000", soundex("Topf"));
116 assertEquals("586660", soundex("Kleinmann"));
117 assertEquals("769600", soundex("Ben Aron"));
118
119 assertEquals("097400|097500", soundex("AUERBACH"));
120 assertEquals("097400|097500", soundex("OHRBACH"));
121 assertEquals("874400", soundex("LIPSHITZ"));
122 assertEquals("874400|874500", soundex("LIPPSZYC"));
123 assertEquals("876450", soundex("LEWINSKY"));
124 assertEquals("876450", soundex("LEVINSKI"));
125 assertEquals("486740", soundex("SZLAMAWICZ"));
126 assertEquals("486740", soundex("SHLAMOVITZ"));
127 }
128
129
130
131
132 @Test
133 public void testSoundexBasic2() {
134 assertEquals("467000|567000", soundex("Ceniow"));
135 assertEquals("467000", soundex("Tsenyuv"));
136 assertEquals("587400|587500", soundex("Holubica"));
137 assertEquals("587400", soundex("Golubitsa"));
138 assertEquals("746480|794648", soundex("Przemysl"));
139 assertEquals("746480", soundex("Pshemeshil"));
140 assertEquals("944744|944745|944754|944755|945744|945745|945754|945755", soundex("Rosochowaciec"));
141 assertEquals("945744", soundex("Rosokhovatsets"));
142 }
143
144
145
146
147 @Test
148 public void testSoundexBasic3() {
149 assertEquals("734000|739400", soundex("Peters"));
150 assertEquals("734600|739460", soundex("Peterson"));
151 assertEquals("645740", soundex("Moskowitz"));
152 assertEquals("645740", soundex("Moskovitz"));
153 assertEquals("154600|145460|454600|445460", soundex("Jackson"));
154 assertEquals("154654|154645|154644|145465|145464|454654|454645|454644|445465|445464",
155 soundex("Jackson-Jackson"));
156 }
157
158 @Test
159 public void testSpecialRomanianCharacters() {
160 assertEquals("364000|464000", soundex("ţamas"));
161 assertEquals("364000|464000", soundex("țamas"));
162 }
163
164 }