1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 public class Caverphone2 extends AbstractCaverphone {
34
35 private static final String TEN_1 = "1111111111";
36
37
38
39
40
41
42
43
44 @Override
45 public String encode(final String source) {
46 String txt = source;
47 if (txt == null || txt.length() == 0) {
48 return TEN_1;
49 }
50
51
52 txt = txt.toLowerCase(java.util.Locale.ENGLISH);
53
54
55 txt = txt.replaceAll("[^a-z]", "");
56
57
58 txt = txt.replaceAll("e$", "");
59
60
61 txt = txt.replaceAll("^cough", "cou2f");
62 txt = txt.replaceAll("^rough", "rou2f");
63 txt = txt.replaceAll("^tough", "tou2f");
64 txt = txt.replaceAll("^enough", "enou2f");
65 txt = txt.replaceAll("^trough", "trou2f");
66
67 txt = txt.replaceAll("^gn", "2n");
68
69
70 txt = txt.replaceAll("mb$", "m2");
71
72
73 txt = txt.replaceAll("cq", "2q");
74 txt = txt.replaceAll("ci", "si");
75 txt = txt.replaceAll("ce", "se");
76 txt = txt.replaceAll("cy", "sy");
77 txt = txt.replaceAll("tch", "2ch");
78 txt = txt.replaceAll("c", "k");
79 txt = txt.replaceAll("q", "k");
80 txt = txt.replaceAll("x", "k");
81 txt = txt.replaceAll("v", "f");
82 txt = txt.replaceAll("dg", "2g");
83 txt = txt.replaceAll("tio", "sio");
84 txt = txt.replaceAll("tia", "sia");
85 txt = txt.replaceAll("d", "t");
86 txt = txt.replaceAll("ph", "fh");
87 txt = txt.replaceAll("b", "p");
88 txt = txt.replaceAll("sh", "s2");
89 txt = txt.replaceAll("z", "s");
90 txt = txt.replaceAll("^[aeiou]", "A");
91 txt = txt.replaceAll("[aeiou]", "3");
92 txt = txt.replaceAll("j", "y");
93 txt = txt.replaceAll("^y3", "Y3");
94 txt = txt.replaceAll("^y", "A");
95 txt = txt.replaceAll("y", "3");
96 txt = txt.replaceAll("3gh3", "3kh3");
97 txt = txt.replaceAll("gh", "22");
98 txt = txt.replaceAll("g", "k");
99 txt = txt.replaceAll("s+", "S");
100 txt = txt.replaceAll("t+", "T");
101 txt = txt.replaceAll("p+", "P");
102 txt = txt.replaceAll("k+", "K");
103 txt = txt.replaceAll("f+", "F");
104 txt = txt.replaceAll("m+", "M");
105 txt = txt.replaceAll("n+", "N");
106 txt = txt.replaceAll("w3", "W3");
107 txt = txt.replaceAll("wh3", "Wh3");
108 txt = txt.replaceAll("w$", "3");
109 txt = txt.replaceAll("w", "2");
110 txt = txt.replaceAll("^h", "A");
111 txt = txt.replaceAll("h", "2");
112 txt = txt.replaceAll("r3", "R3");
113 txt = txt.replaceAll("r$", "3");
114 txt = txt.replaceAll("r", "2");
115 txt = txt.replaceAll("l3", "L3");
116 txt = txt.replaceAll("l$", "3");
117 txt = txt.replaceAll("l", "2");
118
119
120 txt = txt.replaceAll("2", "");
121 txt = txt.replaceAll("3$", "A");
122 txt = txt.replaceAll("3", "");
123
124
125 txt = txt + TEN_1;
126
127
128 return txt.substring(0, TEN_1.length());
129 }
130
131 }