View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language.bm;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNotEquals;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTimeout;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  
27  import java.time.Duration;
28  
29  import org.apache.commons.codec.AbstractStringEncoderTest;
30  import org.apache.commons.codec.EncoderException;
31  import org.apache.commons.codec.StringEncoder;
32  import org.junit.jupiter.api.Test;
33  
34  /**
35   * Tests BeiderMorseEncoder.
36   */
37  public class BeiderMorseEncoderTest extends AbstractStringEncoderTest<StringEncoder> {
38      private static final char[] TEST_CHARS = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' };
39  
40      private void assertNotEmpty(final BeiderMorseEncoder bmpm, final String value) throws EncoderException {
41          assertNotEquals("", bmpm.encode(value), value);
42      }
43  
44      private BeiderMorseEncoder createGenericApproxEncoder() {
45          final BeiderMorseEncoder encoder = new BeiderMorseEncoder();
46          encoder.setNameType(NameType.GENERIC);
47          encoder.setRuleType(RuleType.APPROX);
48          return encoder;
49      }
50  
51      @Override
52      protected StringEncoder createStringEncoder() {
53          return new BeiderMorseEncoder();
54      }
55  
56      /**
57       * Tests we do not blow up.
58       *
59       * @throws EncoderException for some failure scenarios     */
60      @Test
61      public void testAllChars() throws EncoderException {
62          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
63          for (char c = Character.MIN_VALUE; c < Character.MAX_VALUE; c++) {
64              bmpm.encode(Character.toString(c));
65          }
66      }
67  
68      @Test
69      public void testAsciiEncodeNotEmpty1Letter() throws EncoderException {
70          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
71          for (char c = 'a'; c <= 'z'; c++) {
72              final String value = Character.toString(c);
73              final String valueU = value.toUpperCase();
74              assertNotEmpty(bmpm, value);
75              assertNotEmpty(bmpm, valueU);
76          }
77      }
78  
79      @Test
80      public void testAsciiEncodeNotEmpty2Letters() throws EncoderException {
81          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
82          for (char c1 = 'a'; c1 <= 'z'; c1++) {
83              for (char c2 = 'a'; c2 <= 'z'; c2++) {
84                  final String value = new String(new char[] { c1, c2 });
85                  final String valueU = value.toUpperCase();
86                  assertNotEmpty(bmpm, value);
87                  assertNotEmpty(bmpm, valueU);
88              }
89          }
90      }
91  
92      @Test
93      public void testEncodeAtzNotEmpty() throws EncoderException {
94          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
95          //String[] names = { "ácz", "átz", "Ignácz", "Ignátz", "Ignác" };
96          final String[] names = { "\u00e1cz", "\u00e1tz", "Ign\u00e1cz", "Ign\u00e1tz", "Ign\u00e1c" };
97          for (final String name : names) {
98              assertNotEmpty(bmpm, name);
99          }
100     }
101 
102     /**
103      * Tests https://issues.apache.org/jira/browse/CODEC-125?focusedCommentId=13071566&page=com.atlassian.jira.plugin.system.issuetabpanels:
104      * comment-tabpanel#comment-13071566
105      *
106      * @throws EncoderException for some failure scenarios     */
107     @Test
108     public void testEncodeGna() throws EncoderException {
109         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
110         bmpm.encode("gna");
111     }
112 
113     @Test
114     public void testInvalidLangIllegalArgumentException() {
115         assertThrows(IllegalArgumentException.class, () -> Rule.getInstance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage"));
116     }
117 
118     @Test
119     public void testInvalidLangIllegalStateException() {
120         assertThrows(IllegalArgumentException.class, () -> Lang.loadFromResource("thisIsAMadeUpResourceName", Languages.getInstance(NameType.GENERIC)));
121     }
122 
123     @Test
124     public void testInvalidLanguageIllegalArgumentException() {
125         assertThrows(IllegalArgumentException.class, () -> Languages.getInstance("thereIsNoSuchLanguage"));
126     }
127 
128     @Test
129     public void testLongestEnglishSurname() {
130         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
131         assertTimeout(Duration.ofMillis(10000L), () -> bmpm.encode("MacGhilleseatheanaich"));
132     }
133 
134     @Test
135     public void testNegativeIndexForRuleMatchIndexOutOfBoundsException() {
136         final Rule r = new Rule("a", "", "", new Rule.Phoneme("", Languages.ANY_LANGUAGE));
137         assertThrows(IndexOutOfBoundsException.class, () -> r.patternAndContextMatches("bob", -1));
138     }
139 
140     @Test
141     public void testOOM() throws EncoderException {
142         final String phrase = "200697900'-->&#1913348150;</  bceaeef >aadaabcf\"aedfbff<!--\'-->?>cae" +
143             "cfaaa><?&#<!--</script>&lang&fc;aadeaf?>>&bdquo<    cc =\"abff\"    /></   afe  >" +
144             "<script><!-- f(';<    cf aefbeef = \"bfabadcf\" ebbfeedd = fccabeb >";
145 
146         final BeiderMorseEncoder encoder = new BeiderMorseEncoder();
147         encoder.setNameType(NameType.GENERIC);
148         encoder.setRuleType(RuleType.EXACT);
149         encoder.setMaxPhonemes(10);
150 
151         final String phonemes = encoder.encode(phrase);
152         assertFalse(phonemes.isEmpty());
153 
154         final String[] phonemeArr = phonemes.split("\\|");
155         assertTrue(phonemeArr.length <= 10);
156     }
157 
158     @Test
159     public void testSetConcat() {
160         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
161         bmpm.setConcat(false);
162         assertFalse(bmpm.isConcat(), "Should be able to set concat to false");
163     }
164 
165     @Test
166     public void testSetNameTypeAsh() {
167         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
168         bmpm.setNameType(NameType.ASHKENAZI);
169         assertEquals(NameType.ASHKENAZI, bmpm.getNameType(), "Name type should have been set to ash");
170     }
171 
172     @Test
173     public void testSetRuleTypeExact() {
174         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
175         bmpm.setRuleType(RuleType.EXACT);
176         assertEquals(RuleType.EXACT, bmpm.getRuleType(), "Rule type should have been set to exact");
177     }
178 
179     @Test
180     public void testSetRuleTypeToRulesIllegalArgumentException() {
181         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
182         assertThrows(IllegalArgumentException.class, () -> bmpm.setRuleType(RuleType.RULES));
183     }
184 
185     /**
186      * (Un)luckily, the worse performing test because of the data in the test characters.
187      *
188      * @throws EncoderException for some failure scenarios
189      */
190     @Test /* timeout = 20000L */
191     public void testSpeedCheck() throws EncoderException {
192         final BeiderMorseEncoder bmpm = this.createGenericApproxEncoder();
193         final StringBuilder stringBuffer = new StringBuilder();
194         stringBuffer.append(TEST_CHARS[0]);
195         for (int i = 0, j = 1; i < 40; i++, j++) {
196             if (j == TEST_CHARS.length) {
197                 j = 0;
198             }
199             bmpm.encode(stringBuffer.toString());
200             stringBuffer.append(TEST_CHARS[j]);
201         }
202     }
203 
204     @Test
205     public void testSpeedCheck2() throws EncoderException {
206         final BeiderMorseEncoder bmpm = this.createGenericApproxEncoder();
207         final String phrase = "ItstheendoftheworldasweknowitandIfeelfine";
208 
209         for (int i = 1; i <= phrase.length(); i++) {
210             bmpm.encode(phrase.subSequence(0, i));
211         }
212     }
213 
214     @Test
215     public void testSpeedCheck3() throws EncoderException {
216         final BeiderMorseEncoder bmpm = this.createGenericApproxEncoder();
217         final String phrase = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
218 
219         for (int i = 1; i <= phrase.length(); i++) {
220             bmpm.encode(phrase.subSequence(0, i));
221         }
222     }
223 }