View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language.bm;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNotEquals;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTimeout;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  
27  import java.time.Duration;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.codec.AbstractStringEncoderTest;
31  import org.apache.commons.codec.EncoderException;
32  import org.apache.commons.codec.StringEncoder;
33  import org.junit.jupiter.api.Test;
34  
35  /**
36   * Tests BeiderMorseEncoder.
37   */
38  class BeiderMorseEncoderTest extends AbstractStringEncoderTest<StringEncoder> {
39  
40      private static final Pattern PIPE_PATTERN = Pattern.compile("\\|");
41  
42      private static final char[] TEST_CHARS = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' };
43  
44      private void assertNotEmpty(final BeiderMorseEncoder bmpm, final String value) throws EncoderException {
45          assertNotEquals("", bmpm.encode(value), value);
46      }
47  
48      private BeiderMorseEncoder createGenericApproxEncoder() {
49          final BeiderMorseEncoder encoder = new BeiderMorseEncoder();
50          encoder.setNameType(NameType.GENERIC);
51          encoder.setRuleType(RuleType.APPROX);
52          return encoder;
53      }
54  
55      @Override
56      protected StringEncoder createStringEncoder() {
57          return new BeiderMorseEncoder();
58      }
59  
60      /**
61       * Tests we do not blow up.
62       *
63       * @throws EncoderException for some failure scenarios     */
64      @Test
65      void testAllChars() throws EncoderException {
66          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
67          for (char c = Character.MIN_VALUE; c < Character.MAX_VALUE; c++) {
68              bmpm.encode(Character.toString(c));
69          }
70      }
71  
72      @Test
73      void testAsciiEncodeNotEmpty1Letter() throws EncoderException {
74          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
75          for (char c = 'a'; c <= 'z'; c++) {
76              final String value = Character.toString(c);
77              final String valueU = value.toUpperCase();
78              assertNotEmpty(bmpm, value);
79              assertNotEmpty(bmpm, valueU);
80          }
81      }
82  
83      @Test
84      void testAsciiEncodeNotEmpty2Letters() throws EncoderException {
85          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
86          for (char c1 = 'a'; c1 <= 'z'; c1++) {
87              for (char c2 = 'a'; c2 <= 'z'; c2++) {
88                  final String value = new String(new char[] { c1, c2 });
89                  final String valueU = value.toUpperCase();
90                  assertNotEmpty(bmpm, value);
91                  assertNotEmpty(bmpm, valueU);
92              }
93          }
94      }
95  
96      @Test
97      void testEncodeAtzNotEmpty() throws EncoderException {
98          final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
99          //String[] names = { "ácz", "átz", "Ignácz", "Ignátz", "Ignác" };
100         final String[] names = { "\u00e1cz", "\u00e1tz", "Ign\u00e1cz", "Ign\u00e1tz", "Ign\u00e1c" };
101         for (final String name : names) {
102             assertNotEmpty(bmpm, name);
103         }
104     }
105 
106     /**
107      * Tests https://issues.apache.org/jira/browse/CODEC-125?focusedCommentId=13071566&page=com.atlassian.jira.plugin.system.issuetabpanels:
108      * comment-tabpanel#comment-13071566
109      *
110      * @throws EncoderException for some failure scenarios     */
111     @Test
112     void testEncodeGna() throws EncoderException {
113         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
114         bmpm.encode("gna");
115     }
116 
117     @Test
118     void testInvalidLangIllegalArgumentException() {
119         assertThrows(IllegalArgumentException.class, () -> Rule.getInstance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage"));
120     }
121 
122     @Test
123     void testInvalidLangIllegalStateException() {
124         assertThrows(IllegalArgumentException.class, () -> Lang.loadFromResource("thisIsAMadeUpResourceName", Languages.getInstance(NameType.GENERIC)));
125     }
126 
127     @Test
128     void testInvalidLanguageIllegalArgumentException() {
129         assertThrows(IllegalArgumentException.class, () -> Languages.getInstance("thereIsNoSuchLanguage"));
130     }
131 
132     @Test
133     void testLongestEnglishSurname() {
134         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
135         assertTimeout(Duration.ofMillis(10000L), () -> bmpm.encode("MacGhilleseatheanaich"));
136     }
137 
138     @Test
139     void testNegativeIndexForRuleMatchIndexOutOfBoundsException() {
140         final Rule r = new Rule("a", "", "", new Rule.Phoneme("", Languages.ANY_LANGUAGE));
141         assertThrows(IndexOutOfBoundsException.class, () -> r.patternAndContextMatches("bob", -1));
142     }
143 
144     @Test
145     void testOOM() throws EncoderException {
146         final String phrase = "200697900'-->&#1913348150;</  bceaeef >aadaabcf\"aedfbff<!--\'-->?>cae" +
147             "cfaaa><?&#<!--</script>&lang&fc;aadeaf?>>&bdquo<    cc =\"abff\"    /></   afe  >" +
148             "<script><!-- f(';<    cf aefbeef = \"bfabadcf\" ebbfeedd = fccabeb >";
149 
150         final BeiderMorseEncoder encoder = new BeiderMorseEncoder();
151         encoder.setNameType(NameType.GENERIC);
152         encoder.setRuleType(RuleType.EXACT);
153         encoder.setMaxPhonemes(10);
154 
155         final String phonemes = encoder.encode(phrase);
156         assertFalse(phonemes.isEmpty());
157 
158         final String[] phonemeArr = PIPE_PATTERN.split(phonemes);
159         assertTrue(phonemeArr.length <= 10);
160     }
161 
162     @Test
163     void testSetConcat() {
164         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
165         bmpm.setConcat(false);
166         assertFalse(bmpm.isConcat(), "Should be able to set concat to false");
167     }
168 
169     @Test
170     void testSetNameTypeAsh() {
171         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
172         bmpm.setNameType(NameType.ASHKENAZI);
173         assertEquals(NameType.ASHKENAZI, bmpm.getNameType(), "Name type should have been set to ash");
174     }
175 
176     @Test
177     void testSetRuleTypeExact() {
178         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
179         bmpm.setRuleType(RuleType.EXACT);
180         assertEquals(RuleType.EXACT, bmpm.getRuleType(), "Rule type should have been set to exact");
181     }
182 
183     @Test
184     void testSetRuleTypeToRulesIllegalArgumentException() {
185         final BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
186         assertThrows(IllegalArgumentException.class, () -> bmpm.setRuleType(RuleType.RULES));
187     }
188 
189     /**
190      * (Un)luckily, the worse performing test because of the data in the test characters.
191      *
192      * @throws EncoderException for some failure scenarios
193      */
194     @Test /* timeout = 20000L */
195     void testSpeedCheck() throws EncoderException {
196         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
197         final StringBuilder stringBuffer = new StringBuilder();
198         stringBuffer.append(TEST_CHARS[0]);
199         for (int i = 0, j = 1; i < 40; i++, j++) {
200             if (j == TEST_CHARS.length) {
201                 j = 0;
202             }
203             bmpm.encode(stringBuffer.toString());
204             stringBuffer.append(TEST_CHARS[j]);
205         }
206     }
207 
208     @Test
209     void testSpeedCheck2() throws EncoderException {
210         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
211         final String phrase = "ItstheendoftheworldasweknowitandIfeelfine";
212 
213         for (int i = 1; i <= phrase.length(); i++) {
214             bmpm.encode(phrase.subSequence(0, i));
215         }
216     }
217 
218     @Test
219     void testSpeedCheck3() throws EncoderException {
220         final BeiderMorseEncoder bmpm = createGenericApproxEncoder();
221         final String phrase = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
222 
223         for (int i = 1; i <= phrase.length(); i++) {
224             bmpm.encode(phrase.subSequence(0, i));
225         }
226     }
227 }