1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language.bm;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertTrue;
22
23 import java.util.stream.Stream;
24
25 import org.junit.jupiter.params.ParameterizedTest;
26 import org.junit.jupiter.params.provider.Arguments;
27 import org.junit.jupiter.params.provider.MethodSource;
28
29
30
31
32 public class PhoneticEngineTest {
33
34 private static final Integer TEN = Integer.valueOf(10);
35
36 public static Stream<Arguments> data() {
37 return Stream.of(
38 Arguments.of("Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN),
39 Arguments.of("Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinolt|rinult", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, TEN),
40 Arguments.of("Renault", "rinDlt", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, Integer.valueOf(1)),
41 Arguments.of("Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, Boolean.TRUE, TEN),
42 Arguments.of("SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
43 Arguments.of("d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
44 Arguments.of("van helsing", "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
45 Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN)
46 );
47 }
48
49 public static Stream<Arguments> invalidData() {
50 return Stream.of(
51 Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
52 Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
53 Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
54 Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN)
55 );
56 }
57
58
59 @ParameterizedTest
60 @MethodSource("data")
61 public void testEncode(final String name, final String phoneticExpected, final NameType nameType,
62 final RuleType ruleType, final boolean concat, final int maxPhonemes) {
63 final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
64
65 final String phoneticActual = engine.encode(name);
66
67 assertEquals(phoneticExpected, phoneticActual, "phoneme incorrect");
68
69 if (concat) {
70 final String[] split = phoneticActual.split("\\|");
71 assertTrue(split.length <= maxPhonemes);
72 } else {
73 final String[] words = phoneticActual.split("-");
74 for (final String word : words) {
75 final String[] split = word.split("\\|");
76 assertTrue(split.length <= maxPhonemes);
77 }
78 }
79 }
80
81 @ParameterizedTest
82 @MethodSource("invalidData")
83 public void testInvalidEncode(final String input, final String phoneticExpected, final NameType nameType,
84 final RuleType ruleType, final boolean concat, final int maxPhonemes) {
85 final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
86
87 assertEquals(engine.encode(input), phoneticExpected);
88 }
89 }