1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language.bm;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertTrue;
22
23 import java.util.regex.Pattern;
24 import java.util.stream.Stream;
25
26 import org.junit.jupiter.params.ParameterizedTest;
27 import org.junit.jupiter.params.provider.Arguments;
28 import org.junit.jupiter.params.provider.MethodSource;
29
30
31
32
33 class PhoneticEngineTest {
34
35 private static final Integer TEN = Integer.valueOf(10);
36
37 private static final Pattern PIPE_PATTERN = Pattern.compile("\\|");
38 private static final Pattern MINUS_PATTERN = Pattern.compile("-");
39
40 public static Stream<Arguments> data() {
41
42 return Stream.of(
43 Arguments.of("Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN),
44 Arguments.of("Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinolt|rinult", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, TEN),
45 Arguments.of("Renault", "rinDlt", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, Integer.valueOf(1)),
46 Arguments.of("Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, Boolean.TRUE, TEN),
47 Arguments.of("SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
48 Arguments.of("d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
49 Arguments.of("van helsing", "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
50 Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN),
51 Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, Integer.MAX_VALUE)
52 );
53
54 }
55
56 public static Stream<Arguments> invalidData() {
57
58 return Stream.of(
59 Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
60 Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
61 Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
62 Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
63 Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, Integer.MAX_VALUE)
64 );
65
66 }
67
68
69 @ParameterizedTest
70 @MethodSource("data")
71 void testEncode(final String name, final String phoneticExpected, final NameType nameType,
72 final RuleType ruleType, final boolean concat, final int maxPhonemes) {
73 final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
74
75 final String phoneticActual = engine.encode(name);
76
77 assertEquals(phoneticExpected, phoneticActual, "phoneme incorrect");
78
79 if (concat) {
80 final String[] split = PIPE_PATTERN.split(phoneticActual);
81 assertTrue(split.length <= maxPhonemes);
82 } else {
83 final String[] words = MINUS_PATTERN.split(phoneticActual);
84 for (final String word : words) {
85 final String[] split = PIPE_PATTERN.split(word);
86 assertTrue(split.length <= maxPhonemes);
87 }
88 }
89 }
90
91 @ParameterizedTest
92 @MethodSource("invalidData")
93 void testInvalidEncode(final String input, final String phoneticExpected, final NameType nameType,
94 final RuleType ruleType, final boolean concat, final int maxPhonemes) {
95 final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
96
97 assertEquals(engine.encode(input), phoneticExpected);
98 }
99 }