001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language.bm;
019
020 import static org.junit.Assert.assertEquals;
021 import static org.junit.Assert.assertTrue;
022
023 import java.util.Arrays;
024 import java.util.List;
025
026 import org.junit.Test;
027 import org.junit.runner.RunWith;
028 import org.junit.runners.Parameterized;
029
030 /**
031 * Tests PhoneticEngine.
032 *
033 * @since 1.6
034 */
035 @RunWith(Parameterized.class)
036 public class PhoneticEngineTest {
037
038 private static final Integer TEN = Integer.valueOf(10);
039
040 @Parameterized.Parameters
041 public static List<Object[]> data() {
042 return Arrays
043 .asList(new Object[] { "Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN },
044 new Object[] { "Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, TEN },
045 new Object[] { "Renault", "rYnDlt", NameType.ASHKENAZI, RuleType.APPROX, Boolean.TRUE, Integer.valueOf(1) },
046 new Object[] { "Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, Boolean.TRUE, TEN },
047 new Object[] { "SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN },
048 new Object[] { "d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN },
049 new Object[] {
050 "van helsing",
051 "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)",
052 NameType.GENERIC,
053 RuleType.EXACT,
054 Boolean.FALSE, TEN });
055 }
056
057 private final boolean concat;
058 private final String name;
059 private final NameType nameType;
060 private final String phoneticExpected;
061 private final RuleType ruleType;
062 private final int maxPhonemes;
063
064 public PhoneticEngineTest(final String name, final String phoneticExpected, final NameType nameType,
065 final RuleType ruleType, final boolean concat, final int maxPhonemes) {
066 this.name = name;
067 this.phoneticExpected = phoneticExpected;
068 this.nameType = nameType;
069 this.ruleType = ruleType;
070 this.concat = concat;
071 this.maxPhonemes = maxPhonemes;
072 }
073
074 @Test(timeout = 10000L)
075 public void testEncode() {
076 final PhoneticEngine engine = new PhoneticEngine(this.nameType, this.ruleType, this.concat, this.maxPhonemes);
077
078 final String phoneticActual = engine.encode(this.name);
079
080 //System.err.println("expecting: " + this.phoneticExpected);
081 //System.err.println("actual: " + phoneticActual);
082 assertEquals("phoneme incorrect", this.phoneticExpected, phoneticActual);
083
084 if (this.concat) {
085 final String[] split = phoneticActual.split("\\|");
086 assertTrue(split.length <= this.maxPhonemes);
087 } else {
088 final String[] words = phoneticActual.split("-");
089 for (final String word : words) {
090 final String[] split = word.split("\\|");
091 assertTrue(split.length <= this.maxPhonemes);
092 }
093 }
094 }
095 }