001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language.bm;
019
020 import static org.junit.Assert.assertTrue;
021
022 import java.util.Arrays;
023 import java.util.List;
024
025 import org.junit.Test;
026 import org.junit.runner.RunWith;
027 import org.junit.runners.Parameterized;
028
029 /**
030 * Tests guessLanguages API.
031 *
032 * @since 1.6
033 */
034 @RunWith(Parameterized.class)
035 public class LanguageGuessingTest {
036
037 private static String EXACT = "exact";
038 private static String ONE_OF = "one of";
039
040 @Parameterized.Parameters
041 public static List<Object[]> data() {
042 return Arrays.asList(new Object[][] {
043 { "Renault", "french", EXACT },
044 { "Mickiewicz", "polish", EXACT },
045 { "Thompson", "english", ONE_OF }, // this also hits german and greeklatin
046 { "Nu\u00f1ez", "spanish", EXACT }, // Nuñez
047 { "Carvalho", "portuguese", EXACT },
048 { "\u010capek", "czech", EXACT }, // Čapek
049 { "Sjneijder", "dutch", EXACT },
050 { "Klausewitz", "german", EXACT },
051 { "K\u00fc\u00e7\u00fck", "turkish", EXACT }, // Küçük
052 { "Giacometti", "italian", EXACT },
053 { "Nagy", "hungarian", EXACT },
054 { "Ceau\u015fescu", "romanian", EXACT }, // Ceauşescu
055 { "Angelopoulos", "greeklatin", EXACT },
056 { "\u0391\u03b3\u03b3\u03b5\u03bb\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2", "greek", EXACT }, // Αγγελόπουλος
057 { "\u041f\u0443\u0448\u043a\u0438\u043d", "cyrillic", EXACT }, // Пушкин
058 { "\u05db\u05d4\u05df", "hebrew", EXACT }, // כהן
059 { "\u00e1cz", "any", EXACT }, // ácz
060 { "\u00e1tz", "any", EXACT } }); // átz
061 }
062
063 private final String exactness;
064
065 private final Lang lang = Lang.instance(NameType.GENERIC);
066 private final String language;
067 private final String name;
068
069 public LanguageGuessingTest(final String name, final String language, final String exactness) {
070 this.name = name;
071 this.language = language;
072 this.exactness = exactness;
073 }
074
075 @Test
076 public void testLanguageGuessing() {
077 final Languages.LanguageSet guesses = this.lang.guessLanguages(this.name);
078
079 assertTrue("language predicted for name '" + this.name + "' is wrong: " + guesses + " should contain '" + this.language + "'",
080 guesses.contains(this.language));
081
082 }
083 }