001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.Collections;
022import java.util.Comparator;
023import java.util.LinkedHashSet;
024import java.util.List;
025import java.util.Locale;
026import java.util.Set;
027import java.util.concurrent.ConcurrentHashMap;
028import java.util.concurrent.ConcurrentMap;
029import java.util.function.Predicate;
030import java.util.stream.Collectors;
031
032/**
033 * Operations to assist when working with a {@link Locale}.
034 *
035 * <p>This class tries to handle {@code null} input gracefully.
036 * An exception will not be thrown for a {@code null} input.
037 * Each method documents its behavior in more detail.</p>
038 *
039 * @since 2.2
040 */
041public class LocaleUtils {
042
043    /**
044     * Avoids synchronization, inits on demand.
045     */
046    private static final class SyncAvoid {
047
048        /** Private unmodifiable and sorted list of available locales. */
049        private static final List<Locale> AVAILABLE_LOCALE_ULIST;
050
051        /** Private unmodifiable set of available locales. */
052        private static final Set<Locale> AVAILABLE_LOCALE_USET;
053
054        static {
055            AVAILABLE_LOCALE_ULIST = Collections
056                    .unmodifiableList(Arrays.asList(ArraySorter.sort(Locale.getAvailableLocales(), Comparator.comparing(Locale::toString))));
057            AVAILABLE_LOCALE_USET = Collections.unmodifiableSet(new LinkedHashSet<>(AVAILABLE_LOCALE_ULIST));
058        }
059    }
060
061    /**
062     * The underscore character {@code '}{@value}{@code '}.
063     */
064    private static final char UNDERSCORE = '_';
065
066    /**
067     * The undetermined language {@value}.
068     * <p>
069     * If a language is empty, or not <em>well-formed</am> (for example "a" or "e2"), {@link Locale#toLanguageTag()} will return {@code "und"} (Undetermined).
070     * </p>
071     *
072     * @see Locale#toLanguageTag()
073     */
074    private static final String UNDETERMINED = "und";
075
076    /**
077     * The dash character {@code '}{@value}{@code '}.
078     */
079    private static final char DASH = '-';
080
081    /**
082     * Concurrent map of language locales by country.
083     */
084    private static final ConcurrentMap<String, List<Locale>> cLanguagesByCountry = new ConcurrentHashMap<>();
085
086    /**
087     * Concurrent map of country locales by language.
088     */
089    private static final ConcurrentMap<String, List<Locale>> cCountriesByLanguage = new ConcurrentHashMap<>();
090
091    /**
092     * Obtains an unmodifiable and sorted list of installed locales.
093     *
094     * <p>This method is a wrapper around {@link Locale#getAvailableLocales()}.
095     * It is more efficient, as the JDK method must create a new array each
096     * time it is called.</p>
097     *
098     * @return the unmodifiable and sorted list of available locales
099     */
100    public static List<Locale> availableLocaleList() {
101        return SyncAvoid.AVAILABLE_LOCALE_ULIST;
102    }
103
104    private static List<Locale> availableLocaleList(final Predicate<Locale> predicate) {
105        return availableLocaleList().stream().filter(predicate).collect(Collectors.toList());
106    }
107
108    /**
109     * Obtains an unmodifiable set of installed locales.
110     *
111     * <p>This method is a wrapper around {@link Locale#getAvailableLocales()}.
112     * It is more efficient, as the JDK method must create a new array each
113     * time it is called.</p>
114     *
115     * @return the unmodifiable set of available locales
116     */
117    public static Set<Locale> availableLocaleSet() {
118        return SyncAvoid.AVAILABLE_LOCALE_USET;
119    }
120
121    /**
122     * Obtains the list of countries supported for a given language.
123     *
124     * <p>This method takes a language code and searches to find the
125     * countries available for that language. Variant locales are removed.</p>
126     *
127     * @param languageCode  the 2 letter language code, null returns empty
128     * @return an unmodifiable List of Locale objects, not null
129     */
130    public static List<Locale> countriesByLanguage(final String languageCode) {
131        if (languageCode == null) {
132            return Collections.emptyList();
133        }
134        return cCountriesByLanguage.computeIfAbsent(languageCode, lc -> Collections.unmodifiableList(
135            availableLocaleList(locale -> languageCode.equals(locale.getLanguage()) && !locale.getCountry().isEmpty() && locale.getVariant().isEmpty())));
136    }
137
138    /**
139     * Checks if the locale specified is in the set of available locales.
140     *
141     * @param locale the Locale object to check if it is available
142     * @return true if the locale is a known locale
143     */
144    public static boolean isAvailableLocale(final Locale locale) {
145        return availableLocaleSet().contains(locale);
146    }
147
148    /**
149     * Tests whether the given String is a ISO 3166 alpha-2 country code.
150     *
151     * @param str the String to check
152     * @return true, is the given String is a ISO 3166 compliant country code.
153     */
154    private static boolean isISO3166CountryCode(final String str) {
155        return StringUtils.isAllUpperCase(str) && str.length() == 2;
156    }
157
158    /**
159     * Tests whether the given String is a ISO 639 compliant language code.
160     *
161     * @param str the String to check.
162     * @return true, if the given String is a ISO 639 compliant language code.
163     */
164    private static boolean isISO639LanguageCode(final String str) {
165        return StringUtils.isAllLowerCase(str) && (str.length() == 2 || str.length() == 3);
166    }
167
168    /**
169     * Tests whether a Locale's language is undetermined.
170     * <p>
171     * A Locale's language tag is undetermined if it's value is {@code "und"}. If a language is empty, or not well-formed (for example, "a" or "e2"), it will be
172     * equal to {@code "und"}.
173     * </p>
174     *
175     * @param locale the locale to test.
176     * @return whether a Locale's language is undetermined.
177     * @see Locale#toLanguageTag()
178     * @since 3.14.0
179     */
180    public static boolean isLanguageUndetermined(final Locale locale) {
181        return locale == null || UNDETERMINED.equals(locale.toLanguageTag());
182    }
183
184    /**
185     * TestsNo whether the given String is a UN M.49 numeric area code.
186     *
187     * @param str the String to check
188     * @return true, is the given String is a UN M.49 numeric area code.
189     */
190    private static boolean isNumericAreaCode(final String str) {
191        return StringUtils.isNumeric(str) && str.length() == 3;
192    }
193
194    /**
195     * Obtains the list of languages supported for a given country.
196     *
197     * <p>This method takes a country code and searches to find the
198     * languages available for that country. Variant locales are removed.</p>
199     *
200     * @param countryCode  the 2-letter country code, null returns empty
201     * @return an unmodifiable List of Locale objects, not null
202     */
203    public static List<Locale> languagesByCountry(final String countryCode) {
204        if (countryCode == null) {
205            return Collections.emptyList();
206        }
207        return cLanguagesByCountry.computeIfAbsent(countryCode,
208            k -> Collections.unmodifiableList(availableLocaleList(locale -> countryCode.equals(locale.getCountry()) && locale.getVariant().isEmpty())));
209    }
210
211    /**
212     * Obtains the list of locales to search through when performing
213     * a locale search.
214     *
215     * <pre>
216     * localeLookupList(Locale("fr", "CA", "xxx"))
217     *   = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr")]
218     * </pre>
219     *
220     * @param locale  the locale to start from
221     * @return the unmodifiable list of Locale objects, 0 being locale, not null
222     */
223    public static List<Locale> localeLookupList(final Locale locale) {
224        return localeLookupList(locale, locale);
225    }
226
227    /**
228     * Obtains the list of locales to search through when performing
229     * a locale search.
230     *
231     * <pre>
232     * localeLookupList(Locale("fr", "CA", "xxx"), Locale("en"))
233     *   = [Locale("fr", "CA", "xxx"), Locale("fr", "CA"), Locale("fr"), Locale("en"]
234     * </pre>
235     *
236     * <p>The result list begins with the most specific locale, then the
237     * next more general and so on, finishing with the default locale.
238     * The list will never contain the same locale twice.</p>
239     *
240     * @param locale  the locale to start from, null returns empty list
241     * @param defaultLocale  the default locale to use if no other is found
242     * @return the unmodifiable list of Locale objects, 0 being locale, not null
243     */
244    public static List<Locale> localeLookupList(final Locale locale, final Locale defaultLocale) {
245        final List<Locale> list = new ArrayList<>(4);
246        if (locale != null) {
247            list.add(locale);
248            if (!locale.getVariant().isEmpty()) {
249                list.add(new Locale(locale.getLanguage(), locale.getCountry()));
250            }
251            if (!locale.getCountry().isEmpty()) {
252                list.add(new Locale(locale.getLanguage(), StringUtils.EMPTY));
253            }
254            if (!list.contains(defaultLocale)) {
255                list.add(defaultLocale);
256            }
257        }
258        return Collections.unmodifiableList(list);
259    }
260
261    /**
262     * Tries to parse a Locale from the given String.
263     * <p>
264     * See {@link Locale} for the format.
265     * </p>
266     *
267     * @param str the String to parse as a Locale.
268     * @return a Locale parsed from the given String.
269     * @throws IllegalArgumentException if the given String cannot be parsed.
270     * @see Locale
271     */
272    private static Locale parseLocale(final String str) {
273        if (isISO639LanguageCode(str)) {
274            return new Locale(str);
275        }
276        final int limit = 3;
277        final char separator = str.indexOf(UNDERSCORE) != -1 ? UNDERSCORE : DASH;
278        final String[] segments = str.split(String.valueOf(separator), 3);
279        final String language = segments[0];
280        if (segments.length == 2) {
281            final String country = segments[1];
282            if (isISO639LanguageCode(language) && isISO3166CountryCode(country) || isNumericAreaCode(country)) {
283                return new Locale(language, country);
284            }
285        } else if (segments.length == limit) {
286            final String country = segments[1];
287            final String variant = segments[2];
288            if (isISO639LanguageCode(language) &&
289                    (country.isEmpty() || isISO3166CountryCode(country) || isNumericAreaCode(country)) &&
290                    !variant.isEmpty()) {
291                return new Locale(language, country, variant);
292            }
293        }
294        throw new IllegalArgumentException("Invalid locale format: " + str);
295    }
296
297    /**
298     * Returns the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
299     *
300     * @param locale a locale or {@code null}.
301     * @return the given locale if non-{@code null}, otherwise {@link Locale#getDefault()}.
302     * @since 3.12.0
303     */
304    public static Locale toLocale(final Locale locale) {
305        return locale != null ? locale : Locale.getDefault();
306    }
307
308    /**
309     * Converts a String to a Locale.
310     *
311     * <p>This method takes the string format of a locale and creates the
312     * locale object from it.</p>
313     *
314     * <pre>
315     *   LocaleUtils.toLocale("")           = new Locale("", "")
316     *   LocaleUtils.toLocale("en")         = new Locale("en", "")
317     *   LocaleUtils.toLocale("en_GB")      = new Locale("en", "GB")
318     *   LocaleUtils.toLocale("en-GB")      = new Locale("en", "GB")
319     *   LocaleUtils.toLocale("en_001")     = new Locale("en", "001")
320     *   LocaleUtils.toLocale("en_GB_xxx")  = new Locale("en", "GB", "xxx")   (#)
321     * </pre>
322     *
323     * <p>(#) The behavior of the JDK variant constructor changed between JDK1.3 and JDK1.4.
324     * In JDK1.3, the constructor upper cases the variant, in JDK1.4, it doesn't.
325     * Thus, the result from getVariant() may vary depending on your JDK.</p>
326     *
327     * <p>This method validates the input strictly.
328     * The language code must be lowercase.
329     * The country code must be uppercase.
330     * The separator must be an underscore or a dash.
331     * The length must be correct.
332     * </p>
333     *
334     * @param str  the locale String to convert, null returns null
335     * @return a Locale, null if null input
336     * @throws IllegalArgumentException if the string is an invalid format
337     * @see Locale#forLanguageTag(String)
338     */
339    public static Locale toLocale(final String str) {
340        if (str == null) {
341            // TODO Should this return the default locale?
342            return null;
343        }
344        if (str.isEmpty()) { // LANG-941 - JDK 8 introduced an empty locale where all fields are blank
345            return new Locale(StringUtils.EMPTY, StringUtils.EMPTY);
346        }
347        if (str.contains("#")) { // LANG-879 - Cannot handle Java 7 script & extensions
348            throw new IllegalArgumentException("Invalid locale format: " + str);
349        }
350        final int len = str.length();
351        if (len < 2) {
352            throw new IllegalArgumentException("Invalid locale format: " + str);
353        }
354        final char ch0 = str.charAt(0);
355        if (ch0 == UNDERSCORE || ch0 == DASH) {
356            if (len < 3) {
357                throw new IllegalArgumentException("Invalid locale format: " + str);
358            }
359            final char ch1 = str.charAt(1);
360            final char ch2 = str.charAt(2);
361            if (!Character.isUpperCase(ch1) || !Character.isUpperCase(ch2)) {
362                throw new IllegalArgumentException("Invalid locale format: " + str);
363            }
364            if (len == 3) {
365                return new Locale(StringUtils.EMPTY, str.substring(1, 3));
366            }
367            if (len < 5) {
368                throw new IllegalArgumentException("Invalid locale format: " + str);
369            }
370            if (str.charAt(3) != ch0) {
371                throw new IllegalArgumentException("Invalid locale format: " + str);
372            }
373            return new Locale(StringUtils.EMPTY, str.substring(1, 3), str.substring(4));
374        }
375
376        return parseLocale(str);
377    }
378
379    /**
380     * {@link LocaleUtils} instances should NOT be constructed in standard programming.
381     * Instead, the class should be used as {@code LocaleUtils.toLocale("en_GB");}.
382     *
383     * <p>This constructor is public to permit tools that require a JavaBean instance
384     * to operate.</p>
385     *
386     * @deprecated TODO Make private in 4.0.
387     */
388    @Deprecated
389    public LocaleUtils() {
390        // empty
391    }
392
393}