001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language.bm;
019
020import java.util.Collections;
021import java.util.EnumMap;
022import java.util.HashSet;
023import java.util.Map;
024import java.util.NoSuchElementException;
025import java.util.Scanner;
026import java.util.Set;
027import java.util.stream.Collectors;
028
029import org.apache.commons.codec.Resources;
030
031/**
032 * Language codes.
033 * <p>
034 * Language codes are typically loaded from resource files. These are UTF-8
035 * encoded text files. They are systematically named following the pattern:
036 * </p>
037 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()}
038 * languages.txt</blockquote>
039 * <p>
040 * The format of these resources is the following:
041 * </p>
042 * <ul>
043 * <li><strong>Language:</strong> a single string containing no whitespace</li>
044 * <li><strong>End-of-line comments:</strong> Any occurrence of '//' will cause all text
045 * following on that line to be discarded as a comment.</li>
046 * <li><strong>Multi-line comments:</strong> Any line starting with '/*' will start
047 * multi-line commenting mode. This will skip all content until a line ending in
048 * '*' and '/' is found.</li>
049 * <li><strong>Blank lines:</strong> All blank lines will be skipped.</li>
050 * </ul>
051 * <p>
052 * Ported from language.php
053 * </p>
054 * <p>
055 * This class is immutable and thread-safe.
056 * </p>
057 *
058 * @since 1.6
059 */
060public class Languages {
061    // Implementation note: This class is divided into two sections. The first part
062    // is a static factory interface that
063    // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in
064    // NameType.* as a list of supported
065    // languages, and a second part that provides instance methods for accessing
066    // this set for supported languages.
067
068    /**
069     * A set of languages.
070     */
071    public abstract static class LanguageSet {
072
073        /**
074         * Gets a language set for the given languages.
075         *
076         * @param languages a language set.
077         * @return a LanguageSet.
078         */
079        public static LanguageSet from(final Set<String> languages) {
080            return languages.isEmpty() ? NO_LANGUAGES : new SomeLanguages(languages);
081        }
082
083        /**
084         * Constructs a new instance for subclasses.
085         */
086        public LanguageSet() {
087            // empty
088        }
089
090        /**
091         * Tests whether this instance contains the given value.
092         *
093         * @param language the value to test.
094         * @return whether this instance contains the given value.
095         */
096        public abstract boolean contains(String language);
097
098        /**
099         * Gets any of this instance's element.
100         *
101         * @return any of this instance's element.
102         */
103        public abstract String getAny();
104
105        /**
106         * Tests whether this instance is empty.
107         *
108         * @return whether this instance is empty.
109         */
110        public abstract boolean isEmpty();
111
112        /**
113         * Tests whether this instance contains a single element.
114         *
115         * @return whether this instance contains a single element.
116         */
117        public abstract boolean isSingleton();
118
119        abstract LanguageSet merge(LanguageSet other);
120
121        /**
122         * Returns an instance restricted to this instances and the given values'.
123         *
124         * @param other The other instance.
125         * @return an instance restricted to this instances and the given values'.
126         */
127        public abstract LanguageSet restrictTo(LanguageSet other);
128    }
129
130    /**
131     * Some languages, explicitly enumerated.
132     */
133    public static final class SomeLanguages extends LanguageSet {
134        private final Set<String> languages;
135
136        private SomeLanguages(final Set<String> languages) {
137            this.languages = Collections.unmodifiableSet(languages);
138        }
139
140        @Override
141        public boolean contains(final String language) {
142            return this.languages.contains(language);
143        }
144
145        @Override
146        public String getAny() {
147            return this.languages.iterator().next();
148        }
149
150        /**
151         * Gets the language strings
152         *
153         * @return the languages strings.
154         */
155        public Set<String> getLanguages() {
156            return this.languages;
157        }
158
159        @Override
160        public boolean isEmpty() {
161            return this.languages.isEmpty();
162        }
163
164        @Override
165        public boolean isSingleton() {
166            return this.languages.size() == 1;
167        }
168
169        @Override
170        public LanguageSet merge(final LanguageSet other) {
171            if (other == NO_LANGUAGES) {
172                return this;
173            }
174            if (other == ANY_LANGUAGE) {
175                return other;
176            }
177            final SomeLanguages someLanguages = (SomeLanguages) other;
178            final Set<String> set = new HashSet<>(languages);
179            set.addAll(someLanguages.languages);
180            return from(set);
181        }
182
183        @Override
184        public LanguageSet restrictTo(final LanguageSet other) {
185            if (other == NO_LANGUAGES) {
186                return other;
187            }
188            if (other == ANY_LANGUAGE) {
189                return this;
190            }
191            final SomeLanguages someLanguages = (SomeLanguages) other;
192            return from(languages.stream().filter(lang -> someLanguages.languages.contains(lang)).collect(Collectors.toSet()));
193        }
194
195        @Override
196        public String toString() {
197            return "Languages(" + languages.toString() + ")";
198        }
199
200    }
201
202    /**
203     * Marker for any language.
204     */
205    public static final String ANY = "any";
206
207    private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class);
208
209    /**
210     * No languages at all.
211     */
212    public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
213
214        @Override
215        public boolean contains(final String language) {
216            return false;
217        }
218
219        @Override
220        public String getAny() {
221            throw new NoSuchElementException("Can't fetch any language from the empty language set.");
222        }
223
224        @Override
225        public boolean isEmpty() {
226            return true;
227        }
228
229        @Override
230        public boolean isSingleton() {
231            return false;
232        }
233
234        @Override
235        public LanguageSet merge(final LanguageSet other) {
236            return other;
237        }
238
239        @Override
240        public LanguageSet restrictTo(final LanguageSet other) {
241            return this;
242        }
243
244        @Override
245        public String toString() {
246            return "NO_LANGUAGES";
247        }
248    };
249
250    /**
251     * Any/all languages.
252     */
253    public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
254
255        @Override
256        public boolean contains(final String language) {
257            return true;
258        }
259
260        @Override
261        public String getAny() {
262            throw new NoSuchElementException("Can't fetch any language from the any language set.");
263        }
264
265        @Override
266        public boolean isEmpty() {
267            return false;
268        }
269
270        @Override
271        public boolean isSingleton() {
272            return false;
273        }
274
275        @Override
276        public LanguageSet merge(final LanguageSet other) {
277            return other;
278        }
279
280        @Override
281        public LanguageSet restrictTo(final LanguageSet other) {
282            return other;
283        }
284
285        @Override
286        public String toString() {
287            return "ANY_LANGUAGE";
288        }
289    };
290
291    static {
292        for (final NameType s : NameType.values()) {
293            LANGUAGES.put(s, getInstance(langResourceName(s)));
294        }
295    }
296
297    /**
298     * Gets an instance for the given name type.
299     *
300     * @param nameType The name type to lookup.
301     * @return an instance for the given name type.
302     */
303    public static Languages getInstance(final NameType nameType) {
304        return LANGUAGES.get(nameType);
305    }
306
307    /**
308     * Gets a new instance for the given resource name.
309     *
310     * @param languagesResourceName the resource name to lookup.
311     * @return a new instance.
312     */
313    public static Languages getInstance(final String languagesResourceName) {
314        // read languages list
315        final Set<String> ls = new HashSet<>();
316        try (Scanner lsScanner = new Scanner(Resources.getInputStream(languagesResourceName),
317                ResourceConstants.ENCODING)) {
318            boolean inExtendedComment = false;
319            while (lsScanner.hasNextLine()) {
320                final String line = lsScanner.nextLine().trim();
321                if (inExtendedComment) {
322                    if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
323                        inExtendedComment = false;
324                    }
325                } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
326                    inExtendedComment = true;
327                } else if (!line.isEmpty()) {
328                    ls.add(line);
329                }
330            }
331            return new Languages(Collections.unmodifiableSet(ls));
332        }
333    }
334
335    private static String langResourceName(final NameType nameType) {
336        return String.format("/org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName());
337    }
338
339    private final Set<String> languages;
340
341    private Languages(final Set<String> languages) {
342        this.languages = languages;
343    }
344
345    /**
346     * Gets the language set.
347     *
348     * @return the language set.
349     */
350    public Set<String> getLanguages() {
351        return this.languages;
352    }
353}