001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language.bm;
019
020import java.util.Collections;
021import java.util.EnumMap;
022import java.util.HashSet;
023import java.util.Map;
024import java.util.NoSuchElementException;
025import java.util.Scanner;
026import java.util.Set;
027import java.util.stream.Collectors;
028
029import org.apache.commons.codec.Resources;
030
031/**
032 * Language codes.
033 * <p>
034 * Language codes are typically loaded from resource files. These are UTF-8
035 * encoded text files. They are systematically named following the pattern:
036 * </p>
037 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()}
038 * languages.txt</blockquote>
039 * <p>
040 * The format of these resources is the following:
041 * </p>
042 * <ul>
043 * <li><b>Language:</b> a single string containing no whitespace</li>
044 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text
045 * following on that line to be discarded as a comment.</li>
046 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start
047 * multi-line commenting mode. This will skip all content until a line ending in
048 * '*' and '/' is found.</li>
049 * <li><b>Blank lines:</b> All blank lines will be skipped.</li>
050 * </ul>
051 * <p>
052 * Ported from language.php
053 * </p>
054 * <p>
055 * This class is immutable and thread-safe.
056 * </p>
057 *
058 * @since 1.6
059 */
060public class Languages {
061    // Implementation note: This class is divided into two sections. The first part
062    // is a static factory interface that
063    // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in
064    // NameType.* as a list of supported
065    // languages, and a second part that provides instance methods for accessing
066    // this set for supported languages.
067
068    /**
069     * A set of languages.
070     */
071    public static abstract class LanguageSet {
072
073        public static LanguageSet from(final Set<String> langs) {
074            return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);
075        }
076
077        public abstract boolean contains(String language);
078
079        public abstract String getAny();
080
081        public abstract boolean isEmpty();
082
083        public abstract boolean isSingleton();
084
085        abstract LanguageSet merge(LanguageSet other);
086
087        public abstract LanguageSet restrictTo(LanguageSet other);
088    }
089
090    /**
091     * Some languages, explicitly enumerated.
092     */
093    public static final class SomeLanguages extends LanguageSet {
094        private final Set<String> languages;
095
096        private SomeLanguages(final Set<String> languages) {
097            this.languages = Collections.unmodifiableSet(languages);
098        }
099
100        @Override
101        public boolean contains(final String language) {
102            return this.languages.contains(language);
103        }
104
105        @Override
106        public String getAny() {
107            return this.languages.iterator().next();
108        }
109
110        public Set<String> getLanguages() {
111            return this.languages;
112        }
113
114        @Override
115        public boolean isEmpty() {
116            return this.languages.isEmpty();
117        }
118
119        @Override
120        public boolean isSingleton() {
121            return this.languages.size() == 1;
122        }
123
124        @Override
125        public LanguageSet merge(final LanguageSet other) {
126            if (other == NO_LANGUAGES) {
127                return this;
128            }
129            if (other == ANY_LANGUAGE) {
130                return other;
131            }
132            final SomeLanguages someLanguages = (SomeLanguages) other;
133            final Set<String> set = new HashSet<>(languages);
134            set.addAll(someLanguages.languages);
135            return from(set);
136        }
137
138        @Override
139        public LanguageSet restrictTo(final LanguageSet other) {
140            if (other == NO_LANGUAGES) {
141                return other;
142            }
143            if (other == ANY_LANGUAGE) {
144                return this;
145            }
146            final SomeLanguages someLanguages = (SomeLanguages) other;
147            return from(languages.stream().filter(lang -> someLanguages.languages.contains(lang)).collect(Collectors.toSet()));
148        }
149
150        @Override
151        public String toString() {
152            return "Languages(" + languages.toString() + ")";
153        }
154
155    }
156
157    public static final String ANY = "any";
158
159    private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class);
160
161    /**
162     * No languages at all.
163     */
164    public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
165        @Override
166        public boolean contains(final String language) {
167            return false;
168        }
169
170        @Override
171        public String getAny() {
172            throw new NoSuchElementException("Can't fetch any language from the empty language set.");
173        }
174
175        @Override
176        public boolean isEmpty() {
177            return true;
178        }
179
180        @Override
181        public boolean isSingleton() {
182            return false;
183        }
184
185        @Override
186        public LanguageSet merge(final LanguageSet other) {
187            return other;
188        }
189
190        @Override
191        public LanguageSet restrictTo(final LanguageSet other) {
192            return this;
193        }
194
195        @Override
196        public String toString() {
197            return "NO_LANGUAGES";
198        }
199    };
200
201    /**
202     * Any/all languages.
203     */
204    public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
205        @Override
206        public boolean contains(final String language) {
207            return true;
208        }
209
210        @Override
211        public String getAny() {
212            throw new NoSuchElementException("Can't fetch any language from the any language set.");
213        }
214
215        @Override
216        public boolean isEmpty() {
217            return false;
218        }
219
220        @Override
221        public boolean isSingleton() {
222            return false;
223        }
224
225        @Override
226        public LanguageSet merge(final LanguageSet other) {
227            return other;
228        }
229
230        @Override
231        public LanguageSet restrictTo(final LanguageSet other) {
232            return other;
233        }
234
235        @Override
236        public String toString() {
237            return "ANY_LANGUAGE";
238        }
239    };
240
241    static {
242        for (final NameType s : NameType.values()) {
243            LANGUAGES.put(s, getInstance(langResourceName(s)));
244        }
245    }
246
247    public static Languages getInstance(final NameType nameType) {
248        return LANGUAGES.get(nameType);
249    }
250
251    public static Languages getInstance(final String languagesResourceName) {
252        // read languages list
253        final Set<String> ls = new HashSet<>();
254        try (final Scanner lsScanner = new Scanner(Resources.getInputStream(languagesResourceName),
255                ResourceConstants.ENCODING)) {
256            boolean inExtendedComment = false;
257            while (lsScanner.hasNextLine()) {
258                final String line = lsScanner.nextLine().trim();
259                if (inExtendedComment) {
260                    if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
261                        inExtendedComment = false;
262                    }
263                } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
264                    inExtendedComment = true;
265                } else if (!line.isEmpty()) {
266                    ls.add(line);
267                }
268            }
269            return new Languages(Collections.unmodifiableSet(ls));
270        }
271    }
272
273    private static String langResourceName(final NameType nameType) {
274        return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName());
275    }
276
277    private final Set<String> languages;
278
279    private Languages(final Set<String> languages) {
280        this.languages = languages;
281    }
282
283    public Set<String> getLanguages() {
284        return this.languages;
285    }
286}