001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.language.bm; 019 020import java.util.Collections; 021import java.util.EnumMap; 022import java.util.HashSet; 023import java.util.Map; 024import java.util.NoSuchElementException; 025import java.util.Scanner; 026import java.util.Set; 027import java.util.stream.Collectors; 028 029import org.apache.commons.codec.Resources; 030 031/** 032 * Language codes. 033 * <p> 034 * Language codes are typically loaded from resource files. These are UTF-8 035 * encoded text files. They are systematically named following the pattern: 036 * </p> 037 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} 038 * languages.txt</blockquote> 039 * <p> 040 * The format of these resources is the following: 041 * </p> 042 * <ul> 043 * <li><strong>Language:</strong> a single string containing no whitespace</li> 044 * <li><strong>End-of-line comments:</strong> Any occurrence of '//' will cause all text 045 * following on that line to be discarded as a comment.</li> 046 * <li><strong>Multi-line comments:</strong> Any line starting with '/*' will start 047 * multi-line commenting mode. This will skip all content until a line ending in 048 * '*' and '/' is found.</li> 049 * <li><strong>Blank lines:</strong> All blank lines will be skipped.</li> 050 * </ul> 051 * <p> 052 * Ported from language.php 053 * </p> 054 * <p> 055 * This class is immutable and thread-safe. 056 * </p> 057 * 058 * @since 1.6 059 */ 060public class Languages { 061 // Implementation note: This class is divided into two sections. The first part 062 // is a static factory interface that 063 // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in 064 // NameType.* as a list of supported 065 // languages, and a second part that provides instance methods for accessing 066 // this set for supported languages. 067 068 /** 069 * A set of languages. 070 */ 071 public abstract static class LanguageSet { 072 073 /** 074 * Gets a language set for the given languages. 075 * 076 * @param languages a language set. 077 * @return a LanguageSet. 078 */ 079 public static LanguageSet from(final Set<String> languages) { 080 return languages.isEmpty() ? NO_LANGUAGES : new SomeLanguages(languages); 081 } 082 083 /** 084 * Constructs a new instance for subclasses. 085 */ 086 public LanguageSet() { 087 // empty 088 } 089 090 /** 091 * Tests whether this instance contains the given value. 092 * 093 * @param language the value to test. 094 * @return whether this instance contains the given value. 095 */ 096 public abstract boolean contains(String language); 097 098 /** 099 * Gets any of this instance's element. 100 * 101 * @return any of this instance's element. 102 */ 103 public abstract String getAny(); 104 105 /** 106 * Tests whether this instance is empty. 107 * 108 * @return whether this instance is empty. 109 */ 110 public abstract boolean isEmpty(); 111 112 /** 113 * Tests whether this instance contains a single element. 114 * 115 * @return whether this instance contains a single element. 116 */ 117 public abstract boolean isSingleton(); 118 119 abstract LanguageSet merge(LanguageSet other); 120 121 /** 122 * Returns an instance restricted to this instances and the given values'. 123 * 124 * @param other The other instance. 125 * @return an instance restricted to this instances and the given values'. 126 */ 127 public abstract LanguageSet restrictTo(LanguageSet other); 128 } 129 130 /** 131 * Some languages, explicitly enumerated. 132 */ 133 public static final class SomeLanguages extends LanguageSet { 134 private final Set<String> languages; 135 136 private SomeLanguages(final Set<String> languages) { 137 this.languages = Collections.unmodifiableSet(languages); 138 } 139 140 @Override 141 public boolean contains(final String language) { 142 return this.languages.contains(language); 143 } 144 145 @Override 146 public String getAny() { 147 return this.languages.iterator().next(); 148 } 149 150 /** 151 * Gets the language strings 152 * 153 * @return the languages strings. 154 */ 155 public Set<String> getLanguages() { 156 return this.languages; 157 } 158 159 @Override 160 public boolean isEmpty() { 161 return this.languages.isEmpty(); 162 } 163 164 @Override 165 public boolean isSingleton() { 166 return this.languages.size() == 1; 167 } 168 169 @Override 170 public LanguageSet merge(final LanguageSet other) { 171 if (other == NO_LANGUAGES) { 172 return this; 173 } 174 if (other == ANY_LANGUAGE) { 175 return other; 176 } 177 final SomeLanguages someLanguages = (SomeLanguages) other; 178 final Set<String> set = new HashSet<>(languages); 179 set.addAll(someLanguages.languages); 180 return from(set); 181 } 182 183 @Override 184 public LanguageSet restrictTo(final LanguageSet other) { 185 if (other == NO_LANGUAGES) { 186 return other; 187 } 188 if (other == ANY_LANGUAGE) { 189 return this; 190 } 191 final SomeLanguages someLanguages = (SomeLanguages) other; 192 return from(languages.stream().filter(lang -> someLanguages.languages.contains(lang)).collect(Collectors.toSet())); 193 } 194 195 @Override 196 public String toString() { 197 return "Languages(" + languages.toString() + ")"; 198 } 199 200 } 201 202 /** 203 * Marker for any language. 204 */ 205 public static final String ANY = "any"; 206 207 private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class); 208 209 /** 210 * No languages at all. 211 */ 212 public static final LanguageSet NO_LANGUAGES = new LanguageSet() { 213 214 @Override 215 public boolean contains(final String language) { 216 return false; 217 } 218 219 @Override 220 public String getAny() { 221 throw new NoSuchElementException("Can't fetch any language from the empty language set."); 222 } 223 224 @Override 225 public boolean isEmpty() { 226 return true; 227 } 228 229 @Override 230 public boolean isSingleton() { 231 return false; 232 } 233 234 @Override 235 public LanguageSet merge(final LanguageSet other) { 236 return other; 237 } 238 239 @Override 240 public LanguageSet restrictTo(final LanguageSet other) { 241 return this; 242 } 243 244 @Override 245 public String toString() { 246 return "NO_LANGUAGES"; 247 } 248 }; 249 250 /** 251 * Any/all languages. 252 */ 253 public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { 254 255 @Override 256 public boolean contains(final String language) { 257 return true; 258 } 259 260 @Override 261 public String getAny() { 262 throw new NoSuchElementException("Can't fetch any language from the any language set."); 263 } 264 265 @Override 266 public boolean isEmpty() { 267 return false; 268 } 269 270 @Override 271 public boolean isSingleton() { 272 return false; 273 } 274 275 @Override 276 public LanguageSet merge(final LanguageSet other) { 277 return other; 278 } 279 280 @Override 281 public LanguageSet restrictTo(final LanguageSet other) { 282 return other; 283 } 284 285 @Override 286 public String toString() { 287 return "ANY_LANGUAGE"; 288 } 289 }; 290 291 static { 292 for (final NameType s : NameType.values()) { 293 LANGUAGES.put(s, getInstance(langResourceName(s))); 294 } 295 } 296 297 /** 298 * Gets an instance for the given name type. 299 * 300 * @param nameType The name type to lookup. 301 * @return an instance for the given name type. 302 */ 303 public static Languages getInstance(final NameType nameType) { 304 return LANGUAGES.get(nameType); 305 } 306 307 /** 308 * Gets a new instance for the given resource name. 309 * 310 * @param languagesResourceName the resource name to lookup. 311 * @return a new instance. 312 */ 313 public static Languages getInstance(final String languagesResourceName) { 314 // read languages list 315 final Set<String> ls = new HashSet<>(); 316 try (Scanner lsScanner = new Scanner(Resources.getInputStream(languagesResourceName), 317 ResourceConstants.ENCODING)) { 318 boolean inExtendedComment = false; 319 while (lsScanner.hasNextLine()) { 320 final String line = lsScanner.nextLine().trim(); 321 if (inExtendedComment) { 322 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { 323 inExtendedComment = false; 324 } 325 } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) { 326 inExtendedComment = true; 327 } else if (!line.isEmpty()) { 328 ls.add(line); 329 } 330 } 331 return new Languages(Collections.unmodifiableSet(ls)); 332 } 333 } 334 335 private static String langResourceName(final NameType nameType) { 336 return String.format("/org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); 337 } 338 339 private final Set<String> languages; 340 341 private Languages(final Set<String> languages) { 342 this.languages = languages; 343 } 344 345 /** 346 * Gets the language set. 347 * 348 * @return the language set. 349 */ 350 public Set<String> getLanguages() { 351 return this.languages; 352 } 353}