001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.nio.charset.Charset; 020import java.nio.charset.StandardCharsets; 021import java.util.Collections; 022import java.util.SortedMap; 023import java.util.TreeMap; 024 025/** 026 * Charsets required of every implementation of the Java platform. 027 * 028 * From the Java documentation <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html"> 029 * Standard charsets</a>: 030 * <p> 031 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult 032 * the release documentation for your implementation to see if any other encodings are supported. Consult the release 033 * documentation for your implementation to see if any other encodings are supported. </cite> 034 * </p> 035 * 036 * <ul> 037 * <li><code>US-ASCII</code><br> 038 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> 039 * <li><code>ISO-8859-1</code><br> 040 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> 041 * <li><code>UTF-8</code><br> 042 * Eight-bit Unicode Transformation Format.</li> 043 * <li><code>UTF-16BE</code><br> 044 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> 045 * <li><code>UTF-16LE</code><br> 046 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> 047 * <li><code>UTF-16</code><br> 048 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order 049 * accepted on input, big-endian used on output.)</li> 050 * </ul> 051 * 052 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 053 * @since 2.3 054 * 055 */ 056public class Charsets { 057 // 058 // This class should only contain Charset instances for required encodings. This guarantees that it will load 059 // correctly and without delay on all Java platforms. 060 // 061 062 /** 063 * Constructs a sorted map from canonical charset names to charset objects required of every implementation of the 064 * Java platform. 065 * <p> 066 * From the Java documentation <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html"> 067 * Standard charsets</a>: 068 * </p> 069 * 070 * @return An immutable, case-insensitive map from canonical charset names to charset objects. 071 * @see Charset#availableCharsets() 072 * @since 2.5 073 */ 074 public static SortedMap<String, Charset> requiredCharsets() { 075 // maybe cache? 076 final TreeMap<String, Charset> m = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); 077 m.put(StandardCharsets.ISO_8859_1.name(), StandardCharsets.ISO_8859_1); 078 m.put(StandardCharsets.US_ASCII.name(), StandardCharsets.US_ASCII); 079 m.put(StandardCharsets.UTF_16.name(), StandardCharsets.UTF_16); 080 m.put(StandardCharsets.UTF_16BE.name(), StandardCharsets.UTF_16BE); 081 m.put(StandardCharsets.UTF_16LE.name(), StandardCharsets.UTF_16LE); 082 m.put(StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8); 083 return Collections.unmodifiableSortedMap(m); 084 } 085 086 /** 087 * Returns the given Charset or the default Charset if the given Charset is null. 088 * 089 * @param charset 090 * A charset or null. 091 * @return the given Charset or the default Charset if the given Charset is null 092 */ 093 public static Charset toCharset(final Charset charset) { 094 return charset == null ? Charset.defaultCharset() : charset; 095 } 096 097 /** 098 * Returns a Charset for the named charset. If the name is null, return the default Charset. 099 * 100 * @param charsetName 101 * The name of the requested charset, may be null. 102 * @return a Charset for the named charset 103 * @throws java.nio.charset.UnsupportedCharsetException 104 * If the named charset is unavailable 105 */ 106 public static Charset toCharset(final String charsetName) { 107 return charsetName == null ? Charset.defaultCharset() : Charset.forName(charsetName); 108 } 109 110 /** 111 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. 112 * <p> 113 * Every implementation of the Java platform is required to support this character encoding. 114 * </p> 115 * 116 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 117 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 118 */ 119 @Deprecated 120 public static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1; 121 122 /** 123 * <p> 124 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. 125 * </p> 126 * <p> 127 * Every implementation of the Java platform is required to support this character encoding. 128 * </p> 129 * 130 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 131 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 132 */ 133 @Deprecated 134 public static final Charset US_ASCII = StandardCharsets.US_ASCII; 135 136 /** 137 * <p> 138 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark 139 * (either order accepted on input, big-endian used on output) 140 * </p> 141 * <p> 142 * Every implementation of the Java platform is required to support this character encoding. 143 * </p> 144 * 145 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 146 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 147 */ 148 @Deprecated 149 public static final Charset UTF_16 = StandardCharsets.UTF_16; 150 151 /** 152 * <p> 153 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 154 * </p> 155 * <p> 156 * Every implementation of the Java platform is required to support this character encoding. 157 * </p> 158 * 159 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 160 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 161 */ 162 @Deprecated 163 public static final Charset UTF_16BE = StandardCharsets.UTF_16BE; 164 165 /** 166 * <p> 167 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 168 * </p> 169 * <p> 170 * Every implementation of the Java platform is required to support this character encoding. 171 * </p> 172 * 173 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 174 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 175 */ 176 @Deprecated 177 public static final Charset UTF_16LE = StandardCharsets.UTF_16LE; 178 179 /** 180 * <p> 181 * Eight-bit Unicode Transformation Format. 182 * </p> 183 * <p> 184 * Every implementation of the Java platform is required to support this character encoding. 185 * </p> 186 * 187 * @see <a href="https://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 188 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 189 */ 190 @Deprecated 191 public static final Charset UTF_8 = StandardCharsets.UTF_8; 192}