1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io;
18
19 import java.nio.charset.Charset;
20 import java.nio.charset.UnsupportedCharsetException;
21 import java.util.Collections;
22 import java.util.SortedMap;
23 import java.util.TreeMap;
24
25 /**
26 * Charsets required of every implementation of the Java platform.
27 *
28 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29 * Standard charsets</a>:
30 * <p>
31 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult
32 * the release documentation for your implementation to see if any other encodings are supported. Consult the release
33 * documentation for your implementation to see if any other encodings are supported. </cite>
34 * </p>
35 *
36 * <ul>
37 * <li><code>US-ASCII</code><br/>
38 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
39 * <li><code>ISO-8859-1</code><br/>
40 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
41 * <li><code>UTF-8</code><br/>
42 * Eight-bit Unicode Transformation Format.</li>
43 * <li><code>UTF-16BE</code><br/>
44 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
45 * <li><code>UTF-16LE</code><br/>
46 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
47 * <li><code>UTF-16</code><br/>
48 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
49 * accepted on input, big-endian used on output.)</li>
50 * </ul>
51 *
52 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
53 * @since 2.3
54 * @version $Id: Charsets.java 1415850 2012-11-30 20:51:39Z ggregory $
55 */
56 public class Charsets {
57 //
58 // This class should only contain Charset instances for required encodings. This guarantees that it will load
59 // correctly and without delay on all Java platforms.
60 //
61
62 /**
63 * Constructs a sorted map from canonical charset names to charset objects required of every implementation of the
64 * Java platform.
65 * <p>
66 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
67 * Standard charsets</a>:
68 * </p>
69 *
70 * @return An immutable, case-insensitive map from canonical charset names to charset objects.
71 * @see Charset#availableCharsets()
72 * @since 2.5
73 */
74 public static SortedMap<String, Charset> requiredCharsets() {
75 // maybe cache?
76 final TreeMap<String, Charset> m = new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER);
77 m.put(ISO_8859_1.name(), ISO_8859_1);
78 m.put(US_ASCII.name(), US_ASCII);
79 m.put(UTF_16.name(), UTF_16);
80 m.put(UTF_16BE.name(), UTF_16BE);
81 m.put(UTF_16LE.name(), UTF_16LE);
82 m.put(UTF_8.name(), UTF_8);
83 return Collections.unmodifiableSortedMap(m);
84 }
85
86 /**
87 * Returns the given Charset or the default Charset if the given Charset is null.
88 *
89 * @param charset
90 * A charset or null.
91 * @return the given Charset or the default Charset if the given Charset is null
92 */
93 public static Charset toCharset(final Charset charset) {
94 return charset == null ? Charset.defaultCharset() : charset;
95 }
96
97 /**
98 * Returns a Charset for the named charset. If the name is null, return the default Charset.
99 *
100 * @param charset
101 * The name of the requested charset, may be null.
102 * @return a Charset for the named charset
103 * @throws UnsupportedCharsetException
104 * If the named charset is unavailable
105 */
106 public static Charset toCharset(final String charset) {
107 return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
108 }
109
110 /**
111 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
112 * <p>
113 * Every implementation of the Java platform is required to support this character encoding.
114 * </p>
115 *
116 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117 */
118 public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
119
120 /**
121 * <p>
122 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
123 * </p>
124 * <p>
125 * Every implementation of the Java platform is required to support this character encoding.
126 * </p>
127 *
128 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
129 */
130 public static final Charset US_ASCII = Charset.forName("US-ASCII");
131
132 /**
133 * <p>
134 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
135 * (either order accepted on input, big-endian used on output)
136 * </p>
137 * <p>
138 * Every implementation of the Java platform is required to support this character encoding.
139 * </p>
140 *
141 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
142 */
143 public static final Charset UTF_16 = Charset.forName("UTF-16");
144
145 /**
146 * <p>
147 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
148 * </p>
149 * <p>
150 * Every implementation of the Java platform is required to support this character encoding.
151 * </p>
152 *
153 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
154 */
155 public static final Charset UTF_16BE = Charset.forName("UTF-16BE");
156
157 /**
158 * <p>
159 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
160 * </p>
161 * <p>
162 * Every implementation of the Java platform is required to support this character encoding.
163 * </p>
164 *
165 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
166 */
167 public static final Charset UTF_16LE = Charset.forName("UTF-16LE");
168
169 /**
170 * <p>
171 * Eight-bit Unicode Transformation Format.
172 * </p>
173 * <p>
174 * Every implementation of the Java platform is required to support this character encoding.
175 * </p>
176 *
177 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
178 */
179 public static final Charset UTF_8 = Charset.forName("UTF-8");
180 }