001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.nio.charset.Charset; 020import java.util.Collections; 021import java.util.SortedMap; 022import java.util.TreeMap; 023 024/** 025 * Charsets required of every implementation of the Java platform. 026 * 027 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> 028 * Standard charsets</a>: 029 * <p> 030 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult 031 * the release documentation for your implementation to see if any other encodings are supported. Consult the release 032 * documentation for your implementation to see if any other encodings are supported. </cite> 033 * </p> 034 * 035 * <ul> 036 * <li><code>US-ASCII</code><br> 037 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> 038 * <li><code>ISO-8859-1</code><br> 039 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> 040 * <li><code>UTF-8</code><br> 041 * Eight-bit Unicode Transformation Format.</li> 042 * <li><code>UTF-16BE</code><br> 043 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> 044 * <li><code>UTF-16LE</code><br> 045 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> 046 * <li><code>UTF-16</code><br> 047 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order 048 * accepted on input, big-endian used on output.)</li> 049 * </ul> 050 * 051 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 052 * @since 2.3 053 * @version $Id: Charsets.java 1686747 2015-06-21 18:44:49Z krosenvold $ 054 */ 055public class Charsets { 056 // 057 // This class should only contain Charset instances for required encodings. This guarantees that it will load 058 // correctly and without delay on all Java platforms. 059 // 060 061 /** 062 * Constructs a sorted map from canonical charset names to charset objects required of every implementation of the 063 * Java platform. 064 * <p> 065 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> 066 * Standard charsets</a>: 067 * </p> 068 * 069 * @return An immutable, case-insensitive map from canonical charset names to charset objects. 070 * @see Charset#availableCharsets() 071 * @since 2.5 072 */ 073 public static SortedMap<String, Charset> requiredCharsets() { 074 // maybe cache? 075 // TODO Re-implement on Java 7 to use java.nio.charset.StandardCharsets 076 final TreeMap<String, Charset> m = new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER); 077 m.put(ISO_8859_1.name(), ISO_8859_1); 078 m.put(US_ASCII.name(), US_ASCII); 079 m.put(UTF_16.name(), UTF_16); 080 m.put(UTF_16BE.name(), UTF_16BE); 081 m.put(UTF_16LE.name(), UTF_16LE); 082 m.put(UTF_8.name(), UTF_8); 083 return Collections.unmodifiableSortedMap(m); 084 } 085 086 /** 087 * Returns the given Charset or the default Charset if the given Charset is null. 088 * 089 * @param charset 090 * A charset or null. 091 * @return the given Charset or the default Charset if the given Charset is null 092 */ 093 public static Charset toCharset(final Charset charset) { 094 return charset == null ? Charset.defaultCharset() : charset; 095 } 096 097 /** 098 * Returns a Charset for the named charset. If the name is null, return the default Charset. 099 * 100 * @param charset 101 * The name of the requested charset, may be null. 102 * @return a Charset for the named charset 103 * @throws java.nio.charset.UnsupportedCharsetException 104 * If the named charset is unavailable 105 */ 106 public static Charset toCharset(final String charset) { 107 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); 108 } 109 110 /** 111 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. 112 * <p> 113 * Every implementation of the Java platform is required to support this character encoding. 114 * </p> 115 * 116 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 117 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 118 */ 119 @Deprecated 120 public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 121 122 /** 123 * <p> 124 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. 125 * </p> 126 * <p> 127 * Every implementation of the Java platform is required to support this character encoding. 128 * </p> 129 * 130 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 131 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 132 */ 133 @Deprecated 134 public static final Charset US_ASCII = Charset.forName("US-ASCII"); 135 136 /** 137 * <p> 138 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark 139 * (either order accepted on input, big-endian used on output) 140 * </p> 141 * <p> 142 * Every implementation of the Java platform is required to support this character encoding. 143 * </p> 144 * 145 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 146 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 147 */ 148 @Deprecated 149 public static final Charset UTF_16 = Charset.forName("UTF-16"); 150 151 /** 152 * <p> 153 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 154 * </p> 155 * <p> 156 * Every implementation of the Java platform is required to support this character encoding. 157 * </p> 158 * 159 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 160 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 161 */ 162 @Deprecated 163 public static final Charset UTF_16BE = Charset.forName("UTF-16BE"); 164 165 /** 166 * <p> 167 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 168 * </p> 169 * <p> 170 * Every implementation of the Java platform is required to support this character encoding. 171 * </p> 172 * 173 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 174 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 175 */ 176 @Deprecated 177 public static final Charset UTF_16LE = Charset.forName("UTF-16LE"); 178 179 /** 180 * <p> 181 * Eight-bit Unicode Transformation Format. 182 * </p> 183 * <p> 184 * Every implementation of the Java platform is required to support this character encoding. 185 * </p> 186 * 187 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 188 * @deprecated Use Java 7's {@link java.nio.charset.StandardCharsets} 189 */ 190 @Deprecated 191 public static final Charset UTF_8 = Charset.forName("UTF-8"); 192}