001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.codec; 018 019import java.nio.charset.Charset; 020 021/** 022 * Charsets required of every implementation of the Java platform. 023 * 024 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard 025 * charsets</a>: 026 * <p> 027 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the 028 * release documentation for your implementation to see if any other encodings are supported. Consult the release 029 * documentation for your implementation to see if any other encodings are supported. </cite> 030 * </p> 031 * 032 * <ul> 033 * <li><code>US-ASCII</code><br> 034 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> 035 * <li><code>ISO-8859-1</code><br> 036 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> 037 * <li><code>UTF-8</code><br> 038 * Eight-bit Unicode Transformation Format.</li> 039 * <li><code>UTF-16BE</code><br> 040 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> 041 * <li><code>UTF-16LE</code><br> 042 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> 043 * <li><code>UTF-16</code><br> 044 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order 045 * accepted on input, big-endian used on output.)</li> 046 * </ul> 047 * 048 * This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is 049 * not foreseen that Commons Codec would be made to depend on Commons Lang. 050 * 051 * <p> 052 * This class is immutable and thread-safe. 053 * </p> 054 * 055 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 056 * @since 1.7 057 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $ 058 */ 059public class Charsets { 060 061 // 062 // This class should only contain Charset instances for required encodings. This guarantees that it will load 063 // correctly and without delay on all Java platforms. 064 // 065 066 /** 067 * Returns the given Charset or the default Charset if the given Charset is null. 068 * 069 * @param charset 070 * A charset or null. 071 * @return the given Charset or the default Charset if the given Charset is null 072 */ 073 public static Charset toCharset(final Charset charset) { 074 return charset == null ? Charset.defaultCharset() : charset; 075 } 076 077 /** 078 * Returns a Charset for the named charset. If the name is null, return the default Charset. 079 * 080 * @param charset 081 * The name of the requested charset, may be null. 082 * @return a Charset for the named charset 083 * @throws java.nio.charset.UnsupportedCharsetException 084 * If the named charset is unavailable 085 */ 086 public static Charset toCharset(final String charset) { 087 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); 088 } 089 090 /** 091 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. 092 * <p> 093 * Every implementation of the Java platform is required to support this character encoding. 094 * </p> 095 * <p> 096 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 097 * </p> 098 * 099 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 100 */ 101 public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1); 102 103 /** 104 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. 105 * <p> 106 * Every implementation of the Java platform is required to support this character encoding. 107 * </p> 108 * <p> 109 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 110 * </p> 111 * 112 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 113 */ 114 public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII); 115 116 /** 117 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark 118 * (either order accepted on input, big-endian used on output) 119 * <p> 120 * Every implementation of the Java platform is required to support this character encoding. 121 * </p> 122 * <p> 123 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 124 * </p> 125 * 126 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 127 */ 128 public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16); 129 130 /** 131 * Sixteen-bit Unicode Transformation Format, big-endian byte order. 132 * <p> 133 * Every implementation of the Java platform is required to support this character encoding. 134 * </p> 135 * <p> 136 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 137 * </p> 138 * 139 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 140 */ 141 public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE); 142 143 /** 144 * Sixteen-bit Unicode Transformation Format, little-endian byte order. 145 * <p> 146 * Every implementation of the Java platform is required to support this character encoding. 147 * </p> 148 * <p> 149 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 150 * </p> 151 * 152 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 153 */ 154 public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE); 155 156 /** 157 * Eight-bit Unicode Transformation Format. 158 * <p> 159 * Every implementation of the Java platform is required to support this character encoding. 160 * </p> 161 * <p> 162 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. 163 * </p> 164 * 165 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 166 */ 167 public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8); 168}