001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io;
018
019 import java.io.Serializable;
020
021 /**
022 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
023 *
024 * @see org.apache.commons.io.input.BOMInputStream
025 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
026 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
027 * (Non-Normative)</a>
028 * @version $Id: ByteOrderMark.java 1304166 2012-03-23 02:35:48Z ggregory $
029 * @since 2.0
030 */
031 public class ByteOrderMark implements Serializable {
032
033 private static final long serialVersionUID = 1L;
034
035 /** UTF-8 BOM */
036 public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF);
037
038 /** UTF-16BE BOM (Big-Endian) */
039 public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
040
041 /** UTF-16LE BOM (Little-Endian) */
042 public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
043
044 /**
045 * UFT-32BE BOM (Big-Endian)
046 * @since 2.2
047 */
048 public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
049
050 /**
051 * UTF-32LE BOM (Big-Endian)
052 * @since 2.2
053 */
054 public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFE, 0xFF, 0x00, 0x00);
055
056 private final String charsetName;
057 private final int[] bytes;
058
059 /**
060 * Construct a new BOM.
061 *
062 * @param charsetName The name of the charset the BOM represents
063 * @param bytes The BOM's bytes
064 * @throws IllegalArgumentException if the charsetName is null or
065 * zero length
066 * @throws IllegalArgumentException if the bytes are null or zero
067 * length
068 */
069 public ByteOrderMark(String charsetName, int... bytes) {
070 if (charsetName == null || charsetName.length() == 0) {
071 throw new IllegalArgumentException("No charsetName specified");
072 }
073 if (bytes == null || bytes.length == 0) {
074 throw new IllegalArgumentException("No bytes specified");
075 }
076 this.charsetName = charsetName;
077 this.bytes = new int[bytes.length];
078 System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
079 }
080
081 /**
082 * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
083 *
084 * @return the character set name
085 */
086 public String getCharsetName() {
087 return charsetName;
088 }
089
090 /**
091 * Return the length of the BOM's bytes.
092 *
093 * @return the length of the BOM's bytes
094 */
095 public int length() {
096 return bytes.length;
097 }
098
099 /**
100 * The byte at the specified position.
101 *
102 * @param pos The position
103 * @return The specified byte
104 */
105 public int get(int pos) {
106 return bytes[pos];
107 }
108
109 /**
110 * Return a copy of the BOM's bytes.
111 *
112 * @return a copy of the BOM's bytes
113 */
114 public byte[] getBytes() {
115 byte[] copy = new byte[bytes.length];
116 for (int i = 0; i < bytes.length; i++) {
117 copy[i] = (byte)bytes[i];
118 }
119 return copy;
120 }
121
122 /**
123 * Indicates if this BOM's bytes equals another.
124 *
125 * @param obj The object to compare to
126 * @return true if the bom's bytes are equal, otherwise
127 * false
128 */
129 @Override
130 public boolean equals(Object obj) {
131 if (!(obj instanceof ByteOrderMark)) {
132 return false;
133 }
134 ByteOrderMark bom = (ByteOrderMark)obj;
135 if (bytes.length != bom.length()) {
136 return false;
137 }
138 for (int i = 0; i < bytes.length; i++) {
139 if (bytes[i] != bom.get(i)) {
140 return false;
141 }
142 }
143 return true;
144 }
145
146 /**
147 * Return the hashcode for this BOM.
148 *
149 * @return the hashcode for this BOM.
150 * @see java.lang.Object#hashCode()
151 */
152 @Override
153 public int hashCode() {
154 int hashCode = getClass().hashCode();
155 for (int b : bytes) {
156 hashCode += b;
157 }
158 return hashCode;
159 }
160
161 /**
162 * Provide a String representation of the BOM.
163 *
164 * @return the length of the BOM's bytes
165 */
166 @Override
167 public String toString() {
168 StringBuilder builder = new StringBuilder();
169 builder.append(getClass().getSimpleName());
170 builder.append('[');
171 builder.append(charsetName);
172 builder.append(": ");
173 for (int i = 0; i < bytes.length; i++) {
174 if (i > 0) {
175 builder.append(",");
176 }
177 builder.append("0x");
178 builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
179 }
180 builder.append(']');
181 return builder.toString();
182 }
183
184 }