001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.Serializable;
020import java.util.Locale;
021
022/**
023 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
024 *
025 * @see org.apache.commons.io.input.BOMInputStream
026 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
027 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
028 *      (Non-Normative)</a>
029 * @since 2.0
030 */
031public class ByteOrderMark implements Serializable {
032
033    private static final long serialVersionUID = 1L;
034
035    /** UTF-8 BOM. */
036    public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF);
037
038    /** UTF-16BE BOM (Big-Endian). */
039    public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
040
041    /** UTF-16LE BOM (Little-Endian). */
042    public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
043
044    /**
045     * UTF-32BE BOM (Big-Endian).
046     *
047     * @since 2.2
048     */
049    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
050
051    /**
052     * UTF-32LE BOM (Little-Endian).
053     *
054     * @since 2.2
055     */
056    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
057
058    /**
059     * Unicode BOM character; external form depends on the encoding.
060     *
061     * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
062     * @since 2.5
063     */
064    public static final char UTF_BOM = '\uFEFF';
065
066    private final String charsetName;
067    private final int[] bytes;
068
069    /**
070     * Constructs a new BOM.
071     *
072     * @param charsetName The name of the charset the BOM represents
073     * @param bytes The BOM's bytes
074     * @throws IllegalArgumentException if the charsetName is null or
075     * zero length
076     * @throws IllegalArgumentException if the bytes are null or zero
077     * length
078     */
079    public ByteOrderMark(final String charsetName, final int... bytes) {
080        if (charsetName == null || charsetName.isEmpty()) {
081            throw new IllegalArgumentException("No charsetName specified");
082        }
083        if (bytes == null || bytes.length == 0) {
084            throw new IllegalArgumentException("No bytes specified");
085        }
086        this.charsetName = charsetName;
087        this.bytes = new int[bytes.length];
088        System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
089    }
090
091    /**
092     * Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
093     *
094     * @return the character set name
095     */
096    public String getCharsetName() {
097        return charsetName;
098    }
099
100    /**
101     * Gets the length of the BOM's bytes.
102     *
103     * @return the length of the BOM's bytes
104     */
105    public int length() {
106        return bytes.length;
107    }
108
109    /**
110     * Gets the byte at the specified position.
111     *
112     * @param pos The position
113     * @return The specified byte
114     */
115    public int get(final int pos) {
116        return bytes[pos];
117    }
118
119    /**
120     * Gets a copy of the BOM's bytes.
121     *
122     * @return a copy of the BOM's bytes
123     */
124    public byte[] getBytes() {
125        final byte[] copy = IOUtils.byteArray(bytes.length);
126        for (int i = 0; i < bytes.length; i++) {
127            copy[i] = (byte)bytes[i];
128        }
129        return copy;
130    }
131
132    /**
133     * Indicates if this BOM's bytes equals another.
134     *
135     * @param obj The object to compare to
136     * @return true if the bom's bytes are equal, otherwise
137     * false
138     */
139    @Override
140    public boolean equals(final Object obj) {
141        if (!(obj instanceof ByteOrderMark)) {
142            return false;
143        }
144        final ByteOrderMark bom = (ByteOrderMark)obj;
145        if (bytes.length != bom.length()) {
146            return false;
147        }
148        for (int i = 0; i < bytes.length; i++) {
149            if (bytes[i] != bom.get(i)) {
150                return false;
151            }
152        }
153        return true;
154    }
155
156    /**
157     * Computes the hashcode for this BOM.
158     *
159     * @return the hashcode for this BOM.
160     * @see java.lang.Object#hashCode()
161     */
162    @Override
163    public int hashCode() {
164        int hashCode = getClass().hashCode();
165        for (final int b : bytes) {
166            hashCode += b;
167        }
168        return hashCode;
169    }
170
171    /**
172     * Converts this instance to a String representation of the BOM.
173     *
174     * @return the length of the BOM's bytes
175     */
176    @Override
177    public String toString() {
178        final StringBuilder builder = new StringBuilder();
179        builder.append(getClass().getSimpleName());
180        builder.append('[');
181        builder.append(charsetName);
182        builder.append(": ");
183        for (int i = 0; i < bytes.length; i++) {
184            if (i > 0) {
185                builder.append(",");
186            }
187            builder.append("0x");
188            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
189        }
190        builder.append(']');
191        return builder.toString();
192    }
193
194}