001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io;
018    
019    import java.io.Serializable;
020    
021    /**
022     * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
023     * 
024     * @see org.apache.commons.io.input.BOMInputStream
025     * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
026     * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
027     *      (Non-Normative)</a>
028     * @version $Id: ByteOrderMark.java 1310398 2012-04-06 15:18:47Z ggregory $
029     * @since 2.0
030     */
031    public class ByteOrderMark implements Serializable {
032    
033        private static final long serialVersionUID = 1L;
034    
035        /** UTF-8 BOM */
036        public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
037        
038        /** UTF-16BE BOM (Big-Endian) */
039        public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
040        
041        /** UTF-16LE BOM (Little-Endian) */
042        public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
043    
044        /** 
045         * UFT-32BE BOM (Big-Endian)
046         * @since 2.2 
047         */
048        public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
049        
050        /** 
051         * UTF-32LE BOM (Big-Endian)
052         * @since 2.2 
053         */
054        public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
055        
056        private final String charsetName;
057        private final int[] bytes;
058    
059        /**
060         * Construct a new BOM.
061         *
062         * @param charsetName The name of the charset the BOM represents
063         * @param bytes The BOM's bytes
064         * @throws IllegalArgumentException if the charsetName is null or
065         * zero length
066         * @throws IllegalArgumentException if the bytes are null or zero
067         * length
068         */
069        public ByteOrderMark(String charsetName, int... bytes) {
070            if (charsetName == null || charsetName.length() == 0) {
071                throw new IllegalArgumentException("No charsetName specified");
072            }
073            if (bytes == null || bytes.length == 0) {
074                throw new IllegalArgumentException("No bytes specified");
075            }
076            this.charsetName = charsetName;
077            this.bytes = new int[bytes.length];
078            System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
079        }
080    
081        /**
082         * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
083         *
084         * @return the character set name
085         */
086        public String getCharsetName() {
087            return charsetName;
088        }
089    
090        /**
091         * Return the length of the BOM's bytes.
092         *
093         * @return the length of the BOM's bytes
094         */
095        public int length() {
096            return bytes.length;
097        }
098    
099        /**
100         * The byte at the specified position.
101         *
102         * @param pos The position
103         * @return The specified byte
104         */
105        public int get(int pos) {
106            return bytes[pos];
107        }
108    
109        /**
110         * Return a copy of the BOM's bytes.
111         *
112         * @return a copy of the BOM's bytes
113         */
114        public byte[] getBytes() {
115            byte[] copy = new byte[bytes.length];
116            for (int i = 0; i < bytes.length; i++) {
117                copy[i] = (byte)bytes[i];
118            }
119            return copy;
120        }
121    
122        /**
123         * Indicates if this BOM's bytes equals another.
124         *
125         * @param obj The object to compare to
126         * @return true if the bom's bytes are equal, otherwise
127         * false
128         */
129        @Override
130        public boolean equals(Object obj) {
131            if (!(obj instanceof ByteOrderMark)) {
132                return false;
133            }
134            ByteOrderMark bom = (ByteOrderMark)obj;
135            if (bytes.length != bom.length()) {
136                return false;
137            }
138            for (int i = 0; i < bytes.length; i++) {
139                if (bytes[i] != bom.get(i)) {
140                    return false;
141                }
142            }
143            return true;
144        }
145    
146        /**
147         * Return the hashcode for this BOM.
148         *
149         * @return the hashcode for this BOM.
150         * @see java.lang.Object#hashCode()
151         */
152        @Override
153        public int hashCode() {
154            int hashCode = getClass().hashCode();
155            for (int b : bytes) {
156                hashCode += b;
157            }
158            return hashCode;
159        }
160    
161        /**
162         * Provide a String representation of the BOM.
163         *
164         * @return the length of the BOM's bytes
165         */
166        @Override
167        public String toString() {
168            StringBuilder builder = new StringBuilder();
169            builder.append(getClass().getSimpleName());
170            builder.append('[');
171            builder.append(charsetName);
172            builder.append(": ");
173            for (int i = 0; i < bytes.length; i++) {
174                if (i > 0) {
175                    builder.append(",");
176                }
177                builder.append("0x");
178                builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
179            }
180            builder.append(']');
181            return builder.toString();
182        }
183    
184    }