View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io;
18  
19  import java.io.Serializable;
20  
21  /**
22   * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}.
23   *
24   * @see org.apache.commons.io.input.BOMInputStream
25   * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
26   * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
27   *      (Non-Normative)</a>
28   * @version $Id: ByteOrderMark.java 1471767 2013-04-24 23:24:19Z sebb $
29   * @since 2.0
30   */
31  public class ByteOrderMark implements Serializable {
32  
33      private static final long serialVersionUID = 1L;
34  
35      /** UTF-8 BOM */
36      public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
37  
38      /** UTF-16BE BOM (Big-Endian) */
39      public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
40  
41      /** UTF-16LE BOM (Little-Endian) */
42      public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
43  
44      /**
45       * UTF-32BE BOM (Big-Endian)
46       * @since 2.2
47       */
48      public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
49  
50      /**
51       * UTF-32LE BOM (Little-Endian)
52       * @since 2.2
53       */
54      public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
55  
56      /**
57       * Unicode BOM character; external form depends on the encoding.
58       * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
59       * @since 2.5
60       */
61      public static final char UTF_BOM = '\uFEFF';
62  
63      private final String charsetName;
64      private final int[] bytes;
65  
66      /**
67       * Construct a new BOM.
68       *
69       * @param charsetName The name of the charset the BOM represents
70       * @param bytes The BOM's bytes
71       * @throws IllegalArgumentException if the charsetName is null or
72       * zero length
73       * @throws IllegalArgumentException if the bytes are null or zero
74       * length
75       */
76      public ByteOrderMark(final String charsetName, final int... bytes) {
77          if (charsetName == null || charsetName.length() == 0) {
78              throw new IllegalArgumentException("No charsetName specified");
79          }
80          if (bytes == null || bytes.length == 0) {
81              throw new IllegalArgumentException("No bytes specified");
82          }
83          this.charsetName = charsetName;
84          this.bytes = new int[bytes.length];
85          System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
86      }
87  
88      /**
89       * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
90       *
91       * @return the character set name
92       */
93      public String getCharsetName() {
94          return charsetName;
95      }
96  
97      /**
98       * Return the length of the BOM's bytes.
99       *
100      * @return the length of the BOM's bytes
101      */
102     public int length() {
103         return bytes.length;
104     }
105 
106     /**
107      * The byte at the specified position.
108      *
109      * @param pos The position
110      * @return The specified byte
111      */
112     public int get(final int pos) {
113         return bytes[pos];
114     }
115 
116     /**
117      * Return a copy of the BOM's bytes.
118      *
119      * @return a copy of the BOM's bytes
120      */
121     public byte[] getBytes() {
122         final byte[] copy = new byte[bytes.length];
123         for (int i = 0; i < bytes.length; i++) {
124             copy[i] = (byte)bytes[i];
125         }
126         return copy;
127     }
128 
129     /**
130      * Indicates if this BOM's bytes equals another.
131      *
132      * @param obj The object to compare to
133      * @return true if the bom's bytes are equal, otherwise
134      * false
135      */
136     @Override
137     public boolean equals(final Object obj) {
138         if (!(obj instanceof ByteOrderMark)) {
139             return false;
140         }
141         final ByteOrderMark bom = (ByteOrderMark)obj;
142         if (bytes.length != bom.length()) {
143             return false;
144         }
145         for (int i = 0; i < bytes.length; i++) {
146             if (bytes[i] != bom.get(i)) {
147                 return false;
148             }
149         }
150         return true;
151     }
152 
153     /**
154      * Return the hashcode for this BOM.
155      *
156      * @return the hashcode for this BOM.
157      * @see java.lang.Object#hashCode()
158      */
159     @Override
160     public int hashCode() {
161         int hashCode = getClass().hashCode();
162         for (final int b : bytes) {
163             hashCode += b;
164         }
165         return hashCode;
166     }
167 
168     /**
169      * Provide a String representation of the BOM.
170      *
171      * @return the length of the BOM's bytes
172      */
173     @Override
174     public String toString() {
175         final StringBuilder builder = new StringBuilder();
176         builder.append(getClass().getSimpleName());
177         builder.append('[');
178         builder.append(charsetName);
179         builder.append(": ");
180         for (int i = 0; i < bytes.length; i++) {
181             if (i > 0) {
182                 builder.append(",");
183             }
184             builder.append("0x");
185             builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
186         }
187         builder.append(']');
188         return builder.toString();
189     }
190 
191 }