001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.Serializable; 020import java.util.Locale; 021 022/** 023 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}. 024 * 025 * @see org.apache.commons.io.input.BOMInputStream 026 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a> 027 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings 028 * (Non-Normative)</a> 029 * @since 2.0 030 */ 031public class ByteOrderMark implements Serializable { 032 033 private static final long serialVersionUID = 1L; 034 035 /** UTF-8 BOM */ 036 public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); 037 038 /** UTF-16BE BOM (Big-Endian) */ 039 public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); 040 041 /** UTF-16LE BOM (Little-Endian) */ 042 public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); 043 044 /** 045 * UTF-32BE BOM (Big-Endian) 046 * @since 2.2 047 */ 048 public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); 049 050 /** 051 * UTF-32LE BOM (Little-Endian) 052 * @since 2.2 053 */ 054 public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); 055 056 /** 057 * Unicode BOM character; external form depends on the encoding. 058 * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> 059 * @since 2.5 060 */ 061 public static final char UTF_BOM = '\uFEFF'; 062 063 private final String charsetName; 064 private final int[] bytes; 065 066 /** 067 * Construct a new BOM. 068 * 069 * @param charsetName The name of the charset the BOM represents 070 * @param bytes The BOM's bytes 071 * @throws IllegalArgumentException if the charsetName is null or 072 * zero length 073 * @throws IllegalArgumentException if the bytes are null or zero 074 * length 075 */ 076 public ByteOrderMark(final String charsetName, final int... bytes) { 077 if (charsetName == null || charsetName.isEmpty()) { 078 throw new IllegalArgumentException("No charsetName specified"); 079 } 080 if (bytes == null || bytes.length == 0) { 081 throw new IllegalArgumentException("No bytes specified"); 082 } 083 this.charsetName = charsetName; 084 this.bytes = new int[bytes.length]; 085 System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); 086 } 087 088 /** 089 * Return the name of the {@link java.nio.charset.Charset} the BOM represents. 090 * 091 * @return the character set name 092 */ 093 public String getCharsetName() { 094 return charsetName; 095 } 096 097 /** 098 * Return the length of the BOM's bytes. 099 * 100 * @return the length of the BOM's bytes 101 */ 102 public int length() { 103 return bytes.length; 104 } 105 106 /** 107 * The byte at the specified position. 108 * 109 * @param pos The position 110 * @return The specified byte 111 */ 112 public int get(final int pos) { 113 return bytes[pos]; 114 } 115 116 /** 117 * Return a copy of the BOM's bytes. 118 * 119 * @return a copy of the BOM's bytes 120 */ 121 public byte[] getBytes() { 122 final byte[] copy = new byte[bytes.length]; 123 for (int i = 0; i < bytes.length; i++) { 124 copy[i] = (byte)bytes[i]; 125 } 126 return copy; 127 } 128 129 /** 130 * Indicates if this BOM's bytes equals another. 131 * 132 * @param obj The object to compare to 133 * @return true if the bom's bytes are equal, otherwise 134 * false 135 */ 136 @Override 137 public boolean equals(final Object obj) { 138 if (!(obj instanceof ByteOrderMark)) { 139 return false; 140 } 141 final ByteOrderMark bom = (ByteOrderMark)obj; 142 if (bytes.length != bom.length()) { 143 return false; 144 } 145 for (int i = 0; i < bytes.length; i++) { 146 if (bytes[i] != bom.get(i)) { 147 return false; 148 } 149 } 150 return true; 151 } 152 153 /** 154 * Return the hashcode for this BOM. 155 * 156 * @return the hashcode for this BOM. 157 * @see java.lang.Object#hashCode() 158 */ 159 @Override 160 public int hashCode() { 161 int hashCode = getClass().hashCode(); 162 for (final int b : bytes) { 163 hashCode += b; 164 } 165 return hashCode; 166 } 167 168 /** 169 * Provide a String representation of the BOM. 170 * 171 * @return the length of the BOM's bytes 172 */ 173 @Override 174 public String toString() { 175 final StringBuilder builder = new StringBuilder(); 176 builder.append(getClass().getSimpleName()); 177 builder.append('['); 178 builder.append(charsetName); 179 builder.append(": "); 180 for (int i = 0; i < bytes.length; i++) { 181 if (i > 0) { 182 builder.append(","); 183 } 184 builder.append("0x"); 185 builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); 186 } 187 builder.append(']'); 188 return builder.toString(); 189 } 190 191}