001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io; 018 019 import java.io.Serializable; 020 021 /** 022 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}. 023 * 024 * @see org.apache.commons.io.input.BOMInputStream 025 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a> 026 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings 027 * (Non-Normative)</a> 028 * @version $Id: ByteOrderMark.java 1347571 2012-06-07 11:13:53Z sebb $ 029 * @since 2.0 030 */ 031 public class ByteOrderMark implements Serializable { 032 033 private static final long serialVersionUID = 1L; 034 035 /** UTF-8 BOM */ 036 public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); 037 038 /** UTF-16BE BOM (Big-Endian) */ 039 public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); 040 041 /** UTF-16LE BOM (Little-Endian) */ 042 public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); 043 044 /** 045 * UTF-32BE BOM (Big-Endian) 046 * @since 2.2 047 */ 048 public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); 049 050 /** 051 * UTF-32LE BOM (Little-Endian) 052 * @since 2.2 053 */ 054 public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); 055 056 private final String charsetName; 057 private final int[] bytes; 058 059 /** 060 * Construct a new BOM. 061 * 062 * @param charsetName The name of the charset the BOM represents 063 * @param bytes The BOM's bytes 064 * @throws IllegalArgumentException if the charsetName is null or 065 * zero length 066 * @throws IllegalArgumentException if the bytes are null or zero 067 * length 068 */ 069 public ByteOrderMark(String charsetName, int... bytes) { 070 if (charsetName == null || charsetName.length() == 0) { 071 throw new IllegalArgumentException("No charsetName specified"); 072 } 073 if (bytes == null || bytes.length == 0) { 074 throw new IllegalArgumentException("No bytes specified"); 075 } 076 this.charsetName = charsetName; 077 this.bytes = new int[bytes.length]; 078 System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); 079 } 080 081 /** 082 * Return the name of the {@link java.nio.charset.Charset} the BOM represents. 083 * 084 * @return the character set name 085 */ 086 public String getCharsetName() { 087 return charsetName; 088 } 089 090 /** 091 * Return the length of the BOM's bytes. 092 * 093 * @return the length of the BOM's bytes 094 */ 095 public int length() { 096 return bytes.length; 097 } 098 099 /** 100 * The byte at the specified position. 101 * 102 * @param pos The position 103 * @return The specified byte 104 */ 105 public int get(int pos) { 106 return bytes[pos]; 107 } 108 109 /** 110 * Return a copy of the BOM's bytes. 111 * 112 * @return a copy of the BOM's bytes 113 */ 114 public byte[] getBytes() { 115 byte[] copy = new byte[bytes.length]; 116 for (int i = 0; i < bytes.length; i++) { 117 copy[i] = (byte)bytes[i]; 118 } 119 return copy; 120 } 121 122 /** 123 * Indicates if this BOM's bytes equals another. 124 * 125 * @param obj The object to compare to 126 * @return true if the bom's bytes are equal, otherwise 127 * false 128 */ 129 @Override 130 public boolean equals(Object obj) { 131 if (!(obj instanceof ByteOrderMark)) { 132 return false; 133 } 134 ByteOrderMark bom = (ByteOrderMark)obj; 135 if (bytes.length != bom.length()) { 136 return false; 137 } 138 for (int i = 0; i < bytes.length; i++) { 139 if (bytes[i] != bom.get(i)) { 140 return false; 141 } 142 } 143 return true; 144 } 145 146 /** 147 * Return the hashcode for this BOM. 148 * 149 * @return the hashcode for this BOM. 150 * @see java.lang.Object#hashCode() 151 */ 152 @Override 153 public int hashCode() { 154 int hashCode = getClass().hashCode(); 155 for (int b : bytes) { 156 hashCode += b; 157 } 158 return hashCode; 159 } 160 161 /** 162 * Provide a String representation of the BOM. 163 * 164 * @return the length of the BOM's bytes 165 */ 166 @Override 167 public String toString() { 168 StringBuilder builder = new StringBuilder(); 169 builder.append(getClass().getSimpleName()); 170 builder.append('['); 171 builder.append(charsetName); 172 builder.append(": "); 173 for (int i = 0; i < bytes.length; i++) { 174 if (i > 0) { 175 builder.append(","); 176 } 177 builder.append("0x"); 178 builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase()); 179 } 180 builder.append(']'); 181 return builder.toString(); 182 } 183 184 }