ByteOrderMark.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.io;
- import java.io.Serializable;
- import java.nio.charset.StandardCharsets;
- import java.util.Locale;
- import java.util.Objects;
- /**
- * Byte Order Mark (BOM) representation. See {@link org.apache.commons.io.input.BOMInputStream}.
- * <p>
- * We define the follow BOM constants:
- * </p>
- * <ul>
- * <li>{@link #UTF_16BE}</li>
- * <li>{@link #UTF_16LE}</li>
- * <li>{@link #UTF_32BE}</li>
- * <li>{@link #UTF_32LE}</li>
- * <li>{@link #UTF_8}</li>
- * </ul>
- * <h2>Deprecating Serialization</h2>
- * <p>
- * <em>Serialization is deprecated and will be removed in 3.0.</em>
- * </p>
- *
- * @see org.apache.commons.io.input.BOMInputStream
- * @see <a href="https://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
- * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
- * (Non-Normative)</a>
- * @since 2.0
- */
- public class ByteOrderMark implements Serializable {
- private static final long serialVersionUID = 1L;
- /**
- * UTF-8 BOM.
- * <p>
- * This BOM is:
- * </p>
- * <pre>
- * 0xEF 0xBB 0xBF
- * </pre>
- */
- public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8.name(), 0xEF, 0xBB, 0xBF);
- /**
- * UTF-16BE BOM (Big-Endian).
- * <p>
- * This BOM is:
- * </p>
- * <pre>
- * 0xFE 0xFF
- * </pre>
- */
- public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE.name(), 0xFE, 0xFF);
- /**
- * UTF-16LE BOM (Little-Endian).
- * <p>
- * This BOM is:
- * </p>
- * <pre>
- * 0xFF 0xFE
- * </pre>
- */
- public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE.name(), 0xFF, 0xFE);
- /**
- * UTF-32BE BOM (Big-Endian).
- * <p>
- * This BOM is:
- * </p>
- * <pre>
- * 0x00 0x00 0xFE 0xFF
- * </pre>
- *
- * @since 2.2
- */
- public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
- /**
- * UTF-32LE BOM (Little-Endian).
- * <p>
- * This BOM is:
- * </p>
- * <pre>
- * 0xFF 0xFE 0x00 0x00
- * </pre>
- *
- * @since 2.2
- */
- public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
- /**
- * Unicode BOM character; external form depends on the encoding.
- *
- * @see <a href="https://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
- * @since 2.5
- */
- public static final char UTF_BOM = '\uFEFF';
- /**
- * Charset name.
- */
- private final String charsetName;
- /**
- * Bytes.
- */
- private final int[] bytes;
- /**
- * Constructs a new instance.
- *
- * @param charsetName The name of the charset the BOM represents
- * @param bytes The BOM's bytes
- * @throws IllegalArgumentException if the charsetName is zero length
- * @throws IllegalArgumentException if the bytes are zero length
- */
- public ByteOrderMark(final String charsetName, final int... bytes) {
- Objects.requireNonNull(charsetName, "charsetName");
- Objects.requireNonNull(bytes, "bytes");
- if (charsetName.isEmpty()) {
- throw new IllegalArgumentException("No charsetName specified");
- }
- if (bytes.length == 0) {
- throw new IllegalArgumentException("No bytes specified");
- }
- this.charsetName = charsetName;
- this.bytes = bytes.clone();
- }
- /**
- * Indicates if this instance's bytes equals another.
- *
- * @param obj The object to compare to
- * @return true if the bom's bytes are equal, otherwise
- * false
- */
- @Override
- public boolean equals(final Object obj) {
- if (!(obj instanceof ByteOrderMark)) {
- return false;
- }
- final ByteOrderMark bom = (ByteOrderMark) obj;
- if (bytes.length != bom.length()) {
- return false;
- }
- for (int i = 0; i < bytes.length; i++) {
- if (bytes[i] != bom.get(i)) {
- return false;
- }
- }
- return true;
- }
- /**
- * Gets the byte at the specified position.
- *
- * @param pos The position
- * @return The specified byte
- */
- public int get(final int pos) {
- return bytes[pos];
- }
- /**
- * Gets a copy of the BOM's bytes.
- *
- * @return a copy of the BOM's bytes
- */
- public byte[] getBytes() {
- final byte[] copy = IOUtils.byteArray(bytes.length);
- for (int i = 0; i < bytes.length; i++) {
- copy[i] = (byte) bytes[i];
- }
- return copy;
- }
- /**
- * Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
- *
- * @return the character set name
- */
- public String getCharsetName() {
- return charsetName;
- }
- /**
- * Computes the hash code for this BOM.
- *
- * @return the hash code for this BOM.
- * @see Object#hashCode()
- */
- @Override
- public int hashCode() {
- int hashCode = getClass().hashCode();
- for (final int b : bytes) {
- hashCode += b;
- }
- return hashCode;
- }
- /**
- * Gets the length of the BOM's bytes.
- *
- * @return the length of the BOM's bytes
- */
- public int length() {
- return bytes.length;
- }
- /**
- * Converts this instance to a String representation of the BOM.
- *
- * @return the length of the BOM's bytes
- */
- @Override
- public String toString() {
- final StringBuilder builder = new StringBuilder();
- builder.append(getClass().getSimpleName());
- builder.append('[');
- builder.append(charsetName);
- builder.append(": ");
- for (int i = 0; i < bytes.length; i++) {
- if (i > 0) {
- builder.append(",");
- }
- builder.append("0x");
- builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
- }
- builder.append(']');
- return builder.toString();
- }
- }