Coverage Report - org.apache.commons.io.input.BOMInputStream
 
Classes in this File Line Coverage Branch Coverage Complexity
BOMInputStream
100%
72/72
100%
56/56
2.833
BOMInputStream$1
100%
8/8
100%
4/4
2.833
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  * 
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  * 
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 package org.apache.commons.io.input;
 18  
 
 19  
 import java.io.IOException;
 20  
 import java.io.InputStream;
 21  
 import java.util.Arrays;
 22  
 import java.util.Comparator;
 23  
 import java.util.List;
 24  
 
 25  
 import org.apache.commons.io.ByteOrderMark;
 26  
 
 27  
 /**
 28  
  * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
 29  
  * 
 30  
  * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the
 31  
  * first byte in the stream.
 32  
  * 
 33  
  * The {@link ByteOrderMark} implementation has the following pre-defined BOMs:
 34  
  * <ul>
 35  
  * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
 36  
  * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
 37  
  * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
 38  
  * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li>
 39  
  * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li>
 40  
  * </ul>
 41  
  * 
 42  
  * 
 43  
  * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
 44  
  * 
 45  
  * <pre>
 46  
  * BOMInputStream bomIn = new BOMInputStream(in);
 47  
  * if (bomIn.hasBOM()) {
 48  
  *     // has a UTF-8 BOM
 49  
  * }
 50  
  * </pre>
 51  
  * 
 52  
  * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
 53  
  * 
 54  
  * <pre>
 55  
  * boolean include = true;
 56  
  * BOMInputStream bomIn = new BOMInputStream(in, include);
 57  
  * if (bomIn.hasBOM()) {
 58  
  *     // has a UTF-8 BOM
 59  
  * }
 60  
  * </pre>
 61  
  * 
 62  
  * <h3>Example 3 - Detect Multiple BOMs</h3>
 63  
  * 
 64  
  * <pre>
 65  
  * BOMInputStream bomIn = new BOMInputStream(in, 
 66  
  *   ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
 67  
  *   ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
 68  
  *   );
 69  
  * if (bomIn.hasBOM() == false) {
 70  
  *     // No BOM found
 71  
  * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
 72  
  *     // has a UTF-16LE BOM
 73  
  * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
 74  
  *     // has a UTF-16BE BOM
 75  
  * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) {
 76  
  *     // has a UTF-32LE BOM
 77  
  * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) {
 78  
  *     // has a UTF-32BE BOM
 79  
  * }
 80  
  * </pre>
 81  
  * 
 82  
  * @see org.apache.commons.io.ByteOrderMark
 83  
  * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark</a>
 84  
  * @version $Id: BOMInputStream.java 1415850 2012-11-30 20:51:39Z ggregory $
 85  
  * @since 2.0
 86  
  */
 87  
 public class BOMInputStream extends ProxyInputStream {
 88  
     private final boolean include;
 89  
     /**
 90  
      * BOMs are sorted from longest to shortest.
 91  
      */
 92  
     private final List<ByteOrderMark> boms;
 93  
     private ByteOrderMark byteOrderMark;
 94  
     private int[] firstBytes;
 95  
     private int fbLength;
 96  
     private int fbIndex;
 97  
     private int markFbIndex;
 98  
     private boolean markedAtStart;
 99  
 
 100  
     /**
 101  
      * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM.
 102  
      * 
 103  
      * @param delegate
 104  
      *            the InputStream to delegate to
 105  
      */
 106  
     public BOMInputStream(final InputStream delegate) {
 107  52
         this(delegate, false, ByteOrderMark.UTF_8);
 108  52
     }
 109  
 
 110  
     /**
 111  
      * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it.
 112  
      * 
 113  
      * @param delegate
 114  
      *            the InputStream to delegate to
 115  
      * @param include
 116  
      *            true to include the UTF-8 BOM or false to exclude it
 117  
      */
 118  
     public BOMInputStream(final InputStream delegate, final boolean include) {
 119  4
         this(delegate, include, ByteOrderMark.UTF_8);
 120  4
     }
 121  
 
 122  
     /**
 123  
      * Constructs a new BOM InputStream that excludes the specified BOMs.
 124  
      * 
 125  
      * @param delegate
 126  
      *            the InputStream to delegate to
 127  
      * @param boms
 128  
      *            The BOMs to detect and exclude
 129  
      */
 130  
     public BOMInputStream(final InputStream delegate, final ByteOrderMark... boms) {
 131  22
         this(delegate, false, boms);
 132  22
     }
 133  
 
 134  
     /**
 135  
      * Compares ByteOrderMark objects in descending length order.
 136  
      */
 137  4580
     private static final Comparator<ByteOrderMark> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() {
 138  
 
 139  
         public int compare(final ByteOrderMark bom1, final ByteOrderMark bom2) {
 140  4574
             final int len1 = bom1.length();
 141  4574
             final int len2 = bom2.length();
 142  4574
             if (len1 > len2) {
 143  62
                 return -1;
 144  
             }
 145  4512
             if (len2 > len1) {
 146  1494
                 return 1;
 147  
             }
 148  3018
             return 0;
 149  
         }
 150  
     };
 151  
 
 152  
     /**
 153  
      * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
 154  
      * 
 155  
      * @param delegate
 156  
      *            the InputStream to delegate to
 157  
      * @param include
 158  
      *            true to include the specified BOMs or false to exclude them
 159  
      * @param boms
 160  
      *            The BOMs to detect and optionally exclude
 161  
      */
 162  
     public BOMInputStream(final InputStream delegate, final boolean include, final ByteOrderMark... boms) {
 163  1086
         super(delegate);
 164  1086
         if (boms == null || boms.length == 0) {
 165  4
             throw new IllegalArgumentException("No BOMs specified");
 166  
         }
 167  1082
         this.include = include;
 168  
         // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
 169  1082
         Arrays.sort(boms, ByteOrderMarkLengthComparator);
 170  1082
         this.boms = Arrays.asList(boms);
 171  
 
 172  1082
     }
 173  
 
 174  
     /**
 175  
      * Indicates whether the stream contains one of the specified BOMs.
 176  
      * 
 177  
      * @return true if the stream has one of the specified BOMs, otherwise false if it does not
 178  
      * @throws IOException
 179  
      *             if an error reading the first bytes of the stream occurs
 180  
      */
 181  
     public boolean hasBOM() throws IOException {
 182  24
         return getBOM() != null;
 183  
     }
 184  
 
 185  
     /**
 186  
      * Indicates whether the stream contains the specified BOM.
 187  
      * 
 188  
      * @param bom
 189  
      *            The BOM to check for
 190  
      * @return true if the stream has the specified BOM, otherwise false if it does not
 191  
      * @throws IllegalArgumentException
 192  
      *             if the BOM is not one the stream is configured to detect
 193  
      * @throws IOException
 194  
      *             if an error reading the first bytes of the stream occurs
 195  
      */
 196  
     public boolean hasBOM(final ByteOrderMark bom) throws IOException {
 197  36
         if (!boms.contains(bom)) {
 198  10
             throw new IllegalArgumentException("Stream not configure to detect " + bom);
 199  
         }
 200  26
         return byteOrderMark != null && getBOM().equals(bom);
 201  
     }
 202  
 
 203  
     /**
 204  
      * Return the BOM (Byte Order Mark).
 205  
      * 
 206  
      * @return The BOM or null if none
 207  
      * @throws IOException
 208  
      *             if an error reading the first bytes of the stream occurs
 209  
      */
 210  
     public ByteOrderMark getBOM() throws IOException {
 211  12446
         if (firstBytes == null) {
 212  1084
             fbLength = 0;
 213  
             // BOMs are sorted from longest to shortest
 214  1084
             final int maxBomSize = boms.get(0).length();
 215  1084
             firstBytes = new int[maxBomSize];
 216  
             // Read first maxBomSize bytes
 217  6908
             for (int i = 0; i < firstBytes.length; i++) {
 218  6274
                 firstBytes[i] = in.read();
 219  6274
                 fbLength++;
 220  6274
                 if (firstBytes[i] < 0) {
 221  450
                     break;
 222  
                 }
 223  
             }
 224  
             // match BOM in firstBytes
 225  1084
             byteOrderMark = find();
 226  1084
             if (byteOrderMark != null) {
 227  426
                 if (!include) {
 228  190
                     if (byteOrderMark.length() < firstBytes.length) {
 229  96
                         fbIndex = byteOrderMark.length();
 230  
                     } else {
 231  94
                         fbLength = 0;
 232  
                     }
 233  
                 }
 234  
             }
 235  
         }
 236  12446
         return byteOrderMark;
 237  
     }
 238  
 
 239  
     /**
 240  
      * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}.
 241  
      * 
 242  
      * @return The BOM charset Name or null if no BOM found
 243  
      * @throws IOException
 244  
      *             if an error reading the first bytes of the stream occurs
 245  
      * 
 246  
      */
 247  
     public String getBOMCharsetName() throws IOException {
 248  1004
         getBOM();
 249  1004
         return byteOrderMark == null ? null : byteOrderMark.getCharsetName();
 250  
     }
 251  
 
 252  
     /**
 253  
      * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte
 254  
      * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been
 255  
      * processed already.
 256  
      * 
 257  
      * @return the byte read (excluding BOM) or -1 if the end of stream
 258  
      * @throws IOException
 259  
      *             if an I/O error occurs
 260  
      */
 261  
     private int readFirstBytes() throws IOException {
 262  11374
         getBOM();
 263  11374
         return fbIndex < fbLength ? firstBytes[fbIndex++] : -1;
 264  
     }
 265  
 
 266  
     /**
 267  
      * Find a BOM with the specified bytes.
 268  
      * 
 269  
      * @return The matched BOM or null if none matched
 270  
      */
 271  
     private ByteOrderMark find() {
 272  1084
         for (final ByteOrderMark bom : boms) {
 273  4576
             if (matches(bom)) {
 274  426
                 return bom;
 275  
             }
 276  4150
         }
 277  658
         return null;
 278  
     }
 279  
 
 280  
     /**
 281  
      * Check if the bytes match a BOM.
 282  
      * 
 283  
      * @param bom
 284  
      *            The BOM
 285  
      * @return true if the bytes match the bom, otherwise false
 286  
      */
 287  
     private boolean matches(final ByteOrderMark bom) {
 288  
         // if (bom.length() != fbLength) {
 289  
         // return false;
 290  
         // }
 291  
         // firstBytes may be bigger than the BOM bytes
 292  7442
         for (int i = 0; i < bom.length(); i++) {
 293  7016
             if (bom.get(i) != firstBytes[i]) {
 294  4150
                 return false;
 295  
             }
 296  
         }
 297  426
         return true;
 298  
     }
 299  
 
 300  
     // ----------------------------------------------------------------------------
 301  
     // Implementation of InputStream
 302  
     // ----------------------------------------------------------------------------
 303  
 
 304  
     /**
 305  
      * Invokes the delegate's <code>read()</code> method, detecting and optionally skipping BOM.
 306  
      * 
 307  
      * @return the byte read (excluding BOM) or -1 if the end of stream
 308  
      * @throws IOException
 309  
      *             if an I/O error occurs
 310  
      */
 311  
     @Override
 312  
     public int read() throws IOException {
 313  6350
         final int b = readFirstBytes();
 314  6350
         return b >= 0 ? b : in.read();
 315  
     }
 316  
 
 317  
     /**
 318  
      * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting and optionally skipping BOM.
 319  
      * 
 320  
      * @param buf
 321  
      *            the buffer to read the bytes into
 322  
      * @param off
 323  
      *            The start offset
 324  
      * @param len
 325  
      *            The number of bytes to read (excluding BOM)
 326  
      * @return the number of bytes read or -1 if the end of stream
 327  
      * @throws IOException
 328  
      *             if an I/O error occurs
 329  
      */
 330  
     @Override
 331  
     public int read(final byte[] buf, int off, int len) throws IOException {
 332  1594
         int firstCount = 0;
 333  1594
         int b = 0;
 334  6612
         while (len > 0 && b >= 0) {
 335  5018
             b = readFirstBytes();
 336  5018
             if (b >= 0) {
 337  3870
                 buf[off++] = (byte) (b & 0xFF);
 338  3870
                 len--;
 339  3870
                 firstCount++;
 340  
             }
 341  
         }
 342  1594
         final int secondCount = in.read(buf, off, len);
 343  1594
         return secondCount < 0 ? firstCount > 0 ? firstCount : -1 : firstCount + secondCount;
 344  
     }
 345  
 
 346  
     /**
 347  
      * Invokes the delegate's <code>read(byte[])</code> method, detecting and optionally skipping BOM.
 348  
      * 
 349  
      * @param buf
 350  
      *            the buffer to read the bytes into
 351  
      * @return the number of bytes read (excluding BOM) or -1 if the end of stream
 352  
      * @throws IOException
 353  
      *             if an I/O error occurs
 354  
      */
 355  
     @Override
 356  
     public int read(final byte[] buf) throws IOException {
 357  578
         return read(buf, 0, buf.length);
 358  
     }
 359  
 
 360  
     /**
 361  
      * Invokes the delegate's <code>mark(int)</code> method.
 362  
      * 
 363  
      * @param readlimit
 364  
      *            read ahead limit
 365  
      */
 366  
     @Override
 367  
     public synchronized void mark(final int readlimit) {
 368  476
         markFbIndex = fbIndex;
 369  476
         markedAtStart = firstBytes == null;
 370  476
         in.mark(readlimit);
 371  476
     }
 372  
 
 373  
     /**
 374  
      * Invokes the delegate's <code>reset()</code> method.
 375  
      * 
 376  
      * @throws IOException
 377  
      *             if an I/O error occurs
 378  
      */
 379  
     @Override
 380  
     public synchronized void reset() throws IOException {
 381  476
         fbIndex = markFbIndex;
 382  476
         if (markedAtStart) {
 383  8
             firstBytes = null;
 384  
         }
 385  
 
 386  476
         in.reset();
 387  476
     }
 388  
 
 389  
     /**
 390  
      * Invokes the delegate's <code>skip(long)</code> method, detecting and optionallyskipping BOM.
 391  
      * 
 392  
      * @param n
 393  
      *            the number of bytes to skip
 394  
      * @return the number of bytes to skipped or -1 if the end of stream
 395  
      * @throws IOException
 396  
      *             if an I/O error occurs
 397  
      */
 398  
     @Override
 399  
     public long skip(long n) throws IOException {
 400  8
         while (n > 0 && readFirstBytes() >= 0) {
 401  4
             n--;
 402  
         }
 403  4
         return in.skip(n);
 404  
     }
 405  
 }