Coverage Report - org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
 
Classes in this File Line Coverage Branch Coverage Complexity
GzipCompressorInputStream
83%
90/108
68%
49/72
7,125
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one
 3  
  * or more contributor license agreements.  See the NOTICE file
 4  
  * distributed with this work for additional information
 5  
  * regarding copyright ownership.  The ASF licenses this file
 6  
  * to you under the Apache License, Version 2.0 (the
 7  
  * "License"); you may not use this file except in compliance
 8  
  * with the License.  You may obtain a copy of the License at
 9  
  *
 10  
  * http://www.apache.org/licenses/LICENSE-2.0
 11  
  *
 12  
  * Unless required by applicable law or agreed to in writing,
 13  
  * software distributed under the License is distributed on an
 14  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15  
  * KIND, either express or implied.  See the License for the
 16  
  * specific language governing permissions and limitations
 17  
  * under the License.
 18  
  */
 19  
 package org.apache.commons.compress.compressors.gzip;
 20  
 
 21  
 import java.io.IOException;
 22  
 import java.io.EOFException;
 23  
 import java.io.InputStream;
 24  
 import java.io.DataInputStream;
 25  
 import java.io.BufferedInputStream;
 26  
 import java.util.zip.DataFormatException;
 27  
 import java.util.zip.Inflater;
 28  
 import java.util.zip.CRC32;
 29  
 
 30  
 import org.apache.commons.compress.compressors.CompressorInputStream;
 31  
 
 32  
 /**
 33  
  * Input stream that decompresses .gz files.
 34  
  * This supports decompressing concatenated .gz files which is important
 35  
  * when decompressing standalone .gz files.
 36  
  * <p>
 37  
  * {@link java.util.zip.GZIPInputStream} doesn't decompress concatenated .gz
 38  
  * files: it stops after the first member and silently ignores the rest.
 39  
  * It doesn't leave the read position to point to the beginning of the next
 40  
  * member, which makes it difficult workaround the lack of concatenation
 41  
  * support.
 42  
  * <p>
 43  
  * Instead of using <code>GZIPInputStream</code>, this class has its own .gz
 44  
  * container format decoder. The actual decompression is done with
 45  
  * {@link java.util.zip.Inflater}.
 46  
  */
 47  2
 public class GzipCompressorInputStream extends CompressorInputStream {
 48  
     // Header flags
 49  
     // private static final int FTEXT = 0x01; // Uninteresting for us
 50  
     private static final int FHCRC = 0x02;
 51  
     private static final int FEXTRA = 0x04;
 52  
     private static final int FNAME = 0x08;
 53  
     private static final int FCOMMENT = 0x10;
 54  
     private static final int FRESERVED = 0xE0;
 55  
 
 56  
     // Compressed input stream, possibly wrapped in a BufferedInputStream
 57  
     private final InputStream in;
 58  
 
 59  
     // True if decompressing multimember streams.
 60  
     private final boolean decompressConcatenated;
 61  
 
 62  
     // Buffer to hold the input data
 63  12
     private final byte[] buf = new byte[8192];
 64  
 
 65  
     // Amount of data in buf.
 66  12
     private int bufUsed = 0;
 67  
 
 68  
     // Decompressor
 69  12
     private Inflater inf = new Inflater(true);
 70  
 
 71  
     // CRC32 from uncompressed data
 72  12
     private final CRC32 crc = new CRC32();
 73  
 
 74  
     private int memberSize;
 75  
 
 76  
     // True once everything has been decompressed
 77  12
     private boolean endReached = false;
 78  
 
 79  
     // used in no-arg read method
 80  12
     private final byte[] oneByte = new byte[1];
 81  
 
 82  
     /**
 83  
      * Constructs a new input stream that decompresses gzip-compressed data
 84  
      * from the specified input stream.
 85  
      * <p>
 86  
      * This is equivalent to
 87  
      * <code>GzipCompressorInputStream(inputStream, false)</code> and thus
 88  
      * will not decompress concatenated .gz files.
 89  
      *
 90  
      * @param inputStream  the InputStream from which this object should
 91  
      *                     be created of
 92  
      *
 93  
      * @throws IOException if the stream could not be created
 94  
      */
 95  
     public GzipCompressorInputStream(InputStream inputStream)
 96  
             throws IOException {
 97  8
         this(inputStream, false);
 98  8
     }
 99  
 
 100  
     /**
 101  
      * Constructs a new input stream that decompresses gzip-compressed data
 102  
      * from the specified input stream.
 103  
      * <p>
 104  
      * If <code>decompressConcatenated</code> is {@code false}:
 105  
      * This decompressor might read more input than it will actually use.
 106  
      * If <code>inputStream</code> supports <code>mark</code> and
 107  
      * <code>reset</code>, then the input position will be adjusted
 108  
      * so that it is right after the last byte of the compressed stream.
 109  
      * If <code>mark</code> isn't supported, the input position will be
 110  
      * undefined.
 111  
      *
 112  
      * @param inputStream  the InputStream from which this object should
 113  
      *                     be created of
 114  
      * @param decompressConcatenated
 115  
      *                     if true, decompress until the end of the input;
 116  
      *                     if false, stop after the first .gz member
 117  
      *
 118  
      * @throws IOException if the stream could not be created
 119  
      */
 120  
     public GzipCompressorInputStream(InputStream inputStream,
 121  
                                      boolean decompressConcatenated)
 122  12
             throws IOException {
 123  
         // Mark support is strictly needed for concatenated files only,
 124  
         // but it's simpler if it is always available.
 125  12
         if (inputStream.markSupported()) {
 126  4
             in = inputStream;
 127  
         } else {
 128  8
             in = new BufferedInputStream(inputStream);
 129  
         }
 130  
 
 131  12
         this.decompressConcatenated = decompressConcatenated;
 132  12
         init(true);
 133  12
     }
 134  
 
 135  
     private boolean init(boolean isFirstMember) throws IOException {
 136  16
         assert isFirstMember || decompressConcatenated;
 137  
 
 138  
         // Check the magic bytes without a possibility of EOFException.
 139  16
         int magic0 = in.read();
 140  16
         int magic1 = in.read();
 141  
 
 142  
         // If end of input was reached after decompressing at least
 143  
         // one .gz member, we have reached the end of the file successfully.
 144  16
         if (magic0 == -1 && !isFirstMember) {
 145  2
             return false;
 146  
         }
 147  
 
 148  14
         if (magic0 != 31 || magic1 != 139) {
 149  0
             throw new IOException(isFirstMember
 150  
                                   ? "Input is not in the .gz format"
 151  
                                   : "Garbage after a valid .gz stream");
 152  
         }
 153  
 
 154  
         // Parsing the rest of the header may throw EOFException.
 155  14
         DataInputStream inData = new DataInputStream(in);
 156  14
         int method = inData.readUnsignedByte();
 157  14
         if (method != 8) {
 158  0
             throw new IOException("Unsupported compression method "
 159  
                                   + method + " in the .gz header");
 160  
         }
 161  
 
 162  14
         int flg = inData.readUnsignedByte();
 163  14
         if ((flg & FRESERVED) != 0) {
 164  0
             throw new IOException(
 165  
                     "Reserved flags are set in the .gz header");
 166  
         }
 167  
 
 168  14
         inData.readInt(); // mtime, ignored
 169  14
         inData.readUnsignedByte(); // extra flags, ignored
 170  14
         inData.readUnsignedByte(); // operating system, ignored
 171  
 
 172  
         // Extra field, ignored
 173  14
         if ((flg & FEXTRA) != 0) {
 174  0
             int xlen = inData.readUnsignedByte();
 175  0
             xlen |= inData.readUnsignedByte() << 8;
 176  
 
 177  
             // This isn't as efficient as calling in.skip would be,
 178  
             // but it's lazier to handle unexpected end of input this way.
 179  
             // Most files don't have an extra field anyway.
 180  0
             while (xlen-- > 0) {
 181  0
                 inData.readUnsignedByte();
 182  
             }
 183  
         }
 184  
 
 185  
         // Original file name, ignored
 186  14
         if ((flg & FNAME) != 0) {
 187  6
             readToNull(inData);
 188  
         }
 189  
 
 190  
         // Comment, ignored
 191  14
         if ((flg & FCOMMENT) != 0) {
 192  0
             readToNull(inData);
 193  
         }
 194  
 
 195  
         // Header "CRC16" which is actually a truncated CRC32 (which isn't
 196  
         // as good as real CRC16). I don't know if any encoder implementation
 197  
         // sets this, so it's not worth trying to verify it. GNU gzip 1.4
 198  
         // doesn't support this field, but zlib seems to be able to at least
 199  
         // skip over it.
 200  14
         if ((flg & FHCRC) != 0) {
 201  0
             inData.readShort();
 202  
         }
 203  
 
 204  
         // Reset
 205  14
         inf.reset();
 206  14
         crc.reset();
 207  14
         memberSize = 0;
 208  
 
 209  14
         return true;
 210  
     }
 211  
 
 212  
     private void readToNull(DataInputStream inData) throws IOException {
 213  12
         while (inData.readUnsignedByte() != 0x00) {}
 214  6
     }
 215  
 
 216  
     /** {@inheritDoc} */
 217  
     @Override
 218  
     public int read() throws IOException {
 219  10
         return read(oneByte, 0, 1) == -1 ? -1 : (oneByte[0] & 0xFF);
 220  
     }
 221  
 
 222  
     /**
 223  
      * {@inheritDoc}
 224  
      *
 225  
      * @since 1.1
 226  
      */
 227  
     @Override
 228  
     public int read(byte[] b, int off, int len) throws IOException {
 229  22
         if (endReached) {
 230  6
             return -1;
 231  
         }
 232  
 
 233  16
         int size = 0;
 234  
 
 235  22
         while (len > 0) {
 236  16
             if (inf.needsInput()) {
 237  
                 // Remember the current position because we may need to
 238  
                 // rewind after reading too much input.
 239  12
                 in.mark(buf.length);
 240  
 
 241  12
                 bufUsed = in.read(buf);
 242  12
                 if (bufUsed == -1) {
 243  0
                     throw new EOFException();
 244  
                 }
 245  
 
 246  12
                 inf.setInput(buf, 0, bufUsed);
 247  
             }
 248  
 
 249  
             int ret;
 250  
             try {
 251  16
                 ret = inf.inflate(b, off, len);
 252  0
             } catch (DataFormatException e) {
 253  0
                 throw new IOException("Gzip-compressed data is corrupt");
 254  16
             }
 255  
 
 256  16
             crc.update(b, off, ret);
 257  16
             memberSize += ret;
 258  16
             off += ret;
 259  16
             len -= ret;
 260  16
             size += ret;
 261  16
             count(ret);
 262  
 
 263  16
             if (inf.finished()) {
 264  
                 // We may have read too many bytes. Rewind the read
 265  
                 // position to match the actual amount used.
 266  
                 //
 267  
                 // NOTE: The "if" is there just in case. Since we used
 268  
                 // in.mark earler, it should always skip enough.
 269  12
                 in.reset();
 270  
 
 271  12
                 int skipAmount = bufUsed - inf.getRemaining();
 272  12
                 if (in.skip(skipAmount) != skipAmount) {
 273  0
                     throw new IOException();
 274  
                 }
 275  
 
 276  12
                 bufUsed = 0;
 277  
 
 278  12
                 DataInputStream inData = new DataInputStream(in);
 279  
 
 280  
                 // CRC32
 281  12
                 long crcStored = 0;
 282  60
                 for (int i = 0; i < 4; ++i) {
 283  48
                     crcStored |= (long)inData.readUnsignedByte() << (i * 8);
 284  
                 }
 285  
 
 286  12
                 if (crcStored != crc.getValue()) {
 287  0
                     throw new IOException("Gzip-compressed data is corrupt "
 288  
                                           + "(CRC32 error)");
 289  
                 }
 290  
 
 291  
                 // Uncompressed size modulo 2^32 (ISIZE in the spec)
 292  12
                 int isize = 0;
 293  58
                 for (int i = 0; i < 4; ++i) {
 294  48
                     isize |= inData.readUnsignedByte() << (i * 8);
 295  
                 }
 296  
 
 297  10
                 if (isize != memberSize) {
 298  0
                     throw new IOException("Gzip-compressed data is corrupt"
 299  
                                           + "(uncompressed size mismatch)");
 300  
                 }
 301  
 
 302  
                 // See if this is the end of the file.
 303  10
                 if (!decompressConcatenated || !init(false)) {
 304  8
                     inf.end();
 305  8
                     inf = null;
 306  8
                     endReached = true;
 307  8
                     return size == 0 ? -1 : size;
 308  
                 }
 309  
             }
 310  6
         }
 311  
 
 312  6
         return size;
 313  
     }
 314  
 
 315  
     /**
 316  
      * Checks if the signature matches what is expected for a .gz file.
 317  
      *
 318  
      * @param signature the bytes to check
 319  
      * @param length    the number of bytes to check
 320  
      * @return          true if this is a .gz stream, false otherwise
 321  
      *
 322  
      * @since 1.1
 323  
      */
 324  
     public static boolean matches(byte[] signature, int length) {
 325  
 
 326  2
         if (length < 2) {
 327  0
             return false;
 328  
         }
 329  
 
 330  2
         if (signature[0] != 31) {
 331  0
             return false;
 332  
         }
 333  
 
 334  2
         if (signature[1] != -117) {
 335  0
             return false;
 336  
         }
 337  
 
 338  2
         return true;
 339  
     }
 340  
 
 341  
     /**
 342  
      * Closes the input stream (unless it is System.in).
 343  
      *
 344  
      * @since 1.2
 345  
      */
 346  
     @Override
 347  
     public void close() throws IOException {
 348  10
         if (inf != null) {
 349  2
             inf.end();
 350  2
             inf = null;
 351  
         }
 352  
 
 353  10
         if (this.in != System.in) {
 354  10
             this.in.close();
 355  
         }
 356  10
     }
 357  
 }