001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.util.zip.CRC32;
028import java.util.zip.DataFormatException;
029import java.util.zip.Inflater;
030import java.util.zip.ZipEntry;
031import java.util.zip.ZipException;
032
033import org.apache.commons.compress.archivers.ArchiveEntry;
034import org.apache.commons.compress.archivers.ArchiveInputStream;
035
036import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
037import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
038import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
039import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
040
041/**
042 * Implements an input stream that can read Zip archives.
043 *
044 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the
045 * DEFLATE algorithm is used, as the size information is not available
046 * from the header.</p>
047 *
048 * <p>The {@link ZipFile} class is preferred when reading from files.</p>
049 *
050 * <p>As of Apache Commons Compress it transparently supports Zip64
051 * extensions and thus individual entries and archives larger than 4
052 * GB or with more than 65536 entries.</p>
053 *
054 * @see ZipFile
055 * @NotThreadSafe
056 */
057public class ZipArchiveInputStream extends ArchiveInputStream {
058
059    /**
060     * The zip encoding to use for filenames and the file comment.
061     */
062    private final ZipEncoding zipEncoding;
063
064    /**
065     * Whether to look for and use Unicode extra fields.
066     */
067    private final boolean useUnicodeExtraFields;
068
069    /**
070     * Wrapped stream, will always be a PushbackInputStream.
071     */
072    private final InputStream in;
073
074    /**
075     * Inflater used for all deflated entries.
076     */
077    private final Inflater inf = new Inflater(true);
078
079    /**
080     * Calculates checkusms for all entries.
081     */
082    private final CRC32 crc = new CRC32();
083
084    /**
085     * Buffer used to read from the wrapped stream.
086     */
087    private final Buffer buf = new Buffer();
088    /**
089     * The entry that is currently being read.
090     */
091    private CurrentEntry current = null;
092    /**
093     * Whether the stream has been closed.
094     */
095    private boolean closed = false;
096    /**
097     * Whether the stream has reached the central directory - and thus
098     * found all entries.
099     */
100    private boolean hitCentralDirectory = false;
101    /**
102     * When reading a stored entry that uses the data descriptor this
103     * stream has to read the full entry and caches it.  This is the
104     * cache.
105     */
106    private ByteArrayInputStream lastStoredEntry = null;
107
108    /**
109     * Whether the stream will try to read STORED entries that use a
110     * data descriptor.
111     */
112    private boolean allowStoredEntriesWithDataDescriptor = false;
113
114    private static final int LFH_LEN = 30;
115    /*
116      local file header signature     4 bytes  (0x04034b50)
117      version needed to extract       2 bytes
118      general purpose bit flag        2 bytes
119      compression method              2 bytes
120      last mod file time              2 bytes
121      last mod file date              2 bytes
122      crc-32                          4 bytes
123      compressed size                 4 bytes
124      uncompressed size               4 bytes
125      file name length                2 bytes
126      extra field length              2 bytes
127    */
128
129    private static final int CFH_LEN = 46;
130    /*
131        central file header signature   4 bytes  (0x02014b50)
132        version made by                 2 bytes
133        version needed to extract       2 bytes
134        general purpose bit flag        2 bytes
135        compression method              2 bytes
136        last mod file time              2 bytes
137        last mod file date              2 bytes
138        crc-32                          4 bytes
139        compressed size                 4 bytes
140        uncompressed size               4 bytes
141        file name length                2 bytes
142        extra field length              2 bytes
143        file comment length             2 bytes
144        disk number start               2 bytes
145        internal file attributes        2 bytes
146        external file attributes        4 bytes
147        relative offset of local header 4 bytes
148    */
149
150    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
151
152    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
153    private final byte[] LFH_BUF = new byte[LFH_LEN];
154    private final byte[] SKIP_BUF = new byte[1024];
155    private final byte[] SHORT_BUF = new byte[SHORT];
156    private final byte[] WORD_BUF = new byte[WORD];
157    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
158
159    private int entriesRead = 0;
160
161    public ZipArchiveInputStream(InputStream inputStream) {
162        this(inputStream, ZipEncodingHelper.UTF8);
163    }
164
165    /**
166     * @param encoding the encoding to use for file names, use null
167     * for the platform's default encoding
168     * @since 1.5
169     */
170    public ZipArchiveInputStream(InputStream inputStream, String encoding) {
171        this(inputStream, encoding, true);
172    }
173
174    /**
175     * @param encoding the encoding to use for file names, use null
176     * for the platform's default encoding
177     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
178     * Extra Fields (if present) to set the file names.
179     */
180    public ZipArchiveInputStream(InputStream inputStream,
181                                 String encoding,
182                                 boolean useUnicodeExtraFields) {
183        this(inputStream, encoding, useUnicodeExtraFields, false);
184    }
185
186    /**
187     * @param encoding the encoding to use for file names, use null
188     * for the platform's default encoding
189     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
190     * Extra Fields (if present) to set the file names.
191     * @param allowStoredEntriesWithDataDescriptor whether the stream
192     * will try to read STORED entries that use a data descriptor
193     * @since 1.1
194     */
195    public ZipArchiveInputStream(InputStream inputStream,
196                                 String encoding,
197                                 boolean useUnicodeExtraFields,
198                                 boolean allowStoredEntriesWithDataDescriptor) {
199        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
200        this.useUnicodeExtraFields = useUnicodeExtraFields;
201        in = new PushbackInputStream(inputStream, buf.buf.length);
202        this.allowStoredEntriesWithDataDescriptor =
203            allowStoredEntriesWithDataDescriptor;
204    }
205
206    public ZipArchiveEntry getNextZipEntry() throws IOException {
207        boolean firstEntry = true;
208        if (closed || hitCentralDirectory) {
209            return null;
210        }
211        if (current != null) {
212            closeEntry();
213            firstEntry = false;
214        }
215
216        try {
217            if (firstEntry) {
218                // split archives have a special signature before the
219                // first local file header - look for it and fail with
220                // the appropriate error message if this is a split
221                // archive.
222                readFirstLocalFileHeader(LFH_BUF);
223            } else {
224                readFully(LFH_BUF);
225            }
226        } catch (EOFException e) {
227            return null;
228        }
229
230        ZipLong sig = new ZipLong(LFH_BUF);
231        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
232            hitCentralDirectory = true;
233            skipRemainderOfArchive();
234        }
235        if (!sig.equals(ZipLong.LFH_SIG)) {
236            return null;
237        }
238
239        int off = WORD;
240        current = new CurrentEntry();
241
242        int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
243        off += SHORT;
244        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
245                                  & ZipFile.NIBLET_MASK);
246
247        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
248        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
249        final ZipEncoding entryEncoding =
250            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
251        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
252        current.entry.setGeneralPurposeBit(gpFlag);
253
254        off += SHORT;
255
256        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
257        off += SHORT;
258
259        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
260        current.entry.setTime(time);
261        off += WORD;
262
263        ZipLong size = null, cSize = null;
264        if (!current.hasDataDescriptor) {
265            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
266            off += WORD;
267
268            cSize = new ZipLong(LFH_BUF, off);
269            off += WORD;
270
271            size = new ZipLong(LFH_BUF, off);
272            off += WORD;
273        } else {
274            off += 3 * WORD;
275        }
276
277        int fileNameLen = ZipShort.getValue(LFH_BUF, off);
278
279        off += SHORT;
280
281        int extraLen = ZipShort.getValue(LFH_BUF, off);
282        off += SHORT;
283
284        byte[] fileName = new byte[fileNameLen];
285        readFully(fileName);
286        current.entry.setName(entryEncoding.decode(fileName), fileName);
287
288        byte[] extraData = new byte[extraLen];
289        readFully(extraData);
290        current.entry.setExtra(extraData);
291
292        if (!hasUTF8Flag && useUnicodeExtraFields) {
293            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName,
294                                                     null);
295        }
296
297        processZip64Extra(size, cSize);
298        entriesRead++;
299        return current.entry;
300    }
301
302    /**
303     * Fills the given array with the first local file header and
304     * deals with splitting/spanning markers that may prefix the first
305     * LFH.
306     */
307    private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
308        readFully(lfh);
309        ZipLong sig = new ZipLong(lfh);
310        if (sig.equals(ZipLong.DD_SIG)) {
311            throw new
312                UnsupportedZipFeatureException(UnsupportedZipFeatureException
313                                               .Feature.SPLITTING);
314        }
315        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
316            // The archive is not really split as only one segment was
317            // needed in the end.  Just skip over the marker.
318            byte[] missedLfhBytes = new byte[4];
319            readFully(missedLfhBytes);
320            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
321            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
322        }
323    }
324
325    /**
326     * Records whether a Zip64 extra is present and sets the size
327     * information from it if sizes are 0xFFFFFFFF and the entry
328     * doesn't use a data descriptor.
329     */
330    private void processZip64Extra(ZipLong size, ZipLong cSize) {
331        Zip64ExtendedInformationExtraField z64 =
332            (Zip64ExtendedInformationExtraField)
333            current.entry.getExtraField(Zip64ExtendedInformationExtraField
334                                        .HEADER_ID);
335        current.usesZip64 = z64 != null;
336        if (!current.hasDataDescriptor) {
337            if (current.usesZip64 && (cSize.equals(ZipLong.ZIP64_MAGIC)
338                                      || size.equals(ZipLong.ZIP64_MAGIC))
339                ) {
340                current.entry.setCompressedSize(z64.getCompressedSize() // z64 cannot be null here
341                                                .getLongValue());
342                current.entry.setSize(z64.getSize().getLongValue());
343            } else {
344                current.entry.setCompressedSize(cSize.getValue());
345                current.entry.setSize(size.getValue());
346            }
347        }
348    }
349
350    /** {@inheritDoc} */
351    @Override
352    public ArchiveEntry getNextEntry() throws IOException {
353        return getNextZipEntry();
354    }
355
356    /**
357     * Whether this class is able to read the given entry.
358     *
359     * <p>May return false if it is set up to use encryption or a
360     * compression method that hasn't been implemented yet.</p>
361     * @since 1.1
362     */
363    @Override
364    public boolean canReadEntryData(ArchiveEntry ae) {
365        if (ae instanceof ZipArchiveEntry) {
366            ZipArchiveEntry ze = (ZipArchiveEntry) ae;
367            return ZipUtil.canHandleEntryData(ze)
368                && supportsDataDescriptorFor(ze);
369
370        }
371        return false;
372    }
373
374    @Override
375    public int read(byte[] buffer, int start, int length) throws IOException {
376        if (closed) {
377            throw new IOException("The stream is closed");
378        }
379        if (inf.finished() || current == null) {
380            return -1;
381        }
382
383        // avoid int overflow, check null buffer
384        if (start <= buffer.length && length >= 0 && start >= 0
385            && buffer.length - start >= length) {
386            ZipUtil.checkRequestedFeatures(current.entry);
387            if (!supportsDataDescriptorFor(current.entry)) {
388                throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException
389                                                         .Feature
390                                                         .DATA_DESCRIPTOR,
391                                                         current.entry);
392            }
393
394            if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
395                return readStored(buffer, start, length);
396            }
397            return readDeflated(buffer, start, length);
398        }
399        throw new ArrayIndexOutOfBoundsException();
400    }
401
402    /**
403     * Implementation of read for STORED entries.
404     */
405    private int readStored(byte[] buffer, int start, int length)
406        throws IOException {
407
408        if (current.hasDataDescriptor) {
409            if (lastStoredEntry == null) {
410                readStoredEntry();
411            }
412            return lastStoredEntry.read(buffer, start, length);
413        }
414
415        long csize = current.entry.getSize();
416        if (current.bytesRead >= csize) {
417            return -1;
418        }
419
420        if (buf.offsetInBuffer >= buf.lengthOfLastRead) {
421            buf.offsetInBuffer = 0;
422            if ((buf.lengthOfLastRead = in.read(buf.buf)) == -1) {
423                return -1;
424            }
425            count(buf.lengthOfLastRead);
426            current.bytesReadFromStream += buf.lengthOfLastRead;
427        }
428
429        int availableBytesInBuffer = buf.lengthOfLastRead - buf.offsetInBuffer;
430        int toRead = Math.min(availableBytesInBuffer, length);
431        if ((csize - current.bytesRead) < toRead) {
432            // if it is smaller than toRead then it fits into an int
433            toRead = (int) (csize - current.bytesRead);
434        }
435        System.arraycopy(buf.buf, buf.offsetInBuffer, buffer, start, toRead);
436        buf.offsetInBuffer += toRead;
437        current.bytesRead += toRead;
438        crc.update(buffer, start, toRead);
439        return toRead;
440    }
441
442    /**
443     * Implementation of read for DEFLATED entries.
444     */
445    private int readDeflated(byte[] buffer, int start, int length)
446        throws IOException {
447        int read = readFromInflater(buffer, start, length);
448        if (read == 0) {
449            if (inf.finished()) {
450                return -1;
451            } else if (inf.needsDictionary()) {
452                throw new ZipException("This archive needs a preset dictionary"
453                                       + " which is not supported by Commons"
454                                       + " Compress.");
455            } else if (buf.lengthOfLastRead == -1) {
456                throw new IOException("Truncated ZIP file");
457            }
458        }
459        crc.update(buffer, start, read);
460        return read;
461    }
462
463    /**
464     * Potentially reads more bytes to fill the inflater's buffer and
465     * reads from it.
466     */
467    private int readFromInflater(byte[] buffer, int start, int length)
468        throws IOException {
469        int read = 0;
470        do {
471            if (inf.needsInput()) {
472                fill();
473                if (buf.lengthOfLastRead > 0) {
474                    current.bytesReadFromStream += buf.lengthOfLastRead;
475                } else {
476                    break;
477                }
478            }
479            try {
480                read = inf.inflate(buffer, start, length);
481            } catch (DataFormatException e) {
482                throw new ZipException(e.getMessage());
483            }
484        } while (read == 0 && inf.needsInput());
485        return read;
486    }
487
488    @Override
489    public void close() throws IOException {
490        if (!closed) {
491            closed = true;
492            in.close();
493            inf.end();
494        }
495    }
496
497    /**
498     * Skips over and discards value bytes of data from this input
499     * stream.
500     *
501     * <p>This implementation may end up skipping over some smaller
502     * number of bytes, possibly 0, if and only if it reaches the end
503     * of the underlying stream.</p>
504     *
505     * <p>The actual number of bytes skipped is returned.</p>
506     *
507     * @param value the number of bytes to be skipped.
508     * @return the actual number of bytes skipped.
509     * @throws IOException - if an I/O error occurs.
510     * @throws IllegalArgumentException - if value is negative.
511     */
512    @Override
513    public long skip(long value) throws IOException {
514        if (value >= 0) {
515            long skipped = 0;
516            while (skipped < value) {
517                long rem = value - skipped;
518                int x = read(SKIP_BUF, 0,
519                             (int) (SKIP_BUF.length > rem ? rem
520                                    : SKIP_BUF.length));
521                if (x == -1) {
522                    return skipped;
523                }
524                skipped += x;
525            }
526            return skipped;
527        }
528        throw new IllegalArgumentException();
529    }
530
531    /**
532     * Checks if the signature matches what is expected for a zip file.
533     * Does not currently handle self-extracting zips which may have arbitrary
534     * leading content.
535     *
536     * @param signature
537     *            the bytes to check
538     * @param length
539     *            the number of bytes to check
540     * @return true, if this stream is a zip archive stream, false otherwise
541     */
542    public static boolean matches(byte[] signature, int length) {
543        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
544            return false;
545        }
546
547        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
548            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
549            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
550            || checksig(signature,
551                        ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
552    }
553
554    private static boolean checksig(byte[] signature, byte[] expected){
555        for (int i = 0; i < expected.length; i++) {
556            if (signature[i] != expected[i]) {
557                return false;
558            }
559        }
560        return true;
561    }
562
563    /**
564     * Closes the current ZIP archive entry and positions the underlying
565     * stream to the beginning of the next entry. All per-entry variables
566     * and data structures are cleared.
567     * <p>
568     * If the compressed size of this entry is included in the entry header,
569     * then any outstanding bytes are simply skipped from the underlying
570     * stream without uncompressing them. This allows an entry to be safely
571     * closed even if the compression method is unsupported.
572     * <p>
573     * In case we don't know the compressed size of this entry or have
574     * already buffered too much data from the underlying stream to support
575     * uncompression, then the uncompression process is completed and the
576     * end position of the stream is adjusted based on the result of that
577     * process.
578     *
579     * @throws IOException if an error occurs
580     */
581    private void closeEntry() throws IOException {
582        if (closed) {
583            throw new IOException("The stream is closed");
584        }
585        if (current == null) {
586            return;
587        }
588
589        // Ensure all entry bytes are read
590        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
591            && !current.hasDataDescriptor) {
592            drainCurrentEntryData();
593        } else {
594            skip(Long.MAX_VALUE);
595
596            long inB =
597                current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
598                ? getBytesInflated() : current.bytesRead;
599
600            // this is at most a single read() operation and can't
601            // exceed the range of int
602            int diff = (int) (current.bytesReadFromStream - inB);
603
604            // Pushback any required bytes
605            if (diff > 0) {
606                pushback(buf.buf, buf.lengthOfLastRead - diff, diff);
607            }
608        }
609
610        if (lastStoredEntry == null && current.hasDataDescriptor) {
611            readDataDescriptor();
612        }
613
614        inf.reset();
615        buf.reset();
616        crc.reset();
617        current = null;
618        lastStoredEntry = null;
619    }
620
621    /**
622     * Read all data of the current entry from the underlying stream
623     * that hasn't been read, yet.
624     */
625    private void drainCurrentEntryData() throws IOException {
626        long remaining = current.entry.getCompressedSize()
627            - current.bytesReadFromStream;
628        while (remaining > 0) {
629            long n = in.read(buf.buf, 0, (int) Math.min(buf.buf.length,
630                                                        remaining));
631            if (n < 0) {
632                throw new EOFException(
633                                       "Truncated ZIP entry: " + current.entry.getName());
634            } else {
635                count(n);
636                remaining -= n;
637            }
638        }
639    }
640
641    /**
642     * Get the number of bytes Inflater has actually processed.
643     *
644     * <p>for Java &lt; Java7 the getBytes* methods in
645     * Inflater/Deflater seem to return unsigned ints rather than
646     * longs that start over with 0 at 2^32.</p>
647     *
648     * <p>The stream knows how many bytes it has read, but not how
649     * many the Inflater actually consumed - it should be between the
650     * total number of bytes read for the entry and the total number
651     * minus the last read operation.  Here we just try to make the
652     * value close enough to the bytes we've read by assuming the
653     * number of bytes consumed must be smaller than (or equal to) the
654     * number of bytes read but not smaller by more than 2^32.</p>
655     */
656    private long getBytesInflated() {
657        long inB = inf.getBytesRead();
658        if (current.bytesReadFromStream >= TWO_EXP_32) {
659            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
660                inB += TWO_EXP_32;
661            }
662        }
663        return inB;
664    }
665
666    private void fill() throws IOException {
667        if (closed) {
668            throw new IOException("The stream is closed");
669        }
670        if ((buf.lengthOfLastRead = in.read(buf.buf)) > 0) {
671            count(buf.lengthOfLastRead);
672            inf.setInput(buf.buf, 0, buf.lengthOfLastRead);
673        }
674    }
675
676    private void readFully(byte[] b) throws IOException {
677        int count = 0, x = 0;
678        while (count != b.length) {
679            count += x = in.read(b, count, b.length - count);
680            if (x == -1) {
681                throw new EOFException();
682            }
683            count(x);
684        }
685    }
686
687    private void readDataDescriptor() throws IOException {
688        readFully(WORD_BUF);
689        ZipLong val = new ZipLong(WORD_BUF);
690        if (ZipLong.DD_SIG.equals(val)) {
691            // data descriptor with signature, skip sig
692            readFully(WORD_BUF);
693            val = new ZipLong(WORD_BUF);
694        }
695        current.entry.setCrc(val.getValue());
696
697        // if there is a ZIP64 extra field, sizes are eight bytes
698        // each, otherwise four bytes each.  Unfortunately some
699        // implementations - namely Java7 - use eight bytes without
700        // using a ZIP64 extra field -
701        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
702
703        // just read 16 bytes and check whether bytes nine to twelve
704        // look like one of the signatures of what could follow a data
705        // descriptor (ignoring archive decryption headers for now).
706        // If so, push back eight bytes and assume sizes are four
707        // bytes, otherwise sizes are eight bytes each.
708        readFully(TWO_DWORD_BUF);
709        ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
710        if (potentialSig.equals(ZipLong.CFH_SIG)
711            || potentialSig.equals(ZipLong.LFH_SIG)) {
712            pushback(TWO_DWORD_BUF, DWORD, DWORD);
713            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
714            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
715        } else {
716            current.entry
717                .setCompressedSize(ZipEightByteInteger
718                                   .getLongValue(TWO_DWORD_BUF));
719            current.entry.setSize(ZipEightByteInteger
720                                  .getLongValue(TWO_DWORD_BUF, DWORD));
721        }
722    }
723
724    /**
725     * Whether this entry requires a data descriptor this library can work with.
726     *
727     * @return true if allowStoredEntriesWithDataDescriptor is true,
728     * the entry doesn't require any data descriptor or the method is
729     * DEFLATED.
730     */
731    private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
732        return allowStoredEntriesWithDataDescriptor ||
733            !entry.getGeneralPurposeBit().usesDataDescriptor()
734            || entry.getMethod() == ZipEntry.DEFLATED;
735    }
736
737    /**
738     * Caches a stored entry that uses the data descriptor.
739     *
740     * <ul>
741     *   <li>Reads a stored entry until the signature of a local file
742     *     header, central directory header or data descriptor has been
743     *     found.</li>
744     *   <li>Stores all entry data in lastStoredEntry.</p>
745     *   <li>Rewinds the stream to position at the data
746     *     descriptor.</li>
747     *   <li>reads the data descriptor</li>
748     * </ul>
749     *
750     * <p>After calling this method the entry should know its size,
751     * the entry's data is cached and the stream is positioned at the
752     * next local file or central directory header.</p>
753     */
754    private void readStoredEntry() throws IOException {
755        ByteArrayOutputStream bos = new ByteArrayOutputStream();
756        int off = 0;
757        boolean done = false;
758
759        // length of DD without signature
760        int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
761
762        while (!done) {
763            int r = in.read(buf.buf, off,
764                            ZipArchiveOutputStream.BUFFER_SIZE - off);
765            if (r <= 0) {
766                // read the whole archive without ever finding a
767                // central directory
768                throw new IOException("Truncated ZIP file");
769            }
770            if (r + off < 4) {
771                // buf is too small to check for a signature, loop
772                off += r;
773                continue;
774            }
775
776            done = bufferContainsSignature(bos, off, r, ddLen);
777            if (!done) {
778                off = cacheBytesRead(bos, off, r, ddLen);
779            }
780        }
781
782        byte[] b = bos.toByteArray();
783        lastStoredEntry = new ByteArrayInputStream(b);
784    }
785
786    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
787    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
788    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
789
790    /**
791     * Checks whether the current buffer contains the signature of a
792     * &quot;data decsriptor&quot;, &quot;local file header&quot; or
793     * &quot;central directory entry&quot;.
794     *
795     * <p>If it contains such a signature, reads the data descriptor
796     * and positions the stream right after the data descriptor.</p>
797     */
798    private boolean bufferContainsSignature(ByteArrayOutputStream bos,
799                                            int offset, int lastRead,
800                                            int expectedDDLen)
801        throws IOException {
802        boolean done = false;
803        int readTooMuch = 0;
804        for (int i = 0; !done && i < lastRead - 4; i++) {
805            if (buf.buf[i] == LFH[0] && buf.buf[i + 1] == LFH[1]) {
806                if ((buf.buf[i + 2] == LFH[2] && buf.buf[i + 3] == LFH[3])
807                    || (buf.buf[i] == CFH[2] && buf.buf[i + 3] == CFH[3])) {
808                    // found a LFH or CFH:
809                    readTooMuch = offset + lastRead - i - expectedDDLen;
810                    done = true;
811                }
812                else if (buf.buf[i + 2] == DD[2] && buf.buf[i + 3] == DD[3]) {
813                    // found DD:
814                    readTooMuch = offset + lastRead - i;
815                    done = true;
816                }
817                if (done) {
818                    // * push back bytes read in excess as well as the data
819                    //   descriptor
820                    // * copy the remaining bytes to cache
821                    // * read data descriptor
822                    pushback(buf.buf, offset + lastRead - readTooMuch,
823                             readTooMuch);
824                    bos.write(buf.buf, 0, i);
825                    readDataDescriptor();
826                }
827            }
828        }
829        return done;
830    }
831
832    /**
833     * If the last read bytes could hold a data descriptor and an
834     * incomplete signature then save the last bytes to the front of
835     * the buffer and cache everything in front of the potential data
836     * descriptor into the given ByteArrayOutputStream.
837     *
838     * <p>Data descriptor plus incomplete signature (3 bytes in the
839     * worst case) can be 20 bytes max.</p>
840     */
841    private int cacheBytesRead(ByteArrayOutputStream bos, int offset,
842                               int lastRead, int expecteDDLen) {
843        final int cacheable = offset + lastRead - expecteDDLen - 3;
844        if (cacheable > 0) {
845            bos.write(buf.buf, 0, cacheable);
846            System.arraycopy(buf.buf, cacheable, buf.buf, 0,
847                             expecteDDLen + 3);
848            offset = expecteDDLen + 3;
849        } else {
850            offset += lastRead;
851        }
852        return offset;
853    }
854
855    private void pushback(byte[] buf, int offset, int length)
856        throws IOException {
857        ((PushbackInputStream) in).unread(buf, offset, length);
858        pushedBackBytes(length);
859    }
860
861    // End of Central Directory Record
862    //   end of central dir signature    4 bytes  (0x06054b50)
863    //   number of this disk             2 bytes
864    //   number of the disk with the
865    //   start of the central directory  2 bytes
866    //   total number of entries in the
867    //   central directory on this disk  2 bytes
868    //   total number of entries in
869    //   the central directory           2 bytes
870    //   size of the central directory   4 bytes
871    //   offset of start of central
872    //   directory with respect to
873    //   the starting disk number        4 bytes
874    //   .ZIP file comment length        2 bytes
875    //   .ZIP file comment       (variable size)
876    //
877
878    /**
879     * Reads the stream until it find the "End of central directory
880     * record" and consumes it as well.
881     */
882    private void skipRemainderOfArchive() throws IOException {
883        // skip over central directory. One LFH has been read too much
884        // already.  The calculation discounts file names and extra
885        // data so it will be too short.
886        realSkip(entriesRead * CFH_LEN - LFH_LEN);
887        findEocdRecord();
888        realSkip(ZipFile.MIN_EOCD_SIZE
889                 - WORD /* signature */ - SHORT /* comment len */);
890        readFully(SHORT_BUF);
891        // file comment
892        realSkip(ZipShort.getValue(SHORT_BUF));
893    }
894
895    /**
896     * Reads forward until the signature of the &quot;End of central
897     * directory&quot; recod is found.
898     */
899    private void findEocdRecord() throws IOException {
900        int currentByte = -1;
901        boolean skipReadCall = false;
902        while (skipReadCall || (currentByte = readOneByte()) > -1) {
903            skipReadCall = false;
904            if (!isFirstByteOfEocdSig(currentByte)) {
905                continue;
906            }
907            currentByte = readOneByte();
908            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
909                if (currentByte == -1) {
910                    break;
911                }
912                skipReadCall = isFirstByteOfEocdSig(currentByte);
913                continue;
914            }
915            currentByte = readOneByte();
916            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
917                if (currentByte == -1) {
918                    break;
919                }
920                skipReadCall = isFirstByteOfEocdSig(currentByte);
921                continue;
922            }
923            currentByte = readOneByte();
924            if (currentByte == -1
925                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
926                break;
927            }
928            skipReadCall = isFirstByteOfEocdSig(currentByte);
929        }
930    }
931
932    /**
933     * Skips bytes by reading from the underlying stream rather than
934     * the (potentially inflating) archive stream - which {@link
935     * #skip} would do.
936     *
937     * Also updates bytes-read counter.
938     */
939    private void realSkip(long value) throws IOException {
940        if (value >= 0) {
941            long skipped = 0;
942            while (skipped < value) {
943                long rem = value - skipped;
944                int x = in.read(SKIP_BUF, 0,
945                                (int) (SKIP_BUF.length > rem ? rem
946                                       : SKIP_BUF.length));
947                if (x == -1) {
948                    return;
949                }
950                count(x);
951                skipped += x;
952            }
953            return;
954        }
955        throw new IllegalArgumentException();
956    }
957
958    /**
959     * Reads bytes by reading from the underlying stream rather than
960     * the (potentially inflating) archive stream - which {@link
961     * #read} would do.
962     *
963     * Also updates bytes-read counter.
964     */
965    private int readOneByte() throws IOException {
966        int b = in.read();
967        if (b != -1) {
968            count(1);
969        }
970        return b;
971    }
972
973    private boolean isFirstByteOfEocdSig(int b) {
974        return b == ZipArchiveOutputStream.EOCD_SIG[0];
975    }
976
977    /**
978     * Structure collecting information for the entry that is
979     * currently being read.
980     */
981    private static final class CurrentEntry {
982        /**
983         * Current ZIP entry.
984         */
985        private final ZipArchiveEntry entry = new ZipArchiveEntry();
986        /**
987         * Does the entry use a data descriptor?
988         */
989        private boolean hasDataDescriptor;
990        /**
991         * Does the entry have a ZIP64 extended information extra field.
992         */
993        private boolean usesZip64;
994        /**
995         * Number of bytes of entry content read by the client if the
996         * entry is STORED.
997         */
998        private long bytesRead;
999        /**
1000         * Number of bytes of entry content read so from the stream.
1001         *
1002         * <p>This may be more than the actual entry's length as some
1003         * stuff gets buffered up and needs to be pushed back when the
1004         * end of the entry has been reached.</p>
1005         */
1006        private long bytesReadFromStream;
1007    }
1008
1009    /**
1010     * Contains a temporary buffer used to read from the wrapped
1011     * stream together with some information needed for internal
1012     * housekeeping.
1013     */
1014    private static final class Buffer {
1015        /**
1016         * Buffer used as temporary buffer when reading from the stream.
1017         */
1018        private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
1019        /**
1020         * {@link #buf buf} may contain data the client hasnt read, yet,
1021         * this is the first byte that hasn't been read so far.
1022         */
1023        private int offsetInBuffer = 0;
1024        /**
1025         * Number of bytes read from the wrapped stream into {@link #buf
1026         * buf} with the last read operation.
1027         */
1028        private int lengthOfLastRead = 0;
1029        /**
1030         * Reset internal housekeeping.
1031         */
1032        private void reset() {
1033            offsetInBuffer = lengthOfLastRead = 0;
1034        }
1035    }
1036}