001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.nio.ByteBuffer;
027import java.nio.channels.FileChannel;
028import java.nio.channels.SeekableByteChannel;
029import java.nio.file.Files;
030import java.nio.file.StandardOpenOption;
031import java.util.Arrays;
032import java.util.Collections;
033import java.util.Comparator;
034import java.util.Enumeration;
035import java.util.EnumSet;
036import java.util.HashMap;
037import java.util.LinkedList;
038import java.util.List;
039import java.util.Map;
040import java.util.zip.Inflater;
041import java.util.zip.InflaterInputStream;
042import java.util.zip.ZipException;
043
044import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
045import org.apache.commons.compress.utils.IOUtils;
046
047import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
048import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
049import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
050import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
051import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
052
053/**
054 * Replacement for <code>java.util.ZipFile</code>.
055 *
056 * <p>This class adds support for file name encodings other than UTF-8
057 * (which is required to work on ZIP files created by native zip tools
058 * and is able to skip a preamble like the one found in self
059 * extracting archives.  Furthermore it returns instances of
060 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
061 * instead of <code>java.util.zip.ZipEntry</code>.</p>
062 *
063 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
064 * have to reimplement all methods anyway.  Like
065 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
066 * covers and supports compressed and uncompressed entries.  As of
067 * Apache Commons Compress 1.3 it also transparently supports Zip64
068 * extensions and thus individual entries and archives larger than 4
069 * GB or with more than 65536 entries.</p>
070 *
071 * <p>The method signatures mimic the ones of
072 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
073 *
074 * <ul>
075 *   <li>There is no getName method.</li>
076 *   <li>entries has been renamed to getEntries.</li>
077 *   <li>getEntries and getEntry return
078 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
079 *   instances.</li>
080 *   <li>close is allowed to throw IOException.</li>
081 * </ul>
082 *
083 */
084public class ZipFile implements Closeable {
085    private static final int HASH_SIZE = 509;
086    static final int NIBLET_MASK = 0x0f;
087    static final int BYTE_SHIFT = 8;
088    private static final int POS_0 = 0;
089    private static final int POS_1 = 1;
090    private static final int POS_2 = 2;
091    private static final int POS_3 = 3;
092
093    /**
094     * List of entries in the order they appear inside the central
095     * directory.
096     */
097    private final List<ZipArchiveEntry> entries =
098        new LinkedList<>();
099
100    /**
101     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
102     */
103    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
104        new HashMap<>(HASH_SIZE);
105
106    /**
107     * The encoding to use for filenames and the file comment.
108     *
109     * <p>For a list of possible values see <a
110     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
111     * Defaults to UTF-8.</p>
112     */
113    private final String encoding;
114
115    /**
116     * The zip encoding to use for filenames and the file comment.
117     */
118    private final ZipEncoding zipEncoding;
119
120    /**
121     * File name of actual source.
122     */
123    private final String archiveName;
124
125    /**
126     * The actual data source.
127     */
128    private final SeekableByteChannel archive;
129
130    /**
131     * Whether to look for and use Unicode extra fields.
132     */
133    private final boolean useUnicodeExtraFields;
134
135    /**
136     * Whether the file is closed.
137     */
138    private volatile boolean closed = true;
139
140    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
141    private final byte[] dwordBuf = new byte[DWORD];
142    private final byte[] wordBuf = new byte[WORD];
143    private final byte[] cfhBuf = new byte[CFH_LEN];
144    private final byte[] shortBuf = new byte[SHORT];
145    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
146    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
147    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
148
149    /**
150     * Opens the given file for reading, assuming "UTF8" for file names.
151     *
152     * @param f the archive.
153     *
154     * @throws IOException if an error occurs while reading the file.
155     */
156    public ZipFile(final File f) throws IOException {
157        this(f, ZipEncodingHelper.UTF8);
158    }
159
160    /**
161     * Opens the given file for reading, assuming "UTF8".
162     *
163     * @param name name of the archive.
164     *
165     * @throws IOException if an error occurs while reading the file.
166     */
167    public ZipFile(final String name) throws IOException {
168        this(new File(name), ZipEncodingHelper.UTF8);
169    }
170
171    /**
172     * Opens the given file for reading, assuming the specified
173     * encoding for file names, scanning unicode extra fields.
174     *
175     * @param name name of the archive.
176     * @param encoding the encoding to use for file names, use null
177     * for the platform's default encoding
178     *
179     * @throws IOException if an error occurs while reading the file.
180     */
181    public ZipFile(final String name, final String encoding) throws IOException {
182        this(new File(name), encoding, true);
183    }
184
185    /**
186     * Opens the given file for reading, assuming the specified
187     * encoding for file names and scanning for unicode extra fields.
188     *
189     * @param f the archive.
190     * @param encoding the encoding to use for file names, use null
191     * for the platform's default encoding
192     *
193     * @throws IOException if an error occurs while reading the file.
194     */
195    public ZipFile(final File f, final String encoding) throws IOException {
196        this(f, encoding, true);
197    }
198
199    /**
200     * Opens the given file for reading, assuming the specified
201     * encoding for file names.
202     *
203     * @param f the archive.
204     * @param encoding the encoding to use for file names, use null
205     * for the platform's default encoding
206     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
207     * Extra Fields (if present) to set the file names.
208     *
209     * @throws IOException if an error occurs while reading the file.
210     */
211    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
212        throws IOException {
213        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
214             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
215    }
216
217    /**
218     * Opens the given channel for reading, assuming "UTF8" for file names.
219     *
220     * <p>{@link
221     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
222     * allows you to read from an in-memory archive.</p>
223     *
224     * @param channel the archive.
225     *
226     * @throws IOException if an error occurs while reading the file.
227     * @since 1.13
228     */
229    public ZipFile(final SeekableByteChannel channel)
230            throws IOException {
231        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
232    }
233
234    /**
235     * Opens the given channel for reading, assuming the specified
236     * encoding for file names.
237     *
238     * <p>{@link
239     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
240     * allows you to read from an in-memory archive.</p>
241     *
242     * @param channel the archive.
243     * @param encoding the encoding to use for file names, use null
244     * for the platform's default encoding
245     *
246     * @throws IOException if an error occurs while reading the file.
247     * @since 1.13
248     */
249    public ZipFile(final SeekableByteChannel channel, final String encoding)
250        throws IOException {
251        this(channel, "unknown archive", encoding, true);
252    }
253
254    /**
255     * Opens the given channel for reading, assuming the specified
256     * encoding for file names.
257     *
258     * <p>{@link
259     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
260     * allows you to read from an in-memory archive.</p>
261     *
262     * @param channel the archive.
263     * @param archiveName name of the archive, used for error messages only.
264     * @param encoding the encoding to use for file names, use null
265     * for the platform's default encoding
266     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
267     * Extra Fields (if present) to set the file names.
268     *
269     * @throws IOException if an error occurs while reading the file.
270     * @since 1.13
271     */
272    public ZipFile(final SeekableByteChannel channel, final String archiveName,
273                   final String encoding, final boolean useUnicodeExtraFields)
274        throws IOException {
275        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
276    }
277
278    private ZipFile(final SeekableByteChannel channel, final String archiveName,
279                    final String encoding, final boolean useUnicodeExtraFields,
280                    final boolean closeOnError)
281        throws IOException {
282        this.archiveName = archiveName;
283        this.encoding = encoding;
284        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
285        this.useUnicodeExtraFields = useUnicodeExtraFields;
286        archive = channel;
287        boolean success = false;
288        try {
289            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
290                populateFromCentralDirectory();
291            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
292            success = true;
293        } finally {
294            closed = !success;
295            if (!success && closeOnError) {
296                IOUtils.closeQuietly(archive);
297            }
298        }
299    }
300
301    /**
302     * The encoding to use for filenames and the file comment.
303     *
304     * @return null if using the platform's default character encoding.
305     */
306    public String getEncoding() {
307        return encoding;
308    }
309
310    /**
311     * Closes the archive.
312     * @throws IOException if an error occurs closing the archive.
313     */
314    @Override
315    public void close() throws IOException {
316        // this flag is only written here and read in finalize() which
317        // can never be run in parallel.
318        // no synchronization needed.
319        closed = true;
320
321        archive.close();
322    }
323
324    /**
325     * close a zipfile quietly; throw no io fault, do nothing
326     * on a null parameter
327     * @param zipfile file to close, can be null
328     */
329    public static void closeQuietly(final ZipFile zipfile) {
330        IOUtils.closeQuietly(zipfile);
331    }
332
333    /**
334     * Returns all entries.
335     *
336     * <p>Entries will be returned in the same order they appear
337     * within the archive's central directory.</p>
338     *
339     * @return all entries as {@link ZipArchiveEntry} instances
340     */
341    public Enumeration<ZipArchiveEntry> getEntries() {
342        return Collections.enumeration(entries);
343    }
344
345    /**
346     * Returns all entries in physical order.
347     *
348     * <p>Entries will be returned in the same order their contents
349     * appear within the archive.</p>
350     *
351     * @return all entries as {@link ZipArchiveEntry} instances
352     *
353     * @since 1.1
354     */
355    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
356        final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
357        Arrays.sort(allEntries, offsetComparator);
358        return Collections.enumeration(Arrays.asList(allEntries));
359    }
360
361    /**
362     * Returns a named entry - or {@code null} if no entry by
363     * that name exists.
364     *
365     * <p>If multiple entries with the same name exist the first entry
366     * in the archive's central directory by that name is
367     * returned.</p>
368     *
369     * @param name name of the entry.
370     * @return the ZipArchiveEntry corresponding to the given name - or
371     * {@code null} if not present.
372     */
373    public ZipArchiveEntry getEntry(final String name) {
374        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
375        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
376    }
377
378    /**
379     * Returns all named entries in the same order they appear within
380     * the archive's central directory.
381     *
382     * @param name name of the entry.
383     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
384     * given name
385     * @since 1.6
386     */
387    public Iterable<ZipArchiveEntry> getEntries(final String name) {
388        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
389        return entriesOfThatName != null ? entriesOfThatName
390            : Collections.<ZipArchiveEntry>emptyList();
391    }
392
393    /**
394     * Returns all named entries in the same order their contents
395     * appear within the archive.
396     *
397     * @param name name of the entry.
398     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
399     * given name
400     * @since 1.6
401     */
402    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
403        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
404        if (nameMap.containsKey(name)) {
405            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
406            Arrays.sort(entriesOfThatName, offsetComparator);
407        }
408        return Arrays.asList(entriesOfThatName);
409    }
410
411    /**
412     * Whether this class is able to read the given entry.
413     *
414     * <p>May return false if it is set up to use encryption or a
415     * compression method that hasn't been implemented yet.</p>
416     * @since 1.1
417     * @param ze the entry
418     * @return whether this class is able to read the given entry.
419     */
420    public boolean canReadEntryData(final ZipArchiveEntry ze) {
421        return ZipUtil.canHandleEntryData(ze);
422    }
423
424    /**
425     * Expose the raw stream of the archive entry (compressed form).
426     *
427     * <p>This method does not relate to how/if we understand the payload in the
428     * stream, since we really only intend to move it on to somewhere else.</p>
429     *
430     * @param ze The entry to get the stream for
431     * @return The raw input stream containing (possibly) compressed data.
432     * @since 1.11
433     */
434    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
435        if (!(ze instanceof Entry)) {
436            return null;
437        }
438        final long start = ze.getDataOffset();
439        return createBoundedInputStream(start, ze.getCompressedSize());
440    }
441
442
443    /**
444     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
445     * Compression and all other attributes will be as in this file.
446     * <p>This method transfers entries based on the central directory of the zip file.</p>
447     *
448     * @param target The zipArchiveOutputStream to write the entries to
449     * @param predicate A predicate that selects which entries to write
450     * @throws IOException on error
451     */
452    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
453            throws IOException {
454        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
455        while (src.hasMoreElements()) {
456            final ZipArchiveEntry entry = src.nextElement();
457            if (predicate.test( entry)) {
458                target.addRawArchiveEntry(entry, getRawInputStream(entry));
459            }
460        }
461    }
462
463    /**
464     * Returns an InputStream for reading the contents of the given entry.
465     *
466     * @param ze the entry to get the stream for.
467     * @return a stream to read the entry from.
468     * @throws IOException if unable to create an input stream from the zipentry
469     * @throws ZipException if the zipentry uses an unsupported feature
470     */
471    public InputStream getInputStream(final ZipArchiveEntry ze)
472        throws IOException, ZipException {
473        if (!(ze instanceof Entry)) {
474            return null;
475        }
476        // cast valididty is checked just above
477        ZipUtil.checkRequestedFeatures(ze);
478        final long start = ze.getDataOffset();
479        // doesn't get closed if the method is not supported, but doesn't hold any resources either
480        final BoundedInputStream bis =
481            createBoundedInputStream(start, ze.getCompressedSize()); //NOSONAR
482        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
483            case STORED:
484                return bis;
485            case UNSHRINKING:
486                return new UnshrinkingInputStream(bis);
487            case IMPLODING:
488                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
489                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
490            case DEFLATED:
491                bis.addDummy();
492                final Inflater inflater = new Inflater(true);
493                return new InflaterInputStream(bis, inflater) {
494                    @Override
495                    public void close() throws IOException {
496                        try {
497                            super.close();
498                        } finally {
499                            inflater.end();
500                        }
501                    }
502                };
503            case BZIP2:
504                return new BZip2CompressorInputStream(bis);
505            case AES_ENCRYPTED:
506            case ENHANCED_DEFLATED:
507            case EXPANDING_LEVEL_1:
508            case EXPANDING_LEVEL_2:
509            case EXPANDING_LEVEL_3:
510            case EXPANDING_LEVEL_4:
511            case JPEG:
512            case LZMA:
513            case PKWARE_IMPLODING:
514            case PPMD:
515            case TOKENIZATION:
516            case UNKNOWN:
517            case WAVPACK:
518            default:
519                throw new ZipException("Found unsupported compression method "
520                                       + ze.getMethod());
521        }
522    }
523
524    /**
525     * <p>
526     * Convenience method to return the entry's content as a String if isUnixSymlink()
527     * returns true for it, otherwise returns null.
528     * </p>
529     *
530     * <p>This method assumes the symbolic link's file name uses the
531     * same encoding that as been specified for this ZipFile.</p>
532     *
533     * @param entry ZipArchiveEntry object that represents the symbolic link
534     * @return entry's content as a String
535     * @throws IOException problem with content's input stream
536     * @since 1.5
537     */
538    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
539        if (entry != null && entry.isUnixSymlink()) {
540            try (InputStream in = getInputStream(entry)) {
541                return zipEncoding.decode(IOUtils.toByteArray(in));
542            }
543        }
544        return null;
545    }
546
547    /**
548     * Ensures that the close method of this zipfile is called when
549     * there are no more references to it.
550     * @see #close()
551     */
552    @Override
553    protected void finalize() throws Throwable {
554        try {
555            if (!closed) {
556                System.err.println("Cleaning up unclosed ZipFile for archive "
557                                   + archiveName);
558                close();
559            }
560        } finally {
561            super.finalize();
562        }
563    }
564
565    /**
566     * Length of a "central directory" entry structure without file
567     * name, extra fields or comment.
568     */
569    private static final int CFH_LEN =
570        /* version made by                 */ SHORT
571        /* version needed to extract       */ + SHORT
572        /* general purpose bit flag        */ + SHORT
573        /* compression method              */ + SHORT
574        /* last mod file time              */ + SHORT
575        /* last mod file date              */ + SHORT
576        /* crc-32                          */ + WORD
577        /* compressed size                 */ + WORD
578        /* uncompressed size               */ + WORD
579        /* filename length                 */ + SHORT
580        /* extra field length              */ + SHORT
581        /* file comment length             */ + SHORT
582        /* disk number start               */ + SHORT
583        /* internal file attributes        */ + SHORT
584        /* external file attributes        */ + WORD
585        /* relative offset of local header */ + WORD;
586
587    private static final long CFH_SIG =
588        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
589
590    /**
591     * Reads the central directory of the given archive and populates
592     * the internal tables with ZipArchiveEntry instances.
593     *
594     * <p>The ZipArchiveEntrys will know all data that can be obtained from
595     * the central directory alone, but not the data that requires the
596     * local file header or additional data to be read.</p>
597     *
598     * @return a map of zipentries that didn't have the language
599     * encoding flag set when read.
600     */
601    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
602        throws IOException {
603        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
604            new HashMap<>();
605
606        positionAtCentralDirectory();
607
608        wordBbuf.rewind();
609        IOUtils.readFully(archive, wordBbuf);
610        long sig = ZipLong.getValue(wordBuf);
611
612        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
613            throw new IOException("central directory is empty, can't expand"
614                                  + " corrupt archive.");
615        }
616
617        while (sig == CFH_SIG) {
618            readCentralDirectoryEntry(noUTF8Flag);
619            wordBbuf.rewind();
620            IOUtils.readFully(archive, wordBbuf);
621            sig = ZipLong.getValue(wordBuf);
622        }
623        return noUTF8Flag;
624    }
625
626    /**
627     * Reads an individual entry of the central directory, creats an
628     * ZipArchiveEntry from it and adds it to the global maps.
629     *
630     * @param noUTF8Flag map used to collect entries that don't have
631     * their UTF-8 flag set and whose name will be set by data read
632     * from the local file header later.  The current entry may be
633     * added to this map.
634     */
635    private void
636        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
637        throws IOException {
638        cfhBbuf.rewind();
639        IOUtils.readFully(archive, cfhBbuf);
640        int off = 0;
641        final Entry ze = new Entry();
642
643        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
644        off += SHORT;
645        ze.setVersionMadeBy(versionMadeBy);
646        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
647
648        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
649        off += SHORT; // version required
650
651        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
652        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
653        final ZipEncoding entryEncoding =
654            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
655        ze.setGeneralPurposeBit(gpFlag);
656        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
657
658        off += SHORT;
659
660        //noinspection MagicConstant
661        ze.setMethod(ZipShort.getValue(cfhBuf, off));
662        off += SHORT;
663
664        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
665        ze.setTime(time);
666        off += WORD;
667
668        ze.setCrc(ZipLong.getValue(cfhBuf, off));
669        off += WORD;
670
671        ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
672        off += WORD;
673
674        ze.setSize(ZipLong.getValue(cfhBuf, off));
675        off += WORD;
676
677        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
678        off += SHORT;
679
680        final int extraLen = ZipShort.getValue(cfhBuf, off);
681        off += SHORT;
682
683        final int commentLen = ZipShort.getValue(cfhBuf, off);
684        off += SHORT;
685
686        final int diskStart = ZipShort.getValue(cfhBuf, off);
687        off += SHORT;
688
689        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
690        off += SHORT;
691
692        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
693        off += WORD;
694
695        final byte[] fileName = new byte[fileNameLen];
696        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
697        ze.setName(entryEncoding.decode(fileName), fileName);
698
699        // LFH offset,
700        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
701        // data offset will be filled later
702        entries.add(ze);
703
704        final byte[] cdExtraData = new byte[extraLen];
705        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
706        ze.setCentralDirectoryExtra(cdExtraData);
707
708        setSizesAndOffsetFromZip64Extra(ze, diskStart);
709
710        final byte[] comment = new byte[commentLen];
711        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
712        ze.setComment(entryEncoding.decode(comment));
713
714        if (!hasUTF8Flag && useUnicodeExtraFields) {
715            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
716        }
717    }
718
719    /**
720     * If the entry holds a Zip64 extended information extra field,
721     * read sizes from there if the entry's sizes are set to
722     * 0xFFFFFFFFF, do the same for the offset of the local file
723     * header.
724     *
725     * <p>Ensures the Zip64 extra either knows both compressed and
726     * uncompressed size or neither of both as the internal logic in
727     * ExtraFieldUtils forces the field to create local header data
728     * even if they are never used - and here a field with only one
729     * size would be invalid.</p>
730     */
731    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
732                                                 final int diskStart)
733        throws IOException {
734        final Zip64ExtendedInformationExtraField z64 =
735            (Zip64ExtendedInformationExtraField)
736            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
737        if (z64 != null) {
738            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
739            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
740            final boolean hasRelativeHeaderOffset =
741                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
742            z64.reparseCentralDirectoryData(hasUncompressedSize,
743                                            hasCompressedSize,
744                                            hasRelativeHeaderOffset,
745                                            diskStart == ZIP64_MAGIC_SHORT);
746
747            if (hasUncompressedSize) {
748                ze.setSize(z64.getSize().getLongValue());
749            } else if (hasCompressedSize) {
750                z64.setSize(new ZipEightByteInteger(ze.getSize()));
751            }
752
753            if (hasCompressedSize) {
754                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
755            } else if (hasUncompressedSize) {
756                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
757            }
758
759            if (hasRelativeHeaderOffset) {
760                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
761            }
762        }
763    }
764
765    /**
766     * Length of the "End of central directory record" - which is
767     * supposed to be the last structure of the archive - without file
768     * comment.
769     */
770    static final int MIN_EOCD_SIZE =
771        /* end of central dir signature    */ WORD
772        /* number of this disk             */ + SHORT
773        /* number of the disk with the     */
774        /* start of the central directory  */ + SHORT
775        /* total number of entries in      */
776        /* the central dir on this disk    */ + SHORT
777        /* total number of entries in      */
778        /* the central dir                 */ + SHORT
779        /* size of the central directory   */ + WORD
780        /* offset of start of central      */
781        /* directory with respect to       */
782        /* the starting disk number        */ + WORD
783        /* zipfile comment length          */ + SHORT;
784
785    /**
786     * Maximum length of the "End of central directory record" with a
787     * file comment.
788     */
789    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
790        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
791
792    /**
793     * Offset of the field that holds the location of the first
794     * central directory entry inside the "End of central directory
795     * record" relative to the start of the "End of central directory
796     * record".
797     */
798    private static final int CFD_LOCATOR_OFFSET =
799        /* end of central dir signature    */ WORD
800        /* number of this disk             */ + SHORT
801        /* number of the disk with the     */
802        /* start of the central directory  */ + SHORT
803        /* total number of entries in      */
804        /* the central dir on this disk    */ + SHORT
805        /* total number of entries in      */
806        /* the central dir                 */ + SHORT
807        /* size of the central directory   */ + WORD;
808
809    /**
810     * Length of the "Zip64 end of central directory locator" - which
811     * should be right in front of the "end of central directory
812     * record" if one is present at all.
813     */
814    private static final int ZIP64_EOCDL_LENGTH =
815        /* zip64 end of central dir locator sig */ WORD
816        /* number of the disk with the start    */
817        /* start of the zip64 end of            */
818        /* central directory                    */ + WORD
819        /* relative offset of the zip64         */
820        /* end of central directory record      */ + DWORD
821        /* total number of disks                */ + WORD;
822
823    /**
824     * Offset of the field that holds the location of the "Zip64 end
825     * of central directory record" inside the "Zip64 end of central
826     * directory locator" relative to the start of the "Zip64 end of
827     * central directory locator".
828     */
829    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
830        /* zip64 end of central dir locator sig */ WORD
831        /* number of the disk with the start    */
832        /* start of the zip64 end of            */
833        /* central directory                    */ + WORD;
834
835    /**
836     * Offset of the field that holds the location of the first
837     * central directory entry inside the "Zip64 end of central
838     * directory record" relative to the start of the "Zip64 end of
839     * central directory record".
840     */
841    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
842        /* zip64 end of central dir        */
843        /* signature                       */ WORD
844        /* size of zip64 end of central    */
845        /* directory record                */ + DWORD
846        /* version made by                 */ + SHORT
847        /* version needed to extract       */ + SHORT
848        /* number of this disk             */ + WORD
849        /* number of the disk with the     */
850        /* start of the central directory  */ + WORD
851        /* total number of entries in the  */
852        /* central directory on this disk  */ + DWORD
853        /* total number of entries in the  */
854        /* central directory               */ + DWORD
855        /* size of the central directory   */ + DWORD;
856
857    /**
858     * Searches for either the &quot;Zip64 end of central directory
859     * locator&quot; or the &quot;End of central dir record&quot;, parses
860     * it and positions the stream at the first central directory
861     * record.
862     */
863    private void positionAtCentralDirectory()
864        throws IOException {
865        positionAtEndOfCentralDirectoryRecord();
866        boolean found = false;
867        final boolean searchedForZip64EOCD =
868            archive.position() > ZIP64_EOCDL_LENGTH;
869        if (searchedForZip64EOCD) {
870            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
871            wordBbuf.rewind();
872            IOUtils.readFully(archive, wordBbuf);
873            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
874                                  wordBuf);
875        }
876        if (!found) {
877            // not a ZIP64 archive
878            if (searchedForZip64EOCD) {
879                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
880            }
881            positionAtCentralDirectory32();
882        } else {
883            positionAtCentralDirectory64();
884        }
885    }
886
887    /**
888     * Parses the &quot;Zip64 end of central directory locator&quot;,
889     * finds the &quot;Zip64 end of central directory record&quot; using the
890     * parsed information, parses that and positions the stream at the
891     * first central directory record.
892     *
893     * Expects stream to be positioned right behind the &quot;Zip64
894     * end of central directory locator&quot;'s signature.
895     */
896    private void positionAtCentralDirectory64()
897        throws IOException {
898        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
899                  - WORD /* signature has already been read */);
900        dwordBbuf.rewind();
901        IOUtils.readFully(archive, dwordBbuf);
902        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
903        wordBbuf.rewind();
904        IOUtils.readFully(archive, wordBbuf);
905        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
906            throw new ZipException("archive's ZIP64 end of central "
907                                   + "directory locator is corrupt.");
908        }
909        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
910                  - WORD /* signature has already been read */);
911        dwordBbuf.rewind();
912        IOUtils.readFully(archive, dwordBbuf);
913        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
914    }
915
916    /**
917     * Parses the &quot;End of central dir record&quot; and positions
918     * the stream at the first central directory record.
919     *
920     * Expects stream to be positioned at the beginning of the
921     * &quot;End of central dir record&quot;.
922     */
923    private void positionAtCentralDirectory32()
924        throws IOException {
925        skipBytes(CFD_LOCATOR_OFFSET);
926        wordBbuf.rewind();
927        IOUtils.readFully(archive, wordBbuf);
928        archive.position(ZipLong.getValue(wordBuf));
929    }
930
931    /**
932     * Searches for the and positions the stream at the start of the
933     * &quot;End of central dir record&quot;.
934     */
935    private void positionAtEndOfCentralDirectoryRecord()
936        throws IOException {
937        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
938                                             ZipArchiveOutputStream.EOCD_SIG);
939        if (!found) {
940            throw new ZipException("archive is not a ZIP archive");
941        }
942    }
943
944    /**
945     * Searches the archive backwards from minDistance to maxDistance
946     * for the given signature, positions the RandomaccessFile right
947     * at the signature if it has been found.
948     */
949    private boolean tryToLocateSignature(final long minDistanceFromEnd,
950                                         final long maxDistanceFromEnd,
951                                         final byte[] sig) throws IOException {
952        boolean found = false;
953        long off = archive.size() - minDistanceFromEnd;
954        final long stopSearching =
955            Math.max(0L, archive.size() - maxDistanceFromEnd);
956        if (off >= 0) {
957            for (; off >= stopSearching; off--) {
958                archive.position(off);
959                try {
960                    wordBbuf.rewind();
961                    IOUtils.readFully(archive, wordBbuf);
962                    wordBbuf.flip();
963                } catch (EOFException ex) {
964                    break;
965                }
966                int curr = wordBbuf.get();
967                if (curr == sig[POS_0]) {
968                    curr = wordBbuf.get();
969                    if (curr == sig[POS_1]) {
970                        curr = wordBbuf.get();
971                        if (curr == sig[POS_2]) {
972                            curr = wordBbuf.get();
973                            if (curr == sig[POS_3]) {
974                                found = true;
975                                break;
976                            }
977                        }
978                    }
979                }
980            }
981        }
982        if (found) {
983            archive.position(off);
984        }
985        return found;
986    }
987
988    /**
989     * Skips the given number of bytes or throws an EOFException if
990     * skipping failed.
991     */ 
992    private void skipBytes(final int count) throws IOException {
993        long currentPosition = archive.position();
994        long newPosition = currentPosition + count;
995        if (newPosition > archive.size()) {
996            throw new EOFException();
997        }
998        archive.position(newPosition);
999    }
1000
1001    /**
1002     * Number of bytes in local file header up to the &quot;length of
1003     * filename&quot; entry.
1004     */
1005    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1006        /* local file header signature     */ WORD
1007        /* version needed to extract       */ + SHORT
1008        /* general purpose bit flag        */ + SHORT
1009        /* compression method              */ + SHORT
1010        /* last mod file time              */ + SHORT
1011        /* last mod file date              */ + SHORT
1012        /* crc-32                          */ + WORD
1013        /* compressed size                 */ + WORD
1014        /* uncompressed size               */ + (long) WORD;
1015
1016    /**
1017     * Walks through all recorded entries and adds the data available
1018     * from the local file header.
1019     *
1020     * <p>Also records the offsets for the data to read from the
1021     * entries.</p>
1022     */
1023    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1024                                            entriesWithoutUTF8Flag)
1025        throws IOException {
1026        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1027            // entries is filled in populateFromCentralDirectory and
1028            // never modified
1029            final Entry ze = (Entry) zipArchiveEntry;
1030            final long offset = ze.getLocalHeaderOffset();
1031            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1032            wordBbuf.rewind();
1033            IOUtils.readFully(archive, wordBbuf);
1034            wordBbuf.flip();
1035            wordBbuf.get(shortBuf);
1036            final int fileNameLen = ZipShort.getValue(shortBuf);
1037            wordBbuf.get(shortBuf);
1038            final int extraFieldLen = ZipShort.getValue(shortBuf);
1039            skipBytes(fileNameLen);
1040            final byte[] localExtraData = new byte[extraFieldLen];
1041            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1042            ze.setExtra(localExtraData);
1043            ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1044                + SHORT + SHORT + fileNameLen + extraFieldLen);
1045            ze.setStreamContiguous(true);
1046
1047            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1048                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1049                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1050                                                         nc.comment);
1051            }
1052
1053            final String name = ze.getName();
1054            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1055            if (entriesOfThatName == null) {
1056                entriesOfThatName = new LinkedList<>();
1057                nameMap.put(name, entriesOfThatName);
1058            }
1059            entriesOfThatName.addLast(ze);
1060        }
1061    }
1062
1063    /**
1064     * Checks whether the archive starts with a LFH.  If it doesn't,
1065     * it may be an empty archive.
1066     */
1067    private boolean startsWithLocalFileHeader() throws IOException {
1068        archive.position(0);
1069        wordBbuf.rewind();
1070        IOUtils.readFully(archive, wordBbuf);
1071        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1072    }
1073
1074    /**
1075     * Creates new BoundedInputStream, according to implementation of
1076     * underlying archive channel.
1077     */
1078    private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1079        return archive instanceof FileChannel ?
1080            new BoundedFileChannelInputStream(start, remaining) :
1081            new BoundedInputStream(start, remaining);
1082    }
1083
1084    /**
1085     * InputStream that delegates requests to the underlying
1086     * SeekableByteChannel, making sure that only bytes from a certain
1087     * range can be read.
1088     */
1089    private class BoundedInputStream extends InputStream {
1090        private ByteBuffer singleByteBuffer;
1091        private final long end;
1092        private long loc;
1093        private boolean addDummy = false;
1094
1095        BoundedInputStream(final long start, final long remaining) {
1096            this.end = start+remaining;
1097            if (this.end < start) {
1098                // check for potential vulnerability due to overflow
1099                throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1100            }
1101            loc = start;
1102        }
1103
1104        @Override
1105        public synchronized int read() throws IOException {
1106            if (loc >= end) {
1107                if (loc == end && addDummy) {
1108                    addDummy = false;
1109                    return 0;
1110                }
1111                return -1;
1112            }
1113            if (singleByteBuffer == null) {
1114                singleByteBuffer = ByteBuffer.allocate(1);
1115            }
1116            else {
1117                singleByteBuffer.rewind();
1118            }
1119            int read = read(loc, singleByteBuffer);
1120            if (read < 0) {
1121                return read;
1122            }
1123            loc++;
1124            return singleByteBuffer.get() & 0xff;
1125        }
1126
1127        @Override
1128        public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1129            if (len <= 0) {
1130                return 0;
1131            }
1132
1133            if (len > end-loc) {
1134                if (loc >= end) {
1135                    if (loc == end && addDummy) {
1136                        addDummy = false;
1137                        b[off] = 0;
1138                        return 1;
1139                    }
1140                    return -1;
1141                }
1142                len = (int)(end-loc);
1143            }
1144
1145            ByteBuffer buf;
1146            buf = ByteBuffer.wrap(b, off, len);
1147            int ret = read(loc, buf);
1148            if (ret > 0) {
1149                loc += ret;
1150                return ret;
1151            }
1152            return ret;
1153        }
1154
1155        protected int read(long pos, ByteBuffer buf) throws IOException {
1156            int read;
1157            synchronized (archive) {
1158                archive.position(pos);
1159                read = archive.read(buf);
1160            }
1161            buf.flip();
1162            return read;
1163        }
1164
1165        synchronized void addDummy() {
1166            this.addDummy = true;
1167        }
1168    }
1169
1170    /**
1171     * Lock-free implementation of BoundedInputStream. The
1172     * implementation uses positioned reads on the underlying archive
1173     * file channel and therefore performs significantly faster in
1174     * concurrent environment.
1175     */
1176    private class BoundedFileChannelInputStream extends BoundedInputStream {
1177        private final FileChannel archive;
1178
1179        BoundedFileChannelInputStream(final long start, final long remaining) {
1180            super(start, remaining);
1181            archive = (FileChannel)ZipFile.this.archive;
1182        }
1183
1184        @Override
1185        protected int read(long pos, ByteBuffer buf) throws IOException {
1186            int read = archive.read(buf, pos);
1187            buf.flip();
1188            return read;
1189        }
1190    }
1191
1192    private static final class NameAndComment {
1193        private final byte[] name;
1194        private final byte[] comment;
1195        private NameAndComment(final byte[] name, final byte[] comment) {
1196            this.name = name;
1197            this.comment = comment;
1198        }
1199    }
1200
1201    /**
1202     * Compares two ZipArchiveEntries based on their offset within the archive.
1203     *
1204     * <p>Won't return any meaningful results if one of the entries
1205     * isn't part of the archive at all.</p>
1206     *
1207     * @since 1.1
1208     */
1209    private final Comparator<ZipArchiveEntry> offsetComparator =
1210        new Comparator<ZipArchiveEntry>() {
1211        @Override
1212        public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1213            if (e1 == e2) {
1214                return 0;
1215            }
1216
1217            final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1218            final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1219            if (ent1 == null) {
1220                return 1;
1221            }
1222            if (ent2 == null) {
1223                return -1;
1224            }
1225            final long val = (ent1.getLocalHeaderOffset()
1226                        - ent2.getLocalHeaderOffset());
1227            return val == 0 ? 0 : val < 0 ? -1 : +1;
1228        }
1229    };
1230
1231    /**
1232     * Extends ZipArchiveEntry to store the offset within the archive.
1233     */
1234    private static class Entry extends ZipArchiveEntry {
1235
1236        Entry() {
1237        }
1238
1239        @Override
1240        public int hashCode() {
1241            return 3 * super.hashCode()
1242                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1243        }
1244
1245        @Override
1246        public boolean equals(final Object other) {
1247            if (super.equals(other)) {
1248                // super.equals would return false if other were not an Entry
1249                final Entry otherEntry = (Entry) other;
1250                return getLocalHeaderOffset()
1251                        == otherEntry.getLocalHeaderOffset()
1252                    && getDataOffset()
1253                        == otherEntry.getDataOffset();
1254            }
1255            return false;
1256        }
1257    }
1258}