001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.EOFException;
021import java.io.File;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.RandomAccessFile;
025import java.util.Arrays;
026import java.util.Collections;
027import java.util.Comparator;
028import java.util.Enumeration;
029import java.util.HashMap;
030import java.util.LinkedHashMap;
031import java.util.Map;
032import java.util.zip.Inflater;
033import java.util.zip.InflaterInputStream;
034import java.util.zip.ZipEntry;
035import java.util.zip.ZipException;
036
037import org.apache.commons.compress.utils.IOUtils;
038
039import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
040import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
044
045/**
046 * Replacement for <code>java.util.ZipFile</code>.
047 *
048 * <p>This class adds support for file name encodings other than UTF-8
049 * (which is required to work on ZIP files created by native zip tools
050 * and is able to skip a preamble like the one found in self
051 * extracting archives.  Furthermore it returns instances of
052 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
053 * instead of <code>java.util.zip.ZipEntry</code>.</p>
054 *
055 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
056 * have to reimplement all methods anyway.  Like
057 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
058 * covers and supports compressed and uncompressed entries.  As of
059 * Apache Commons Compress 1.3 it also transparently supports Zip64
060 * extensions and thus individual entries and archives larger than 4
061 * GB or with more than 65536 entries.</p>
062 *
063 * <p>The method signatures mimic the ones of
064 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
065 *
066 * <ul>
067 *   <li>There is no getName method.</li>
068 *   <li>entries has been renamed to getEntries.</li>
069 *   <li>getEntries and getEntry return
070 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
071 *   instances.</li>
072 *   <li>close is allowed to throw IOException.</li>
073 * </ul>
074 *
075 */
076public class ZipFile {
077    private static final int HASH_SIZE = 509;
078    static final int NIBLET_MASK = 0x0f;
079    static final int BYTE_SHIFT = 8;
080    private static final int POS_0 = 0;
081    private static final int POS_1 = 1;
082    private static final int POS_2 = 2;
083    private static final int POS_3 = 3;
084
085    /**
086     * Maps ZipArchiveEntrys to two longs, recording the offsets of
087     * the local file headers and the start of entry data.
088     */
089    private final Map<ZipArchiveEntry, OffsetEntry> entries =
090        new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(HASH_SIZE);
091
092    /**
093     * Maps String to ZipArchiveEntrys, name -> actual entry.
094     */
095    private final Map<String, ZipArchiveEntry> nameMap =
096        new HashMap<String, ZipArchiveEntry>(HASH_SIZE);
097
098    private static final class OffsetEntry {
099        private long headerOffset = -1;
100        private long dataOffset = -1;
101    }
102
103    /**
104     * The encoding to use for filenames and the file comment.
105     *
106     * <p>For a list of possible values see <a
107     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
108     * Defaults to UTF-8.</p>
109     */
110    private final String encoding;
111
112    /**
113     * The zip encoding to use for filenames and the file comment.
114     */
115    private final ZipEncoding zipEncoding;
116
117    /**
118     * File name of actual source.
119     */
120    private final String archiveName;
121
122    /**
123     * The actual data source.
124     */
125    private final RandomAccessFile archive;
126
127    /**
128     * Whether to look for and use Unicode extra fields.
129     */
130    private final boolean useUnicodeExtraFields;
131
132    /**
133     * Whether the file is closed.
134     */
135    private boolean closed;
136
137    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
138    private final byte[] DWORD_BUF = new byte[DWORD];
139    private final byte[] WORD_BUF = new byte[WORD];
140    private final byte[] CFH_BUF = new byte[CFH_LEN];
141    private final byte[] SHORT_BUF = new byte[SHORT];
142
143    /**
144     * Opens the given file for reading, assuming "UTF8" for file names.
145     *
146     * @param f the archive.
147     *
148     * @throws IOException if an error occurs while reading the file.
149     */
150    public ZipFile(File f) throws IOException {
151        this(f, ZipEncodingHelper.UTF8);
152    }
153
154    /**
155     * Opens the given file for reading, assuming "UTF8".
156     *
157     * @param name name of the archive.
158     *
159     * @throws IOException if an error occurs while reading the file.
160     */
161    public ZipFile(String name) throws IOException {
162        this(new File(name), ZipEncodingHelper.UTF8);
163    }
164
165    /**
166     * Opens the given file for reading, assuming the specified
167     * encoding for file names, scanning unicode extra fields.
168     *
169     * @param name name of the archive.
170     * @param encoding the encoding to use for file names, use null
171     * for the platform's default encoding
172     *
173     * @throws IOException if an error occurs while reading the file.
174     */
175    public ZipFile(String name, String encoding) throws IOException {
176        this(new File(name), encoding, true);
177    }
178
179    /**
180     * Opens the given file for reading, assuming the specified
181     * encoding for file names and scanning for unicode extra fields.
182     *
183     * @param f the archive.
184     * @param encoding the encoding to use for file names, use null
185     * for the platform's default encoding
186     *
187     * @throws IOException if an error occurs while reading the file.
188     */
189    public ZipFile(File f, String encoding) throws IOException {
190        this(f, encoding, true);
191    }
192
193    /**
194     * Opens the given file for reading, assuming the specified
195     * encoding for file names.
196     *
197     * @param f the archive.
198     * @param encoding the encoding to use for file names, use null
199     * for the platform's default encoding
200     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
201     * Extra Fields (if present) to set the file names.
202     *
203     * @throws IOException if an error occurs while reading the file.
204     */
205    public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
206        throws IOException {
207        this.archiveName = f.getAbsolutePath();
208        this.encoding = encoding;
209        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
210        this.useUnicodeExtraFields = useUnicodeExtraFields;
211        archive = new RandomAccessFile(f, "r");
212        boolean success = false;
213        try {
214            Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
215                populateFromCentralDirectory();
216            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
217            success = true;
218        } finally {
219            if (!success) {
220                try {
221                    closed = true;
222                    archive.close();
223                } catch (IOException e2) { // NOPMD
224                    // swallow, throw the original exception instead
225                }
226            }
227        }
228    }
229
230    /**
231     * The encoding to use for filenames and the file comment.
232     *
233     * @return null if using the platform's default character encoding.
234     */
235    public String getEncoding() {
236        return encoding;
237    }
238
239    /**
240     * Closes the archive.
241     * @throws IOException if an error occurs closing the archive.
242     */
243    public void close() throws IOException {
244        // this flag is only written here and read in finalize() which
245        // can never be run in parallel.
246        // no synchronization needed.
247        closed = true;
248
249        archive.close();
250    }
251
252    /**
253     * close a zipfile quietly; throw no io fault, do nothing
254     * on a null parameter
255     * @param zipfile file to close, can be null
256     */
257    public static void closeQuietly(ZipFile zipfile) {
258        if (zipfile != null) {
259            try {
260                zipfile.close();
261            } catch (IOException e) { // NOPMD
262                //ignore, that's why the method is called "quietly"
263            }
264        }
265    }
266
267    /**
268     * Returns all entries.
269     *
270     * <p>Entries will be returned in the same order they appear
271     * within the archive's central directory.</p>
272     *
273     * @return all entries as {@link ZipArchiveEntry} instances
274     */
275    public Enumeration<ZipArchiveEntry> getEntries() {
276        return Collections.enumeration(entries.keySet());
277    }
278
279    /**
280     * Returns all entries in physical order.
281     *
282     * <p>Entries will be returned in the same order their contents
283     * appear within the archive.</p>
284     *
285     * @return all entries as {@link ZipArchiveEntry} instances
286     *
287     * @since 1.1
288     */
289    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
290        ZipArchiveEntry[] allEntries =
291            entries.keySet().toArray(new ZipArchiveEntry[0]);
292        Arrays.sort(allEntries, OFFSET_COMPARATOR);
293        return Collections.enumeration(Arrays.asList(allEntries));
294    }
295
296    /**
297     * Returns a named entry - or {@code null} if no entry by
298     * that name exists.
299     * @param name name of the entry.
300     * @return the ZipArchiveEntry corresponding to the given name - or
301     * {@code null} if not present.
302     */
303    public ZipArchiveEntry getEntry(String name) {
304        return nameMap.get(name);
305    }
306
307    /**
308     * Whether this class is able to read the given entry.
309     *
310     * <p>May return false if it is set up to use encryption or a
311     * compression method that hasn't been implemented yet.</p>
312     * @since 1.1
313     */
314    public boolean canReadEntryData(ZipArchiveEntry ze) {
315        return ZipUtil.canHandleEntryData(ze);
316    }
317
318    /**
319     * Returns an InputStream for reading the contents of the given entry.
320     *
321     * @param ze the entry to get the stream for.
322     * @return a stream to read the entry from.
323     * @throws IOException if unable to create an input stream from the zipentry
324     * @throws ZipException if the zipentry uses an unsupported feature
325     */
326    public InputStream getInputStream(ZipArchiveEntry ze)
327        throws IOException, ZipException {
328        OffsetEntry offsetEntry = entries.get(ze);
329        if (offsetEntry == null) {
330            return null;
331        }
332        ZipUtil.checkRequestedFeatures(ze);
333        long start = offsetEntry.dataOffset;
334        BoundedInputStream bis =
335            new BoundedInputStream(start, ze.getCompressedSize());
336        switch (ze.getMethod()) {
337            case ZipEntry.STORED:
338                return bis;
339            case ZipEntry.DEFLATED:
340                bis.addDummy();
341                final Inflater inflater = new Inflater(true);
342                return new InflaterInputStream(bis, inflater) {
343                    @Override
344                    public void close() throws IOException {
345                        super.close();
346                        inflater.end();
347                    }
348                };
349            default:
350                throw new ZipException("Found unsupported compression method "
351                                       + ze.getMethod());
352        }
353    }
354
355    /**
356     * <p>
357     * Convenience method to return the entry's content as a String if isUnixSymlink()
358     * returns true for it, otherwise returns null.
359     * </p>
360     *
361     * <p>This method assumes the symbolic link's file name uses the
362     * same encoding that as been specified for this ZipFile.</p>
363     *
364     * @param entry ZipArchiveEntry object that represents the symbolic link
365     * @return entry's content as a String
366     * @throws IOException problem with content's input stream
367     * @since 1.5
368     */
369    public String getUnixSymlink(ZipArchiveEntry entry) throws IOException {
370        if (entry != null && entry.isUnixSymlink()) {
371            InputStream in = null;
372            try {
373                in = getInputStream(entry);
374                byte[] symlinkBytes = IOUtils.toByteArray(in);
375                return zipEncoding.decode(symlinkBytes);
376            } finally {
377                if (in != null) {
378                    in.close();
379                }
380            }
381        } else {
382            return null;
383        }
384    }
385
386    /**
387     * Ensures that the close method of this zipfile is called when
388     * there are no more references to it.
389     * @see #close()
390     */
391    @Override
392    protected void finalize() throws Throwable {
393        try {
394            if (!closed) {
395                System.err.println("Cleaning up unclosed ZipFile for archive "
396                                   + archiveName);
397                close();
398            }
399        } finally {
400            super.finalize();
401        }
402    }
403
404    /**
405     * Length of a "central directory" entry structure without file
406     * name, extra fields or comment.
407     */
408    private static final int CFH_LEN =
409        /* version made by                 */ SHORT
410        /* version needed to extract       */ + SHORT
411        /* general purpose bit flag        */ + SHORT
412        /* compression method              */ + SHORT
413        /* last mod file time              */ + SHORT
414        /* last mod file date              */ + SHORT
415        /* crc-32                          */ + WORD
416        /* compressed size                 */ + WORD
417        /* uncompressed size               */ + WORD
418        /* filename length                 */ + SHORT
419        /* extra field length              */ + SHORT
420        /* file comment length             */ + SHORT
421        /* disk number start               */ + SHORT
422        /* internal file attributes        */ + SHORT
423        /* external file attributes        */ + WORD
424        /* relative offset of local header */ + WORD;
425
426    private static final long CFH_SIG =
427        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
428
429    /**
430     * Reads the central directory of the given archive and populates
431     * the internal tables with ZipArchiveEntry instances.
432     *
433     * <p>The ZipArchiveEntrys will know all data that can be obtained from
434     * the central directory alone, but not the data that requires the
435     * local file header or additional data to be read.</p>
436     *
437     * @return a map of zipentries that didn't have the language
438     * encoding flag set when read.
439     */
440    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
441        throws IOException {
442        HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
443            new HashMap<ZipArchiveEntry, NameAndComment>();
444
445        positionAtCentralDirectory();
446
447        archive.readFully(WORD_BUF);
448        long sig = ZipLong.getValue(WORD_BUF);
449
450        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
451            throw new IOException("central directory is empty, can't expand"
452                                  + " corrupt archive.");
453        }
454
455        while (sig == CFH_SIG) {
456            readCentralDirectoryEntry(noUTF8Flag);
457            archive.readFully(WORD_BUF);
458            sig = ZipLong.getValue(WORD_BUF);
459        }
460        return noUTF8Flag;
461    }
462
463    /**
464     * Reads an individual entry of the central directory, creats an
465     * ZipArchiveEntry from it and adds it to the global maps.
466     *
467     * @param noUTF8Flag map used to collect entries that don't have
468     * their UTF-8 flag set and whose name will be set by data read
469     * from the local file header later.  The current entry may be
470     * added to this map.
471     */
472    private void
473        readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
474        throws IOException {
475        archive.readFully(CFH_BUF);
476        int off = 0;
477        ZipArchiveEntry ze = new ZipArchiveEntry();
478
479        int versionMadeBy = ZipShort.getValue(CFH_BUF, off);
480        off += SHORT;
481        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
482
483        off += SHORT; // skip version info
484
485        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off);
486        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
487        final ZipEncoding entryEncoding =
488            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
489        ze.setGeneralPurposeBit(gpFlag);
490
491        off += SHORT;
492
493        ze.setMethod(ZipShort.getValue(CFH_BUF, off));
494        off += SHORT;
495
496        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off));
497        ze.setTime(time);
498        off += WORD;
499
500        ze.setCrc(ZipLong.getValue(CFH_BUF, off));
501        off += WORD;
502
503        ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off));
504        off += WORD;
505
506        ze.setSize(ZipLong.getValue(CFH_BUF, off));
507        off += WORD;
508
509        int fileNameLen = ZipShort.getValue(CFH_BUF, off);
510        off += SHORT;
511
512        int extraLen = ZipShort.getValue(CFH_BUF, off);
513        off += SHORT;
514
515        int commentLen = ZipShort.getValue(CFH_BUF, off);
516        off += SHORT;
517
518        int diskStart = ZipShort.getValue(CFH_BUF, off);
519        off += SHORT;
520
521        ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off));
522        off += SHORT;
523
524        ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off));
525        off += WORD;
526
527        byte[] fileName = new byte[fileNameLen];
528        archive.readFully(fileName);
529        ze.setName(entryEncoding.decode(fileName), fileName);
530
531        // LFH offset,
532        OffsetEntry offset = new OffsetEntry();
533        offset.headerOffset = ZipLong.getValue(CFH_BUF, off);
534        // data offset will be filled later
535        entries.put(ze, offset);
536
537        nameMap.put(ze.getName(), ze);
538
539        byte[] cdExtraData = new byte[extraLen];
540        archive.readFully(cdExtraData);
541        ze.setCentralDirectoryExtra(cdExtraData);
542
543        setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
544
545        byte[] comment = new byte[commentLen];
546        archive.readFully(comment);
547        ze.setComment(entryEncoding.decode(comment));
548
549        if (!hasUTF8Flag && useUnicodeExtraFields) {
550            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
551        }
552    }
553
554    /**
555     * If the entry holds a Zip64 extended information extra field,
556     * read sizes from there if the entry's sizes are set to
557     * 0xFFFFFFFFF, do the same for the offset of the local file
558     * header.
559     *
560     * <p>Ensures the Zip64 extra either knows both compressed and
561     * uncompressed size or neither of both as the internal logic in
562     * ExtraFieldUtils forces the field to create local header data
563     * even if they are never used - and here a field with only one
564     * size would be invalid.</p>
565     */
566    private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
567                                                 OffsetEntry offset,
568                                                 int diskStart)
569        throws IOException {
570        Zip64ExtendedInformationExtraField z64 =
571            (Zip64ExtendedInformationExtraField)
572            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
573        if (z64 != null) {
574            boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
575            boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
576            boolean hasRelativeHeaderOffset =
577                offset.headerOffset == ZIP64_MAGIC;
578            z64.reparseCentralDirectoryData(hasUncompressedSize,
579                                            hasCompressedSize,
580                                            hasRelativeHeaderOffset,
581                                            diskStart == ZIP64_MAGIC_SHORT);
582
583            if (hasUncompressedSize) {
584                ze.setSize(z64.getSize().getLongValue());
585            } else if (hasCompressedSize) {
586                z64.setSize(new ZipEightByteInteger(ze.getSize()));
587            }
588
589            if (hasCompressedSize) {
590                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
591            } else if (hasUncompressedSize) {
592                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
593            }
594
595            if (hasRelativeHeaderOffset) {
596                offset.headerOffset =
597                    z64.getRelativeHeaderOffset().getLongValue();
598            }
599        }
600    }
601
602    /**
603     * Length of the "End of central directory record" - which is
604     * supposed to be the last structure of the archive - without file
605     * comment.
606     */
607    static final int MIN_EOCD_SIZE =
608        /* end of central dir signature    */ WORD
609        /* number of this disk             */ + SHORT
610        /* number of the disk with the     */
611        /* start of the central directory  */ + SHORT
612        /* total number of entries in      */
613        /* the central dir on this disk    */ + SHORT
614        /* total number of entries in      */
615        /* the central dir                 */ + SHORT
616        /* size of the central directory   */ + WORD
617        /* offset of start of central      */
618        /* directory with respect to       */
619        /* the starting disk number        */ + WORD
620        /* zipfile comment length          */ + SHORT;
621
622    /**
623     * Maximum length of the "End of central directory record" with a
624     * file comment.
625     */
626    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
627        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
628
629    /**
630     * Offset of the field that holds the location of the first
631     * central directory entry inside the "End of central directory
632     * record" relative to the start of the "End of central directory
633     * record".
634     */
635    private static final int CFD_LOCATOR_OFFSET =
636        /* end of central dir signature    */ WORD
637        /* number of this disk             */ + SHORT
638        /* number of the disk with the     */
639        /* start of the central directory  */ + SHORT
640        /* total number of entries in      */
641        /* the central dir on this disk    */ + SHORT
642        /* total number of entries in      */
643        /* the central dir                 */ + SHORT
644        /* size of the central directory   */ + WORD;
645
646    /**
647     * Length of the "Zip64 end of central directory locator" - which
648     * should be right in front of the "end of central directory
649     * record" if one is present at all.
650     */
651    private static final int ZIP64_EOCDL_LENGTH =
652        /* zip64 end of central dir locator sig */ WORD
653        /* number of the disk with the start    */
654        /* start of the zip64 end of            */
655        /* central directory                    */ + WORD
656        /* relative offset of the zip64         */
657        /* end of central directory record      */ + DWORD
658        /* total number of disks                */ + WORD;
659
660    /**
661     * Offset of the field that holds the location of the "Zip64 end
662     * of central directory record" inside the "Zip64 end of central
663     * directory locator" relative to the start of the "Zip64 end of
664     * central directory locator".
665     */
666    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
667        /* zip64 end of central dir locator sig */ WORD
668        /* number of the disk with the start    */
669        /* start of the zip64 end of            */
670        /* central directory                    */ + WORD;
671
672    /**
673     * Offset of the field that holds the location of the first
674     * central directory entry inside the "Zip64 end of central
675     * directory record" relative to the start of the "Zip64 end of
676     * central directory record".
677     */
678    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
679        /* zip64 end of central dir        */
680        /* signature                       */ WORD
681        /* size of zip64 end of central    */
682        /* directory record                */ + DWORD
683        /* version made by                 */ + SHORT
684        /* version needed to extract       */ + SHORT
685        /* number of this disk             */ + WORD
686        /* number of the disk with the     */
687        /* start of the central directory  */ + WORD
688        /* total number of entries in the  */
689        /* central directory on this disk  */ + DWORD
690        /* total number of entries in the  */
691        /* central directory               */ + DWORD
692        /* size of the central directory   */ + DWORD;
693
694    /**
695     * Searches for either the &quot;Zip64 end of central directory
696     * locator&quot; or the &quot;End of central dir record&quot;, parses
697     * it and positions the stream at the first central directory
698     * record.
699     */
700    private void positionAtCentralDirectory()
701        throws IOException {
702        positionAtEndOfCentralDirectoryRecord();
703        boolean found = false;
704        boolean searchedForZip64EOCD =
705            archive.getFilePointer() > ZIP64_EOCDL_LENGTH;
706        if (searchedForZip64EOCD) {
707            archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH);
708            archive.readFully(WORD_BUF);
709            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
710                                  WORD_BUF);
711        }
712        if (!found) {
713            // not a ZIP64 archive
714            if (searchedForZip64EOCD) {
715                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
716            }
717            positionAtCentralDirectory32();
718        } else {
719            positionAtCentralDirectory64();
720        }
721    }
722
723    /**
724     * Parses the &quot;Zip64 end of central directory locator&quot;,
725     * finds the &quot;Zip64 end of central directory record&quot; using the
726     * parsed information, parses that and positions the stream at the
727     * first central directory record.
728     *
729     * Expects stream to be positioned right behind the &quot;Zip64
730     * end of central directory locator&quot;'s signature.
731     */
732    private void positionAtCentralDirectory64()
733        throws IOException {
734        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
735                  - WORD /* signature has already been read */);
736        archive.readFully(DWORD_BUF);
737        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
738        archive.readFully(WORD_BUF);
739        if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
740            throw new ZipException("archive's ZIP64 end of central "
741                                   + "directory locator is corrupt.");
742        }
743        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
744                  - WORD /* signature has already been read */);
745        archive.readFully(DWORD_BUF);
746        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
747    }
748
749    /**
750     * Parses the &quot;End of central dir record&quot; and positions
751     * the stream at the first central directory record.
752     *
753     * Expects stream to be positioned at the beginning of the
754     * &quot;End of central dir record&quot;.
755     */
756    private void positionAtCentralDirectory32()
757        throws IOException {
758        skipBytes(CFD_LOCATOR_OFFSET);
759        archive.readFully(WORD_BUF);
760        archive.seek(ZipLong.getValue(WORD_BUF));
761    }
762
763    /**
764     * Searches for the and positions the stream at the start of the
765     * &quot;End of central dir record&quot;.
766     */
767    private void positionAtEndOfCentralDirectoryRecord()
768        throws IOException {
769        boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
770                                             ZipArchiveOutputStream.EOCD_SIG);
771        if (!found) {
772            throw new ZipException("archive is not a ZIP archive");
773        }
774    }
775
776    /**
777     * Searches the archive backwards from minDistance to maxDistance
778     * for the given signature, positions the RandomaccessFile right
779     * at the signature if it has been found.
780     */
781    private boolean tryToLocateSignature(long minDistanceFromEnd,
782                                         long maxDistanceFromEnd,
783                                         byte[] sig) throws IOException {
784        boolean found = false;
785        long off = archive.length() - minDistanceFromEnd;
786        final long stopSearching =
787            Math.max(0L, archive.length() - maxDistanceFromEnd);
788        if (off >= 0) {
789            for (; off >= stopSearching; off--) {
790                archive.seek(off);
791                int curr = archive.read();
792                if (curr == -1) {
793                    break;
794                }
795                if (curr == sig[POS_0]) {
796                    curr = archive.read();
797                    if (curr == sig[POS_1]) {
798                        curr = archive.read();
799                        if (curr == sig[POS_2]) {
800                            curr = archive.read();
801                            if (curr == sig[POS_3]) {
802                                found = true;
803                                break;
804                            }
805                        }
806                    }
807                }
808            }
809        }
810        if (found) {
811            archive.seek(off);
812        }
813        return found;
814    }
815
816    /**
817     * Skips the given number of bytes or throws an EOFException if
818     * skipping failed.
819     */ 
820    private void skipBytes(final int count) throws IOException {
821        int totalSkipped = 0;
822        while (totalSkipped < count) {
823            int skippedNow = archive.skipBytes(count - totalSkipped);
824            if (skippedNow <= 0) {
825                throw new EOFException();
826            }
827            totalSkipped += skippedNow;
828        }
829    }
830
831    /**
832     * Number of bytes in local file header up to the &quot;length of
833     * filename&quot; entry.
834     */
835    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
836        /* local file header signature     */ WORD
837        /* version needed to extract       */ + SHORT
838        /* general purpose bit flag        */ + SHORT
839        /* compression method              */ + SHORT
840        /* last mod file time              */ + SHORT
841        /* last mod file date              */ + SHORT
842        /* crc-32                          */ + WORD
843        /* compressed size                 */ + WORD
844        /* uncompressed size               */ + WORD;
845
846    /**
847     * Walks through all recorded entries and adds the data available
848     * from the local file header.
849     *
850     * <p>Also records the offsets for the data to read from the
851     * entries.</p>
852     */
853    private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment>
854                                            entriesWithoutUTF8Flag)
855        throws IOException {
856        // changing the name of a ZipArchiveEntry is going to change
857        // the hashcode - see COMPRESS-164
858        // Map needs to be reconstructed in order to keep central
859        // directory order
860        Map<ZipArchiveEntry, OffsetEntry> origMap =
861            new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(entries);
862        entries.clear();
863        for (Map.Entry<ZipArchiveEntry, OffsetEntry> ent : origMap.entrySet()) {
864            ZipArchiveEntry ze = ent.getKey();
865            OffsetEntry offsetEntry = ent.getValue();
866            long offset = offsetEntry.headerOffset;
867            archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
868            archive.readFully(SHORT_BUF);
869            int fileNameLen = ZipShort.getValue(SHORT_BUF);
870            archive.readFully(SHORT_BUF);
871            int extraFieldLen = ZipShort.getValue(SHORT_BUF);
872            int lenToSkip = fileNameLen;
873            while (lenToSkip > 0) {
874                int skipped = archive.skipBytes(lenToSkip);
875                if (skipped <= 0) {
876                    throw new IOException("failed to skip file name in"
877                                          + " local file header");
878                }
879                lenToSkip -= skipped;
880            }
881            byte[] localExtraData = new byte[extraFieldLen];
882            archive.readFully(localExtraData);
883            ze.setExtra(localExtraData);
884            offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
885                + SHORT + SHORT + fileNameLen + extraFieldLen;
886
887            if (entriesWithoutUTF8Flag.containsKey(ze)) {
888                String orig = ze.getName();
889                NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
890                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
891                                                         nc.comment);
892                if (!orig.equals(ze.getName())) {
893                    nameMap.remove(orig);
894                    nameMap.put(ze.getName(), ze);
895                }
896            }
897            entries.put(ze, offsetEntry);
898        }
899    }
900
901    /**
902     * Checks whether the archive starts with a LFH.  If it doesn't,
903     * it may be an empty archive.
904     */
905    private boolean startsWithLocalFileHeader() throws IOException {
906        archive.seek(0);
907        archive.readFully(WORD_BUF);
908        return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG);
909    }
910
911    /**
912     * InputStream that delegates requests to the underlying
913     * RandomAccessFile, making sure that only bytes from a certain
914     * range can be read.
915     */
916    private class BoundedInputStream extends InputStream {
917        private long remaining;
918        private long loc;
919        private boolean addDummyByte = false;
920
921        BoundedInputStream(long start, long remaining) {
922            this.remaining = remaining;
923            loc = start;
924        }
925
926        @Override
927        public int read() throws IOException {
928            if (remaining-- <= 0) {
929                if (addDummyByte) {
930                    addDummyByte = false;
931                    return 0;
932                }
933                return -1;
934            }
935            synchronized (archive) {
936                archive.seek(loc++);
937                return archive.read();
938            }
939        }
940
941        @Override
942        public int read(byte[] b, int off, int len) throws IOException {
943            if (remaining <= 0) {
944                if (addDummyByte) {
945                    addDummyByte = false;
946                    b[off] = 0;
947                    return 1;
948                }
949                return -1;
950            }
951
952            if (len <= 0) {
953                return 0;
954            }
955
956            if (len > remaining) {
957                len = (int) remaining;
958            }
959            int ret = -1;
960            synchronized (archive) {
961                archive.seek(loc);
962                ret = archive.read(b, off, len);
963            }
964            if (ret > 0) {
965                loc += ret;
966                remaining -= ret;
967            }
968            return ret;
969        }
970
971        /**
972         * Inflater needs an extra dummy byte for nowrap - see
973         * Inflater's javadocs.
974         */
975        void addDummy() {
976            addDummyByte = true;
977        }
978    }
979
980    private static final class NameAndComment {
981        private final byte[] name;
982        private final byte[] comment;
983        private NameAndComment(byte[] name, byte[] comment) {
984            this.name = name;
985            this.comment = comment;
986        }
987    }
988
989    /**
990     * Compares two ZipArchiveEntries based on their offset within the archive.
991     *
992     * <p>Won't return any meaningful results if one of the entries
993     * isn't part of the archive at all.</p>
994     *
995     * @since 1.1
996     */
997    private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR =
998        new Comparator<ZipArchiveEntry>() {
999        public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) {
1000            if (e1 == e2) {
1001                return 0;
1002            }
1003
1004            OffsetEntry off1 = entries.get(e1);
1005            OffsetEntry off2 = entries.get(e2);
1006            if (off1 == null) {
1007                return 1;
1008            }
1009            if (off2 == null) {
1010                return -1;
1011            }
1012            long val = (off1.headerOffset - off2.headerOffset);
1013            return val == 0 ? 0 : val < 0 ? -1 : +1;
1014        }
1015    };
1016}