View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  package org.apache.commons.compress.archivers.zip;
19  
20  import java.io.BufferedInputStream;
21  import java.io.Closeable;
22  import java.io.EOFException;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.RandomAccessFile;
27  import java.util.Arrays;
28  import java.util.Collections;
29  import java.util.Comparator;
30  import java.util.Enumeration;
31  import java.util.HashMap;
32  import java.util.LinkedList;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.zip.Inflater;
36  import java.util.zip.InflaterInputStream;
37  import java.util.zip.ZipException;
38  
39  import org.apache.commons.compress.utils.IOUtils;
40  
41  import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
42  import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
43  import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
44  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
45  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
46  
47  /**
48   * Replacement for <code>java.util.ZipFile</code>.
49   *
50   * <p>This class adds support for file name encodings other than UTF-8
51   * (which is required to work on ZIP files created by native zip tools
52   * and is able to skip a preamble like the one found in self
53   * extracting archives.  Furthermore it returns instances of
54   * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
55   * instead of <code>java.util.zip.ZipEntry</code>.</p>
56   *
57   * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
58   * have to reimplement all methods anyway.  Like
59   * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
60   * covers and supports compressed and uncompressed entries.  As of
61   * Apache Commons Compress 1.3 it also transparently supports Zip64
62   * extensions and thus individual entries and archives larger than 4
63   * GB or with more than 65536 entries.</p>
64   *
65   * <p>The method signatures mimic the ones of
66   * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
67   *
68   * <ul>
69   *   <li>There is no getName method.</li>
70   *   <li>entries has been renamed to getEntries.</li>
71   *   <li>getEntries and getEntry return
72   *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
73   *   instances.</li>
74   *   <li>close is allowed to throw IOException.</li>
75   * </ul>
76   *
77   */
78  public class ZipFile implements Closeable {
79      private static final int HASH_SIZE = 509;
80      static final int NIBLET_MASK = 0x0f;
81      static final int BYTE_SHIFT = 8;
82      private static final int POS_0 = 0;
83      private static final int POS_1 = 1;
84      private static final int POS_2 = 2;
85      private static final int POS_3 = 3;
86  
87      /**
88       * List of entries in the order they appear inside the central
89       * directory.
90       */
91      private final List<ZipArchiveEntry> entries =
92          new LinkedList<ZipArchiveEntry>();
93  
94      /**
95       * Maps String to list of ZipArchiveEntrys, name -> actual entries.
96       */
97      private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
98          new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE);
99  
100     private static final class OffsetEntry {
101         private long headerOffset = -1;
102         private long dataOffset = -1;
103     }
104 
105     /**
106      * The encoding to use for filenames and the file comment.
107      *
108      * <p>For a list of possible values see <a
109      * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
110      * Defaults to UTF-8.</p>
111      */
112     private final String encoding;
113 
114     /**
115      * The zip encoding to use for filenames and the file comment.
116      */
117     private final ZipEncoding zipEncoding;
118 
119     /**
120      * File name of actual source.
121      */
122     private final String archiveName;
123 
124     /**
125      * The actual data source.
126      */
127     private final RandomAccessFile archive;
128 
129     /**
130      * Whether to look for and use Unicode extra fields.
131      */
132     private final boolean useUnicodeExtraFields;
133 
134     /**
135      * Whether the file is closed.
136      */
137     private boolean closed;
138 
139     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
140     private final byte[] DWORD_BUF = new byte[DWORD];
141     private final byte[] WORD_BUF = new byte[WORD];
142     private final byte[] CFH_BUF = new byte[CFH_LEN];
143     private final byte[] SHORT_BUF = new byte[SHORT];
144 
145     /**
146      * Opens the given file for reading, assuming "UTF8" for file names.
147      *
148      * @param f the archive.
149      *
150      * @throws IOException if an error occurs while reading the file.
151      */
152     public ZipFile(File f) throws IOException {
153         this(f, ZipEncodingHelper.UTF8);
154     }
155 
156     /**
157      * Opens the given file for reading, assuming "UTF8".
158      *
159      * @param name name of the archive.
160      *
161      * @throws IOException if an error occurs while reading the file.
162      */
163     public ZipFile(String name) throws IOException {
164         this(new File(name), ZipEncodingHelper.UTF8);
165     }
166 
167     /**
168      * Opens the given file for reading, assuming the specified
169      * encoding for file names, scanning unicode extra fields.
170      *
171      * @param name name of the archive.
172      * @param encoding the encoding to use for file names, use null
173      * for the platform's default encoding
174      *
175      * @throws IOException if an error occurs while reading the file.
176      */
177     public ZipFile(String name, String encoding) throws IOException {
178         this(new File(name), encoding, true);
179     }
180 
181     /**
182      * Opens the given file for reading, assuming the specified
183      * encoding for file names and scanning for unicode extra fields.
184      *
185      * @param f the archive.
186      * @param encoding the encoding to use for file names, use null
187      * for the platform's default encoding
188      *
189      * @throws IOException if an error occurs while reading the file.
190      */
191     public ZipFile(File f, String encoding) throws IOException {
192         this(f, encoding, true);
193     }
194 
195     /**
196      * Opens the given file for reading, assuming the specified
197      * encoding for file names.
198      *
199      * @param f the archive.
200      * @param encoding the encoding to use for file names, use null
201      * for the platform's default encoding
202      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
203      * Extra Fields (if present) to set the file names.
204      *
205      * @throws IOException if an error occurs while reading the file.
206      */
207     public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
208         throws IOException {
209         this.archiveName = f.getAbsolutePath();
210         this.encoding = encoding;
211         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
212         this.useUnicodeExtraFields = useUnicodeExtraFields;
213         archive = new RandomAccessFile(f, "r");
214         boolean success = false;
215         try {
216             Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
217                 populateFromCentralDirectory();
218             resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
219             success = true;
220         } finally {
221             if (!success) {
222                 closed = true;
223                 IOUtils.closeQuietly(archive);
224             }
225         }
226     }
227 
228     /**
229      * The encoding to use for filenames and the file comment.
230      *
231      * @return null if using the platform's default character encoding.
232      */
233     public String getEncoding() {
234         return encoding;
235     }
236 
237     /**
238      * Closes the archive.
239      * @throws IOException if an error occurs closing the archive.
240      */
241     public void close() throws IOException {
242         // this flag is only written here and read in finalize() which
243         // can never be run in parallel.
244         // no synchronization needed.
245         closed = true;
246 
247         archive.close();
248     }
249 
250     /**
251      * close a zipfile quietly; throw no io fault, do nothing
252      * on a null parameter
253      * @param zipfile file to close, can be null
254      */
255     public static void closeQuietly(ZipFile zipfile) {
256         IOUtils.closeQuietly(zipfile);
257     }
258 
259     /**
260      * Returns all entries.
261      *
262      * <p>Entries will be returned in the same order they appear
263      * within the archive's central directory.</p>
264      *
265      * @return all entries as {@link ZipArchiveEntry} instances
266      */
267     public Enumeration<ZipArchiveEntry> getEntries() {
268         return Collections.enumeration(entries);
269     }
270 
271     /**
272      * Returns all entries in physical order.
273      *
274      * <p>Entries will be returned in the same order their contents
275      * appear within the archive.</p>
276      *
277      * @return all entries as {@link ZipArchiveEntry} instances
278      *
279      * @since 1.1
280      */
281     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
282         ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[0]);
283         Arrays.sort(allEntries, OFFSET_COMPARATOR);
284         return Collections.enumeration(Arrays.asList(allEntries));
285     }
286 
287     /**
288      * Returns a named entry - or {@code null} if no entry by
289      * that name exists.
290      *
291      * <p>If multiple entries with the same name exist the first entry
292      * in the archive's central directory by that name is
293      * returned.</p>
294      *
295      * @param name name of the entry.
296      * @return the ZipArchiveEntry corresponding to the given name - or
297      * {@code null} if not present.
298      */
299     public ZipArchiveEntry getEntry(String name) {
300         LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
301         return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
302     }
303 
304     /**
305      * Returns all named entries in the same order they appear within
306      * the archive's central directory.
307      *
308      * @param name name of the entry.
309      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
310      * given name
311      * @since 1.6
312      */
313     public Iterable<ZipArchiveEntry> getEntries(String name) {
314         List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
315         return entriesOfThatName != null ? entriesOfThatName
316             : Collections.<ZipArchiveEntry>emptyList();
317     }
318 
319     /**
320      * Returns all named entries in the same order their contents
321      * appear within the archive.
322      *
323      * @param name name of the entry.
324      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
325      * given name
326      * @since 1.6
327      */
328     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) {
329         ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
330         if (nameMap.containsKey(name)) {
331             entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
332             Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR);
333         }
334         return Arrays.asList(entriesOfThatName);
335     }
336 
337     /**
338      * Whether this class is able to read the given entry.
339      *
340      * <p>May return false if it is set up to use encryption or a
341      * compression method that hasn't been implemented yet.</p>
342      * @since 1.1
343      */
344     public boolean canReadEntryData(ZipArchiveEntry ze) {
345         return ZipUtil.canHandleEntryData(ze);
346     }
347 
348     /**
349      * Returns an InputStream for reading the contents of the given entry.
350      *
351      * @param ze the entry to get the stream for.
352      * @return a stream to read the entry from.
353      * @throws IOException if unable to create an input stream from the zipentry
354      * @throws ZipException if the zipentry uses an unsupported feature
355      */
356     public InputStream getInputStream(ZipArchiveEntry ze)
357         throws IOException, ZipException {
358         if (!(ze instanceof Entry)) {
359             return null;
360         }
361         // cast valididty is checked just above
362         OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
363         ZipUtil.checkRequestedFeatures(ze);
364         long start = offsetEntry.dataOffset;
365         BoundedInputStream bis =
366             new BoundedInputStream(start, ze.getCompressedSize());
367         switch (ZipMethod.getMethodByCode(ze.getMethod())) {
368             case STORED:
369                 return bis;
370             case UNSHRINKING:
371                 return new UnshrinkingInputStream(bis);
372             case IMPLODING:
373                 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
374                         ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
375             case DEFLATED:
376                 bis.addDummy();
377                 final Inflater inflater = new Inflater(true);
378                 return new InflaterInputStream(bis, inflater) {
379                     @Override
380                     public void close() throws IOException {
381                         super.close();
382                         inflater.end();
383                     }
384                 };
385             default:
386                 throw new ZipException("Found unsupported compression method "
387                                        + ze.getMethod());
388         }
389     }
390 
391     /**
392      * <p>
393      * Convenience method to return the entry's content as a String if isUnixSymlink()
394      * returns true for it, otherwise returns null.
395      * </p>
396      *
397      * <p>This method assumes the symbolic link's file name uses the
398      * same encoding that as been specified for this ZipFile.</p>
399      *
400      * @param entry ZipArchiveEntry object that represents the symbolic link
401      * @return entry's content as a String
402      * @throws IOException problem with content's input stream
403      * @since 1.5
404      */
405     public String getUnixSymlink(ZipArchiveEntry entry) throws IOException {
406         if (entry != null && entry.isUnixSymlink()) {
407             InputStream in = null;
408             try {
409                 in = getInputStream(entry);
410                 byte[] symlinkBytes = IOUtils.toByteArray(in);
411                 return zipEncoding.decode(symlinkBytes);
412             } finally {
413                 if (in != null) {
414                     in.close();
415                 }
416             }
417         } else {
418             return null;
419         }
420     }
421 
422     /**
423      * Ensures that the close method of this zipfile is called when
424      * there are no more references to it.
425      * @see #close()
426      */
427     @Override
428     protected void finalize() throws Throwable {
429         try {
430             if (!closed) {
431                 System.err.println("Cleaning up unclosed ZipFile for archive "
432                                    + archiveName);
433                 close();
434             }
435         } finally {
436             super.finalize();
437         }
438     }
439 
440     /**
441      * Length of a "central directory" entry structure without file
442      * name, extra fields or comment.
443      */
444     private static final int CFH_LEN =
445         /* version made by                 */ SHORT
446         /* version needed to extract       */ + SHORT
447         /* general purpose bit flag        */ + SHORT
448         /* compression method              */ + SHORT
449         /* last mod file time              */ + SHORT
450         /* last mod file date              */ + SHORT
451         /* crc-32                          */ + WORD
452         /* compressed size                 */ + WORD
453         /* uncompressed size               */ + WORD
454         /* filename length                 */ + SHORT
455         /* extra field length              */ + SHORT
456         /* file comment length             */ + SHORT
457         /* disk number start               */ + SHORT
458         /* internal file attributes        */ + SHORT
459         /* external file attributes        */ + WORD
460         /* relative offset of local header */ + WORD;
461 
462     private static final long CFH_SIG =
463         ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
464 
465     /**
466      * Reads the central directory of the given archive and populates
467      * the internal tables with ZipArchiveEntry instances.
468      *
469      * <p>The ZipArchiveEntrys will know all data that can be obtained from
470      * the central directory alone, but not the data that requires the
471      * local file header or additional data to be read.</p>
472      *
473      * @return a map of zipentries that didn't have the language
474      * encoding flag set when read.
475      */
476     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
477         throws IOException {
478         HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
479             new HashMap<ZipArchiveEntry, NameAndComment>();
480 
481         positionAtCentralDirectory();
482 
483         archive.readFully(WORD_BUF);
484         long sig = ZipLong.getValue(WORD_BUF);
485 
486         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
487             throw new IOException("central directory is empty, can't expand"
488                                   + " corrupt archive.");
489         }
490 
491         while (sig == CFH_SIG) {
492             readCentralDirectoryEntry(noUTF8Flag);
493             archive.readFully(WORD_BUF);
494             sig = ZipLong.getValue(WORD_BUF);
495         }
496         return noUTF8Flag;
497     }
498 
499     /**
500      * Reads an individual entry of the central directory, creats an
501      * ZipArchiveEntry from it and adds it to the global maps.
502      *
503      * @param noUTF8Flag map used to collect entries that don't have
504      * their UTF-8 flag set and whose name will be set by data read
505      * from the local file header later.  The current entry may be
506      * added to this map.
507      */
508     private void
509         readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
510         throws IOException {
511         archive.readFully(CFH_BUF);
512         int off = 0;
513         OffsetEntry offset = new OffsetEntry();
514         Entry ze = new Entry(offset);
515 
516         int versionMadeBy = ZipShort.getValue(CFH_BUF, off);
517         off += SHORT;
518         ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
519 
520         off += SHORT; // skip version info
521 
522         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off);
523         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
524         final ZipEncoding entryEncoding =
525             hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
526         ze.setGeneralPurposeBit(gpFlag);
527 
528         off += SHORT;
529 
530         ze.setMethod(ZipShort.getValue(CFH_BUF, off));
531         off += SHORT;
532 
533         long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off));
534         ze.setTime(time);
535         off += WORD;
536 
537         ze.setCrc(ZipLong.getValue(CFH_BUF, off));
538         off += WORD;
539 
540         ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off));
541         off += WORD;
542 
543         ze.setSize(ZipLong.getValue(CFH_BUF, off));
544         off += WORD;
545 
546         int fileNameLen = ZipShort.getValue(CFH_BUF, off);
547         off += SHORT;
548 
549         int extraLen = ZipShort.getValue(CFH_BUF, off);
550         off += SHORT;
551 
552         int commentLen = ZipShort.getValue(CFH_BUF, off);
553         off += SHORT;
554 
555         int diskStart = ZipShort.getValue(CFH_BUF, off);
556         off += SHORT;
557 
558         ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off));
559         off += SHORT;
560 
561         ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off));
562         off += WORD;
563 
564         byte[] fileName = new byte[fileNameLen];
565         archive.readFully(fileName);
566         ze.setName(entryEncoding.decode(fileName), fileName);
567 
568         // LFH offset,
569         offset.headerOffset = ZipLong.getValue(CFH_BUF, off);
570         // data offset will be filled later
571         entries.add(ze);
572 
573         byte[] cdExtraData = new byte[extraLen];
574         archive.readFully(cdExtraData);
575         ze.setCentralDirectoryExtra(cdExtraData);
576 
577         setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
578 
579         byte[] comment = new byte[commentLen];
580         archive.readFully(comment);
581         ze.setComment(entryEncoding.decode(comment));
582 
583         if (!hasUTF8Flag && useUnicodeExtraFields) {
584             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
585         }
586     }
587 
588     /**
589      * If the entry holds a Zip64 extended information extra field,
590      * read sizes from there if the entry's sizes are set to
591      * 0xFFFFFFFFF, do the same for the offset of the local file
592      * header.
593      *
594      * <p>Ensures the Zip64 extra either knows both compressed and
595      * uncompressed size or neither of both as the internal logic in
596      * ExtraFieldUtils forces the field to create local header data
597      * even if they are never used - and here a field with only one
598      * size would be invalid.</p>
599      */
600     private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
601                                                  OffsetEntry offset,
602                                                  int diskStart)
603         throws IOException {
604         Zip64ExtendedInformationExtraField z64 =
605             (Zip64ExtendedInformationExtraField)
606             ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
607         if (z64 != null) {
608             boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
609             boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
610             boolean hasRelativeHeaderOffset =
611                 offset.headerOffset == ZIP64_MAGIC;
612             z64.reparseCentralDirectoryData(hasUncompressedSize,
613                                             hasCompressedSize,
614                                             hasRelativeHeaderOffset,
615                                             diskStart == ZIP64_MAGIC_SHORT);
616 
617             if (hasUncompressedSize) {
618                 ze.setSize(z64.getSize().getLongValue());
619             } else if (hasCompressedSize) {
620                 z64.setSize(new ZipEightByteInteger(ze.getSize()));
621             }
622 
623             if (hasCompressedSize) {
624                 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
625             } else if (hasUncompressedSize) {
626                 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
627             }
628 
629             if (hasRelativeHeaderOffset) {
630                 offset.headerOffset =
631                     z64.getRelativeHeaderOffset().getLongValue();
632             }
633         }
634     }
635 
636     /**
637      * Length of the "End of central directory record" - which is
638      * supposed to be the last structure of the archive - without file
639      * comment.
640      */
641     static final int MIN_EOCD_SIZE =
642         /* end of central dir signature    */ WORD
643         /* number of this disk             */ + SHORT
644         /* number of the disk with the     */
645         /* start of the central directory  */ + SHORT
646         /* total number of entries in      */
647         /* the central dir on this disk    */ + SHORT
648         /* total number of entries in      */
649         /* the central dir                 */ + SHORT
650         /* size of the central directory   */ + WORD
651         /* offset of start of central      */
652         /* directory with respect to       */
653         /* the starting disk number        */ + WORD
654         /* zipfile comment length          */ + SHORT;
655 
656     /**
657      * Maximum length of the "End of central directory record" with a
658      * file comment.
659      */
660     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
661         /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
662 
663     /**
664      * Offset of the field that holds the location of the first
665      * central directory entry inside the "End of central directory
666      * record" relative to the start of the "End of central directory
667      * record".
668      */
669     private static final int CFD_LOCATOR_OFFSET =
670         /* end of central dir signature    */ WORD
671         /* number of this disk             */ + SHORT
672         /* number of the disk with the     */
673         /* start of the central directory  */ + SHORT
674         /* total number of entries in      */
675         /* the central dir on this disk    */ + SHORT
676         /* total number of entries in      */
677         /* the central dir                 */ + SHORT
678         /* size of the central directory   */ + WORD;
679 
680     /**
681      * Length of the "Zip64 end of central directory locator" - which
682      * should be right in front of the "end of central directory
683      * record" if one is present at all.
684      */
685     private static final int ZIP64_EOCDL_LENGTH =
686         /* zip64 end of central dir locator sig */ WORD
687         /* number of the disk with the start    */
688         /* start of the zip64 end of            */
689         /* central directory                    */ + WORD
690         /* relative offset of the zip64         */
691         /* end of central directory record      */ + DWORD
692         /* total number of disks                */ + WORD;
693 
694     /**
695      * Offset of the field that holds the location of the "Zip64 end
696      * of central directory record" inside the "Zip64 end of central
697      * directory locator" relative to the start of the "Zip64 end of
698      * central directory locator".
699      */
700     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
701         /* zip64 end of central dir locator sig */ WORD
702         /* number of the disk with the start    */
703         /* start of the zip64 end of            */
704         /* central directory                    */ + WORD;
705 
706     /**
707      * Offset of the field that holds the location of the first
708      * central directory entry inside the "Zip64 end of central
709      * directory record" relative to the start of the "Zip64 end of
710      * central directory record".
711      */
712     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
713         /* zip64 end of central dir        */
714         /* signature                       */ WORD
715         /* size of zip64 end of central    */
716         /* directory record                */ + DWORD
717         /* version made by                 */ + SHORT
718         /* version needed to extract       */ + SHORT
719         /* number of this disk             */ + WORD
720         /* number of the disk with the     */
721         /* start of the central directory  */ + WORD
722         /* total number of entries in the  */
723         /* central directory on this disk  */ + DWORD
724         /* total number of entries in the  */
725         /* central directory               */ + DWORD
726         /* size of the central directory   */ + DWORD;
727 
728     /**
729      * Searches for either the &quot;Zip64 end of central directory
730      * locator&quot; or the &quot;End of central dir record&quot;, parses
731      * it and positions the stream at the first central directory
732      * record.
733      */
734     private void positionAtCentralDirectory()
735         throws IOException {
736         positionAtEndOfCentralDirectoryRecord();
737         boolean found = false;
738         boolean searchedForZip64EOCD =
739             archive.getFilePointer() > ZIP64_EOCDL_LENGTH;
740         if (searchedForZip64EOCD) {
741             archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH);
742             archive.readFully(WORD_BUF);
743             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
744                                   WORD_BUF);
745         }
746         if (!found) {
747             // not a ZIP64 archive
748             if (searchedForZip64EOCD) {
749                 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
750             }
751             positionAtCentralDirectory32();
752         } else {
753             positionAtCentralDirectory64();
754         }
755     }
756 
757     /**
758      * Parses the &quot;Zip64 end of central directory locator&quot;,
759      * finds the &quot;Zip64 end of central directory record&quot; using the
760      * parsed information, parses that and positions the stream at the
761      * first central directory record.
762      *
763      * Expects stream to be positioned right behind the &quot;Zip64
764      * end of central directory locator&quot;'s signature.
765      */
766     private void positionAtCentralDirectory64()
767         throws IOException {
768         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
769                   - WORD /* signature has already been read */);
770         archive.readFully(DWORD_BUF);
771         archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
772         archive.readFully(WORD_BUF);
773         if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
774             throw new ZipException("archive's ZIP64 end of central "
775                                    + "directory locator is corrupt.");
776         }
777         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
778                   - WORD /* signature has already been read */);
779         archive.readFully(DWORD_BUF);
780         archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
781     }
782 
783     /**
784      * Parses the &quot;End of central dir record&quot; and positions
785      * the stream at the first central directory record.
786      *
787      * Expects stream to be positioned at the beginning of the
788      * &quot;End of central dir record&quot;.
789      */
790     private void positionAtCentralDirectory32()
791         throws IOException {
792         skipBytes(CFD_LOCATOR_OFFSET);
793         archive.readFully(WORD_BUF);
794         archive.seek(ZipLong.getValue(WORD_BUF));
795     }
796 
797     /**
798      * Searches for the and positions the stream at the start of the
799      * &quot;End of central dir record&quot;.
800      */
801     private void positionAtEndOfCentralDirectoryRecord()
802         throws IOException {
803         boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
804                                              ZipArchiveOutputStream.EOCD_SIG);
805         if (!found) {
806             throw new ZipException("archive is not a ZIP archive");
807         }
808     }
809 
810     /**
811      * Searches the archive backwards from minDistance to maxDistance
812      * for the given signature, positions the RandomaccessFile right
813      * at the signature if it has been found.
814      */
815     private boolean tryToLocateSignature(long minDistanceFromEnd,
816                                          long maxDistanceFromEnd,
817                                          byte[] sig) throws IOException {
818         boolean found = false;
819         long off = archive.length() - minDistanceFromEnd;
820         final long stopSearching =
821             Math.max(0L, archive.length() - maxDistanceFromEnd);
822         if (off >= 0) {
823             for (; off >= stopSearching; off--) {
824                 archive.seek(off);
825                 int curr = archive.read();
826                 if (curr == -1) {
827                     break;
828                 }
829                 if (curr == sig[POS_0]) {
830                     curr = archive.read();
831                     if (curr == sig[POS_1]) {
832                         curr = archive.read();
833                         if (curr == sig[POS_2]) {
834                             curr = archive.read();
835                             if (curr == sig[POS_3]) {
836                                 found = true;
837                                 break;
838                             }
839                         }
840                     }
841                 }
842             }
843         }
844         if (found) {
845             archive.seek(off);
846         }
847         return found;
848     }
849 
850     /**
851      * Skips the given number of bytes or throws an EOFException if
852      * skipping failed.
853      */ 
854     private void skipBytes(final int count) throws IOException {
855         int totalSkipped = 0;
856         while (totalSkipped < count) {
857             int skippedNow = archive.skipBytes(count - totalSkipped);
858             if (skippedNow <= 0) {
859                 throw new EOFException();
860             }
861             totalSkipped += skippedNow;
862         }
863     }
864 
865     /**
866      * Number of bytes in local file header up to the &quot;length of
867      * filename&quot; entry.
868      */
869     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
870         /* local file header signature     */ WORD
871         /* version needed to extract       */ + SHORT
872         /* general purpose bit flag        */ + SHORT
873         /* compression method              */ + SHORT
874         /* last mod file time              */ + SHORT
875         /* last mod file date              */ + SHORT
876         /* crc-32                          */ + WORD
877         /* compressed size                 */ + WORD
878         /* uncompressed size               */ + WORD;
879 
880     /**
881      * Walks through all recorded entries and adds the data available
882      * from the local file header.
883      *
884      * <p>Also records the offsets for the data to read from the
885      * entries.</p>
886      */
887     private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment>
888                                             entriesWithoutUTF8Flag)
889         throws IOException {
890         for (ZipArchiveEntry zipArchiveEntry : entries) {
891             // entries is filled in populateFromCentralDirectory and
892             // never modified
893             Entry ze = (Entry) zipArchiveEntry;
894             OffsetEntry offsetEntry = ze.getOffsetEntry();
895             long offset = offsetEntry.headerOffset;
896             archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
897             archive.readFully(SHORT_BUF);
898             int fileNameLen = ZipShort.getValue(SHORT_BUF);
899             archive.readFully(SHORT_BUF);
900             int extraFieldLen = ZipShort.getValue(SHORT_BUF);
901             int lenToSkip = fileNameLen;
902             while (lenToSkip > 0) {
903                 int skipped = archive.skipBytes(lenToSkip);
904                 if (skipped <= 0) {
905                     throw new IOException("failed to skip file name in"
906                                           + " local file header");
907                 }
908                 lenToSkip -= skipped;
909             }
910             byte[] localExtraData = new byte[extraFieldLen];
911             archive.readFully(localExtraData);
912             ze.setExtra(localExtraData);
913             offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
914                 + SHORT + SHORT + fileNameLen + extraFieldLen;
915 
916             if (entriesWithoutUTF8Flag.containsKey(ze)) {
917                 NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
918                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
919                                                          nc.comment);
920             }
921 
922             String name = ze.getName();
923             LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
924             if (entriesOfThatName == null) {
925                 entriesOfThatName = new LinkedList<ZipArchiveEntry>();
926                 nameMap.put(name, entriesOfThatName);
927             }
928             entriesOfThatName.addLast(ze);
929         }
930     }
931 
932     /**
933      * Checks whether the archive starts with a LFH.  If it doesn't,
934      * it may be an empty archive.
935      */
936     private boolean startsWithLocalFileHeader() throws IOException {
937         archive.seek(0);
938         archive.readFully(WORD_BUF);
939         return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG);
940     }
941 
942     /**
943      * InputStream that delegates requests to the underlying
944      * RandomAccessFile, making sure that only bytes from a certain
945      * range can be read.
946      */
947     private class BoundedInputStream extends InputStream {
948         private long remaining;
949         private long loc;
950         private boolean addDummyByte = false;
951 
952         BoundedInputStream(long start, long remaining) {
953             this.remaining = remaining;
954             loc = start;
955         }
956 
957         @Override
958         public int read() throws IOException {
959             if (remaining-- <= 0) {
960                 if (addDummyByte) {
961                     addDummyByte = false;
962                     return 0;
963                 }
964                 return -1;
965             }
966             synchronized (archive) {
967                 archive.seek(loc++);
968                 return archive.read();
969             }
970         }
971 
972         @Override
973         public int read(byte[] b, int off, int len) throws IOException {
974             if (remaining <= 0) {
975                 if (addDummyByte) {
976                     addDummyByte = false;
977                     b[off] = 0;
978                     return 1;
979                 }
980                 return -1;
981             }
982 
983             if (len <= 0) {
984                 return 0;
985             }
986 
987             if (len > remaining) {
988                 len = (int) remaining;
989             }
990             int ret = -1;
991             synchronized (archive) {
992                 archive.seek(loc);
993                 ret = archive.read(b, off, len);
994             }
995             if (ret > 0) {
996                 loc += ret;
997                 remaining -= ret;
998             }
999             return ret;
1000         }
1001 
1002         /**
1003          * Inflater needs an extra dummy byte for nowrap - see
1004          * Inflater's javadocs.
1005          */
1006         void addDummy() {
1007             addDummyByte = true;
1008         }
1009     }
1010 
1011     private static final class NameAndComment {
1012         private final byte[] name;
1013         private final byte[] comment;
1014         private NameAndComment(byte[] name, byte[] comment) {
1015             this.name = name;
1016             this.comment = comment;
1017         }
1018     }
1019 
1020     /**
1021      * Compares two ZipArchiveEntries based on their offset within the archive.
1022      *
1023      * <p>Won't return any meaningful results if one of the entries
1024      * isn't part of the archive at all.</p>
1025      *
1026      * @since 1.1
1027      */
1028     private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR =
1029         new Comparator<ZipArchiveEntry>() {
1030         public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) {
1031             if (e1 == e2) {
1032                 return 0;
1033             }
1034 
1035             Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1036             Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1037             if (ent1 == null) {
1038                 return 1;
1039             }
1040             if (ent2 == null) {
1041                 return -1;
1042             }
1043             long val = (ent1.getOffsetEntry().headerOffset
1044                         - ent2.getOffsetEntry().headerOffset);
1045             return val == 0 ? 0 : val < 0 ? -1 : +1;
1046         }
1047     };
1048 
1049     /**
1050      * Extends ZipArchiveEntry to store the offset within the archive.
1051      */
1052     private static class Entry extends ZipArchiveEntry {
1053 
1054         private final OffsetEntry offsetEntry;
1055 
1056         Entry(OffsetEntry offset) {
1057             this.offsetEntry = offset;
1058         }
1059 
1060         OffsetEntry getOffsetEntry() {
1061             return offsetEntry;
1062         }
1063 
1064         @Override
1065         public int hashCode() {
1066             return 3 * super.hashCode()
1067                 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE);
1068         }
1069 
1070         @Override
1071         public boolean equals(Object other) {
1072             if (super.equals(other)) {
1073                 // super.equals would return false if other were not an Entry
1074                 Entry otherEntry = (Entry) other;
1075                 return offsetEntry.headerOffset
1076                         == otherEntry.offsetEntry.headerOffset
1077                     && offsetEntry.dataOffset
1078                         == otherEntry.offsetEntry.dataOffset;
1079             }
1080             return false;
1081         }
1082     }
1083 }