View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  package org.apache.commons.compress.archivers.zip;
19  
20  import java.io.BufferedInputStream;
21  import java.io.Closeable;
22  import java.io.EOFException;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.nio.ByteBuffer;
27  import java.nio.channels.SeekableByteChannel;
28  import java.nio.file.Files;
29  import java.nio.file.StandardOpenOption;
30  import java.util.Arrays;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.Enumeration;
34  import java.util.EnumSet;
35  import java.util.HashMap;
36  import java.util.LinkedList;
37  import java.util.List;
38  import java.util.Map;
39  import java.util.zip.Inflater;
40  import java.util.zip.InflaterInputStream;
41  import java.util.zip.ZipException;
42  
43  import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
44  import org.apache.commons.compress.utils.IOUtils;
45  
46  import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
47  import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
48  import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
49  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
50  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
51  
52  /**
53   * Replacement for <code>java.util.ZipFile</code>.
54   *
55   * <p>This class adds support for file name encodings other than UTF-8
56   * (which is required to work on ZIP files created by native zip tools
57   * and is able to skip a preamble like the one found in self
58   * extracting archives.  Furthermore it returns instances of
59   * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
60   * instead of <code>java.util.zip.ZipEntry</code>.</p>
61   *
62   * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
63   * have to reimplement all methods anyway.  Like
64   * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
65   * covers and supports compressed and uncompressed entries.  As of
66   * Apache Commons Compress 1.3 it also transparently supports Zip64
67   * extensions and thus individual entries and archives larger than 4
68   * GB or with more than 65536 entries.</p>
69   *
70   * <p>The method signatures mimic the ones of
71   * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
72   *
73   * <ul>
74   *   <li>There is no getName method.</li>
75   *   <li>entries has been renamed to getEntries.</li>
76   *   <li>getEntries and getEntry return
77   *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
78   *   instances.</li>
79   *   <li>close is allowed to throw IOException.</li>
80   * </ul>
81   *
82   */
83  public class ZipFile implements Closeable {
84      private static final int HASH_SIZE = 509;
85      static final int NIBLET_MASK = 0x0f;
86      static final int BYTE_SHIFT = 8;
87      private static final int POS_0 = 0;
88      private static final int POS_1 = 1;
89      private static final int POS_2 = 2;
90      private static final int POS_3 = 3;
91  
92      /**
93       * List of entries in the order they appear inside the central
94       * directory.
95       */
96      private final List<ZipArchiveEntry> entries =
97          new LinkedList<>();
98  
99      /**
100      * Maps String to list of ZipArchiveEntrys, name -> actual entries.
101      */
102     private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
103         new HashMap<>(HASH_SIZE);
104 
105     private static final class OffsetEntry {
106         private long headerOffset = -1;
107         private long dataOffset = -1;
108     }
109 
110     /**
111      * The encoding to use for filenames and the file comment.
112      *
113      * <p>For a list of possible values see <a
114      * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
115      * Defaults to UTF-8.</p>
116      */
117     private final String encoding;
118 
119     /**
120      * The zip encoding to use for filenames and the file comment.
121      */
122     private final ZipEncoding zipEncoding;
123 
124     /**
125      * File name of actual source.
126      */
127     private final String archiveName;
128 
129     /**
130      * The actual data source.
131      */
132     private final SeekableByteChannel archive;
133 
134     /**
135      * Whether to look for and use Unicode extra fields.
136      */
137     private final boolean useUnicodeExtraFields;
138 
139     /**
140      * Whether the file is closed.
141      */
142     private volatile boolean closed = true;
143 
144     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
145     private final byte[] dwordBuf = new byte[DWORD];
146     private final byte[] wordBuf = new byte[WORD];
147     private final byte[] cfhBuf = new byte[CFH_LEN];
148     private final byte[] shortBuf = new byte[SHORT];
149     private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
150     private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
151     private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
152 
153     /**
154      * Opens the given file for reading, assuming "UTF8" for file names.
155      *
156      * @param f the archive.
157      *
158      * @throws IOException if an error occurs while reading the file.
159      */
160     public ZipFile(final File f) throws IOException {
161         this(f, ZipEncodingHelper.UTF8);
162     }
163 
164     /**
165      * Opens the given file for reading, assuming "UTF8".
166      *
167      * @param name name of the archive.
168      *
169      * @throws IOException if an error occurs while reading the file.
170      */
171     public ZipFile(final String name) throws IOException {
172         this(new File(name), ZipEncodingHelper.UTF8);
173     }
174 
175     /**
176      * Opens the given file for reading, assuming the specified
177      * encoding for file names, scanning unicode extra fields.
178      *
179      * @param name name of the archive.
180      * @param encoding the encoding to use for file names, use null
181      * for the platform's default encoding
182      *
183      * @throws IOException if an error occurs while reading the file.
184      */
185     public ZipFile(final String name, final String encoding) throws IOException {
186         this(new File(name), encoding, true);
187     }
188 
189     /**
190      * Opens the given file for reading, assuming the specified
191      * encoding for file names and scanning for unicode extra fields.
192      *
193      * @param f the archive.
194      * @param encoding the encoding to use for file names, use null
195      * for the platform's default encoding
196      *
197      * @throws IOException if an error occurs while reading the file.
198      */
199     public ZipFile(final File f, final String encoding) throws IOException {
200         this(f, encoding, true);
201     }
202 
203     /**
204      * Opens the given file for reading, assuming the specified
205      * encoding for file names.
206      *
207      * @param f the archive.
208      * @param encoding the encoding to use for file names, use null
209      * for the platform's default encoding
210      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
211      * Extra Fields (if present) to set the file names.
212      *
213      * @throws IOException if an error occurs while reading the file.
214      */
215     public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
216         throws IOException {
217         this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
218              f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
219     }
220 
221     /**
222      * Opens the given channel for reading, assuming "UTF8" for file names.
223      *
224      * <p>{@link
225      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
226      * allows you to read from an in-memory archive.</p>
227      *
228      * @param channel the archive.
229      *
230      * @throws IOException if an error occurs while reading the file.
231      * @since 1.13
232      */
233     public ZipFile(final SeekableByteChannel channel)
234             throws IOException {
235         this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
236     }
237 
238     /**
239      * Opens the given channel for reading, assuming the specified
240      * encoding for file names.
241      *
242      * <p>{@link
243      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
244      * allows you to read from an in-memory archive.</p>
245      *
246      * @param channel the archive.
247      * @param encoding the encoding to use for file names, use null
248      * for the platform's default encoding
249      *
250      * @throws IOException if an error occurs while reading the file.
251      * @since 1.13
252      */
253     public ZipFile(final SeekableByteChannel channel, final String encoding)
254         throws IOException {
255         this(channel, "unknown archive", encoding, true);
256     }
257 
258     /**
259      * Opens the given channel for reading, assuming the specified
260      * encoding for file names.
261      *
262      * <p>{@link
263      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
264      * allows you to read from an in-memory archive.</p>
265      *
266      * @param channel the archive.
267      * @param archiveName name of the archive, used for error messages only.
268      * @param encoding the encoding to use for file names, use null
269      * for the platform's default encoding
270      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
271      * Extra Fields (if present) to set the file names.
272      *
273      * @throws IOException if an error occurs while reading the file.
274      * @since 1.13
275      */
276     public ZipFile(final SeekableByteChannel channel, final String archiveName,
277                    final String encoding, final boolean useUnicodeExtraFields)
278         throws IOException {
279         this(channel, archiveName, encoding, useUnicodeExtraFields, false);
280     }
281 
282     private ZipFile(final SeekableByteChannel channel, final String archiveName,
283                     final String encoding, final boolean useUnicodeExtraFields,
284                     final boolean closeOnError)
285         throws IOException {
286         this.archiveName = archiveName;
287         this.encoding = encoding;
288         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
289         this.useUnicodeExtraFields = useUnicodeExtraFields;
290         archive = channel;
291         boolean success = false;
292         try {
293             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
294                 populateFromCentralDirectory();
295             resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
296             success = true;
297         } finally {
298             closed = !success;
299             if (!success && closeOnError) {
300                 IOUtils.closeQuietly(archive);
301             }
302         }
303     }
304 
305     /**
306      * The encoding to use for filenames and the file comment.
307      *
308      * @return null if using the platform's default character encoding.
309      */
310     public String getEncoding() {
311         return encoding;
312     }
313 
314     /**
315      * Closes the archive.
316      * @throws IOException if an error occurs closing the archive.
317      */
318     @Override
319     public void close() throws IOException {
320         // this flag is only written here and read in finalize() which
321         // can never be run in parallel.
322         // no synchronization needed.
323         closed = true;
324 
325         archive.close();
326     }
327 
328     /**
329      * close a zipfile quietly; throw no io fault, do nothing
330      * on a null parameter
331      * @param zipfile file to close, can be null
332      */
333     public static void closeQuietly(final ZipFile zipfile) {
334         IOUtils.closeQuietly(zipfile);
335     }
336 
337     /**
338      * Returns all entries.
339      *
340      * <p>Entries will be returned in the same order they appear
341      * within the archive's central directory.</p>
342      *
343      * @return all entries as {@link ZipArchiveEntry} instances
344      */
345     public Enumeration<ZipArchiveEntry> getEntries() {
346         return Collections.enumeration(entries);
347     }
348 
349     /**
350      * Returns all entries in physical order.
351      *
352      * <p>Entries will be returned in the same order their contents
353      * appear within the archive.</p>
354      *
355      * @return all entries as {@link ZipArchiveEntry} instances
356      *
357      * @since 1.1
358      */
359     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
360         final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
361         Arrays.sort(allEntries, offsetComparator);
362         return Collections.enumeration(Arrays.asList(allEntries));
363     }
364 
365     /**
366      * Returns a named entry - or {@code null} if no entry by
367      * that name exists.
368      *
369      * <p>If multiple entries with the same name exist the first entry
370      * in the archive's central directory by that name is
371      * returned.</p>
372      *
373      * @param name name of the entry.
374      * @return the ZipArchiveEntry corresponding to the given name - or
375      * {@code null} if not present.
376      */
377     public ZipArchiveEntry getEntry(final String name) {
378         final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
379         return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
380     }
381 
382     /**
383      * Returns all named entries in the same order they appear within
384      * the archive's central directory.
385      *
386      * @param name name of the entry.
387      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
388      * given name
389      * @since 1.6
390      */
391     public Iterable<ZipArchiveEntry> getEntries(final String name) {
392         final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
393         return entriesOfThatName != null ? entriesOfThatName
394             : Collections.<ZipArchiveEntry>emptyList();
395     }
396 
397     /**
398      * Returns all named entries in the same order their contents
399      * appear within the archive.
400      *
401      * @param name name of the entry.
402      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
403      * given name
404      * @since 1.6
405      */
406     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
407         ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
408         if (nameMap.containsKey(name)) {
409             entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
410             Arrays.sort(entriesOfThatName, offsetComparator);
411         }
412         return Arrays.asList(entriesOfThatName);
413     }
414 
415     /**
416      * Whether this class is able to read the given entry.
417      *
418      * <p>May return false if it is set up to use encryption or a
419      * compression method that hasn't been implemented yet.</p>
420      * @since 1.1
421      * @param ze the entry
422      * @return whether this class is able to read the given entry.
423      */
424     public boolean canReadEntryData(final ZipArchiveEntry ze) {
425         return ZipUtil.canHandleEntryData(ze);
426     }
427 
428     /**
429      * Expose the raw stream of the archive entry (compressed form).
430      *
431      * <p>This method does not relate to how/if we understand the payload in the
432      * stream, since we really only intend to move it on to somewhere else.</p>
433      *
434      * @param ze The entry to get the stream for
435      * @return The raw input stream containing (possibly) compressed data.
436      * @since 1.11
437      */
438     public InputStream getRawInputStream(final ZipArchiveEntry ze) {
439         if (!(ze instanceof Entry)) {
440             return null;
441         }
442         final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
443         final long start = offsetEntry.dataOffset;
444         return new BoundedInputStream(start, ze.getCompressedSize());
445     }
446 
447 
448     /**
449      * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
450      * Compression and all other attributes will be as in this file.
451      * <p>This method transfers entries based on the central directory of the zip file.</p>
452      *
453      * @param target The zipArchiveOutputStream to write the entries to
454      * @param predicate A predicate that selects which entries to write
455      * @throws IOException on error
456      */
457     public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
458             throws IOException {
459         final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
460         while (src.hasMoreElements()) {
461             final ZipArchiveEntry entry = src.nextElement();
462             if (predicate.test( entry)) {
463                 target.addRawArchiveEntry(entry, getRawInputStream(entry));
464             }
465         }
466     }
467 
468     /**
469      * Returns an InputStream for reading the contents of the given entry.
470      *
471      * @param ze the entry to get the stream for.
472      * @return a stream to read the entry from.
473      * @throws IOException if unable to create an input stream from the zipentry
474      * @throws ZipException if the zipentry uses an unsupported feature
475      */
476     public InputStream getInputStream(final ZipArchiveEntry ze)
477         throws IOException, ZipException {
478         if (!(ze instanceof Entry)) {
479             return null;
480         }
481         // cast valididty is checked just above
482         final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry();
483         ZipUtil.checkRequestedFeatures(ze);
484         final long start = offsetEntry.dataOffset;
485         // doesn't get closed if the method is not supported, but doesn't hold any resources either
486         final BoundedInputStream bis =
487             new BoundedInputStream(start, ze.getCompressedSize()); //NOSONAR
488         switch (ZipMethod.getMethodByCode(ze.getMethod())) {
489             case STORED:
490                 return bis;
491             case UNSHRINKING:
492                 return new UnshrinkingInputStream(bis);
493             case IMPLODING:
494                 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
495                         ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
496             case DEFLATED:
497                 bis.addDummy();
498                 final Inflater inflater = new Inflater(true);
499                 return new InflaterInputStream(bis, inflater) {
500                     @Override
501                     public void close() throws IOException {
502                         try {
503                             super.close();
504                         } finally {
505                             inflater.end();
506                         }
507                     }
508                 };
509             case BZIP2:
510                 return new BZip2CompressorInputStream(bis);
511             case AES_ENCRYPTED:
512             case ENHANCED_DEFLATED:
513             case EXPANDING_LEVEL_1:
514             case EXPANDING_LEVEL_2:
515             case EXPANDING_LEVEL_3:
516             case EXPANDING_LEVEL_4:
517             case JPEG:
518             case LZMA:
519             case PKWARE_IMPLODING:
520             case PPMD:
521             case TOKENIZATION:
522             case UNKNOWN:
523             case WAVPACK:
524             default:
525                 throw new ZipException("Found unsupported compression method "
526                                        + ze.getMethod());
527         }
528     }
529 
530     /**
531      * <p>
532      * Convenience method to return the entry's content as a String if isUnixSymlink()
533      * returns true for it, otherwise returns null.
534      * </p>
535      *
536      * <p>This method assumes the symbolic link's file name uses the
537      * same encoding that as been specified for this ZipFile.</p>
538      *
539      * @param entry ZipArchiveEntry object that represents the symbolic link
540      * @return entry's content as a String
541      * @throws IOException problem with content's input stream
542      * @since 1.5
543      */
544     public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
545         if (entry != null && entry.isUnixSymlink()) {
546             try (InputStream in = getInputStream(entry)) {
547                 return zipEncoding.decode(IOUtils.toByteArray(in));
548             }
549         }
550         return null;
551     }
552 
553     /**
554      * Ensures that the close method of this zipfile is called when
555      * there are no more references to it.
556      * @see #close()
557      */
558     @Override
559     protected void finalize() throws Throwable {
560         try {
561             if (!closed) {
562                 System.err.println("Cleaning up unclosed ZipFile for archive "
563                                    + archiveName);
564                 close();
565             }
566         } finally {
567             super.finalize();
568         }
569     }
570 
571     /**
572      * Length of a "central directory" entry structure without file
573      * name, extra fields or comment.
574      */
575     private static final int CFH_LEN =
576         /* version made by                 */ SHORT
577         /* version needed to extract       */ + SHORT
578         /* general purpose bit flag        */ + SHORT
579         /* compression method              */ + SHORT
580         /* last mod file time              */ + SHORT
581         /* last mod file date              */ + SHORT
582         /* crc-32                          */ + WORD
583         /* compressed size                 */ + WORD
584         /* uncompressed size               */ + WORD
585         /* filename length                 */ + SHORT
586         /* extra field length              */ + SHORT
587         /* file comment length             */ + SHORT
588         /* disk number start               */ + SHORT
589         /* internal file attributes        */ + SHORT
590         /* external file attributes        */ + WORD
591         /* relative offset of local header */ + WORD;
592 
593     private static final long CFH_SIG =
594         ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
595 
596     /**
597      * Reads the central directory of the given archive and populates
598      * the internal tables with ZipArchiveEntry instances.
599      *
600      * <p>The ZipArchiveEntrys will know all data that can be obtained from
601      * the central directory alone, but not the data that requires the
602      * local file header or additional data to be read.</p>
603      *
604      * @return a map of zipentries that didn't have the language
605      * encoding flag set when read.
606      */
607     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
608         throws IOException {
609         final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
610             new HashMap<>();
611 
612         positionAtCentralDirectory();
613 
614         wordBbuf.rewind();
615         IOUtils.readFully(archive, wordBbuf);
616         long sig = ZipLong.getValue(wordBuf);
617 
618         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
619             throw new IOException("central directory is empty, can't expand"
620                                   + " corrupt archive.");
621         }
622 
623         while (sig == CFH_SIG) {
624             readCentralDirectoryEntry(noUTF8Flag);
625             wordBbuf.rewind();
626             IOUtils.readFully(archive, wordBbuf);
627             sig = ZipLong.getValue(wordBuf);
628         }
629         return noUTF8Flag;
630     }
631 
632     /**
633      * Reads an individual entry of the central directory, creats an
634      * ZipArchiveEntry from it and adds it to the global maps.
635      *
636      * @param noUTF8Flag map used to collect entries that don't have
637      * their UTF-8 flag set and whose name will be set by data read
638      * from the local file header later.  The current entry may be
639      * added to this map.
640      */
641     private void
642         readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
643         throws IOException {
644         cfhBbuf.rewind();
645         IOUtils.readFully(archive, cfhBbuf);
646         int off = 0;
647         final OffsetEntry offset = new OffsetEntry();
648         final Entry ze = new Entry(offset);
649 
650         final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
651         off += SHORT;
652         ze.setVersionMadeBy(versionMadeBy);
653         ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
654 
655         ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
656         off += SHORT; // version required
657 
658         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
659         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
660         final ZipEncoding entryEncoding =
661             hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
662         ze.setGeneralPurposeBit(gpFlag);
663         ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
664 
665         off += SHORT;
666 
667         //noinspection MagicConstant
668         ze.setMethod(ZipShort.getValue(cfhBuf, off));
669         off += SHORT;
670 
671         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
672         ze.setTime(time);
673         off += WORD;
674 
675         ze.setCrc(ZipLong.getValue(cfhBuf, off));
676         off += WORD;
677 
678         ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
679         off += WORD;
680 
681         ze.setSize(ZipLong.getValue(cfhBuf, off));
682         off += WORD;
683 
684         final int fileNameLen = ZipShort.getValue(cfhBuf, off);
685         off += SHORT;
686 
687         final int extraLen = ZipShort.getValue(cfhBuf, off);
688         off += SHORT;
689 
690         final int commentLen = ZipShort.getValue(cfhBuf, off);
691         off += SHORT;
692 
693         final int diskStart = ZipShort.getValue(cfhBuf, off);
694         off += SHORT;
695 
696         ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
697         off += SHORT;
698 
699         ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
700         off += WORD;
701 
702         final byte[] fileName = new byte[fileNameLen];
703         IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
704         ze.setName(entryEncoding.decode(fileName), fileName);
705 
706         // LFH offset,
707         offset.headerOffset = ZipLong.getValue(cfhBuf, off);
708         // data offset will be filled later
709         entries.add(ze);
710 
711         final byte[] cdExtraData = new byte[extraLen];
712         IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
713         ze.setCentralDirectoryExtra(cdExtraData);
714 
715         setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
716 
717         final byte[] comment = new byte[commentLen];
718         IOUtils.readFully(archive, ByteBuffer.wrap(comment));
719         ze.setComment(entryEncoding.decode(comment));
720 
721         if (!hasUTF8Flag && useUnicodeExtraFields) {
722             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
723         }
724     }
725 
726     /**
727      * If the entry holds a Zip64 extended information extra field,
728      * read sizes from there if the entry's sizes are set to
729      * 0xFFFFFFFFF, do the same for the offset of the local file
730      * header.
731      *
732      * <p>Ensures the Zip64 extra either knows both compressed and
733      * uncompressed size or neither of both as the internal logic in
734      * ExtraFieldUtils forces the field to create local header data
735      * even if they are never used - and here a field with only one
736      * size would be invalid.</p>
737      */
738     private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
739                                                  final OffsetEntry offset,
740                                                  final int diskStart)
741         throws IOException {
742         final Zip64ExtendedInformationExtraField z64 =
743             (Zip64ExtendedInformationExtraField)
744             ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
745         if (z64 != null) {
746             final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
747             final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
748             final boolean hasRelativeHeaderOffset =
749                 offset.headerOffset == ZIP64_MAGIC;
750             z64.reparseCentralDirectoryData(hasUncompressedSize,
751                                             hasCompressedSize,
752                                             hasRelativeHeaderOffset,
753                                             diskStart == ZIP64_MAGIC_SHORT);
754 
755             if (hasUncompressedSize) {
756                 ze.setSize(z64.getSize().getLongValue());
757             } else if (hasCompressedSize) {
758                 z64.setSize(new ZipEightByteInteger(ze.getSize()));
759             }
760 
761             if (hasCompressedSize) {
762                 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
763             } else if (hasUncompressedSize) {
764                 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
765             }
766 
767             if (hasRelativeHeaderOffset) {
768                 offset.headerOffset =
769                     z64.getRelativeHeaderOffset().getLongValue();
770             }
771         }
772     }
773 
774     /**
775      * Length of the "End of central directory record" - which is
776      * supposed to be the last structure of the archive - without file
777      * comment.
778      */
779     static final int MIN_EOCD_SIZE =
780         /* end of central dir signature    */ WORD
781         /* number of this disk             */ + SHORT
782         /* number of the disk with the     */
783         /* start of the central directory  */ + SHORT
784         /* total number of entries in      */
785         /* the central dir on this disk    */ + SHORT
786         /* total number of entries in      */
787         /* the central dir                 */ + SHORT
788         /* size of the central directory   */ + WORD
789         /* offset of start of central      */
790         /* directory with respect to       */
791         /* the starting disk number        */ + WORD
792         /* zipfile comment length          */ + SHORT;
793 
794     /**
795      * Maximum length of the "End of central directory record" with a
796      * file comment.
797      */
798     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
799         /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
800 
801     /**
802      * Offset of the field that holds the location of the first
803      * central directory entry inside the "End of central directory
804      * record" relative to the start of the "End of central directory
805      * record".
806      */
807     private static final int CFD_LOCATOR_OFFSET =
808         /* end of central dir signature    */ WORD
809         /* number of this disk             */ + SHORT
810         /* number of the disk with the     */
811         /* start of the central directory  */ + SHORT
812         /* total number of entries in      */
813         /* the central dir on this disk    */ + SHORT
814         /* total number of entries in      */
815         /* the central dir                 */ + SHORT
816         /* size of the central directory   */ + WORD;
817 
818     /**
819      * Length of the "Zip64 end of central directory locator" - which
820      * should be right in front of the "end of central directory
821      * record" if one is present at all.
822      */
823     private static final int ZIP64_EOCDL_LENGTH =
824         /* zip64 end of central dir locator sig */ WORD
825         /* number of the disk with the start    */
826         /* start of the zip64 end of            */
827         /* central directory                    */ + WORD
828         /* relative offset of the zip64         */
829         /* end of central directory record      */ + DWORD
830         /* total number of disks                */ + WORD;
831 
832     /**
833      * Offset of the field that holds the location of the "Zip64 end
834      * of central directory record" inside the "Zip64 end of central
835      * directory locator" relative to the start of the "Zip64 end of
836      * central directory locator".
837      */
838     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
839         /* zip64 end of central dir locator sig */ WORD
840         /* number of the disk with the start    */
841         /* start of the zip64 end of            */
842         /* central directory                    */ + WORD;
843 
844     /**
845      * Offset of the field that holds the location of the first
846      * central directory entry inside the "Zip64 end of central
847      * directory record" relative to the start of the "Zip64 end of
848      * central directory record".
849      */
850     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
851         /* zip64 end of central dir        */
852         /* signature                       */ WORD
853         /* size of zip64 end of central    */
854         /* directory record                */ + DWORD
855         /* version made by                 */ + SHORT
856         /* version needed to extract       */ + SHORT
857         /* number of this disk             */ + WORD
858         /* number of the disk with the     */
859         /* start of the central directory  */ + WORD
860         /* total number of entries in the  */
861         /* central directory on this disk  */ + DWORD
862         /* total number of entries in the  */
863         /* central directory               */ + DWORD
864         /* size of the central directory   */ + DWORD;
865 
866     /**
867      * Searches for either the &quot;Zip64 end of central directory
868      * locator&quot; or the &quot;End of central dir record&quot;, parses
869      * it and positions the stream at the first central directory
870      * record.
871      */
872     private void positionAtCentralDirectory()
873         throws IOException {
874         positionAtEndOfCentralDirectoryRecord();
875         boolean found = false;
876         final boolean searchedForZip64EOCD =
877             archive.position() > ZIP64_EOCDL_LENGTH;
878         if (searchedForZip64EOCD) {
879             archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
880             wordBbuf.rewind();
881             IOUtils.readFully(archive, wordBbuf);
882             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
883                                   wordBuf);
884         }
885         if (!found) {
886             // not a ZIP64 archive
887             if (searchedForZip64EOCD) {
888                 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
889             }
890             positionAtCentralDirectory32();
891         } else {
892             positionAtCentralDirectory64();
893         }
894     }
895 
896     /**
897      * Parses the &quot;Zip64 end of central directory locator&quot;,
898      * finds the &quot;Zip64 end of central directory record&quot; using the
899      * parsed information, parses that and positions the stream at the
900      * first central directory record.
901      *
902      * Expects stream to be positioned right behind the &quot;Zip64
903      * end of central directory locator&quot;'s signature.
904      */
905     private void positionAtCentralDirectory64()
906         throws IOException {
907         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
908                   - WORD /* signature has already been read */);
909         dwordBbuf.rewind();
910         IOUtils.readFully(archive, dwordBbuf);
911         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
912         wordBbuf.rewind();
913         IOUtils.readFully(archive, wordBbuf);
914         if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
915             throw new ZipException("archive's ZIP64 end of central "
916                                    + "directory locator is corrupt.");
917         }
918         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
919                   - WORD /* signature has already been read */);
920         dwordBbuf.rewind();
921         IOUtils.readFully(archive, dwordBbuf);
922         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
923     }
924 
925     /**
926      * Parses the &quot;End of central dir record&quot; and positions
927      * the stream at the first central directory record.
928      *
929      * Expects stream to be positioned at the beginning of the
930      * &quot;End of central dir record&quot;.
931      */
932     private void positionAtCentralDirectory32()
933         throws IOException {
934         skipBytes(CFD_LOCATOR_OFFSET);
935         wordBbuf.rewind();
936         IOUtils.readFully(archive, wordBbuf);
937         archive.position(ZipLong.getValue(wordBuf));
938     }
939 
940     /**
941      * Searches for the and positions the stream at the start of the
942      * &quot;End of central dir record&quot;.
943      */
944     private void positionAtEndOfCentralDirectoryRecord()
945         throws IOException {
946         final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
947                                              ZipArchiveOutputStream.EOCD_SIG);
948         if (!found) {
949             throw new ZipException("archive is not a ZIP archive");
950         }
951     }
952 
953     /**
954      * Searches the archive backwards from minDistance to maxDistance
955      * for the given signature, positions the RandomaccessFile right
956      * at the signature if it has been found.
957      */
958     private boolean tryToLocateSignature(final long minDistanceFromEnd,
959                                          final long maxDistanceFromEnd,
960                                          final byte[] sig) throws IOException {
961         boolean found = false;
962         long off = archive.size() - minDistanceFromEnd;
963         final long stopSearching =
964             Math.max(0L, archive.size() - maxDistanceFromEnd);
965         if (off >= 0) {
966             for (; off >= stopSearching; off--) {
967                 archive.position(off);
968                 try {
969                     wordBbuf.rewind();
970                     IOUtils.readFully(archive, wordBbuf);
971                     wordBbuf.flip();
972                 } catch (EOFException ex) {
973                     break;
974                 }
975                 int curr = wordBbuf.get();
976                 if (curr == sig[POS_0]) {
977                     curr = wordBbuf.get();
978                     if (curr == sig[POS_1]) {
979                         curr = wordBbuf.get();
980                         if (curr == sig[POS_2]) {
981                             curr = wordBbuf.get();
982                             if (curr == sig[POS_3]) {
983                                 found = true;
984                                 break;
985                             }
986                         }
987                     }
988                 }
989             }
990         }
991         if (found) {
992             archive.position(off);
993         }
994         return found;
995     }
996 
997     /**
998      * Skips the given number of bytes or throws an EOFException if
999      * skipping failed.
1000      */ 
1001     private void skipBytes(final int count) throws IOException {
1002         long currentPosition = archive.position();
1003         long newPosition = currentPosition + count;
1004         if (newPosition > archive.size()) {
1005             throw new EOFException();
1006         }
1007         archive.position(newPosition);
1008     }
1009 
1010     /**
1011      * Number of bytes in local file header up to the &quot;length of
1012      * filename&quot; entry.
1013      */
1014     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1015         /* local file header signature     */ WORD
1016         /* version needed to extract       */ + SHORT
1017         /* general purpose bit flag        */ + SHORT
1018         /* compression method              */ + SHORT
1019         /* last mod file time              */ + SHORT
1020         /* last mod file date              */ + SHORT
1021         /* crc-32                          */ + WORD
1022         /* compressed size                 */ + WORD
1023         /* uncompressed size               */ + (long) WORD;
1024 
1025     /**
1026      * Walks through all recorded entries and adds the data available
1027      * from the local file header.
1028      *
1029      * <p>Also records the offsets for the data to read from the
1030      * entries.</p>
1031      */
1032     private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1033                                             entriesWithoutUTF8Flag)
1034         throws IOException {
1035         for (final ZipArchiveEntry zipArchiveEntry : entries) {
1036             // entries is filled in populateFromCentralDirectory and
1037             // never modified
1038             final Entry ze = (Entry) zipArchiveEntry;
1039             final OffsetEntry offsetEntry = ze.getOffsetEntry();
1040             final long offset = offsetEntry.headerOffset;
1041             archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1042             wordBbuf.rewind();
1043             IOUtils.readFully(archive, wordBbuf);
1044             wordBbuf.flip();
1045             wordBbuf.get(shortBuf);
1046             final int fileNameLen = ZipShort.getValue(shortBuf);
1047             wordBbuf.get(shortBuf);
1048             final int extraFieldLen = ZipShort.getValue(shortBuf);
1049             skipBytes(fileNameLen);
1050             final byte[] localExtraData = new byte[extraFieldLen];
1051             IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1052             ze.setExtra(localExtraData);
1053             offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1054                 + SHORT + SHORT + fileNameLen + extraFieldLen;
1055 
1056             if (entriesWithoutUTF8Flag.containsKey(ze)) {
1057                 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1058                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1059                                                          nc.comment);
1060             }
1061 
1062             final String name = ze.getName();
1063             LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1064             if (entriesOfThatName == null) {
1065                 entriesOfThatName = new LinkedList<>();
1066                 nameMap.put(name, entriesOfThatName);
1067             }
1068             entriesOfThatName.addLast(ze);
1069         }
1070     }
1071 
1072     /**
1073      * Checks whether the archive starts with a LFH.  If it doesn't,
1074      * it may be an empty archive.
1075      */
1076     private boolean startsWithLocalFileHeader() throws IOException {
1077         archive.position(0);
1078         wordBbuf.rewind();
1079         IOUtils.readFully(archive, wordBbuf);
1080         return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1081     }
1082 
1083     /**
1084      * InputStream that delegates requests to the underlying
1085      * SeekableByteChannel, making sure that only bytes from a certain
1086      * range can be read.
1087      */
1088     private class BoundedInputStream extends InputStream {
1089         private static final int MAX_BUF_LEN = 8192;
1090         private final ByteBuffer buffer;
1091         private long remaining;
1092         private long loc;
1093         private boolean addDummyByte = false;
1094 
1095         BoundedInputStream(final long start, final long remaining) {
1096             this.remaining = remaining;
1097             loc = start;
1098             if (remaining < MAX_BUF_LEN && remaining > 0) {
1099                 buffer = ByteBuffer.allocate((int) remaining);
1100             } else {
1101                 buffer = ByteBuffer.allocate(MAX_BUF_LEN);
1102             }
1103         }
1104 
1105         @Override
1106         public int read() throws IOException {
1107             if (remaining-- <= 0) {
1108                 if (addDummyByte) {
1109                     addDummyByte = false;
1110                     return 0;
1111                 }
1112                 return -1;
1113             }
1114             synchronized (archive) {
1115                 archive.position(loc++);
1116                 int read = read(1);
1117                 if (read < 0) {
1118                     return read;
1119                 }
1120                 return buffer.get() & 0xff;
1121             }
1122         }
1123 
1124         @Override
1125         public int read(final byte[] b, final int off, int len) throws IOException {
1126             if (remaining <= 0) {
1127                 if (addDummyByte) {
1128                     addDummyByte = false;
1129                     b[off] = 0;
1130                     return 1;
1131                 }
1132                 return -1;
1133             }
1134 
1135             if (len <= 0) {
1136                 return 0;
1137             }
1138 
1139             if (len > remaining) {
1140                 len = (int) remaining;
1141             }
1142             ByteBuffer buf;
1143             int ret = -1;
1144             synchronized (archive) {
1145                 archive.position(loc);
1146                 if (len <= buffer.capacity()) {
1147                     buf = buffer;
1148                     ret = read(len);
1149                 } else {
1150                     buf = ByteBuffer.allocate(len);
1151                     ret = archive.read(buf);
1152                     buf.flip();
1153                 }
1154             }
1155             if (ret > 0) {
1156                 buf.get(b, off, ret);
1157                 loc += ret;
1158                 remaining -= ret;
1159             }
1160             return ret;
1161         }
1162 
1163         private int read(int len) throws IOException {
1164             buffer.rewind().limit(len);
1165             int read = archive.read(buffer);
1166             buffer.flip();
1167             return read;
1168         }
1169 
1170         /**
1171          * Inflater needs an extra dummy byte for nowrap - see
1172          * Inflater's javadocs.
1173          */
1174         void addDummy() {
1175             addDummyByte = true;
1176         }
1177     }
1178 
1179     private static final class NameAndComment {
1180         private final byte[] name;
1181         private final byte[] comment;
1182         private NameAndComment(final byte[] name, final byte[] comment) {
1183             this.name = name;
1184             this.comment = comment;
1185         }
1186     }
1187 
1188     /**
1189      * Compares two ZipArchiveEntries based on their offset within the archive.
1190      *
1191      * <p>Won't return any meaningful results if one of the entries
1192      * isn't part of the archive at all.</p>
1193      *
1194      * @since 1.1
1195      */
1196     private final Comparator<ZipArchiveEntry> offsetComparator =
1197         new Comparator<ZipArchiveEntry>() {
1198         @Override
1199         public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1200             if (e1 == e2) {
1201                 return 0;
1202             }
1203 
1204             final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1205             final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1206             if (ent1 == null) {
1207                 return 1;
1208             }
1209             if (ent2 == null) {
1210                 return -1;
1211             }
1212             final long val = (ent1.getOffsetEntry().headerOffset
1213                         - ent2.getOffsetEntry().headerOffset);
1214             return val == 0 ? 0 : val < 0 ? -1 : +1;
1215         }
1216     };
1217 
1218     /**
1219      * Extends ZipArchiveEntry to store the offset within the archive.
1220      */
1221     private static class Entry extends ZipArchiveEntry {
1222 
1223         private final OffsetEntry offsetEntry;
1224 
1225         Entry(final OffsetEntry offset) {
1226             this.offsetEntry = offset;
1227         }
1228 
1229         OffsetEntry getOffsetEntry() {
1230             return offsetEntry;
1231         }
1232 
1233         @Override
1234         public int hashCode() {
1235             return 3 * super.hashCode()
1236                 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE);
1237         }
1238 
1239         @Override
1240         public boolean equals(final Object other) {
1241             if (super.equals(other)) {
1242                 // super.equals would return false if other were not an Entry
1243                 final Entry otherEntry = (Entry) other;
1244                 return offsetEntry.headerOffset
1245                         == otherEntry.offsetEntry.headerOffset
1246                     && offsetEntry.dataOffset
1247                         == otherEntry.offsetEntry.dataOffset;
1248             }
1249             return false;
1250         }
1251     }
1252 }