View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  package org.apache.commons.compress.archivers.zip;
19  
20  import java.io.BufferedInputStream;
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.EOFException;
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.SequenceInputStream;
28  import java.nio.ByteBuffer;
29  import java.nio.channels.FileChannel;
30  import java.nio.channels.SeekableByteChannel;
31  import java.nio.file.Files;
32  import java.nio.file.StandardOpenOption;
33  import java.util.Arrays;
34  import java.util.Collections;
35  import java.util.Comparator;
36  import java.util.Enumeration;
37  import java.util.EnumSet;
38  import java.util.HashMap;
39  import java.util.LinkedList;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.zip.Inflater;
43  import java.util.zip.InflaterInputStream;
44  import java.util.zip.ZipException;
45  
46  import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
47  import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
48  import org.apache.commons.compress.utils.IOUtils;
49  
50  import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
51  import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
52  import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
53  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
54  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
55  
56  /**
57   * Replacement for <code>java.util.ZipFile</code>.
58   *
59   * <p>This class adds support for file name encodings other than UTF-8
60   * (which is required to work on ZIP files created by native zip tools
61   * and is able to skip a preamble like the one found in self
62   * extracting archives.  Furthermore it returns instances of
63   * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
64   * instead of <code>java.util.zip.ZipEntry</code>.</p>
65   *
66   * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
67   * have to reimplement all methods anyway.  Like
68   * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
69   * covers and supports compressed and uncompressed entries.  As of
70   * Apache Commons Compress 1.3 it also transparently supports Zip64
71   * extensions and thus individual entries and archives larger than 4
72   * GB or with more than 65536 entries.</p>
73   *
74   * <p>The method signatures mimic the ones of
75   * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
76   *
77   * <ul>
78   *   <li>There is no getName method.</li>
79   *   <li>entries has been renamed to getEntries.</li>
80   *   <li>getEntries and getEntry return
81   *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
82   *   instances.</li>
83   *   <li>close is allowed to throw IOException.</li>
84   * </ul>
85   *
86   */
87  public class ZipFile implements Closeable {
88      private static final int HASH_SIZE = 509;
89      static final int NIBLET_MASK = 0x0f;
90      static final int BYTE_SHIFT = 8;
91      private static final int POS_0 = 0;
92      private static final int POS_1 = 1;
93      private static final int POS_2 = 2;
94      private static final int POS_3 = 3;
95      private static final byte[] ONE_ZERO_BYTE = new byte[1];
96  
97      /**
98       * List of entries in the order they appear inside the central
99       * directory.
100      */
101     private final List<ZipArchiveEntry> entries =
102         new LinkedList<>();
103 
104     /**
105      * Maps String to list of ZipArchiveEntrys, name -> actual entries.
106      */
107     private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
108         new HashMap<>(HASH_SIZE);
109 
110     /**
111      * The encoding to use for filenames and the file comment.
112      *
113      * <p>For a list of possible values see <a
114      * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
115      * Defaults to UTF-8.</p>
116      */
117     private final String encoding;
118 
119     /**
120      * The zip encoding to use for filenames and the file comment.
121      */
122     private final ZipEncoding zipEncoding;
123 
124     /**
125      * File name of actual source.
126      */
127     private final String archiveName;
128 
129     /**
130      * The actual data source.
131      */
132     private final SeekableByteChannel archive;
133 
134     /**
135      * Whether to look for and use Unicode extra fields.
136      */
137     private final boolean useUnicodeExtraFields;
138 
139     /**
140      * Whether the file is closed.
141      */
142     private volatile boolean closed = true;
143 
144     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
145     private final byte[] dwordBuf = new byte[DWORD];
146     private final byte[] wordBuf = new byte[WORD];
147     private final byte[] cfhBuf = new byte[CFH_LEN];
148     private final byte[] shortBuf = new byte[SHORT];
149     private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
150     private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
151     private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
152 
153     /**
154      * Opens the given file for reading, assuming "UTF8" for file names.
155      *
156      * @param f the archive.
157      *
158      * @throws IOException if an error occurs while reading the file.
159      */
160     public ZipFile(final File f) throws IOException {
161         this(f, ZipEncodingHelper.UTF8);
162     }
163 
164     /**
165      * Opens the given file for reading, assuming "UTF8".
166      *
167      * @param name name of the archive.
168      *
169      * @throws IOException if an error occurs while reading the file.
170      */
171     public ZipFile(final String name) throws IOException {
172         this(new File(name), ZipEncodingHelper.UTF8);
173     }
174 
175     /**
176      * Opens the given file for reading, assuming the specified
177      * encoding for file names, scanning unicode extra fields.
178      *
179      * @param name name of the archive.
180      * @param encoding the encoding to use for file names, use null
181      * for the platform's default encoding
182      *
183      * @throws IOException if an error occurs while reading the file.
184      */
185     public ZipFile(final String name, final String encoding) throws IOException {
186         this(new File(name), encoding, true);
187     }
188 
189     /**
190      * Opens the given file for reading, assuming the specified
191      * encoding for file names and scanning for unicode extra fields.
192      *
193      * @param f the archive.
194      * @param encoding the encoding to use for file names, use null
195      * for the platform's default encoding
196      *
197      * @throws IOException if an error occurs while reading the file.
198      */
199     public ZipFile(final File f, final String encoding) throws IOException {
200         this(f, encoding, true);
201     }
202 
203     /**
204      * Opens the given file for reading, assuming the specified
205      * encoding for file names.
206      *
207      * @param f the archive.
208      * @param encoding the encoding to use for file names, use null
209      * for the platform's default encoding
210      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
211      * Extra Fields (if present) to set the file names.
212      *
213      * @throws IOException if an error occurs while reading the file.
214      */
215     public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
216         throws IOException {
217         this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
218              f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
219     }
220 
221     /**
222      * Opens the given channel for reading, assuming "UTF8" for file names.
223      *
224      * <p>{@link
225      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
226      * allows you to read from an in-memory archive.</p>
227      *
228      * @param channel the archive.
229      *
230      * @throws IOException if an error occurs while reading the file.
231      * @since 1.13
232      */
233     public ZipFile(final SeekableByteChannel channel)
234             throws IOException {
235         this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
236     }
237 
238     /**
239      * Opens the given channel for reading, assuming the specified
240      * encoding for file names.
241      *
242      * <p>{@link
243      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
244      * allows you to read from an in-memory archive.</p>
245      *
246      * @param channel the archive.
247      * @param encoding the encoding to use for file names, use null
248      * for the platform's default encoding
249      *
250      * @throws IOException if an error occurs while reading the file.
251      * @since 1.13
252      */
253     public ZipFile(final SeekableByteChannel channel, final String encoding)
254         throws IOException {
255         this(channel, "unknown archive", encoding, true);
256     }
257 
258     /**
259      * Opens the given channel for reading, assuming the specified
260      * encoding for file names.
261      *
262      * <p>{@link
263      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
264      * allows you to read from an in-memory archive.</p>
265      *
266      * @param channel the archive.
267      * @param archiveName name of the archive, used for error messages only.
268      * @param encoding the encoding to use for file names, use null
269      * for the platform's default encoding
270      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
271      * Extra Fields (if present) to set the file names.
272      *
273      * @throws IOException if an error occurs while reading the file.
274      * @since 1.13
275      */
276     public ZipFile(final SeekableByteChannel channel, final String archiveName,
277                    final String encoding, final boolean useUnicodeExtraFields)
278         throws IOException {
279         this(channel, archiveName, encoding, useUnicodeExtraFields, false);
280     }
281 
282     private ZipFile(final SeekableByteChannel channel, final String archiveName,
283                     final String encoding, final boolean useUnicodeExtraFields,
284                     final boolean closeOnError)
285         throws IOException {
286         this.archiveName = archiveName;
287         this.encoding = encoding;
288         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
289         this.useUnicodeExtraFields = useUnicodeExtraFields;
290         archive = channel;
291         boolean success = false;
292         try {
293             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
294                 populateFromCentralDirectory();
295             resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
296             success = true;
297         } finally {
298             closed = !success;
299             if (!success && closeOnError) {
300                 IOUtils.closeQuietly(archive);
301             }
302         }
303     }
304 
305     /**
306      * The encoding to use for filenames and the file comment.
307      *
308      * @return null if using the platform's default character encoding.
309      */
310     public String getEncoding() {
311         return encoding;
312     }
313 
314     /**
315      * Closes the archive.
316      * @throws IOException if an error occurs closing the archive.
317      */
318     @Override
319     public void close() throws IOException {
320         // this flag is only written here and read in finalize() which
321         // can never be run in parallel.
322         // no synchronization needed.
323         closed = true;
324 
325         archive.close();
326     }
327 
328     /**
329      * close a zipfile quietly; throw no io fault, do nothing
330      * on a null parameter
331      * @param zipfile file to close, can be null
332      */
333     public static void closeQuietly(final ZipFile zipfile) {
334         IOUtils.closeQuietly(zipfile);
335     }
336 
337     /**
338      * Returns all entries.
339      *
340      * <p>Entries will be returned in the same order they appear
341      * within the archive's central directory.</p>
342      *
343      * @return all entries as {@link ZipArchiveEntry} instances
344      */
345     public Enumeration<ZipArchiveEntry> getEntries() {
346         return Collections.enumeration(entries);
347     }
348 
349     /**
350      * Returns all entries in physical order.
351      *
352      * <p>Entries will be returned in the same order their contents
353      * appear within the archive.</p>
354      *
355      * @return all entries as {@link ZipArchiveEntry} instances
356      *
357      * @since 1.1
358      */
359     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
360         final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
361         Arrays.sort(allEntries, offsetComparator);
362         return Collections.enumeration(Arrays.asList(allEntries));
363     }
364 
365     /**
366      * Returns a named entry - or {@code null} if no entry by
367      * that name exists.
368      *
369      * <p>If multiple entries with the same name exist the first entry
370      * in the archive's central directory by that name is
371      * returned.</p>
372      *
373      * @param name name of the entry.
374      * @return the ZipArchiveEntry corresponding to the given name - or
375      * {@code null} if not present.
376      */
377     public ZipArchiveEntry getEntry(final String name) {
378         final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
379         return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
380     }
381 
382     /**
383      * Returns all named entries in the same order they appear within
384      * the archive's central directory.
385      *
386      * @param name name of the entry.
387      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
388      * given name
389      * @since 1.6
390      */
391     public Iterable<ZipArchiveEntry> getEntries(final String name) {
392         final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
393         return entriesOfThatName != null ? entriesOfThatName
394             : Collections.<ZipArchiveEntry>emptyList();
395     }
396 
397     /**
398      * Returns all named entries in the same order their contents
399      * appear within the archive.
400      *
401      * @param name name of the entry.
402      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
403      * given name
404      * @since 1.6
405      */
406     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
407         ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
408         if (nameMap.containsKey(name)) {
409             entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
410             Arrays.sort(entriesOfThatName, offsetComparator);
411         }
412         return Arrays.asList(entriesOfThatName);
413     }
414 
415     /**
416      * Whether this class is able to read the given entry.
417      *
418      * <p>May return false if it is set up to use encryption or a
419      * compression method that hasn't been implemented yet.</p>
420      * @since 1.1
421      * @param ze the entry
422      * @return whether this class is able to read the given entry.
423      */
424     public boolean canReadEntryData(final ZipArchiveEntry ze) {
425         return ZipUtil.canHandleEntryData(ze);
426     }
427 
428     /**
429      * Expose the raw stream of the archive entry (compressed form).
430      *
431      * <p>This method does not relate to how/if we understand the payload in the
432      * stream, since we really only intend to move it on to somewhere else.</p>
433      *
434      * @param ze The entry to get the stream for
435      * @return The raw input stream containing (possibly) compressed data.
436      * @since 1.11
437      */
438     public InputStream getRawInputStream(final ZipArchiveEntry ze) {
439         if (!(ze instanceof Entry)) {
440             return null;
441         }
442         final long start = ze.getDataOffset();
443         return createBoundedInputStream(start, ze.getCompressedSize());
444     }
445 
446 
447     /**
448      * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
449      * Compression and all other attributes will be as in this file.
450      * <p>This method transfers entries based on the central directory of the zip file.</p>
451      *
452      * @param target The zipArchiveOutputStream to write the entries to
453      * @param predicate A predicate that selects which entries to write
454      * @throws IOException on error
455      */
456     public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
457             throws IOException {
458         final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
459         while (src.hasMoreElements()) {
460             final ZipArchiveEntry entry = src.nextElement();
461             if (predicate.test( entry)) {
462                 target.addRawArchiveEntry(entry, getRawInputStream(entry));
463             }
464         }
465     }
466 
467     /**
468      * Returns an InputStream for reading the contents of the given entry.
469      *
470      * @param ze the entry to get the stream for.
471      * @return a stream to read the entry from.
472      * @throws IOException if unable to create an input stream from the zipentry
473      */
474     public InputStream getInputStream(final ZipArchiveEntry ze)
475         throws IOException {
476         if (!(ze instanceof Entry)) {
477             return null;
478         }
479         // cast validity is checked just above
480         ZipUtil.checkRequestedFeatures(ze);
481         final long start = ze.getDataOffset();
482 
483         // doesn't get closed if the method is not supported - which
484         // should never happen because of the checkRequestedFeatures
485         // call above
486         final InputStream is =
487             new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
488         switch (ZipMethod.getMethodByCode(ze.getMethod())) {
489             case STORED:
490                 return is;
491             case UNSHRINKING:
492                 return new UnshrinkingInputStream(is);
493             case IMPLODING:
494                 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
495                         ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
496             case DEFLATED:
497                 final Inflater inflater = new Inflater(true);
498                 // Inflater with nowrap=true has this odd contract for a zero padding
499                 // byte following the data stream; this used to be zlib's requirement
500                 // and has been fixed a long time ago, but the contract persists so
501                 // we comply.
502                 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
503                 return new InflaterInputStream(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
504                     inflater) {
505                     @Override
506                     public void close() throws IOException {
507                         try {
508                             super.close();
509                         } finally {
510                             inflater.end();
511                         }
512                     }
513                 };
514             case BZIP2:
515                 return new BZip2CompressorInputStream(is);
516             case ENHANCED_DEFLATED:
517                 return new Deflate64CompressorInputStream(is);
518             case AES_ENCRYPTED:
519             case EXPANDING_LEVEL_1:
520             case EXPANDING_LEVEL_2:
521             case EXPANDING_LEVEL_3:
522             case EXPANDING_LEVEL_4:
523             case JPEG:
524             case LZMA:
525             case PKWARE_IMPLODING:
526             case PPMD:
527             case TOKENIZATION:
528             case UNKNOWN:
529             case WAVPACK:
530             case XZ:
531             default:
532                 throw new ZipException("Found unsupported compression method "
533                                        + ze.getMethod());
534         }
535     }
536 
537     /**
538      * <p>
539      * Convenience method to return the entry's content as a String if isUnixSymlink()
540      * returns true for it, otherwise returns null.
541      * </p>
542      *
543      * <p>This method assumes the symbolic link's file name uses the
544      * same encoding that as been specified for this ZipFile.</p>
545      *
546      * @param entry ZipArchiveEntry object that represents the symbolic link
547      * @return entry's content as a String
548      * @throws IOException problem with content's input stream
549      * @since 1.5
550      */
551     public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
552         if (entry != null && entry.isUnixSymlink()) {
553             try (InputStream in = getInputStream(entry)) {
554                 return zipEncoding.decode(IOUtils.toByteArray(in));
555             }
556         }
557         return null;
558     }
559 
560     /**
561      * Ensures that the close method of this zipfile is called when
562      * there are no more references to it.
563      * @see #close()
564      */
565     @Override
566     protected void finalize() throws Throwable {
567         try {
568             if (!closed) {
569                 System.err.println("Cleaning up unclosed ZipFile for archive "
570                                    + archiveName);
571                 close();
572             }
573         } finally {
574             super.finalize();
575         }
576     }
577 
578     /**
579      * Length of a "central directory" entry structure without file
580      * name, extra fields or comment.
581      */
582     private static final int CFH_LEN =
583         /* version made by                 */ SHORT
584         /* version needed to extract       */ + SHORT
585         /* general purpose bit flag        */ + SHORT
586         /* compression method              */ + SHORT
587         /* last mod file time              */ + SHORT
588         /* last mod file date              */ + SHORT
589         /* crc-32                          */ + WORD
590         /* compressed size                 */ + WORD
591         /* uncompressed size               */ + WORD
592         /* filename length                 */ + SHORT
593         /* extra field length              */ + SHORT
594         /* file comment length             */ + SHORT
595         /* disk number start               */ + SHORT
596         /* internal file attributes        */ + SHORT
597         /* external file attributes        */ + WORD
598         /* relative offset of local header */ + WORD;
599 
600     private static final long CFH_SIG =
601         ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
602 
603     /**
604      * Reads the central directory of the given archive and populates
605      * the internal tables with ZipArchiveEntry instances.
606      *
607      * <p>The ZipArchiveEntrys will know all data that can be obtained from
608      * the central directory alone, but not the data that requires the
609      * local file header or additional data to be read.</p>
610      *
611      * @return a map of zipentries that didn't have the language
612      * encoding flag set when read.
613      */
614     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
615         throws IOException {
616         final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
617             new HashMap<>();
618 
619         positionAtCentralDirectory();
620 
621         wordBbuf.rewind();
622         IOUtils.readFully(archive, wordBbuf);
623         long sig = ZipLong.getValue(wordBuf);
624 
625         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
626             throw new IOException("central directory is empty, can't expand"
627                                   + " corrupt archive.");
628         }
629 
630         while (sig == CFH_SIG) {
631             readCentralDirectoryEntry(noUTF8Flag);
632             wordBbuf.rewind();
633             IOUtils.readFully(archive, wordBbuf);
634             sig = ZipLong.getValue(wordBuf);
635         }
636         return noUTF8Flag;
637     }
638 
639     /**
640      * Reads an individual entry of the central directory, creats an
641      * ZipArchiveEntry from it and adds it to the global maps.
642      *
643      * @param noUTF8Flag map used to collect entries that don't have
644      * their UTF-8 flag set and whose name will be set by data read
645      * from the local file header later.  The current entry may be
646      * added to this map.
647      */
648     private void
649         readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
650         throws IOException {
651         cfhBbuf.rewind();
652         IOUtils.readFully(archive, cfhBbuf);
653         int off = 0;
654         final Entry ze = new Entry();
655 
656         final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
657         off += SHORT;
658         ze.setVersionMadeBy(versionMadeBy);
659         ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
660 
661         ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
662         off += SHORT; // version required
663 
664         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
665         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
666         final ZipEncoding entryEncoding =
667             hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
668         if (hasUTF8Flag) {
669             ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
670         }
671         ze.setGeneralPurposeBit(gpFlag);
672         ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
673 
674         off += SHORT;
675 
676         //noinspection MagicConstant
677         ze.setMethod(ZipShort.getValue(cfhBuf, off));
678         off += SHORT;
679 
680         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
681         ze.setTime(time);
682         off += WORD;
683 
684         ze.setCrc(ZipLong.getValue(cfhBuf, off));
685         off += WORD;
686 
687         ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
688         off += WORD;
689 
690         ze.setSize(ZipLong.getValue(cfhBuf, off));
691         off += WORD;
692 
693         final int fileNameLen = ZipShort.getValue(cfhBuf, off);
694         off += SHORT;
695 
696         final int extraLen = ZipShort.getValue(cfhBuf, off);
697         off += SHORT;
698 
699         final int commentLen = ZipShort.getValue(cfhBuf, off);
700         off += SHORT;
701 
702         final int diskStart = ZipShort.getValue(cfhBuf, off);
703         off += SHORT;
704 
705         ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
706         off += SHORT;
707 
708         ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
709         off += WORD;
710 
711         final byte[] fileName = new byte[fileNameLen];
712         IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
713         ze.setName(entryEncoding.decode(fileName), fileName);
714 
715         // LFH offset,
716         ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
717         // data offset will be filled later
718         entries.add(ze);
719 
720         final byte[] cdExtraData = new byte[extraLen];
721         IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
722         ze.setCentralDirectoryExtra(cdExtraData);
723 
724         setSizesAndOffsetFromZip64Extra(ze, diskStart);
725 
726         final byte[] comment = new byte[commentLen];
727         IOUtils.readFully(archive, ByteBuffer.wrap(comment));
728         ze.setComment(entryEncoding.decode(comment));
729 
730         if (!hasUTF8Flag && useUnicodeExtraFields) {
731             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
732         }
733     }
734 
735     /**
736      * If the entry holds a Zip64 extended information extra field,
737      * read sizes from there if the entry's sizes are set to
738      * 0xFFFFFFFFF, do the same for the offset of the local file
739      * header.
740      *
741      * <p>Ensures the Zip64 extra either knows both compressed and
742      * uncompressed size or neither of both as the internal logic in
743      * ExtraFieldUtils forces the field to create local header data
744      * even if they are never used - and here a field with only one
745      * size would be invalid.</p>
746      */
747     private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
748                                                  final int diskStart)
749         throws IOException {
750         final Zip64ExtendedInformationExtraField z64 =
751             (Zip64ExtendedInformationExtraField)
752             ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
753         if (z64 != null) {
754             final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
755             final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
756             final boolean hasRelativeHeaderOffset =
757                 ze.getLocalHeaderOffset() == ZIP64_MAGIC;
758             z64.reparseCentralDirectoryData(hasUncompressedSize,
759                                             hasCompressedSize,
760                                             hasRelativeHeaderOffset,
761                                             diskStart == ZIP64_MAGIC_SHORT);
762 
763             if (hasUncompressedSize) {
764                 ze.setSize(z64.getSize().getLongValue());
765             } else if (hasCompressedSize) {
766                 z64.setSize(new ZipEightByteInteger(ze.getSize()));
767             }
768 
769             if (hasCompressedSize) {
770                 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
771             } else if (hasUncompressedSize) {
772                 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
773             }
774 
775             if (hasRelativeHeaderOffset) {
776                 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
777             }
778         }
779     }
780 
781     /**
782      * Length of the "End of central directory record" - which is
783      * supposed to be the last structure of the archive - without file
784      * comment.
785      */
786     static final int MIN_EOCD_SIZE =
787         /* end of central dir signature    */ WORD
788         /* number of this disk             */ + SHORT
789         /* number of the disk with the     */
790         /* start of the central directory  */ + SHORT
791         /* total number of entries in      */
792         /* the central dir on this disk    */ + SHORT
793         /* total number of entries in      */
794         /* the central dir                 */ + SHORT
795         /* size of the central directory   */ + WORD
796         /* offset of start of central      */
797         /* directory with respect to       */
798         /* the starting disk number        */ + WORD
799         /* zipfile comment length          */ + SHORT;
800 
801     /**
802      * Maximum length of the "End of central directory record" with a
803      * file comment.
804      */
805     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
806         /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
807 
808     /**
809      * Offset of the field that holds the location of the first
810      * central directory entry inside the "End of central directory
811      * record" relative to the start of the "End of central directory
812      * record".
813      */
814     private static final int CFD_LOCATOR_OFFSET =
815         /* end of central dir signature    */ WORD
816         /* number of this disk             */ + SHORT
817         /* number of the disk with the     */
818         /* start of the central directory  */ + SHORT
819         /* total number of entries in      */
820         /* the central dir on this disk    */ + SHORT
821         /* total number of entries in      */
822         /* the central dir                 */ + SHORT
823         /* size of the central directory   */ + WORD;
824 
825     /**
826      * Length of the "Zip64 end of central directory locator" - which
827      * should be right in front of the "end of central directory
828      * record" if one is present at all.
829      */
830     private static final int ZIP64_EOCDL_LENGTH =
831         /* zip64 end of central dir locator sig */ WORD
832         /* number of the disk with the start    */
833         /* start of the zip64 end of            */
834         /* central directory                    */ + WORD
835         /* relative offset of the zip64         */
836         /* end of central directory record      */ + DWORD
837         /* total number of disks                */ + WORD;
838 
839     /**
840      * Offset of the field that holds the location of the "Zip64 end
841      * of central directory record" inside the "Zip64 end of central
842      * directory locator" relative to the start of the "Zip64 end of
843      * central directory locator".
844      */
845     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
846         /* zip64 end of central dir locator sig */ WORD
847         /* number of the disk with the start    */
848         /* start of the zip64 end of            */
849         /* central directory                    */ + WORD;
850 
851     /**
852      * Offset of the field that holds the location of the first
853      * central directory entry inside the "Zip64 end of central
854      * directory record" relative to the start of the "Zip64 end of
855      * central directory record".
856      */
857     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
858         /* zip64 end of central dir        */
859         /* signature                       */ WORD
860         /* size of zip64 end of central    */
861         /* directory record                */ + DWORD
862         /* version made by                 */ + SHORT
863         /* version needed to extract       */ + SHORT
864         /* number of this disk             */ + WORD
865         /* number of the disk with the     */
866         /* start of the central directory  */ + WORD
867         /* total number of entries in the  */
868         /* central directory on this disk  */ + DWORD
869         /* total number of entries in the  */
870         /* central directory               */ + DWORD
871         /* size of the central directory   */ + DWORD;
872 
873     /**
874      * Searches for either the &quot;Zip64 end of central directory
875      * locator&quot; or the &quot;End of central dir record&quot;, parses
876      * it and positions the stream at the first central directory
877      * record.
878      */
879     private void positionAtCentralDirectory()
880         throws IOException {
881         positionAtEndOfCentralDirectoryRecord();
882         boolean found = false;
883         final boolean searchedForZip64EOCD =
884             archive.position() > ZIP64_EOCDL_LENGTH;
885         if (searchedForZip64EOCD) {
886             archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
887             wordBbuf.rewind();
888             IOUtils.readFully(archive, wordBbuf);
889             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
890                                   wordBuf);
891         }
892         if (!found) {
893             // not a ZIP64 archive
894             if (searchedForZip64EOCD) {
895                 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
896             }
897             positionAtCentralDirectory32();
898         } else {
899             positionAtCentralDirectory64();
900         }
901     }
902 
903     /**
904      * Parses the &quot;Zip64 end of central directory locator&quot;,
905      * finds the &quot;Zip64 end of central directory record&quot; using the
906      * parsed information, parses that and positions the stream at the
907      * first central directory record.
908      *
909      * Expects stream to be positioned right behind the &quot;Zip64
910      * end of central directory locator&quot;'s signature.
911      */
912     private void positionAtCentralDirectory64()
913         throws IOException {
914         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
915                   - WORD /* signature has already been read */);
916         dwordBbuf.rewind();
917         IOUtils.readFully(archive, dwordBbuf);
918         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
919         wordBbuf.rewind();
920         IOUtils.readFully(archive, wordBbuf);
921         if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
922             throw new ZipException("archive's ZIP64 end of central "
923                                    + "directory locator is corrupt.");
924         }
925         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
926                   - WORD /* signature has already been read */);
927         dwordBbuf.rewind();
928         IOUtils.readFully(archive, dwordBbuf);
929         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
930     }
931 
932     /**
933      * Parses the &quot;End of central dir record&quot; and positions
934      * the stream at the first central directory record.
935      *
936      * Expects stream to be positioned at the beginning of the
937      * &quot;End of central dir record&quot;.
938      */
939     private void positionAtCentralDirectory32()
940         throws IOException {
941         skipBytes(CFD_LOCATOR_OFFSET);
942         wordBbuf.rewind();
943         IOUtils.readFully(archive, wordBbuf);
944         archive.position(ZipLong.getValue(wordBuf));
945     }
946 
947     /**
948      * Searches for the and positions the stream at the start of the
949      * &quot;End of central dir record&quot;.
950      */
951     private void positionAtEndOfCentralDirectoryRecord()
952         throws IOException {
953         final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
954                                              ZipArchiveOutputStream.EOCD_SIG);
955         if (!found) {
956             throw new ZipException("archive is not a ZIP archive");
957         }
958     }
959 
960     /**
961      * Searches the archive backwards from minDistance to maxDistance
962      * for the given signature, positions the RandomaccessFile right
963      * at the signature if it has been found.
964      */
965     private boolean tryToLocateSignature(final long minDistanceFromEnd,
966                                          final long maxDistanceFromEnd,
967                                          final byte[] sig) throws IOException {
968         boolean found = false;
969         long off = archive.size() - minDistanceFromEnd;
970         final long stopSearching =
971             Math.max(0L, archive.size() - maxDistanceFromEnd);
972         if (off >= 0) {
973             for (; off >= stopSearching; off--) {
974                 archive.position(off);
975                 try {
976                     wordBbuf.rewind();
977                     IOUtils.readFully(archive, wordBbuf);
978                     wordBbuf.flip();
979                 } catch (EOFException ex) {
980                     break;
981                 }
982                 int curr = wordBbuf.get();
983                 if (curr == sig[POS_0]) {
984                     curr = wordBbuf.get();
985                     if (curr == sig[POS_1]) {
986                         curr = wordBbuf.get();
987                         if (curr == sig[POS_2]) {
988                             curr = wordBbuf.get();
989                             if (curr == sig[POS_3]) {
990                                 found = true;
991                                 break;
992                             }
993                         }
994                     }
995                 }
996             }
997         }
998         if (found) {
999             archive.position(off);
1000         }
1001         return found;
1002     }
1003 
1004     /**
1005      * Skips the given number of bytes or throws an EOFException if
1006      * skipping failed.
1007      */
1008     private void skipBytes(final int count) throws IOException {
1009         long currentPosition = archive.position();
1010         long newPosition = currentPosition + count;
1011         if (newPosition > archive.size()) {
1012             throw new EOFException();
1013         }
1014         archive.position(newPosition);
1015     }
1016 
1017     /**
1018      * Number of bytes in local file header up to the &quot;length of
1019      * filename&quot; entry.
1020      */
1021     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1022         /* local file header signature     */ WORD
1023         /* version needed to extract       */ + SHORT
1024         /* general purpose bit flag        */ + SHORT
1025         /* compression method              */ + SHORT
1026         /* last mod file time              */ + SHORT
1027         /* last mod file date              */ + SHORT
1028         /* crc-32                          */ + WORD
1029         /* compressed size                 */ + WORD
1030         /* uncompressed size               */ + (long) WORD;
1031 
1032     /**
1033      * Walks through all recorded entries and adds the data available
1034      * from the local file header.
1035      *
1036      * <p>Also records the offsets for the data to read from the
1037      * entries.</p>
1038      */
1039     private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1040                                             entriesWithoutUTF8Flag)
1041         throws IOException {
1042         for (final ZipArchiveEntry zipArchiveEntry : entries) {
1043             // entries is filled in populateFromCentralDirectory and
1044             // never modified
1045             final Entry ze = (Entry) zipArchiveEntry;
1046             final long offset = ze.getLocalHeaderOffset();
1047             archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1048             wordBbuf.rewind();
1049             IOUtils.readFully(archive, wordBbuf);
1050             wordBbuf.flip();
1051             wordBbuf.get(shortBuf);
1052             final int fileNameLen = ZipShort.getValue(shortBuf);
1053             wordBbuf.get(shortBuf);
1054             final int extraFieldLen = ZipShort.getValue(shortBuf);
1055             skipBytes(fileNameLen);
1056             final byte[] localExtraData = new byte[extraFieldLen];
1057             IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1058             ze.setExtra(localExtraData);
1059             ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1060                 + SHORT + SHORT + fileNameLen + extraFieldLen);
1061             ze.setStreamContiguous(true);
1062 
1063             if (entriesWithoutUTF8Flag.containsKey(ze)) {
1064                 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1065                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1066                                                          nc.comment);
1067             }
1068 
1069             final String name = ze.getName();
1070             LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1071             if (entriesOfThatName == null) {
1072                 entriesOfThatName = new LinkedList<>();
1073                 nameMap.put(name, entriesOfThatName);
1074             }
1075             entriesOfThatName.addLast(ze);
1076         }
1077     }
1078 
1079     /**
1080      * Checks whether the archive starts with a LFH.  If it doesn't,
1081      * it may be an empty archive.
1082      */
1083     private boolean startsWithLocalFileHeader() throws IOException {
1084         archive.position(0);
1085         wordBbuf.rewind();
1086         IOUtils.readFully(archive, wordBbuf);
1087         return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1088     }
1089 
1090     /**
1091      * Creates new BoundedInputStream, according to implementation of
1092      * underlying archive channel.
1093      */
1094     private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1095         return archive instanceof FileChannel ?
1096             new BoundedFileChannelInputStream(start, remaining) :
1097             new BoundedInputStream(start, remaining);
1098     }
1099 
1100     /**
1101      * InputStream that delegates requests to the underlying
1102      * SeekableByteChannel, making sure that only bytes from a certain
1103      * range can be read.
1104      */
1105     private class BoundedInputStream extends InputStream {
1106         private ByteBuffer singleByteBuffer;
1107         private final long end;
1108         private long loc;
1109 
1110         BoundedInputStream(final long start, final long remaining) {
1111             this.end = start+remaining;
1112             if (this.end < start) {
1113                 // check for potential vulnerability due to overflow
1114                 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1115             }
1116             loc = start;
1117         }
1118 
1119         @Override
1120         public synchronized int read() throws IOException {
1121             if (loc >= end) {
1122                 return -1;
1123             }
1124             if (singleByteBuffer == null) {
1125                 singleByteBuffer = ByteBuffer.allocate(1);
1126             }
1127             else {
1128                 singleByteBuffer.rewind();
1129             }
1130             int read = read(loc, singleByteBuffer);
1131             if (read < 0) {
1132                 return read;
1133             }
1134             loc++;
1135             return singleByteBuffer.get() & 0xff;
1136         }
1137 
1138         @Override
1139         public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1140             if (len <= 0) {
1141                 return 0;
1142             }
1143 
1144             if (len > end-loc) {
1145                 if (loc >= end) {
1146                     return -1;
1147                 }
1148                 len = (int)(end-loc);
1149             }
1150 
1151             ByteBuffer buf;
1152             buf = ByteBuffer.wrap(b, off, len);
1153             int ret = read(loc, buf);
1154             if (ret > 0) {
1155                 loc += ret;
1156                 return ret;
1157             }
1158             return ret;
1159         }
1160 
1161         protected int read(long pos, ByteBuffer buf) throws IOException {
1162             int read;
1163             synchronized (archive) {
1164                 archive.position(pos);
1165                 read = archive.read(buf);
1166             }
1167             buf.flip();
1168             return read;
1169         }
1170     }
1171 
1172     /**
1173      * Lock-free implementation of BoundedInputStream. The
1174      * implementation uses positioned reads on the underlying archive
1175      * file channel and therefore performs significantly faster in
1176      * concurrent environment.
1177      */
1178     private class BoundedFileChannelInputStream extends BoundedInputStream {
1179         private final FileChannel archive;
1180 
1181         BoundedFileChannelInputStream(final long start, final long remaining) {
1182             super(start, remaining);
1183             archive = (FileChannel)ZipFile.this.archive;
1184         }
1185 
1186         @Override
1187         protected int read(long pos, ByteBuffer buf) throws IOException {
1188             int read = archive.read(buf, pos);
1189             buf.flip();
1190             return read;
1191         }
1192     }
1193 
1194     private static final class NameAndComment {
1195         private final byte[] name;
1196         private final byte[] comment;
1197         private NameAndComment(final byte[] name, final byte[] comment) {
1198             this.name = name;
1199             this.comment = comment;
1200         }
1201     }
1202 
1203     /**
1204      * Compares two ZipArchiveEntries based on their offset within the archive.
1205      *
1206      * <p>Won't return any meaningful results if one of the entries
1207      * isn't part of the archive at all.</p>
1208      *
1209      * @since 1.1
1210      */
1211     private final Comparator<ZipArchiveEntry> offsetComparator =
1212         new Comparator<ZipArchiveEntry>() {
1213         @Override
1214         public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1215             if (e1 == e2) {
1216                 return 0;
1217             }
1218 
1219             final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1220             final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1221             if (ent1 == null) {
1222                 return 1;
1223             }
1224             if (ent2 == null) {
1225                 return -1;
1226             }
1227             final long val = (ent1.getLocalHeaderOffset()
1228                         - ent2.getLocalHeaderOffset());
1229             return val == 0 ? 0 : val < 0 ? -1 : +1;
1230         }
1231     };
1232 
1233     /**
1234      * Extends ZipArchiveEntry to store the offset within the archive.
1235      */
1236     private static class Entry extends ZipArchiveEntry {
1237 
1238         Entry() {
1239         }
1240 
1241         @Override
1242         public int hashCode() {
1243             return 3 * super.hashCode()
1244                 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1245         }
1246 
1247         @Override
1248         public boolean equals(final Object other) {
1249             if (super.equals(other)) {
1250                 // super.equals would return false if other were not an Entry
1251                 final Entry otherEntry = (Entry) other;
1252                 return getLocalHeaderOffset()
1253                         == otherEntry.getLocalHeaderOffset()
1254                     && getDataOffset()
1255                         == otherEntry.getDataOffset();
1256             }
1257             return false;
1258         }
1259     }
1260 }