View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  package org.apache.commons.compress.archivers.zip;
19  
20  import java.io.BufferedInputStream;
21  import java.io.Closeable;
22  import java.io.EOFException;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.nio.ByteBuffer;
27  import java.nio.channels.FileChannel;
28  import java.nio.channels.SeekableByteChannel;
29  import java.nio.file.Files;
30  import java.nio.file.StandardOpenOption;
31  import java.util.Arrays;
32  import java.util.Collections;
33  import java.util.Comparator;
34  import java.util.Enumeration;
35  import java.util.EnumSet;
36  import java.util.HashMap;
37  import java.util.LinkedList;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.zip.Inflater;
41  import java.util.zip.InflaterInputStream;
42  import java.util.zip.ZipException;
43  
44  import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
45  import org.apache.commons.compress.utils.IOUtils;
46  
47  import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
48  import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
49  import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
50  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
51  import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
52  
53  /**
54   * Replacement for <code>java.util.ZipFile</code>.
55   *
56   * <p>This class adds support for file name encodings other than UTF-8
57   * (which is required to work on ZIP files created by native zip tools
58   * and is able to skip a preamble like the one found in self
59   * extracting archives.  Furthermore it returns instances of
60   * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
61   * instead of <code>java.util.zip.ZipEntry</code>.</p>
62   *
63   * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
64   * have to reimplement all methods anyway.  Like
65   * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
66   * covers and supports compressed and uncompressed entries.  As of
67   * Apache Commons Compress 1.3 it also transparently supports Zip64
68   * extensions and thus individual entries and archives larger than 4
69   * GB or with more than 65536 entries.</p>
70   *
71   * <p>The method signatures mimic the ones of
72   * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
73   *
74   * <ul>
75   *   <li>There is no getName method.</li>
76   *   <li>entries has been renamed to getEntries.</li>
77   *   <li>getEntries and getEntry return
78   *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
79   *   instances.</li>
80   *   <li>close is allowed to throw IOException.</li>
81   * </ul>
82   *
83   */
84  public class ZipFile implements Closeable {
85      private static final int HASH_SIZE = 509;
86      static final int NIBLET_MASK = 0x0f;
87      static final int BYTE_SHIFT = 8;
88      private static final int POS_0 = 0;
89      private static final int POS_1 = 1;
90      private static final int POS_2 = 2;
91      private static final int POS_3 = 3;
92  
93      /**
94       * List of entries in the order they appear inside the central
95       * directory.
96       */
97      private final List<ZipArchiveEntry> entries =
98          new LinkedList<>();
99  
100     /**
101      * Maps String to list of ZipArchiveEntrys, name -> actual entries.
102      */
103     private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
104         new HashMap<>(HASH_SIZE);
105 
106     /**
107      * The encoding to use for filenames and the file comment.
108      *
109      * <p>For a list of possible values see <a
110      * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
111      * Defaults to UTF-8.</p>
112      */
113     private final String encoding;
114 
115     /**
116      * The zip encoding to use for filenames and the file comment.
117      */
118     private final ZipEncoding zipEncoding;
119 
120     /**
121      * File name of actual source.
122      */
123     private final String archiveName;
124 
125     /**
126      * The actual data source.
127      */
128     private final SeekableByteChannel archive;
129 
130     /**
131      * Whether to look for and use Unicode extra fields.
132      */
133     private final boolean useUnicodeExtraFields;
134 
135     /**
136      * Whether the file is closed.
137      */
138     private volatile boolean closed = true;
139 
140     // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
141     private final byte[] dwordBuf = new byte[DWORD];
142     private final byte[] wordBuf = new byte[WORD];
143     private final byte[] cfhBuf = new byte[CFH_LEN];
144     private final byte[] shortBuf = new byte[SHORT];
145     private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
146     private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
147     private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
148 
149     /**
150      * Opens the given file for reading, assuming "UTF8" for file names.
151      *
152      * @param f the archive.
153      *
154      * @throws IOException if an error occurs while reading the file.
155      */
156     public ZipFile(final File f) throws IOException {
157         this(f, ZipEncodingHelper.UTF8);
158     }
159 
160     /**
161      * Opens the given file for reading, assuming "UTF8".
162      *
163      * @param name name of the archive.
164      *
165      * @throws IOException if an error occurs while reading the file.
166      */
167     public ZipFile(final String name) throws IOException {
168         this(new File(name), ZipEncodingHelper.UTF8);
169     }
170 
171     /**
172      * Opens the given file for reading, assuming the specified
173      * encoding for file names, scanning unicode extra fields.
174      *
175      * @param name name of the archive.
176      * @param encoding the encoding to use for file names, use null
177      * for the platform's default encoding
178      *
179      * @throws IOException if an error occurs while reading the file.
180      */
181     public ZipFile(final String name, final String encoding) throws IOException {
182         this(new File(name), encoding, true);
183     }
184 
185     /**
186      * Opens the given file for reading, assuming the specified
187      * encoding for file names and scanning for unicode extra fields.
188      *
189      * @param f the archive.
190      * @param encoding the encoding to use for file names, use null
191      * for the platform's default encoding
192      *
193      * @throws IOException if an error occurs while reading the file.
194      */
195     public ZipFile(final File f, final String encoding) throws IOException {
196         this(f, encoding, true);
197     }
198 
199     /**
200      * Opens the given file for reading, assuming the specified
201      * encoding for file names.
202      *
203      * @param f the archive.
204      * @param encoding the encoding to use for file names, use null
205      * for the platform's default encoding
206      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
207      * Extra Fields (if present) to set the file names.
208      *
209      * @throws IOException if an error occurs while reading the file.
210      */
211     public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
212         throws IOException {
213         this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
214              f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
215     }
216 
217     /**
218      * Opens the given channel for reading, assuming "UTF8" for file names.
219      *
220      * <p>{@link
221      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
222      * allows you to read from an in-memory archive.</p>
223      *
224      * @param channel the archive.
225      *
226      * @throws IOException if an error occurs while reading the file.
227      * @since 1.13
228      */
229     public ZipFile(final SeekableByteChannel channel)
230             throws IOException {
231         this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
232     }
233 
234     /**
235      * Opens the given channel for reading, assuming the specified
236      * encoding for file names.
237      *
238      * <p>{@link
239      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
240      * allows you to read from an in-memory archive.</p>
241      *
242      * @param channel the archive.
243      * @param encoding the encoding to use for file names, use null
244      * for the platform's default encoding
245      *
246      * @throws IOException if an error occurs while reading the file.
247      * @since 1.13
248      */
249     public ZipFile(final SeekableByteChannel channel, final String encoding)
250         throws IOException {
251         this(channel, "unknown archive", encoding, true);
252     }
253 
254     /**
255      * Opens the given channel for reading, assuming the specified
256      * encoding for file names.
257      *
258      * <p>{@link
259      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
260      * allows you to read from an in-memory archive.</p>
261      *
262      * @param channel the archive.
263      * @param archiveName name of the archive, used for error messages only.
264      * @param encoding the encoding to use for file names, use null
265      * for the platform's default encoding
266      * @param useUnicodeExtraFields whether to use InfoZIP Unicode
267      * Extra Fields (if present) to set the file names.
268      *
269      * @throws IOException if an error occurs while reading the file.
270      * @since 1.13
271      */
272     public ZipFile(final SeekableByteChannel channel, final String archiveName,
273                    final String encoding, final boolean useUnicodeExtraFields)
274         throws IOException {
275         this(channel, archiveName, encoding, useUnicodeExtraFields, false);
276     }
277 
278     private ZipFile(final SeekableByteChannel channel, final String archiveName,
279                     final String encoding, final boolean useUnicodeExtraFields,
280                     final boolean closeOnError)
281         throws IOException {
282         this.archiveName = archiveName;
283         this.encoding = encoding;
284         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
285         this.useUnicodeExtraFields = useUnicodeExtraFields;
286         archive = channel;
287         boolean success = false;
288         try {
289             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
290                 populateFromCentralDirectory();
291             resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
292             success = true;
293         } finally {
294             closed = !success;
295             if (!success && closeOnError) {
296                 IOUtils.closeQuietly(archive);
297             }
298         }
299     }
300 
301     /**
302      * The encoding to use for filenames and the file comment.
303      *
304      * @return null if using the platform's default character encoding.
305      */
306     public String getEncoding() {
307         return encoding;
308     }
309 
310     /**
311      * Closes the archive.
312      * @throws IOException if an error occurs closing the archive.
313      */
314     @Override
315     public void close() throws IOException {
316         // this flag is only written here and read in finalize() which
317         // can never be run in parallel.
318         // no synchronization needed.
319         closed = true;
320 
321         archive.close();
322     }
323 
324     /**
325      * close a zipfile quietly; throw no io fault, do nothing
326      * on a null parameter
327      * @param zipfile file to close, can be null
328      */
329     public static void closeQuietly(final ZipFile zipfile) {
330         IOUtils.closeQuietly(zipfile);
331     }
332 
333     /**
334      * Returns all entries.
335      *
336      * <p>Entries will be returned in the same order they appear
337      * within the archive's central directory.</p>
338      *
339      * @return all entries as {@link ZipArchiveEntry} instances
340      */
341     public Enumeration<ZipArchiveEntry> getEntries() {
342         return Collections.enumeration(entries);
343     }
344 
345     /**
346      * Returns all entries in physical order.
347      *
348      * <p>Entries will be returned in the same order their contents
349      * appear within the archive.</p>
350      *
351      * @return all entries as {@link ZipArchiveEntry} instances
352      *
353      * @since 1.1
354      */
355     public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
356         final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
357         Arrays.sort(allEntries, offsetComparator);
358         return Collections.enumeration(Arrays.asList(allEntries));
359     }
360 
361     /**
362      * Returns a named entry - or {@code null} if no entry by
363      * that name exists.
364      *
365      * <p>If multiple entries with the same name exist the first entry
366      * in the archive's central directory by that name is
367      * returned.</p>
368      *
369      * @param name name of the entry.
370      * @return the ZipArchiveEntry corresponding to the given name - or
371      * {@code null} if not present.
372      */
373     public ZipArchiveEntry getEntry(final String name) {
374         final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
375         return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
376     }
377 
378     /**
379      * Returns all named entries in the same order they appear within
380      * the archive's central directory.
381      *
382      * @param name name of the entry.
383      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
384      * given name
385      * @since 1.6
386      */
387     public Iterable<ZipArchiveEntry> getEntries(final String name) {
388         final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
389         return entriesOfThatName != null ? entriesOfThatName
390             : Collections.<ZipArchiveEntry>emptyList();
391     }
392 
393     /**
394      * Returns all named entries in the same order their contents
395      * appear within the archive.
396      *
397      * @param name name of the entry.
398      * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
399      * given name
400      * @since 1.6
401      */
402     public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
403         ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
404         if (nameMap.containsKey(name)) {
405             entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
406             Arrays.sort(entriesOfThatName, offsetComparator);
407         }
408         return Arrays.asList(entriesOfThatName);
409     }
410 
411     /**
412      * Whether this class is able to read the given entry.
413      *
414      * <p>May return false if it is set up to use encryption or a
415      * compression method that hasn't been implemented yet.</p>
416      * @since 1.1
417      * @param ze the entry
418      * @return whether this class is able to read the given entry.
419      */
420     public boolean canReadEntryData(final ZipArchiveEntry ze) {
421         return ZipUtil.canHandleEntryData(ze);
422     }
423 
424     /**
425      * Expose the raw stream of the archive entry (compressed form).
426      *
427      * <p>This method does not relate to how/if we understand the payload in the
428      * stream, since we really only intend to move it on to somewhere else.</p>
429      *
430      * @param ze The entry to get the stream for
431      * @return The raw input stream containing (possibly) compressed data.
432      * @since 1.11
433      */
434     public InputStream getRawInputStream(final ZipArchiveEntry ze) {
435         if (!(ze instanceof Entry)) {
436             return null;
437         }
438         final long start = ze.getDataOffset();
439         return createBoundedInputStream(start, ze.getCompressedSize());
440     }
441 
442 
443     /**
444      * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
445      * Compression and all other attributes will be as in this file.
446      * <p>This method transfers entries based on the central directory of the zip file.</p>
447      *
448      * @param target The zipArchiveOutputStream to write the entries to
449      * @param predicate A predicate that selects which entries to write
450      * @throws IOException on error
451      */
452     public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
453             throws IOException {
454         final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
455         while (src.hasMoreElements()) {
456             final ZipArchiveEntry entry = src.nextElement();
457             if (predicate.test( entry)) {
458                 target.addRawArchiveEntry(entry, getRawInputStream(entry));
459             }
460         }
461     }
462 
463     /**
464      * Returns an InputStream for reading the contents of the given entry.
465      *
466      * @param ze the entry to get the stream for.
467      * @return a stream to read the entry from.
468      * @throws IOException if unable to create an input stream from the zipentry
469      * @throws ZipException if the zipentry uses an unsupported feature
470      */
471     public InputStream getInputStream(final ZipArchiveEntry ze)
472         throws IOException, ZipException {
473         if (!(ze instanceof Entry)) {
474             return null;
475         }
476         // cast valididty is checked just above
477         ZipUtil.checkRequestedFeatures(ze);
478         final long start = ze.getDataOffset();
479         // doesn't get closed if the method is not supported, but doesn't hold any resources either
480         final BoundedInputStream bis =
481             createBoundedInputStream(start, ze.getCompressedSize()); //NOSONAR
482         switch (ZipMethod.getMethodByCode(ze.getMethod())) {
483             case STORED:
484                 return bis;
485             case UNSHRINKING:
486                 return new UnshrinkingInputStream(bis);
487             case IMPLODING:
488                 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
489                         ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis));
490             case DEFLATED:
491                 bis.addDummy();
492                 final Inflater inflater = new Inflater(true);
493                 return new InflaterInputStream(bis, inflater) {
494                     @Override
495                     public void close() throws IOException {
496                         try {
497                             super.close();
498                         } finally {
499                             inflater.end();
500                         }
501                     }
502                 };
503             case BZIP2:
504                 return new BZip2CompressorInputStream(bis);
505             case AES_ENCRYPTED:
506             case ENHANCED_DEFLATED:
507             case EXPANDING_LEVEL_1:
508             case EXPANDING_LEVEL_2:
509             case EXPANDING_LEVEL_3:
510             case EXPANDING_LEVEL_4:
511             case JPEG:
512             case LZMA:
513             case PKWARE_IMPLODING:
514             case PPMD:
515             case TOKENIZATION:
516             case UNKNOWN:
517             case WAVPACK:
518             default:
519                 throw new ZipException("Found unsupported compression method "
520                                        + ze.getMethod());
521         }
522     }
523 
524     /**
525      * <p>
526      * Convenience method to return the entry's content as a String if isUnixSymlink()
527      * returns true for it, otherwise returns null.
528      * </p>
529      *
530      * <p>This method assumes the symbolic link's file name uses the
531      * same encoding that as been specified for this ZipFile.</p>
532      *
533      * @param entry ZipArchiveEntry object that represents the symbolic link
534      * @return entry's content as a String
535      * @throws IOException problem with content's input stream
536      * @since 1.5
537      */
538     public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
539         if (entry != null && entry.isUnixSymlink()) {
540             try (InputStream in = getInputStream(entry)) {
541                 return zipEncoding.decode(IOUtils.toByteArray(in));
542             }
543         }
544         return null;
545     }
546 
547     /**
548      * Ensures that the close method of this zipfile is called when
549      * there are no more references to it.
550      * @see #close()
551      */
552     @Override
553     protected void finalize() throws Throwable {
554         try {
555             if (!closed) {
556                 System.err.println("Cleaning up unclosed ZipFile for archive "
557                                    + archiveName);
558                 close();
559             }
560         } finally {
561             super.finalize();
562         }
563     }
564 
565     /**
566      * Length of a "central directory" entry structure without file
567      * name, extra fields or comment.
568      */
569     private static final int CFH_LEN =
570         /* version made by                 */ SHORT
571         /* version needed to extract       */ + SHORT
572         /* general purpose bit flag        */ + SHORT
573         /* compression method              */ + SHORT
574         /* last mod file time              */ + SHORT
575         /* last mod file date              */ + SHORT
576         /* crc-32                          */ + WORD
577         /* compressed size                 */ + WORD
578         /* uncompressed size               */ + WORD
579         /* filename length                 */ + SHORT
580         /* extra field length              */ + SHORT
581         /* file comment length             */ + SHORT
582         /* disk number start               */ + SHORT
583         /* internal file attributes        */ + SHORT
584         /* external file attributes        */ + WORD
585         /* relative offset of local header */ + WORD;
586 
587     private static final long CFH_SIG =
588         ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
589 
590     /**
591      * Reads the central directory of the given archive and populates
592      * the internal tables with ZipArchiveEntry instances.
593      *
594      * <p>The ZipArchiveEntrys will know all data that can be obtained from
595      * the central directory alone, but not the data that requires the
596      * local file header or additional data to be read.</p>
597      *
598      * @return a map of zipentries that didn't have the language
599      * encoding flag set when read.
600      */
601     private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
602         throws IOException {
603         final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
604             new HashMap<>();
605 
606         positionAtCentralDirectory();
607 
608         wordBbuf.rewind();
609         IOUtils.readFully(archive, wordBbuf);
610         long sig = ZipLong.getValue(wordBuf);
611 
612         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
613             throw new IOException("central directory is empty, can't expand"
614                                   + " corrupt archive.");
615         }
616 
617         while (sig == CFH_SIG) {
618             readCentralDirectoryEntry(noUTF8Flag);
619             wordBbuf.rewind();
620             IOUtils.readFully(archive, wordBbuf);
621             sig = ZipLong.getValue(wordBuf);
622         }
623         return noUTF8Flag;
624     }
625 
626     /**
627      * Reads an individual entry of the central directory, creats an
628      * ZipArchiveEntry from it and adds it to the global maps.
629      *
630      * @param noUTF8Flag map used to collect entries that don't have
631      * their UTF-8 flag set and whose name will be set by data read
632      * from the local file header later.  The current entry may be
633      * added to this map.
634      */
635     private void
636         readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
637         throws IOException {
638         cfhBbuf.rewind();
639         IOUtils.readFully(archive, cfhBbuf);
640         int off = 0;
641         final Entry ze = new Entry();
642 
643         final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
644         off += SHORT;
645         ze.setVersionMadeBy(versionMadeBy);
646         ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
647 
648         ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
649         off += SHORT; // version required
650 
651         final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
652         final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
653         final ZipEncoding entryEncoding =
654             hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
655         ze.setGeneralPurposeBit(gpFlag);
656         ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
657 
658         off += SHORT;
659 
660         //noinspection MagicConstant
661         ze.setMethod(ZipShort.getValue(cfhBuf, off));
662         off += SHORT;
663 
664         final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
665         ze.setTime(time);
666         off += WORD;
667 
668         ze.setCrc(ZipLong.getValue(cfhBuf, off));
669         off += WORD;
670 
671         ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
672         off += WORD;
673 
674         ze.setSize(ZipLong.getValue(cfhBuf, off));
675         off += WORD;
676 
677         final int fileNameLen = ZipShort.getValue(cfhBuf, off);
678         off += SHORT;
679 
680         final int extraLen = ZipShort.getValue(cfhBuf, off);
681         off += SHORT;
682 
683         final int commentLen = ZipShort.getValue(cfhBuf, off);
684         off += SHORT;
685 
686         final int diskStart = ZipShort.getValue(cfhBuf, off);
687         off += SHORT;
688 
689         ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
690         off += SHORT;
691 
692         ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
693         off += WORD;
694 
695         final byte[] fileName = new byte[fileNameLen];
696         IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
697         ze.setName(entryEncoding.decode(fileName), fileName);
698 
699         // LFH offset,
700         ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
701         // data offset will be filled later
702         entries.add(ze);
703 
704         final byte[] cdExtraData = new byte[extraLen];
705         IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
706         ze.setCentralDirectoryExtra(cdExtraData);
707 
708         setSizesAndOffsetFromZip64Extra(ze, diskStart);
709 
710         final byte[] comment = new byte[commentLen];
711         IOUtils.readFully(archive, ByteBuffer.wrap(comment));
712         ze.setComment(entryEncoding.decode(comment));
713 
714         if (!hasUTF8Flag && useUnicodeExtraFields) {
715             noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
716         }
717     }
718 
719     /**
720      * If the entry holds a Zip64 extended information extra field,
721      * read sizes from there if the entry's sizes are set to
722      * 0xFFFFFFFFF, do the same for the offset of the local file
723      * header.
724      *
725      * <p>Ensures the Zip64 extra either knows both compressed and
726      * uncompressed size or neither of both as the internal logic in
727      * ExtraFieldUtils forces the field to create local header data
728      * even if they are never used - and here a field with only one
729      * size would be invalid.</p>
730      */
731     private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
732                                                  final int diskStart)
733         throws IOException {
734         final Zip64ExtendedInformationExtraField z64 =
735             (Zip64ExtendedInformationExtraField)
736             ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
737         if (z64 != null) {
738             final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
739             final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
740             final boolean hasRelativeHeaderOffset =
741                 ze.getLocalHeaderOffset() == ZIP64_MAGIC;
742             z64.reparseCentralDirectoryData(hasUncompressedSize,
743                                             hasCompressedSize,
744                                             hasRelativeHeaderOffset,
745                                             diskStart == ZIP64_MAGIC_SHORT);
746 
747             if (hasUncompressedSize) {
748                 ze.setSize(z64.getSize().getLongValue());
749             } else if (hasCompressedSize) {
750                 z64.setSize(new ZipEightByteInteger(ze.getSize()));
751             }
752 
753             if (hasCompressedSize) {
754                 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
755             } else if (hasUncompressedSize) {
756                 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
757             }
758 
759             if (hasRelativeHeaderOffset) {
760                 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
761             }
762         }
763     }
764 
765     /**
766      * Length of the "End of central directory record" - which is
767      * supposed to be the last structure of the archive - without file
768      * comment.
769      */
770     static final int MIN_EOCD_SIZE =
771         /* end of central dir signature    */ WORD
772         /* number of this disk             */ + SHORT
773         /* number of the disk with the     */
774         /* start of the central directory  */ + SHORT
775         /* total number of entries in      */
776         /* the central dir on this disk    */ + SHORT
777         /* total number of entries in      */
778         /* the central dir                 */ + SHORT
779         /* size of the central directory   */ + WORD
780         /* offset of start of central      */
781         /* directory with respect to       */
782         /* the starting disk number        */ + WORD
783         /* zipfile comment length          */ + SHORT;
784 
785     /**
786      * Maximum length of the "End of central directory record" with a
787      * file comment.
788      */
789     private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
790         /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
791 
792     /**
793      * Offset of the field that holds the location of the first
794      * central directory entry inside the "End of central directory
795      * record" relative to the start of the "End of central directory
796      * record".
797      */
798     private static final int CFD_LOCATOR_OFFSET =
799         /* end of central dir signature    */ WORD
800         /* number of this disk             */ + SHORT
801         /* number of the disk with the     */
802         /* start of the central directory  */ + SHORT
803         /* total number of entries in      */
804         /* the central dir on this disk    */ + SHORT
805         /* total number of entries in      */
806         /* the central dir                 */ + SHORT
807         /* size of the central directory   */ + WORD;
808 
809     /**
810      * Length of the "Zip64 end of central directory locator" - which
811      * should be right in front of the "end of central directory
812      * record" if one is present at all.
813      */
814     private static final int ZIP64_EOCDL_LENGTH =
815         /* zip64 end of central dir locator sig */ WORD
816         /* number of the disk with the start    */
817         /* start of the zip64 end of            */
818         /* central directory                    */ + WORD
819         /* relative offset of the zip64         */
820         /* end of central directory record      */ + DWORD
821         /* total number of disks                */ + WORD;
822 
823     /**
824      * Offset of the field that holds the location of the "Zip64 end
825      * of central directory record" inside the "Zip64 end of central
826      * directory locator" relative to the start of the "Zip64 end of
827      * central directory locator".
828      */
829     private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
830         /* zip64 end of central dir locator sig */ WORD
831         /* number of the disk with the start    */
832         /* start of the zip64 end of            */
833         /* central directory                    */ + WORD;
834 
835     /**
836      * Offset of the field that holds the location of the first
837      * central directory entry inside the "Zip64 end of central
838      * directory record" relative to the start of the "Zip64 end of
839      * central directory record".
840      */
841     private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
842         /* zip64 end of central dir        */
843         /* signature                       */ WORD
844         /* size of zip64 end of central    */
845         /* directory record                */ + DWORD
846         /* version made by                 */ + SHORT
847         /* version needed to extract       */ + SHORT
848         /* number of this disk             */ + WORD
849         /* number of the disk with the     */
850         /* start of the central directory  */ + WORD
851         /* total number of entries in the  */
852         /* central directory on this disk  */ + DWORD
853         /* total number of entries in the  */
854         /* central directory               */ + DWORD
855         /* size of the central directory   */ + DWORD;
856 
857     /**
858      * Searches for either the &quot;Zip64 end of central directory
859      * locator&quot; or the &quot;End of central dir record&quot;, parses
860      * it and positions the stream at the first central directory
861      * record.
862      */
863     private void positionAtCentralDirectory()
864         throws IOException {
865         positionAtEndOfCentralDirectoryRecord();
866         boolean found = false;
867         final boolean searchedForZip64EOCD =
868             archive.position() > ZIP64_EOCDL_LENGTH;
869         if (searchedForZip64EOCD) {
870             archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
871             wordBbuf.rewind();
872             IOUtils.readFully(archive, wordBbuf);
873             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
874                                   wordBuf);
875         }
876         if (!found) {
877             // not a ZIP64 archive
878             if (searchedForZip64EOCD) {
879                 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
880             }
881             positionAtCentralDirectory32();
882         } else {
883             positionAtCentralDirectory64();
884         }
885     }
886 
887     /**
888      * Parses the &quot;Zip64 end of central directory locator&quot;,
889      * finds the &quot;Zip64 end of central directory record&quot; using the
890      * parsed information, parses that and positions the stream at the
891      * first central directory record.
892      *
893      * Expects stream to be positioned right behind the &quot;Zip64
894      * end of central directory locator&quot;'s signature.
895      */
896     private void positionAtCentralDirectory64()
897         throws IOException {
898         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
899                   - WORD /* signature has already been read */);
900         dwordBbuf.rewind();
901         IOUtils.readFully(archive, dwordBbuf);
902         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
903         wordBbuf.rewind();
904         IOUtils.readFully(archive, wordBbuf);
905         if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
906             throw new ZipException("archive's ZIP64 end of central "
907                                    + "directory locator is corrupt.");
908         }
909         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
910                   - WORD /* signature has already been read */);
911         dwordBbuf.rewind();
912         IOUtils.readFully(archive, dwordBbuf);
913         archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
914     }
915 
916     /**
917      * Parses the &quot;End of central dir record&quot; and positions
918      * the stream at the first central directory record.
919      *
920      * Expects stream to be positioned at the beginning of the
921      * &quot;End of central dir record&quot;.
922      */
923     private void positionAtCentralDirectory32()
924         throws IOException {
925         skipBytes(CFD_LOCATOR_OFFSET);
926         wordBbuf.rewind();
927         IOUtils.readFully(archive, wordBbuf);
928         archive.position(ZipLong.getValue(wordBuf));
929     }
930 
931     /**
932      * Searches for the and positions the stream at the start of the
933      * &quot;End of central dir record&quot;.
934      */
935     private void positionAtEndOfCentralDirectoryRecord()
936         throws IOException {
937         final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
938                                              ZipArchiveOutputStream.EOCD_SIG);
939         if (!found) {
940             throw new ZipException("archive is not a ZIP archive");
941         }
942     }
943 
944     /**
945      * Searches the archive backwards from minDistance to maxDistance
946      * for the given signature, positions the RandomaccessFile right
947      * at the signature if it has been found.
948      */
949     private boolean tryToLocateSignature(final long minDistanceFromEnd,
950                                          final long maxDistanceFromEnd,
951                                          final byte[] sig) throws IOException {
952         boolean found = false;
953         long off = archive.size() - minDistanceFromEnd;
954         final long stopSearching =
955             Math.max(0L, archive.size() - maxDistanceFromEnd);
956         if (off >= 0) {
957             for (; off >= stopSearching; off--) {
958                 archive.position(off);
959                 try {
960                     wordBbuf.rewind();
961                     IOUtils.readFully(archive, wordBbuf);
962                     wordBbuf.flip();
963                 } catch (EOFException ex) {
964                     break;
965                 }
966                 int curr = wordBbuf.get();
967                 if (curr == sig[POS_0]) {
968                     curr = wordBbuf.get();
969                     if (curr == sig[POS_1]) {
970                         curr = wordBbuf.get();
971                         if (curr == sig[POS_2]) {
972                             curr = wordBbuf.get();
973                             if (curr == sig[POS_3]) {
974                                 found = true;
975                                 break;
976                             }
977                         }
978                     }
979                 }
980             }
981         }
982         if (found) {
983             archive.position(off);
984         }
985         return found;
986     }
987 
988     /**
989      * Skips the given number of bytes or throws an EOFException if
990      * skipping failed.
991      */ 
992     private void skipBytes(final int count) throws IOException {
993         long currentPosition = archive.position();
994         long newPosition = currentPosition + count;
995         if (newPosition > archive.size()) {
996             throw new EOFException();
997         }
998         archive.position(newPosition);
999     }
1000 
1001     /**
1002      * Number of bytes in local file header up to the &quot;length of
1003      * filename&quot; entry.
1004      */
1005     private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1006         /* local file header signature     */ WORD
1007         /* version needed to extract       */ + SHORT
1008         /* general purpose bit flag        */ + SHORT
1009         /* compression method              */ + SHORT
1010         /* last mod file time              */ + SHORT
1011         /* last mod file date              */ + SHORT
1012         /* crc-32                          */ + WORD
1013         /* compressed size                 */ + WORD
1014         /* uncompressed size               */ + (long) WORD;
1015 
1016     /**
1017      * Walks through all recorded entries and adds the data available
1018      * from the local file header.
1019      *
1020      * <p>Also records the offsets for the data to read from the
1021      * entries.</p>
1022      */
1023     private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1024                                             entriesWithoutUTF8Flag)
1025         throws IOException {
1026         for (final ZipArchiveEntry zipArchiveEntry : entries) {
1027             // entries is filled in populateFromCentralDirectory and
1028             // never modified
1029             final Entry ze = (Entry) zipArchiveEntry;
1030             final long offset = ze.getLocalHeaderOffset();
1031             archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1032             wordBbuf.rewind();
1033             IOUtils.readFully(archive, wordBbuf);
1034             wordBbuf.flip();
1035             wordBbuf.get(shortBuf);
1036             final int fileNameLen = ZipShort.getValue(shortBuf);
1037             wordBbuf.get(shortBuf);
1038             final int extraFieldLen = ZipShort.getValue(shortBuf);
1039             skipBytes(fileNameLen);
1040             final byte[] localExtraData = new byte[extraFieldLen];
1041             IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1042             ze.setExtra(localExtraData);
1043             ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1044                 + SHORT + SHORT + fileNameLen + extraFieldLen);
1045             ze.setStreamContiguous(true);
1046 
1047             if (entriesWithoutUTF8Flag.containsKey(ze)) {
1048                 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1049                 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1050                                                          nc.comment);
1051             }
1052 
1053             final String name = ze.getName();
1054             LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1055             if (entriesOfThatName == null) {
1056                 entriesOfThatName = new LinkedList<>();
1057                 nameMap.put(name, entriesOfThatName);
1058             }
1059             entriesOfThatName.addLast(ze);
1060         }
1061     }
1062 
1063     /**
1064      * Checks whether the archive starts with a LFH.  If it doesn't,
1065      * it may be an empty archive.
1066      */
1067     private boolean startsWithLocalFileHeader() throws IOException {
1068         archive.position(0);
1069         wordBbuf.rewind();
1070         IOUtils.readFully(archive, wordBbuf);
1071         return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1072     }
1073 
1074     /**
1075      * Creates new BoundedInputStream, according to implementation of
1076      * underlying archive channel.
1077      */
1078     private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1079         return archive instanceof FileChannel ?
1080             new BoundedFileChannelInputStream(start, remaining) :
1081             new BoundedInputStream(start, remaining);
1082     }
1083 
1084     /**
1085      * InputStream that delegates requests to the underlying
1086      * SeekableByteChannel, making sure that only bytes from a certain
1087      * range can be read.
1088      */
1089     private class BoundedInputStream extends InputStream {
1090         private ByteBuffer singleByteBuffer;
1091         private final long end;
1092         private long loc;
1093         private boolean addDummy = false;
1094 
1095         BoundedInputStream(final long start, final long remaining) {
1096             this.end = start+remaining;
1097             if (this.end < start) {
1098                 // check for potential vulnerability due to overflow
1099                 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1100             }
1101             loc = start;
1102         }
1103 
1104         @Override
1105         public synchronized int read() throws IOException {
1106             if (loc >= end) {
1107                 if (loc == end && addDummy) {
1108                     addDummy = false;
1109                     return 0;
1110                 }
1111                 return -1;
1112             }
1113             if (singleByteBuffer == null) {
1114                 singleByteBuffer = ByteBuffer.allocate(1);
1115             }
1116             else {
1117                 singleByteBuffer.rewind();
1118             }
1119             int read = read(loc, singleByteBuffer);
1120             if (read < 0) {
1121                 return read;
1122             }
1123             loc++;
1124             return singleByteBuffer.get() & 0xff;
1125         }
1126 
1127         @Override
1128         public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1129             if (len <= 0) {
1130                 return 0;
1131             }
1132 
1133             if (len > end-loc) {
1134                 if (loc >= end) {
1135                     if (loc == end && addDummy) {
1136                         addDummy = false;
1137                         b[off] = 0;
1138                         return 1;
1139                     }
1140                     return -1;
1141                 }
1142                 len = (int)(end-loc);
1143             }
1144 
1145             ByteBuffer buf;
1146             buf = ByteBuffer.wrap(b, off, len);
1147             int ret = read(loc, buf);
1148             if (ret > 0) {
1149                 loc += ret;
1150                 return ret;
1151             }
1152             return ret;
1153         }
1154 
1155         protected int read(long pos, ByteBuffer buf) throws IOException {
1156             int read;
1157             synchronized (archive) {
1158                 archive.position(pos);
1159                 read = archive.read(buf);
1160             }
1161             buf.flip();
1162             return read;
1163         }
1164 
1165         synchronized void addDummy() {
1166             this.addDummy = true;
1167         }
1168     }
1169 
1170     /**
1171      * Lock-free implementation of BoundedInputStream. The
1172      * implementation uses positioned reads on the underlying archive
1173      * file channel and therefore performs significantly faster in
1174      * concurrent environment.
1175      */
1176     private class BoundedFileChannelInputStream extends BoundedInputStream {
1177         private final FileChannel archive;
1178 
1179         BoundedFileChannelInputStream(final long start, final long remaining) {
1180             super(start, remaining);
1181             archive = (FileChannel)ZipFile.this.archive;
1182         }
1183 
1184         @Override
1185         protected int read(long pos, ByteBuffer buf) throws IOException {
1186             int read = archive.read(buf, pos);
1187             buf.flip();
1188             return read;
1189         }
1190     }
1191 
1192     private static final class NameAndComment {
1193         private final byte[] name;
1194         private final byte[] comment;
1195         private NameAndComment(final byte[] name, final byte[] comment) {
1196             this.name = name;
1197             this.comment = comment;
1198         }
1199     }
1200 
1201     /**
1202      * Compares two ZipArchiveEntries based on their offset within the archive.
1203      *
1204      * <p>Won't return any meaningful results if one of the entries
1205      * isn't part of the archive at all.</p>
1206      *
1207      * @since 1.1
1208      */
1209     private final Comparator<ZipArchiveEntry> offsetComparator =
1210         new Comparator<ZipArchiveEntry>() {
1211         @Override
1212         public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1213             if (e1 == e2) {
1214                 return 0;
1215             }
1216 
1217             final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1218             final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1219             if (ent1 == null) {
1220                 return 1;
1221             }
1222             if (ent2 == null) {
1223                 return -1;
1224             }
1225             final long val = (ent1.getLocalHeaderOffset()
1226                         - ent2.getLocalHeaderOffset());
1227             return val == 0 ? 0 : val < 0 ? -1 : +1;
1228         }
1229     };
1230 
1231     /**
1232      * Extends ZipArchiveEntry to store the offset within the archive.
1233      */
1234     private static class Entry extends ZipArchiveEntry {
1235 
1236         Entry() {
1237         }
1238 
1239         @Override
1240         public int hashCode() {
1241             return 3 * super.hashCode()
1242                 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1243         }
1244 
1245         @Override
1246         public boolean equals(final Object other) {
1247             if (super.equals(other)) {
1248                 // super.equals would return false if other were not an Entry
1249                 final Entry otherEntry = (Entry) other;
1250                 return getLocalHeaderOffset()
1251                         == otherEntry.getLocalHeaderOffset()
1252                     && getDataOffset()
1253                         == otherEntry.getDataOffset();
1254             }
1255             return false;
1256         }
1257     }
1258 }