1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18 package org.apache.commons.compress.archivers.zip;
19
20 import java.io.EOFException;
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.RandomAccessFile;
25 import java.util.Arrays;
26 import java.util.Collections;
27 import java.util.Comparator;
28 import java.util.Enumeration;
29 import java.util.HashMap;
30 import java.util.LinkedHashMap;
31 import java.util.Map;
32 import java.util.zip.Inflater;
33 import java.util.zip.InflaterInputStream;
34 import java.util.zip.ZipEntry;
35 import java.util.zip.ZipException;
36
37 import org.apache.commons.compress.utils.IOUtils;
38
39 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
40 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
41 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
42 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
43 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
44
45 /**
46 * Replacement for <code>java.util.ZipFile</code>.
47 *
48 * <p>This class adds support for file name encodings other than UTF-8
49 * (which is required to work on ZIP files created by native zip tools
50 * and is able to skip a preamble like the one found in self
51 * extracting archives. Furthermore it returns instances of
52 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
53 * instead of <code>java.util.zip.ZipEntry</code>.</p>
54 *
55 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
56 * have to reimplement all methods anyway. Like
57 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
58 * covers and supports compressed and uncompressed entries. As of
59 * Apache Commons Compress 1.3 it also transparently supports Zip64
60 * extensions and thus individual entries and archives larger than 4
61 * GB or with more than 65536 entries.</p>
62 *
63 * <p>The method signatures mimic the ones of
64 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
65 *
66 * <ul>
67 * <li>There is no getName method.</li>
68 * <li>entries has been renamed to getEntries.</li>
69 * <li>getEntries and getEntry return
70 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
71 * instances.</li>
72 * <li>close is allowed to throw IOException.</li>
73 * </ul>
74 *
75 */
76 public class ZipFile {
77 private static final int HASH_SIZE = 509;
78 static final int NIBLET_MASK = 0x0f;
79 static final int BYTE_SHIFT = 8;
80 private static final int POS_0 = 0;
81 private static final int POS_1 = 1;
82 private static final int POS_2 = 2;
83 private static final int POS_3 = 3;
84
85 /**
86 * Maps ZipArchiveEntrys to two longs, recording the offsets of
87 * the local file headers and the start of entry data.
88 */
89 private final Map<ZipArchiveEntry, OffsetEntry> entries =
90 new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(HASH_SIZE);
91
92 /**
93 * Maps String to ZipArchiveEntrys, name -> actual entry.
94 */
95 private final Map<String, ZipArchiveEntry> nameMap =
96 new HashMap<String, ZipArchiveEntry>(HASH_SIZE);
97
98 private static final class OffsetEntry {
99 private long headerOffset = -1;
100 private long dataOffset = -1;
101 }
102
103 /**
104 * The encoding to use for filenames and the file comment.
105 *
106 * <p>For a list of possible values see <a
107 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
108 * Defaults to UTF-8.</p>
109 */
110 private final String encoding;
111
112 /**
113 * The zip encoding to use for filenames and the file comment.
114 */
115 private final ZipEncoding zipEncoding;
116
117 /**
118 * File name of actual source.
119 */
120 private final String archiveName;
121
122 /**
123 * The actual data source.
124 */
125 private final RandomAccessFile archive;
126
127 /**
128 * Whether to look for and use Unicode extra fields.
129 */
130 private final boolean useUnicodeExtraFields;
131
132 /**
133 * Whether the file is closed.
134 */
135 private boolean closed;
136
137 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
138 private final byte[] DWORD_BUF = new byte[DWORD];
139 private final byte[] WORD_BUF = new byte[WORD];
140 private final byte[] CFH_BUF = new byte[CFH_LEN];
141 private final byte[] SHORT_BUF = new byte[SHORT];
142
143 /**
144 * Opens the given file for reading, assuming "UTF8" for file names.
145 *
146 * @param f the archive.
147 *
148 * @throws IOException if an error occurs while reading the file.
149 */
150 public ZipFile(File f) throws IOException {
151 this(f, ZipEncodingHelper.UTF8);
152 }
153
154 /**
155 * Opens the given file for reading, assuming "UTF8".
156 *
157 * @param name name of the archive.
158 *
159 * @throws IOException if an error occurs while reading the file.
160 */
161 public ZipFile(String name) throws IOException {
162 this(new File(name), ZipEncodingHelper.UTF8);
163 }
164
165 /**
166 * Opens the given file for reading, assuming the specified
167 * encoding for file names, scanning unicode extra fields.
168 *
169 * @param name name of the archive.
170 * @param encoding the encoding to use for file names, use null
171 * for the platform's default encoding
172 *
173 * @throws IOException if an error occurs while reading the file.
174 */
175 public ZipFile(String name, String encoding) throws IOException {
176 this(new File(name), encoding, true);
177 }
178
179 /**
180 * Opens the given file for reading, assuming the specified
181 * encoding for file names and scanning for unicode extra fields.
182 *
183 * @param f the archive.
184 * @param encoding the encoding to use for file names, use null
185 * for the platform's default encoding
186 *
187 * @throws IOException if an error occurs while reading the file.
188 */
189 public ZipFile(File f, String encoding) throws IOException {
190 this(f, encoding, true);
191 }
192
193 /**
194 * Opens the given file for reading, assuming the specified
195 * encoding for file names.
196 *
197 * @param f the archive.
198 * @param encoding the encoding to use for file names, use null
199 * for the platform's default encoding
200 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
201 * Extra Fields (if present) to set the file names.
202 *
203 * @throws IOException if an error occurs while reading the file.
204 */
205 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
206 throws IOException {
207 this.archiveName = f.getAbsolutePath();
208 this.encoding = encoding;
209 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
210 this.useUnicodeExtraFields = useUnicodeExtraFields;
211 archive = new RandomAccessFile(f, "r");
212 boolean success = false;
213 try {
214 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
215 populateFromCentralDirectory();
216 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
217 success = true;
218 } finally {
219 if (!success) {
220 try {
221 closed = true;
222 archive.close();
223 } catch (IOException e2) { // NOPMD
224 // swallow, throw the original exception instead
225 }
226 }
227 }
228 }
229
230 /**
231 * The encoding to use for filenames and the file comment.
232 *
233 * @return null if using the platform's default character encoding.
234 */
235 public String getEncoding() {
236 return encoding;
237 }
238
239 /**
240 * Closes the archive.
241 * @throws IOException if an error occurs closing the archive.
242 */
243 public void close() throws IOException {
244 // this flag is only written here and read in finalize() which
245 // can never be run in parallel.
246 // no synchronization needed.
247 closed = true;
248
249 archive.close();
250 }
251
252 /**
253 * close a zipfile quietly; throw no io fault, do nothing
254 * on a null parameter
255 * @param zipfile file to close, can be null
256 */
257 public static void closeQuietly(ZipFile zipfile) {
258 if (zipfile != null) {
259 try {
260 zipfile.close();
261 } catch (IOException e) { // NOPMD
262 //ignore, that's why the method is called "quietly"
263 }
264 }
265 }
266
267 /**
268 * Returns all entries.
269 *
270 * <p>Entries will be returned in the same order they appear
271 * within the archive's central directory.</p>
272 *
273 * @return all entries as {@link ZipArchiveEntry} instances
274 */
275 public Enumeration<ZipArchiveEntry> getEntries() {
276 return Collections.enumeration(entries.keySet());
277 }
278
279 /**
280 * Returns all entries in physical order.
281 *
282 * <p>Entries will be returned in the same order their contents
283 * appear within the archive.</p>
284 *
285 * @return all entries as {@link ZipArchiveEntry} instances
286 *
287 * @since 1.1
288 */
289 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
290 ZipArchiveEntry[] allEntries =
291 entries.keySet().toArray(new ZipArchiveEntry[0]);
292 Arrays.sort(allEntries, OFFSET_COMPARATOR);
293 return Collections.enumeration(Arrays.asList(allEntries));
294 }
295
296 /**
297 * Returns a named entry - or {@code null} if no entry by
298 * that name exists.
299 * @param name name of the entry.
300 * @return the ZipArchiveEntry corresponding to the given name - or
301 * {@code null} if not present.
302 */
303 public ZipArchiveEntry getEntry(String name) {
304 return nameMap.get(name);
305 }
306
307 /**
308 * Whether this class is able to read the given entry.
309 *
310 * <p>May return false if it is set up to use encryption or a
311 * compression method that hasn't been implemented yet.</p>
312 * @since 1.1
313 */
314 public boolean canReadEntryData(ZipArchiveEntry ze) {
315 return ZipUtil.canHandleEntryData(ze);
316 }
317
318 /**
319 * Returns an InputStream for reading the contents of the given entry.
320 *
321 * @param ze the entry to get the stream for.
322 * @return a stream to read the entry from.
323 * @throws IOException if unable to create an input stream from the zipentry
324 * @throws ZipException if the zipentry uses an unsupported feature
325 */
326 public InputStream getInputStream(ZipArchiveEntry ze)
327 throws IOException, ZipException {
328 OffsetEntry offsetEntry = entries.get(ze);
329 if (offsetEntry == null) {
330 return null;
331 }
332 ZipUtil.checkRequestedFeatures(ze);
333 long start = offsetEntry.dataOffset;
334 BoundedInputStream bis =
335 new BoundedInputStream(start, ze.getCompressedSize());
336 switch (ze.getMethod()) {
337 case ZipEntry.STORED:
338 return bis;
339 case ZipEntry.DEFLATED:
340 bis.addDummy();
341 final Inflater inflater = new Inflater(true);
342 return new InflaterInputStream(bis, inflater) {
343 @Override
344 public void close() throws IOException {
345 super.close();
346 inflater.end();
347 }
348 };
349 default:
350 throw new ZipException("Found unsupported compression method "
351 + ze.getMethod());
352 }
353 }
354
355 /**
356 * <p>
357 * Convenience method to return the entry's content as a String if isUnixSymlink()
358 * returns true for it, otherwise returns null.
359 * </p>
360 *
361 * <p>This method assumes the symbolic link's file name uses the
362 * same encoding that as been specified for this ZipFile.</p>
363 *
364 * @param entry ZipArchiveEntry object that represents the symbolic link
365 * @return entry's content as a String
366 * @throws IOException problem with content's input stream
367 * @since 1.5
368 */
369 public String getUnixSymlink(ZipArchiveEntry entry) throws IOException {
370 if (entry != null && entry.isUnixSymlink()) {
371 InputStream in = null;
372 try {
373 in = getInputStream(entry);
374 byte[] symlinkBytes = IOUtils.toByteArray(in);
375 return zipEncoding.decode(symlinkBytes);
376 } finally {
377 if (in != null) {
378 in.close();
379 }
380 }
381 } else {
382 return null;
383 }
384 }
385
386 /**
387 * Ensures that the close method of this zipfile is called when
388 * there are no more references to it.
389 * @see #close()
390 */
391 @Override
392 protected void finalize() throws Throwable {
393 try {
394 if (!closed) {
395 System.err.println("Cleaning up unclosed ZipFile for archive "
396 + archiveName);
397 close();
398 }
399 } finally {
400 super.finalize();
401 }
402 }
403
404 /**
405 * Length of a "central directory" entry structure without file
406 * name, extra fields or comment.
407 */
408 private static final int CFH_LEN =
409 /* version made by */ SHORT
410 /* version needed to extract */ + SHORT
411 /* general purpose bit flag */ + SHORT
412 /* compression method */ + SHORT
413 /* last mod file time */ + SHORT
414 /* last mod file date */ + SHORT
415 /* crc-32 */ + WORD
416 /* compressed size */ + WORD
417 /* uncompressed size */ + WORD
418 /* filename length */ + SHORT
419 /* extra field length */ + SHORT
420 /* file comment length */ + SHORT
421 /* disk number start */ + SHORT
422 /* internal file attributes */ + SHORT
423 /* external file attributes */ + WORD
424 /* relative offset of local header */ + WORD;
425
426 private static final long CFH_SIG =
427 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
428
429 /**
430 * Reads the central directory of the given archive and populates
431 * the internal tables with ZipArchiveEntry instances.
432 *
433 * <p>The ZipArchiveEntrys will know all data that can be obtained from
434 * the central directory alone, but not the data that requires the
435 * local file header or additional data to be read.</p>
436 *
437 * @return a map of zipentries that didn't have the language
438 * encoding flag set when read.
439 */
440 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
441 throws IOException {
442 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
443 new HashMap<ZipArchiveEntry, NameAndComment>();
444
445 positionAtCentralDirectory();
446
447 archive.readFully(WORD_BUF);
448 long sig = ZipLong.getValue(WORD_BUF);
449
450 if (sig != CFH_SIG && startsWithLocalFileHeader()) {
451 throw new IOException("central directory is empty, can't expand"
452 + " corrupt archive.");
453 }
454
455 while (sig == CFH_SIG) {
456 readCentralDirectoryEntry(noUTF8Flag);
457 archive.readFully(WORD_BUF);
458 sig = ZipLong.getValue(WORD_BUF);
459 }
460 return noUTF8Flag;
461 }
462
463 /**
464 * Reads an individual entry of the central directory, creats an
465 * ZipArchiveEntry from it and adds it to the global maps.
466 *
467 * @param noUTF8Flag map used to collect entries that don't have
468 * their UTF-8 flag set and whose name will be set by data read
469 * from the local file header later. The current entry may be
470 * added to this map.
471 */
472 private void
473 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
474 throws IOException {
475 archive.readFully(CFH_BUF);
476 int off = 0;
477 ZipArchiveEntry ze = new ZipArchiveEntry();
478
479 int versionMadeBy = ZipShort.getValue(CFH_BUF, off);
480 off += SHORT;
481 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
482
483 off += SHORT; // skip version info
484
485 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off);
486 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
487 final ZipEncoding entryEncoding =
488 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
489 ze.setGeneralPurposeBit(gpFlag);
490
491 off += SHORT;
492
493 ze.setMethod(ZipShort.getValue(CFH_BUF, off));
494 off += SHORT;
495
496 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off));
497 ze.setTime(time);
498 off += WORD;
499
500 ze.setCrc(ZipLong.getValue(CFH_BUF, off));
501 off += WORD;
502
503 ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off));
504 off += WORD;
505
506 ze.setSize(ZipLong.getValue(CFH_BUF, off));
507 off += WORD;
508
509 int fileNameLen = ZipShort.getValue(CFH_BUF, off);
510 off += SHORT;
511
512 int extraLen = ZipShort.getValue(CFH_BUF, off);
513 off += SHORT;
514
515 int commentLen = ZipShort.getValue(CFH_BUF, off);
516 off += SHORT;
517
518 int diskStart = ZipShort.getValue(CFH_BUF, off);
519 off += SHORT;
520
521 ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off));
522 off += SHORT;
523
524 ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off));
525 off += WORD;
526
527 byte[] fileName = new byte[fileNameLen];
528 archive.readFully(fileName);
529 ze.setName(entryEncoding.decode(fileName), fileName);
530
531 // LFH offset,
532 OffsetEntry offset = new OffsetEntry();
533 offset.headerOffset = ZipLong.getValue(CFH_BUF, off);
534 // data offset will be filled later
535 entries.put(ze, offset);
536
537 nameMap.put(ze.getName(), ze);
538
539 byte[] cdExtraData = new byte[extraLen];
540 archive.readFully(cdExtraData);
541 ze.setCentralDirectoryExtra(cdExtraData);
542
543 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
544
545 byte[] comment = new byte[commentLen];
546 archive.readFully(comment);
547 ze.setComment(entryEncoding.decode(comment));
548
549 if (!hasUTF8Flag && useUnicodeExtraFields) {
550 noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
551 }
552 }
553
554 /**
555 * If the entry holds a Zip64 extended information extra field,
556 * read sizes from there if the entry's sizes are set to
557 * 0xFFFFFFFFF, do the same for the offset of the local file
558 * header.
559 *
560 * <p>Ensures the Zip64 extra either knows both compressed and
561 * uncompressed size or neither of both as the internal logic in
562 * ExtraFieldUtils forces the field to create local header data
563 * even if they are never used - and here a field with only one
564 * size would be invalid.</p>
565 */
566 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
567 OffsetEntry offset,
568 int diskStart)
569 throws IOException {
570 Zip64ExtendedInformationExtraField z64 =
571 (Zip64ExtendedInformationExtraField)
572 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
573 if (z64 != null) {
574 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
575 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
576 boolean hasRelativeHeaderOffset =
577 offset.headerOffset == ZIP64_MAGIC;
578 z64.reparseCentralDirectoryData(hasUncompressedSize,
579 hasCompressedSize,
580 hasRelativeHeaderOffset,
581 diskStart == ZIP64_MAGIC_SHORT);
582
583 if (hasUncompressedSize) {
584 ze.setSize(z64.getSize().getLongValue());
585 } else if (hasCompressedSize) {
586 z64.setSize(new ZipEightByteInteger(ze.getSize()));
587 }
588
589 if (hasCompressedSize) {
590 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
591 } else if (hasUncompressedSize) {
592 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
593 }
594
595 if (hasRelativeHeaderOffset) {
596 offset.headerOffset =
597 z64.getRelativeHeaderOffset().getLongValue();
598 }
599 }
600 }
601
602 /**
603 * Length of the "End of central directory record" - which is
604 * supposed to be the last structure of the archive - without file
605 * comment.
606 */
607 static final int MIN_EOCD_SIZE =
608 /* end of central dir signature */ WORD
609 /* number of this disk */ + SHORT
610 /* number of the disk with the */
611 /* start of the central directory */ + SHORT
612 /* total number of entries in */
613 /* the central dir on this disk */ + SHORT
614 /* total number of entries in */
615 /* the central dir */ + SHORT
616 /* size of the central directory */ + WORD
617 /* offset of start of central */
618 /* directory with respect to */
619 /* the starting disk number */ + WORD
620 /* zipfile comment length */ + SHORT;
621
622 /**
623 * Maximum length of the "End of central directory record" with a
624 * file comment.
625 */
626 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
627 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
628
629 /**
630 * Offset of the field that holds the location of the first
631 * central directory entry inside the "End of central directory
632 * record" relative to the start of the "End of central directory
633 * record".
634 */
635 private static final int CFD_LOCATOR_OFFSET =
636 /* end of central dir signature */ WORD
637 /* number of this disk */ + SHORT
638 /* number of the disk with the */
639 /* start of the central directory */ + SHORT
640 /* total number of entries in */
641 /* the central dir on this disk */ + SHORT
642 /* total number of entries in */
643 /* the central dir */ + SHORT
644 /* size of the central directory */ + WORD;
645
646 /**
647 * Length of the "Zip64 end of central directory locator" - which
648 * should be right in front of the "end of central directory
649 * record" if one is present at all.
650 */
651 private static final int ZIP64_EOCDL_LENGTH =
652 /* zip64 end of central dir locator sig */ WORD
653 /* number of the disk with the start */
654 /* start of the zip64 end of */
655 /* central directory */ + WORD
656 /* relative offset of the zip64 */
657 /* end of central directory record */ + DWORD
658 /* total number of disks */ + WORD;
659
660 /**
661 * Offset of the field that holds the location of the "Zip64 end
662 * of central directory record" inside the "Zip64 end of central
663 * directory locator" relative to the start of the "Zip64 end of
664 * central directory locator".
665 */
666 private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
667 /* zip64 end of central dir locator sig */ WORD
668 /* number of the disk with the start */
669 /* start of the zip64 end of */
670 /* central directory */ + WORD;
671
672 /**
673 * Offset of the field that holds the location of the first
674 * central directory entry inside the "Zip64 end of central
675 * directory record" relative to the start of the "Zip64 end of
676 * central directory record".
677 */
678 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
679 /* zip64 end of central dir */
680 /* signature */ WORD
681 /* size of zip64 end of central */
682 /* directory record */ + DWORD
683 /* version made by */ + SHORT
684 /* version needed to extract */ + SHORT
685 /* number of this disk */ + WORD
686 /* number of the disk with the */
687 /* start of the central directory */ + WORD
688 /* total number of entries in the */
689 /* central directory on this disk */ + DWORD
690 /* total number of entries in the */
691 /* central directory */ + DWORD
692 /* size of the central directory */ + DWORD;
693
694 /**
695 * Searches for either the "Zip64 end of central directory
696 * locator" or the "End of central dir record", parses
697 * it and positions the stream at the first central directory
698 * record.
699 */
700 private void positionAtCentralDirectory()
701 throws IOException {
702 positionAtEndOfCentralDirectoryRecord();
703 boolean found = false;
704 boolean searchedForZip64EOCD =
705 archive.getFilePointer() > ZIP64_EOCDL_LENGTH;
706 if (searchedForZip64EOCD) {
707 archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH);
708 archive.readFully(WORD_BUF);
709 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
710 WORD_BUF);
711 }
712 if (!found) {
713 // not a ZIP64 archive
714 if (searchedForZip64EOCD) {
715 skipBytes(ZIP64_EOCDL_LENGTH - WORD);
716 }
717 positionAtCentralDirectory32();
718 } else {
719 positionAtCentralDirectory64();
720 }
721 }
722
723 /**
724 * Parses the "Zip64 end of central directory locator",
725 * finds the "Zip64 end of central directory record" using the
726 * parsed information, parses that and positions the stream at the
727 * first central directory record.
728 *
729 * Expects stream to be positioned right behind the "Zip64
730 * end of central directory locator"'s signature.
731 */
732 private void positionAtCentralDirectory64()
733 throws IOException {
734 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
735 - WORD /* signature has already been read */);
736 archive.readFully(DWORD_BUF);
737 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
738 archive.readFully(WORD_BUF);
739 if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
740 throw new ZipException("archive's ZIP64 end of central "
741 + "directory locator is corrupt.");
742 }
743 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
744 - WORD /* signature has already been read */);
745 archive.readFully(DWORD_BUF);
746 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
747 }
748
749 /**
750 * Parses the "End of central dir record" and positions
751 * the stream at the first central directory record.
752 *
753 * Expects stream to be positioned at the beginning of the
754 * "End of central dir record".
755 */
756 private void positionAtCentralDirectory32()
757 throws IOException {
758 skipBytes(CFD_LOCATOR_OFFSET);
759 archive.readFully(WORD_BUF);
760 archive.seek(ZipLong.getValue(WORD_BUF));
761 }
762
763 /**
764 * Searches for the and positions the stream at the start of the
765 * "End of central dir record".
766 */
767 private void positionAtEndOfCentralDirectoryRecord()
768 throws IOException {
769 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
770 ZipArchiveOutputStream.EOCD_SIG);
771 if (!found) {
772 throw new ZipException("archive is not a ZIP archive");
773 }
774 }
775
776 /**
777 * Searches the archive backwards from minDistance to maxDistance
778 * for the given signature, positions the RandomaccessFile right
779 * at the signature if it has been found.
780 */
781 private boolean tryToLocateSignature(long minDistanceFromEnd,
782 long maxDistanceFromEnd,
783 byte[] sig) throws IOException {
784 boolean found = false;
785 long off = archive.length() - minDistanceFromEnd;
786 final long stopSearching =
787 Math.max(0L, archive.length() - maxDistanceFromEnd);
788 if (off >= 0) {
789 for (; off >= stopSearching; off--) {
790 archive.seek(off);
791 int curr = archive.read();
792 if (curr == -1) {
793 break;
794 }
795 if (curr == sig[POS_0]) {
796 curr = archive.read();
797 if (curr == sig[POS_1]) {
798 curr = archive.read();
799 if (curr == sig[POS_2]) {
800 curr = archive.read();
801 if (curr == sig[POS_3]) {
802 found = true;
803 break;
804 }
805 }
806 }
807 }
808 }
809 }
810 if (found) {
811 archive.seek(off);
812 }
813 return found;
814 }
815
816 /**
817 * Skips the given number of bytes or throws an EOFException if
818 * skipping failed.
819 */
820 private void skipBytes(final int count) throws IOException {
821 int totalSkipped = 0;
822 while (totalSkipped < count) {
823 int skippedNow = archive.skipBytes(count - totalSkipped);
824 if (skippedNow <= 0) {
825 throw new EOFException();
826 }
827 totalSkipped += skippedNow;
828 }
829 }
830
831 /**
832 * Number of bytes in local file header up to the "length of
833 * filename" entry.
834 */
835 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
836 /* local file header signature */ WORD
837 /* version needed to extract */ + SHORT
838 /* general purpose bit flag */ + SHORT
839 /* compression method */ + SHORT
840 /* last mod file time */ + SHORT
841 /* last mod file date */ + SHORT
842 /* crc-32 */ + WORD
843 /* compressed size */ + WORD
844 /* uncompressed size */ + WORD;
845
846 /**
847 * Walks through all recorded entries and adds the data available
848 * from the local file header.
849 *
850 * <p>Also records the offsets for the data to read from the
851 * entries.</p>
852 */
853 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment>
854 entriesWithoutUTF8Flag)
855 throws IOException {
856 // changing the name of a ZipArchiveEntry is going to change
857 // the hashcode - see COMPRESS-164
858 // Map needs to be reconstructed in order to keep central
859 // directory order
860 Map<ZipArchiveEntry, OffsetEntry> origMap =
861 new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(entries);
862 entries.clear();
863 for (Map.Entry<ZipArchiveEntry, OffsetEntry> ent : origMap.entrySet()) {
864 ZipArchiveEntry ze = ent.getKey();
865 OffsetEntry offsetEntry = ent.getValue();
866 long offset = offsetEntry.headerOffset;
867 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
868 archive.readFully(SHORT_BUF);
869 int fileNameLen = ZipShort.getValue(SHORT_BUF);
870 archive.readFully(SHORT_BUF);
871 int extraFieldLen = ZipShort.getValue(SHORT_BUF);
872 int lenToSkip = fileNameLen;
873 while (lenToSkip > 0) {
874 int skipped = archive.skipBytes(lenToSkip);
875 if (skipped <= 0) {
876 throw new IOException("failed to skip file name in"
877 + " local file header");
878 }
879 lenToSkip -= skipped;
880 }
881 byte[] localExtraData = new byte[extraFieldLen];
882 archive.readFully(localExtraData);
883 ze.setExtra(localExtraData);
884 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
885 + SHORT + SHORT + fileNameLen + extraFieldLen;
886
887 if (entriesWithoutUTF8Flag.containsKey(ze)) {
888 String orig = ze.getName();
889 NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
890 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
891 nc.comment);
892 if (!orig.equals(ze.getName())) {
893 nameMap.remove(orig);
894 nameMap.put(ze.getName(), ze);
895 }
896 }
897 entries.put(ze, offsetEntry);
898 }
899 }
900
901 /**
902 * Checks whether the archive starts with a LFH. If it doesn't,
903 * it may be an empty archive.
904 */
905 private boolean startsWithLocalFileHeader() throws IOException {
906 archive.seek(0);
907 archive.readFully(WORD_BUF);
908 return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG);
909 }
910
911 /**
912 * InputStream that delegates requests to the underlying
913 * RandomAccessFile, making sure that only bytes from a certain
914 * range can be read.
915 */
916 private class BoundedInputStream extends InputStream {
917 private long remaining;
918 private long loc;
919 private boolean addDummyByte = false;
920
921 BoundedInputStream(long start, long remaining) {
922 this.remaining = remaining;
923 loc = start;
924 }
925
926 @Override
927 public int read() throws IOException {
928 if (remaining-- <= 0) {
929 if (addDummyByte) {
930 addDummyByte = false;
931 return 0;
932 }
933 return -1;
934 }
935 synchronized (archive) {
936 archive.seek(loc++);
937 return archive.read();
938 }
939 }
940
941 @Override
942 public int read(byte[] b, int off, int len) throws IOException {
943 if (remaining <= 0) {
944 if (addDummyByte) {
945 addDummyByte = false;
946 b[off] = 0;
947 return 1;
948 }
949 return -1;
950 }
951
952 if (len <= 0) {
953 return 0;
954 }
955
956 if (len > remaining) {
957 len = (int) remaining;
958 }
959 int ret = -1;
960 synchronized (archive) {
961 archive.seek(loc);
962 ret = archive.read(b, off, len);
963 }
964 if (ret > 0) {
965 loc += ret;
966 remaining -= ret;
967 }
968 return ret;
969 }
970
971 /**
972 * Inflater needs an extra dummy byte for nowrap - see
973 * Inflater's javadocs.
974 */
975 void addDummy() {
976 addDummyByte = true;
977 }
978 }
979
980 private static final class NameAndComment {
981 private final byte[] name;
982 private final byte[] comment;
983 private NameAndComment(byte[] name, byte[] comment) {
984 this.name = name;
985 this.comment = comment;
986 }
987 }
988
989 /**
990 * Compares two ZipArchiveEntries based on their offset within the archive.
991 *
992 * <p>Won't return any meaningful results if one of the entries
993 * isn't part of the archive at all.</p>
994 *
995 * @since 1.1
996 */
997 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR =
998 new Comparator<ZipArchiveEntry>() {
999 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) {
1000 if (e1 == e2) {
1001 return 0;
1002 }
1003
1004 OffsetEntry off1 = entries.get(e1);
1005 OffsetEntry off2 = entries.get(e2);
1006 if (off1 == null) {
1007 return 1;
1008 }
1009 if (off2 == null) {
1010 return -1;
1011 }
1012 long val = (off1.headerOffset - off2.headerOffset);
1013 return val == 0 ? 0 : val < 0 ? -1 : +1;
1014 }
1015 };
1016 }