View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.commons.compress.archivers.zip;
19  
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.Serializable;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.file.Files;
26  import java.nio.file.OpenOption;
27  import java.nio.file.Path;
28  import java.nio.file.StandardOpenOption;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.Comparator;
32  import java.util.List;
33  import java.util.Objects;
34  import java.util.regex.Pattern;
35  import java.util.stream.Collectors;
36  import java.util.stream.Stream;
37  
38  import org.apache.commons.compress.archivers.ArchiveStreamFactory;
39  import org.apache.commons.compress.utils.FileNameUtils;
40  import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
41  
42  /**
43   * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
44   * <p>
45   * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
46   * </p>
47   *
48   * @since 1.20
49   */
50  public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
51  
52      private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
53          private static final long serialVersionUID = 20200123L;
54  
55          @Override
56          public int compare(final Path file1, final Path file2) {
57              final String extension1 = FileNameUtils.getExtension(file1);
58              final String extension2 = FileNameUtils.getExtension(file2);
59  
60              if (!extension1.startsWith("z")) {
61                  return -1;
62              }
63  
64              if (!extension2.startsWith("z")) {
65                  return 1;
66              }
67  
68              final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
69              final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
70  
71              return splitSegmentNumber1.compareTo(splitSegmentNumber2);
72          }
73      }
74  
75      private static final Path[] EMPTY_PATH_ARRAY = {};
76      private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
77  
78      /**
79       * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
80       *
81       * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
82       * @return SeekableByteChannel that concatenates all ZIP split files
83       * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
84       * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
85       */
86      public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
87          return buildFromLastSplitSegment(lastSegmentFile.toPath());
88      }
89  
90      /**
91       * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
92       *
93       * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
94       * @return SeekableByteChannel that concatenates all ZIP split files
95       * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
96       * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
97       * @since 1.22
98       */
99      public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
100         final String extension = FileNameUtils.getExtension(lastSegmentPath);
101         if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
102             throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
103         }
104 
105         final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
106         final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
107         final ArrayList<Path> splitZipSegments;
108 
109         // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
110         final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
111         try (Stream<Path> walk = Files.walk(parent, 1)) {
112             splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
113                     .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
114         }
115 
116         return forPaths(lastSegmentPath, splitZipSegments);
117     }
118 
119     /**
120      * Concatenates the given files.
121      *
122      * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
123      *              (e.g. .z01, .z02... .z99, .zip)
124      * @return SeekableByteChannel that concatenates all provided files
125      * @throws NullPointerException if files is null
126      * @throws IOException          if opening a channel for one of the files fails
127      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
128      */
129     public static SeekableByteChannel forFiles(final File... files) throws IOException {
130         final List<Path> paths = new ArrayList<>();
131         for (final File f : Objects.requireNonNull(files, "files must not be null")) {
132             paths.add(f.toPath());
133         }
134 
135         return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
136     }
137 
138     /**
139      * Concatenates the given files.
140      *
141      * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
142      * @param files           the files to concatenate except for the last segment, note these files should be added in correct order (e.g. .z01, .z02... .z99)
143      * @return SeekableByteChannel that concatenates all provided files
144      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
145      * @throws NullPointerException if files or lastSegmentFile is null
146      */
147     public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
148         Objects.requireNonNull(files, "files");
149         Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
150 
151         final List<Path> filesList = new ArrayList<>();
152         files.forEach(f -> filesList.add(f.toPath()));
153 
154         return forPaths(lastSegmentFile.toPath(), filesList);
155     }
156 
157     /**
158      * Concatenates the given channels.
159      *
160      * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
161      *                 in correct order (e.g. .z01, .z02... .z99, .zip)
162      * @return SeekableByteChannel that concatenates all provided channels
163      * @throws NullPointerException if channels is null
164      * @throws IOException          if reading channels fails
165      */
166     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
167         if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
168             return channels[0];
169         }
170         return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
171     }
172 
173     /**
174      * Concatenates the given channels.
175      *
176      * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
177      * @param channels           the channels to concatenate except for the last segment, note these channels should be added in correct order (e.g. .z01,
178      *                           .z02... .z99)
179      * @return SeekableByteChannel that concatenates all provided channels
180      * @throws NullPointerException if lastSegmentChannel or channels is null
181      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
182      */
183     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
184             throws IOException {
185         Objects.requireNonNull(channels, "channels");
186         Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
187 
188         final List<SeekableByteChannel> channelsList = new ArrayList<>();
189         channels.forEach(channelsList::add);
190         channelsList.add(lastSegmentChannel);
191 
192         return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
193     }
194 
195     /**
196      * Concatenates the given file paths.
197      *
198      * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
199      *              order (e.g.: .z01, .z02... .z99, .zip)
200      * @param openOptions the options to open paths (shared by all paths).
201      * @return SeekableByteChannel that concatenates all provided files
202      * @throws NullPointerException if files is null
203      * @throws IOException          if opening a channel for one of the files fails
204      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
205      * @since 1.22
206      */
207     public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
208         final List<SeekableByteChannel> channels = new ArrayList<>();
209         for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) {
210             channels.add(Files.newByteChannel(path, openOptions));
211         }
212         if (channels.size() == 1) {
213             return channels.get(0);
214         }
215         return new ZipSplitReadOnlySeekableByteChannel(channels);
216     }
217 
218     /**
219      * Concatenates the given file paths.
220      *
221      * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
222      *              order (e.g.: .z01, .z02... .z99, .zip)
223      * @return SeekableByteChannel that concatenates all provided files
224      * @throws NullPointerException if files is null
225      * @throws IOException          if opening a channel for one of the files fails
226      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
227      * @since 1.22
228      */
229     public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
230         return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
231     }
232 
233     /**
234      * Concatenates the given file paths.
235      *
236      * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
237      * @param paths           the file paths to concatenate except for the last segment, note these files should be added in correct order (e.g.: .z01, .z02...
238      *                        .z99)
239      * @return SeekableByteChannel that concatenates all provided files
240      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
241      * @throws NullPointerException if files or lastSegmentPath is null
242      * @since 1.22
243      */
244     public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
245         Objects.requireNonNull(paths, "paths");
246         Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
247 
248         final List<Path> filesList = new ArrayList<>();
249         paths.forEach(filesList::add);
250         filesList.add(lastSegmentPath);
251 
252         return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
253     }
254 
255     private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
256 
257     /**
258      * Concatenates the given channels.
259      *
260      * <p>
261      * The channels should be add in ascending order, e.g. z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added as the
262      * last one in the channels
263      * </p>
264      *
265      * @param channels the channels to concatenate
266      * @throws NullPointerException if channels is null
267      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
268      */
269     public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
270         super(channels);
271 
272         // the first split ZIP segment should begin with ZIP split signature
273         assertSplitSignature(channels);
274     }
275 
276     /**
277      * Based on the ZIP specification:
278      *
279      * <p>
280      * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
281      * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
282      * local header signature for the first file in the archive.
283      * </p>
284      * <p>
285      * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
286      * </p>
287      *
288      * @param channels channels to be validated
289      * @throws IOException
290      */
291     private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
292         final SeekableByteChannel channel = channels.get(0);
293         // the ZIP split file signature is at the beginning of the first split segment
294         channel.position(0L);
295 
296         zipSplitSignatureByteBuffer.rewind();
297         channel.read(zipSplitSignatureByteBuffer);
298         final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
299         if (!signature.equals(ZipLong.DD_SIG)) {
300             channel.position(0L);
301             throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
302         }
303 
304         channel.position(0L);
305     }
306 }