View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.zip;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.io.Serializable;
24  import java.nio.ByteBuffer;
25  import java.nio.channels.SeekableByteChannel;
26  import java.nio.file.Files;
27  import java.nio.file.OpenOption;
28  import java.nio.file.Path;
29  import java.nio.file.StandardOpenOption;
30  import java.util.ArrayList;
31  import java.util.Arrays;
32  import java.util.Comparator;
33  import java.util.List;
34  import java.util.Objects;
35  import java.util.regex.Pattern;
36  import java.util.stream.Collectors;
37  import java.util.stream.Stream;
38  
39  import org.apache.commons.compress.archivers.ArchiveStreamFactory;
40  import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
41  import org.apache.commons.io.file.PathUtils;
42  
43  /**
44   * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
45   * <p>
46   * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
47   * </p>
48   *
49   * @since 1.20
50   */
51  public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
52      private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
53          private static final long serialVersionUID = 20200123L;
54  
55          @Override
56          public int compare(final Path file1, final Path file2) {
57              final String extension1 = PathUtils.getExtension(file1);
58              final String extension2 = PathUtils.getExtension(file2);
59              if (!extension1.startsWith("z")) {
60                  return -1;
61              }
62              if (!extension2.startsWith("z")) {
63                  return 1;
64              }
65              final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
66              final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
67              return splitSegmentNumber1.compareTo(splitSegmentNumber2);
68          }
69      }
70  
71      private static final Path[] EMPTY_PATH_ARRAY = {};
72      private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
73  
74      /**
75       * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
76       *
77       * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
78       * @return SeekableByteChannel that concatenates all ZIP split files
79       * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
80       * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
81       */
82      public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
83          return buildFromLastSplitSegment(lastSegmentFile.toPath());
84      }
85  
86      /**
87       * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
88       *
89       * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
90       * @return SeekableByteChannel that concatenates all ZIP split files
91       * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
92       * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
93       * @since 1.22
94       */
95      public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
96          final String extension = PathUtils.getExtension(lastSegmentPath);
97          if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
98              throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
99          }
100         final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
101         final String fileBaseName = PathUtils.getBaseName(lastSegmentPath);
102         final ArrayList<Path> splitZipSegments;
103         // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
104         final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
105         try (Stream<Path> walk = Files.walk(parent, 1)) {
106             splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
107                     .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
108         }
109         return forPaths(lastSegmentPath, splitZipSegments);
110     }
111 
112     /**
113      * Concatenates the given files.
114      *
115      * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
116      *              (for example .z01, .z02... .z99, .zip)
117      * @return SeekableByteChannel that concatenates all provided files
118      * @throws NullPointerException if files is null
119      * @throws IOException          if opening a channel for one of the files fails
120      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
121      */
122     public static SeekableByteChannel forFiles(final File... files) throws IOException {
123         final List<Path> paths = new ArrayList<>();
124         for (final File f : Objects.requireNonNull(files, "files")) {
125             paths.add(f.toPath());
126         }
127         return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
128     }
129 
130     /**
131      * Concatenates the given files.
132      *
133      * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
134      * @param files           the files to concatenate except for the last segment, note these files should be added in correct order (for example .z01, .z02...
135      *                        .z99)
136      * @return SeekableByteChannel that concatenates all provided files
137      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
138      * @throws NullPointerException if files or lastSegmentFile is null
139      */
140     public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
141         Objects.requireNonNull(files, "files");
142         Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
143         final List<Path> filesList = new ArrayList<>();
144         files.forEach(f -> filesList.add(f.toPath()));
145         return forPaths(lastSegmentFile.toPath(), filesList);
146     }
147 
148     /**
149      * Concatenates the given channels.
150      *
151      * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
152      *                 in correct order (for example .z01, .z02... .z99, .zip)
153      * @return SeekableByteChannel that concatenates all provided channels
154      * @throws NullPointerException if channels is null
155      * @throws IOException          if reading channels fails
156      */
157     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
158         if (Objects.requireNonNull(channels, "channels").length == 1) {
159             return channels[0];
160         }
161         return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
162     }
163 
164     /**
165      * Concatenates the given channels.
166      *
167      * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
168      * @param channels           the channels to concatenate except for the last segment, note these channels should be added in correct order (for example
169      *                           .z01, .z02... .z99)
170      * @return SeekableByteChannel that concatenates all provided channels
171      * @throws NullPointerException if lastSegmentChannel or channels is null
172      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
173      */
174     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
175             throws IOException {
176         Objects.requireNonNull(channels, "channels");
177         Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
178         final List<SeekableByteChannel> channelsList = new ArrayList<>();
179         channels.forEach(channelsList::add);
180         channelsList.add(lastSegmentChannel);
181         return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
182     }
183 
184     /**
185      * Concatenates the given file paths.
186      *
187      * @param paths       the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in
188      *                    correct order (for example: .z01, .z02... .z99, .zip)
189      * @param openOptions the options to open paths (shared by all paths).
190      * @return SeekableByteChannel that concatenates all provided files
191      * @throws NullPointerException if files is null
192      * @throws IOException          if opening a channel for one of the files fails
193      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
194      * @since 1.22
195      */
196     public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
197         final List<SeekableByteChannel> channels = new ArrayList<>();
198         for (final Path path : Objects.requireNonNull(paths, "paths")) {
199             channels.add(Files.newByteChannel(path, openOptions));
200         }
201         if (channels.size() == 1) {
202             return channels.get(0);
203         }
204         return new ZipSplitReadOnlySeekableByteChannel(channels);
205     }
206 
207     /**
208      * Concatenates the given file paths.
209      *
210      * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
211      *              order (for example: .z01, .z02... .z99, .zip)
212      * @return SeekableByteChannel that concatenates all provided files
213      * @throws NullPointerException if files is null
214      * @throws IOException          if opening a channel for one of the files fails
215      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
216      * @since 1.22
217      */
218     public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
219         return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
220     }
221 
222     /**
223      * Concatenates the given file paths.
224      *
225      * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
226      * @param paths           the file paths to concatenate except for the last segment, note these files should be added in correct order (for example: .z01,
227      *                        .z02... .z99)
228      * @return SeekableByteChannel that concatenates all provided files
229      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
230      * @throws NullPointerException if files or lastSegmentPath is null
231      * @since 1.22
232      */
233     public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
234         Objects.requireNonNull(paths, "paths");
235         Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
236         final List<Path> filesList = new ArrayList<>();
237         paths.forEach(filesList::add);
238         filesList.add(lastSegmentPath);
239         return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
240     }
241 
242     private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
243 
244     /**
245      * Concatenates the given channels.
246      *
247      * <p>
248      * The channels should be add in ascending order, for example z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added
249      * as the last one in the channels
250      * </p>
251      *
252      * @param channels the channels to concatenate
253      * @throws NullPointerException if channels is null
254      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
255      */
256     public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
257         super(channels);
258         // the first split ZIP segment should begin with ZIP split signature
259         assertSplitSignature(channels);
260     }
261 
262     /**
263      * Based on the ZIP specification:
264      *
265      * <p>
266      * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
267      * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
268      * local header signature for the first file in the archive.
269      * </p>
270      * <p>
271      * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
272      * </p>
273      *
274      * @param channels channels to be validated
275      * @throws IOException if an I/O error occurs.
276      */
277     private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
278         final SeekableByteChannel channel = channels.get(0);
279         // the ZIP split file signature is at the beginning of the first split segment
280         channel.position(0L);
281         zipSplitSignatureByteBuffer.rewind();
282         channel.read(zipSplitSignatureByteBuffer);
283         final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
284         if (!signature.equals(ZipLong.DD_SIG)) {
285             channel.position(0L);
286             throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
287         }
288         channel.position(0L);
289     }
290 }