1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.commons.compress.archivers.zip;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.Serializable;
24 import java.nio.ByteBuffer;
25 import java.nio.channels.SeekableByteChannel;
26 import java.nio.file.Files;
27 import java.nio.file.OpenOption;
28 import java.nio.file.Path;
29 import java.nio.file.StandardOpenOption;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Comparator;
33 import java.util.List;
34 import java.util.Objects;
35 import java.util.regex.Pattern;
36 import java.util.stream.Collectors;
37 import java.util.stream.Stream;
38
39 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
40 import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
41 import org.apache.commons.io.file.PathUtils;
42
43 /**
44 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
45 * <p>
46 * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
47 * </p>
48 *
49 * @since 1.20
50 */
51 public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
52 private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
53 private static final long serialVersionUID = 20200123L;
54
55 @Override
56 public int compare(final Path file1, final Path file2) {
57 final String extension1 = PathUtils.getExtension(file1);
58 final String extension2 = PathUtils.getExtension(file2);
59 if (!extension1.startsWith("z")) {
60 return -1;
61 }
62 if (!extension2.startsWith("z")) {
63 return 1;
64 }
65 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
66 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
67 return splitSegmentNumber1.compareTo(splitSegmentNumber2);
68 }
69 }
70
71 private static final Path[] EMPTY_PATH_ARRAY = {};
72 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
73
74 /**
75 * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
76 *
77 * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
78 * @return SeekableByteChannel that concatenates all ZIP split files
79 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
80 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
81 */
82 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
83 return buildFromLastSplitSegment(lastSegmentFile.toPath());
84 }
85
86 /**
87 * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
88 *
89 * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
90 * @return SeekableByteChannel that concatenates all ZIP split files
91 * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
92 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
93 * @since 1.22
94 */
95 public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
96 final String extension = PathUtils.getExtension(lastSegmentPath);
97 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
98 throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
99 }
100 final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
101 final String fileBaseName = PathUtils.getBaseName(lastSegmentPath);
102 final ArrayList<Path> splitZipSegments;
103 // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
104 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
105 try (Stream<Path> walk = Files.walk(parent, 1)) {
106 splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
107 .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
108 }
109 return forPaths(lastSegmentPath, splitZipSegments);
110 }
111
112 /**
113 * Concatenates the given files.
114 *
115 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
116 * (for example .z01, .z02... .z99, .zip)
117 * @return SeekableByteChannel that concatenates all provided files
118 * @throws NullPointerException if files is null
119 * @throws IOException if opening a channel for one of the files fails
120 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
121 */
122 public static SeekableByteChannel forFiles(final File... files) throws IOException {
123 final List<Path> paths = new ArrayList<>();
124 for (final File f : Objects.requireNonNull(files, "files")) {
125 paths.add(f.toPath());
126 }
127 return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
128 }
129
130 /**
131 * Concatenates the given files.
132 *
133 * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
134 * @param files the files to concatenate except for the last segment, note these files should be added in correct order (for example .z01, .z02...
135 * .z99)
136 * @return SeekableByteChannel that concatenates all provided files
137 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
138 * @throws NullPointerException if files or lastSegmentFile is null
139 */
140 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
141 Objects.requireNonNull(files, "files");
142 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
143 final List<Path> filesList = new ArrayList<>();
144 files.forEach(f -> filesList.add(f.toPath()));
145 return forPaths(lastSegmentFile.toPath(), filesList);
146 }
147
148 /**
149 * Concatenates the given channels.
150 *
151 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
152 * in correct order (for example .z01, .z02... .z99, .zip)
153 * @return SeekableByteChannel that concatenates all provided channels
154 * @throws NullPointerException if channels is null
155 * @throws IOException if reading channels fails
156 */
157 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
158 if (Objects.requireNonNull(channels, "channels").length == 1) {
159 return channels[0];
160 }
161 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
162 }
163
164 /**
165 * Concatenates the given channels.
166 *
167 * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
168 * @param channels the channels to concatenate except for the last segment, note these channels should be added in correct order (for example
169 * .z01, .z02... .z99)
170 * @return SeekableByteChannel that concatenates all provided channels
171 * @throws NullPointerException if lastSegmentChannel or channels is null
172 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
173 */
174 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
175 throws IOException {
176 Objects.requireNonNull(channels, "channels");
177 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
178 final List<SeekableByteChannel> channelsList = new ArrayList<>();
179 channels.forEach(channelsList::add);
180 channelsList.add(lastSegmentChannel);
181 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
182 }
183
184 /**
185 * Concatenates the given file paths.
186 *
187 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in
188 * correct order (for example: .z01, .z02... .z99, .zip)
189 * @param openOptions the options to open paths (shared by all paths).
190 * @return SeekableByteChannel that concatenates all provided files
191 * @throws NullPointerException if files is null
192 * @throws IOException if opening a channel for one of the files fails
193 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
194 * @since 1.22
195 */
196 public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
197 final List<SeekableByteChannel> channels = new ArrayList<>();
198 for (final Path path : Objects.requireNonNull(paths, "paths")) {
199 channels.add(Files.newByteChannel(path, openOptions));
200 }
201 if (channels.size() == 1) {
202 return channels.get(0);
203 }
204 return new ZipSplitReadOnlySeekableByteChannel(channels);
205 }
206
207 /**
208 * Concatenates the given file paths.
209 *
210 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
211 * order (for example: .z01, .z02... .z99, .zip)
212 * @return SeekableByteChannel that concatenates all provided files
213 * @throws NullPointerException if files is null
214 * @throws IOException if opening a channel for one of the files fails
215 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
216 * @since 1.22
217 */
218 public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
219 return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
220 }
221
222 /**
223 * Concatenates the given file paths.
224 *
225 * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
226 * @param paths the file paths to concatenate except for the last segment, note these files should be added in correct order (for example: .z01,
227 * .z02... .z99)
228 * @return SeekableByteChannel that concatenates all provided files
229 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
230 * @throws NullPointerException if files or lastSegmentPath is null
231 * @since 1.22
232 */
233 public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
234 Objects.requireNonNull(paths, "paths");
235 Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
236 final List<Path> filesList = new ArrayList<>();
237 paths.forEach(filesList::add);
238 filesList.add(lastSegmentPath);
239 return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
240 }
241
242 private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
243
244 /**
245 * Concatenates the given channels.
246 *
247 * <p>
248 * The channels should be add in ascending order, for example z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added
249 * as the last one in the channels
250 * </p>
251 *
252 * @param channels the channels to concatenate
253 * @throws NullPointerException if channels is null
254 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive
255 */
256 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
257 super(channels);
258 // the first split ZIP segment should begin with ZIP split signature
259 assertSplitSignature(channels);
260 }
261
262 /**
263 * Based on the ZIP specification:
264 *
265 * <p>
266 * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
267 * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
268 * local header signature for the first file in the archive.
269 * </p>
270 * <p>
271 * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
272 * </p>
273 *
274 * @param channels channels to be validated
275 * @throws IOException if an I/O error occurs.
276 */
277 private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
278 final SeekableByteChannel channel = channels.get(0);
279 // the ZIP split file signature is at the beginning of the first split segment
280 channel.position(0L);
281 zipSplitSignatureByteBuffer.rewind();
282 channel.read(zipSplitSignatureByteBuffer);
283 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
284 if (!signature.equals(ZipLong.DD_SIG)) {
285 channel.position(0L);
286 throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
287 }
288 channel.position(0L);
289 }
290 }