001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.File;
021import java.io.IOException;
022import java.io.Serializable;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.file.Files;
026import java.nio.file.OpenOption;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.List;
033import java.util.Objects;
034import java.util.regex.Pattern;
035import java.util.stream.Collectors;
036import java.util.stream.Stream;
037
038import org.apache.commons.compress.archivers.ArchiveStreamFactory;
039import org.apache.commons.compress.utils.FileNameUtils;
040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
041
042/**
043 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
044 * <p>
045 * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
046 * </p>
047 *
048 * @since 1.20
049 */
050public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
051
052    private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
053        private static final long serialVersionUID = 20200123L;
054
055        @Override
056        public int compare(final Path file1, final Path file2) {
057            final String extension1 = FileNameUtils.getExtension(file1);
058            final String extension2 = FileNameUtils.getExtension(file2);
059
060            if (!extension1.startsWith("z")) {
061                return -1;
062            }
063
064            if (!extension2.startsWith("z")) {
065                return 1;
066            }
067
068            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
069            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
070
071            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
072        }
073    }
074
075    private static final Path[] EMPTY_PATH_ARRAY = {};
076    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
077
078    /**
079     * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
080     *
081     * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
082     * @return SeekableByteChannel that concatenates all ZIP split files
083     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
084     * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
085     */
086    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
087        return buildFromLastSplitSegment(lastSegmentFile.toPath());
088    }
089
090    /**
091     * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
092     *
093     * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
094     * @return SeekableByteChannel that concatenates all ZIP split files
095     * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
096     * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
097     * @since 1.22
098     */
099    public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
100        final String extension = FileNameUtils.getExtension(lastSegmentPath);
101        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
102            throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
103        }
104
105        final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
106        final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
107        final ArrayList<Path> splitZipSegments;
108
109        // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
110        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
111        try (Stream<Path> walk = Files.walk(parent, 1)) {
112            splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
113                    .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
114        }
115
116        return forPaths(lastSegmentPath, splitZipSegments);
117    }
118
119    /**
120     * Concatenates the given files.
121     *
122     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
123     *              (e.g. .z01, .z02... .z99, .zip)
124     * @return SeekableByteChannel that concatenates all provided files
125     * @throws NullPointerException if files is null
126     * @throws IOException          if opening a channel for one of the files fails
127     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
128     */
129    public static SeekableByteChannel forFiles(final File... files) throws IOException {
130        final List<Path> paths = new ArrayList<>();
131        for (final File f : Objects.requireNonNull(files, "files must not be null")) {
132            paths.add(f.toPath());
133        }
134
135        return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
136    }
137
138    /**
139     * Concatenates the given files.
140     *
141     * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
142     * @param files           the files to concatenate except for the last segment, note these files should be added in correct order (e.g. .z01, .z02... .z99)
143     * @return SeekableByteChannel that concatenates all provided files
144     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
145     * @throws NullPointerException if files or lastSegmentFile is null
146     */
147    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
148        Objects.requireNonNull(files, "files");
149        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
150
151        final List<Path> filesList = new ArrayList<>();
152        files.forEach(f -> filesList.add(f.toPath()));
153
154        return forPaths(lastSegmentFile.toPath(), filesList);
155    }
156
157    /**
158     * Concatenates the given channels.
159     *
160     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
161     *                 in correct order (e.g. .z01, .z02... .z99, .zip)
162     * @return SeekableByteChannel that concatenates all provided channels
163     * @throws NullPointerException if channels is null
164     * @throws IOException          if reading channels fails
165     */
166    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
167        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
168            return channels[0];
169        }
170        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
171    }
172
173    /**
174     * Concatenates the given channels.
175     *
176     * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
177     * @param channels           the channels to concatenate except for the last segment, note these channels should be added in correct order (e.g. .z01,
178     *                           .z02... .z99)
179     * @return SeekableByteChannel that concatenates all provided channels
180     * @throws NullPointerException if lastSegmentChannel or channels is null
181     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
182     */
183    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
184            throws IOException {
185        Objects.requireNonNull(channels, "channels");
186        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
187
188        final List<SeekableByteChannel> channelsList = new ArrayList<>();
189        channels.forEach(channelsList::add);
190        channelsList.add(lastSegmentChannel);
191
192        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
193    }
194
195    /**
196     * Concatenates the given file paths.
197     *
198     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
199     *              order (e.g.: .z01, .z02... .z99, .zip)
200     * @param openOptions the options to open paths (shared by all paths).
201     * @return SeekableByteChannel that concatenates all provided files
202     * @throws NullPointerException if files is null
203     * @throws IOException          if opening a channel for one of the files fails
204     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
205     * @since 1.22
206     */
207    public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
208        final List<SeekableByteChannel> channels = new ArrayList<>();
209        for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) {
210            channels.add(Files.newByteChannel(path, openOptions));
211        }
212        if (channels.size() == 1) {
213            return channels.get(0);
214        }
215        return new ZipSplitReadOnlySeekableByteChannel(channels);
216    }
217
218    /**
219     * Concatenates the given file paths.
220     *
221     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
222     *              order (e.g.: .z01, .z02... .z99, .zip)
223     * @return SeekableByteChannel that concatenates all provided files
224     * @throws NullPointerException if files is null
225     * @throws IOException          if opening a channel for one of the files fails
226     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
227     * @since 1.22
228     */
229    public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
230        return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
231    }
232
233    /**
234     * Concatenates the given file paths.
235     *
236     * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
237     * @param paths           the file paths to concatenate except for the last segment, note these files should be added in correct order (e.g.: .z01, .z02...
238     *                        .z99)
239     * @return SeekableByteChannel that concatenates all provided files
240     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
241     * @throws NullPointerException if files or lastSegmentPath is null
242     * @since 1.22
243     */
244    public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
245        Objects.requireNonNull(paths, "paths");
246        Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
247
248        final List<Path> filesList = new ArrayList<>();
249        paths.forEach(filesList::add);
250        filesList.add(lastSegmentPath);
251
252        return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
253    }
254
255    private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
256
257    /**
258     * Concatenates the given channels.
259     *
260     * <p>
261     * The channels should be add in ascending order, e.g. z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added as the
262     * last one in the channels
263     * </p>
264     *
265     * @param channels the channels to concatenate
266     * @throws NullPointerException if channels is null
267     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
268     */
269    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
270        super(channels);
271
272        // the first split ZIP segment should begin with ZIP split signature
273        assertSplitSignature(channels);
274    }
275
276    /**
277     * Based on the ZIP specification:
278     *
279     * <p>
280     * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
281     * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
282     * local header signature for the first file in the archive.
283     * </p>
284     * <p>
285     * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
286     * </p>
287     *
288     * @param channels channels to be validated
289     * @throws IOException
290     */
291    private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
292        final SeekableByteChannel channel = channels.get(0);
293        // the ZIP split file signature is at the beginning of the first split segment
294        channel.position(0L);
295
296        zipSplitSignatureByteBuffer.rewind();
297        channel.read(zipSplitSignatureByteBuffer);
298        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
299        if (!signature.equals(ZipLong.DD_SIG)) {
300            channel.position(0L);
301            throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
302        }
303
304        channel.position(0L);
305    }
306}