001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.Serializable;
024import java.nio.ByteBuffer;
025import java.nio.channels.SeekableByteChannel;
026import java.nio.file.Files;
027import java.nio.file.OpenOption;
028import java.nio.file.Path;
029import java.nio.file.StandardOpenOption;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.Comparator;
033import java.util.List;
034import java.util.Objects;
035import java.util.regex.Pattern;
036import java.util.stream.Collectors;
037import java.util.stream.Stream;
038
039import org.apache.commons.compress.archivers.ArchiveStreamFactory;
040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
041import org.apache.commons.io.file.PathUtils;
042
043/**
044 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
045 * <p>
046 * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
047 * </p>
048 *
049 * @since 1.20
050 */
051public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
052    private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
053        private static final long serialVersionUID = 20200123L;
054
055        @Override
056        public int compare(final Path file1, final Path file2) {
057            final String extension1 = PathUtils.getExtension(file1);
058            final String extension2 = PathUtils.getExtension(file2);
059            if (!extension1.startsWith("z")) {
060                return -1;
061            }
062            if (!extension2.startsWith("z")) {
063                return 1;
064            }
065            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
066            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
067            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
068        }
069    }
070
071    private static final Path[] EMPTY_PATH_ARRAY = {};
072    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
073
074    /**
075     * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
076     *
077     * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
078     * @return SeekableByteChannel that concatenates all ZIP split files
079     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
080     * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
081     */
082    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
083        return buildFromLastSplitSegment(lastSegmentFile.toPath());
084    }
085
086    /**
087     * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
088     *
089     * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
090     * @return SeekableByteChannel that concatenates all ZIP split files
091     * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
092     * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
093     * @since 1.22
094     */
095    public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
096        final String extension = PathUtils.getExtension(lastSegmentPath);
097        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
098            throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
099        }
100        final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
101        final String fileBaseName = PathUtils.getBaseName(lastSegmentPath);
102        final ArrayList<Path> splitZipSegments;
103        // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
104        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
105        try (Stream<Path> walk = Files.walk(parent, 1)) {
106            splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
107                    .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
108        }
109        return forPaths(lastSegmentPath, splitZipSegments);
110    }
111
112    /**
113     * Concatenates the given files.
114     *
115     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
116     *              (for example .z01, .z02... .z99, .zip)
117     * @return SeekableByteChannel that concatenates all provided files
118     * @throws NullPointerException if files is null
119     * @throws IOException          if opening a channel for one of the files fails
120     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
121     */
122    public static SeekableByteChannel forFiles(final File... files) throws IOException {
123        final List<Path> paths = new ArrayList<>();
124        for (final File f : Objects.requireNonNull(files, "files")) {
125            paths.add(f.toPath());
126        }
127        return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
128    }
129
130    /**
131     * Concatenates the given files.
132     *
133     * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
134     * @param files           the files to concatenate except for the last segment, note these files should be added in correct order (for example .z01, .z02...
135     *                        .z99)
136     * @return SeekableByteChannel that concatenates all provided files
137     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
138     * @throws NullPointerException if files or lastSegmentFile is null
139     */
140    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
141        Objects.requireNonNull(files, "files");
142        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
143        final List<Path> filesList = new ArrayList<>();
144        files.forEach(f -> filesList.add(f.toPath()));
145        return forPaths(lastSegmentFile.toPath(), filesList);
146    }
147
148    /**
149     * Concatenates the given channels.
150     *
151     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
152     *                 in correct order (for example .z01, .z02... .z99, .zip)
153     * @return SeekableByteChannel that concatenates all provided channels
154     * @throws NullPointerException if channels is null
155     * @throws IOException          if reading channels fails
156     */
157    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
158        if (Objects.requireNonNull(channels, "channels").length == 1) {
159            return channels[0];
160        }
161        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
162    }
163
164    /**
165     * Concatenates the given channels.
166     *
167     * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
168     * @param channels           the channels to concatenate except for the last segment, note these channels should be added in correct order (for example
169     *                           .z01, .z02... .z99)
170     * @return SeekableByteChannel that concatenates all provided channels
171     * @throws NullPointerException if lastSegmentChannel or channels is null
172     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
173     */
174    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
175            throws IOException {
176        Objects.requireNonNull(channels, "channels");
177        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
178        final List<SeekableByteChannel> channelsList = new ArrayList<>();
179        channels.forEach(channelsList::add);
180        channelsList.add(lastSegmentChannel);
181        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
182    }
183
184    /**
185     * Concatenates the given file paths.
186     *
187     * @param paths       the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in
188     *                    correct order (for example: .z01, .z02... .z99, .zip)
189     * @param openOptions the options to open paths (shared by all paths).
190     * @return SeekableByteChannel that concatenates all provided files
191     * @throws NullPointerException if files is null
192     * @throws IOException          if opening a channel for one of the files fails
193     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
194     * @since 1.22
195     */
196    public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
197        final List<SeekableByteChannel> channels = new ArrayList<>();
198        for (final Path path : Objects.requireNonNull(paths, "paths")) {
199            channels.add(Files.newByteChannel(path, openOptions));
200        }
201        if (channels.size() == 1) {
202            return channels.get(0);
203        }
204        return new ZipSplitReadOnlySeekableByteChannel(channels);
205    }
206
207    /**
208     * Concatenates the given file paths.
209     *
210     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
211     *              order (for example: .z01, .z02... .z99, .zip)
212     * @return SeekableByteChannel that concatenates all provided files
213     * @throws NullPointerException if files is null
214     * @throws IOException          if opening a channel for one of the files fails
215     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
216     * @since 1.22
217     */
218    public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
219        return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
220    }
221
222    /**
223     * Concatenates the given file paths.
224     *
225     * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
226     * @param paths           the file paths to concatenate except for the last segment, note these files should be added in correct order (for example: .z01,
227     *                        .z02... .z99)
228     * @return SeekableByteChannel that concatenates all provided files
229     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
230     * @throws NullPointerException if files or lastSegmentPath is null
231     * @since 1.22
232     */
233    public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
234        Objects.requireNonNull(paths, "paths");
235        Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
236        final List<Path> filesList = new ArrayList<>();
237        paths.forEach(filesList::add);
238        filesList.add(lastSegmentPath);
239        return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
240    }
241
242    private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
243
244    /**
245     * Concatenates the given channels.
246     *
247     * <p>
248     * The channels should be add in ascending order, for example z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added
249     * as the last one in the channels
250     * </p>
251     *
252     * @param channels the channels to concatenate
253     * @throws NullPointerException if channels is null
254     * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
255     */
256    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
257        super(channels);
258        // the first split ZIP segment should begin with ZIP split signature
259        assertSplitSignature(channels);
260    }
261
262    /**
263     * Based on the ZIP specification:
264     *
265     * <p>
266     * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
267     * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
268     * local header signature for the first file in the archive.
269     * </p>
270     * <p>
271     * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
272     * </p>
273     *
274     * @param channels channels to be validated
275     * @throws IOException if an I/O error occurs.
276     */
277    private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
278        final SeekableByteChannel channel = channels.get(0);
279        // the ZIP split file signature is at the beginning of the first split segment
280        channel.position(0L);
281        zipSplitSignatureByteBuffer.rewind();
282        channel.read(zipSplitSignatureByteBuffer);
283        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
284        if (!signature.equals(ZipLong.DD_SIG)) {
285            channel.position(0L);
286            throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
287        }
288        channel.position(0L);
289    }
290}