ZipSplitReadOnlySeekableByteChannel.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */

  17. package org.apache.commons.compress.archivers.zip;

  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.nio.ByteBuffer;
  22. import java.nio.channels.SeekableByteChannel;
  23. import java.nio.file.Files;
  24. import java.nio.file.OpenOption;
  25. import java.nio.file.Path;
  26. import java.nio.file.StandardOpenOption;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.Comparator;
  30. import java.util.List;
  31. import java.util.Objects;
  32. import java.util.regex.Pattern;
  33. import java.util.stream.Collectors;
  34. import java.util.stream.Stream;

  35. import org.apache.commons.compress.archivers.ArchiveStreamFactory;
  36. import org.apache.commons.compress.utils.FileNameUtils;
  37. import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;

  38. /**
  39.  * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
  40.  * <p>
  41.  * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive.
  42.  * </p>
  43.  *
  44.  * @since 1.20
  45.  */
  46. public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {

  47.     private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
  48.         private static final long serialVersionUID = 20200123L;

  49.         @Override
  50.         public int compare(final Path file1, final Path file2) {
  51.             final String extension1 = FileNameUtils.getExtension(file1);
  52.             final String extension2 = FileNameUtils.getExtension(file2);

  53.             if (!extension1.startsWith("z")) {
  54.                 return -1;
  55.             }

  56.             if (!extension2.startsWith("z")) {
  57.                 return 1;
  58.             }

  59.             final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
  60.             final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));

  61.             return splitSegmentNumber1.compareTo(splitSegmentNumber2);
  62.         }
  63.     }

  64.     private static final Path[] EMPTY_PATH_ARRAY = {};
  65.     private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;

  66.     /**
  67.      * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
  68.      *
  69.      * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
  70.      * @return SeekableByteChannel that concatenates all ZIP split files
  71.      * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
  72.      * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
  73.      */
  74.     public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
  75.         return buildFromLastSplitSegment(lastSegmentFile.toPath());
  76.     }

  77.     /**
  78.      * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
  79.      *
  80.      * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
  81.      * @return SeekableByteChannel that concatenates all ZIP split files
  82.      * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
  83.      * @throws IOException              if the first channel doesn't seem to hold the beginning of a split archive
  84.      * @since 1.22
  85.      */
  86.     public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
  87.         final String extension = FileNameUtils.getExtension(lastSegmentPath);
  88.         if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
  89.             throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
  90.         }

  91.         final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath(".");
  92.         final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
  93.         final ArrayList<Path> splitZipSegments;

  94.         // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
  95.         final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
  96.         try (Stream<Path> walk = Files.walk(parent, 1)) {
  97.             splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches())
  98.                     .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new));
  99.         }

  100.         return forPaths(lastSegmentPath, splitZipSegments);
  101.     }

  102.     /**
  103.      * Concatenates the given files.
  104.      *
  105.      * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order
  106.      *              (e.g. .z01, .z02... .z99, .zip)
  107.      * @return SeekableByteChannel that concatenates all provided files
  108.      * @throws NullPointerException if files is null
  109.      * @throws IOException          if opening a channel for one of the files fails
  110.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  111.      */
  112.     public static SeekableByteChannel forFiles(final File... files) throws IOException {
  113.         final List<Path> paths = new ArrayList<>();
  114.         for (final File f : Objects.requireNonNull(files, "files")) {
  115.             paths.add(f.toPath());
  116.         }

  117.         return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
  118.     }

  119.     /**
  120.      * Concatenates the given files.
  121.      *
  122.      * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
  123.      * @param files           the files to concatenate except for the last segment, note these files should be added in correct order (e.g. .z01, .z02... .z99)
  124.      * @return SeekableByteChannel that concatenates all provided files
  125.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  126.      * @throws NullPointerException if files or lastSegmentFile is null
  127.      */
  128.     public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
  129.         Objects.requireNonNull(files, "files");
  130.         Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");

  131.         final List<Path> filesList = new ArrayList<>();
  132.         files.forEach(f -> filesList.add(f.toPath()));

  133.         return forPaths(lastSegmentFile.toPath(), filesList);
  134.     }

  135.     /**
  136.      * Concatenates the given channels.
  137.      *
  138.      * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added
  139.      *                 in correct order (e.g. .z01, .z02... .z99, .zip)
  140.      * @return SeekableByteChannel that concatenates all provided channels
  141.      * @throws NullPointerException if channels is null
  142.      * @throws IOException          if reading channels fails
  143.      */
  144.     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
  145.         if (Objects.requireNonNull(channels, "channels").length == 1) {
  146.             return channels[0];
  147.         }
  148.         return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
  149.     }

  150.     /**
  151.      * Concatenates the given channels.
  152.      *
  153.      * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
  154.      * @param channels           the channels to concatenate except for the last segment, note these channels should be added in correct order (e.g. .z01,
  155.      *                           .z02... .z99)
  156.      * @return SeekableByteChannel that concatenates all provided channels
  157.      * @throws NullPointerException if lastSegmentChannel or channels is null
  158.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  159.      */
  160.     public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels)
  161.             throws IOException {
  162.         Objects.requireNonNull(channels, "channels");
  163.         Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");

  164.         final List<SeekableByteChannel> channelsList = new ArrayList<>();
  165.         channels.forEach(channelsList::add);
  166.         channelsList.add(lastSegmentChannel);

  167.         return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
  168.     }

  169.     /**
  170.      * Concatenates the given file paths.
  171.      *
  172.      * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
  173.      *              order (e.g.: .z01, .z02... .z99, .zip)
  174.      * @param openOptions the options to open paths (shared by all paths).
  175.      * @return SeekableByteChannel that concatenates all provided files
  176.      * @throws NullPointerException if files is null
  177.      * @throws IOException          if opening a channel for one of the files fails
  178.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  179.      * @since 1.22
  180.      */
  181.     public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException {
  182.         final List<SeekableByteChannel> channels = new ArrayList<>();
  183.         for (final Path path : Objects.requireNonNull(paths, "paths")) {
  184.             channels.add(Files.newByteChannel(path, openOptions));
  185.         }
  186.         if (channels.size() == 1) {
  187.             return channels.get(0);
  188.         }
  189.         return new ZipSplitReadOnlySeekableByteChannel(channels);
  190.     }

  191.     /**
  192.      * Concatenates the given file paths.
  193.      *
  194.      * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct
  195.      *              order (e.g.: .z01, .z02... .z99, .zip)
  196.      * @return SeekableByteChannel that concatenates all provided files
  197.      * @throws NullPointerException if files is null
  198.      * @throws IOException          if opening a channel for one of the files fails
  199.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  200.      * @since 1.22
  201.      */
  202.     public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
  203.         return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ });
  204.     }

  205.     /**
  206.      * Concatenates the given file paths.
  207.      *
  208.      * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
  209.      * @param paths           the file paths to concatenate except for the last segment, note these files should be added in correct order (e.g.: .z01, .z02...
  210.      *                        .z99)
  211.      * @return SeekableByteChannel that concatenates all provided files
  212.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  213.      * @throws NullPointerException if files or lastSegmentPath is null
  214.      * @since 1.22
  215.      */
  216.     public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
  217.         Objects.requireNonNull(paths, "paths");
  218.         Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");

  219.         final List<Path> filesList = new ArrayList<>();
  220.         paths.forEach(filesList::add);
  221.         filesList.add(lastSegmentPath);

  222.         return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
  223.     }

  224.     private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);

  225.     /**
  226.      * Concatenates the given channels.
  227.      *
  228.      * <p>
  229.      * The channels should be add in ascending order, e.g. z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added as the
  230.      * last one in the channels
  231.      * </p>
  232.      *
  233.      * @param channels the channels to concatenate
  234.      * @throws NullPointerException if channels is null
  235.      * @throws IOException          if the first channel doesn't seem to hold the beginning of a split archive
  236.      */
  237.     public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException {
  238.         super(channels);

  239.         // the first split ZIP segment should begin with ZIP split signature
  240.         assertSplitSignature(channels);
  241.     }

  242.     /**
  243.      * Based on the ZIP specification:
  244.      *
  245.      * <p>
  246.      * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a
  247.      * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the
  248.      * local header signature for the first file in the archive.
  249.      * </p>
  250.      * <p>
  251.      * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
  252.      * </p>
  253.      *
  254.      * @param channels channels to be validated
  255.      * @throws IOException
  256.      */
  257.     private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException {
  258.         final SeekableByteChannel channel = channels.get(0);
  259.         // the ZIP split file signature is at the beginning of the first split segment
  260.         channel.position(0L);

  261.         zipSplitSignatureByteBuffer.rewind();
  262.         channel.read(zipSplitSignatureByteBuffer);
  263.         final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
  264.         if (!signature.equals(ZipLong.DD_SIG)) {
  265.             channel.position(0L);
  266.             throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
  267.         }

  268.         channel.position(0L);
  269.     }
  270. }