001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.Serializable; 024import java.nio.ByteBuffer; 025import java.nio.channels.SeekableByteChannel; 026import java.nio.file.Files; 027import java.nio.file.OpenOption; 028import java.nio.file.Path; 029import java.nio.file.StandardOpenOption; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.Comparator; 033import java.util.List; 034import java.util.Objects; 035import java.util.regex.Pattern; 036import java.util.stream.Collectors; 037import java.util.stream.Stream; 038 039import org.apache.commons.compress.archivers.ArchiveStreamFactory; 040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; 041import org.apache.commons.io.file.PathUtils; 042 043/** 044 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. 045 * <p> 046 * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive. 047 * </p> 048 * 049 * @since 1.20 050 */ 051public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { 052 private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable { 053 private static final long serialVersionUID = 20200123L; 054 055 @Override 056 public int compare(final Path file1, final Path file2) { 057 final String extension1 = PathUtils.getExtension(file1); 058 final String extension2 = PathUtils.getExtension(file2); 059 if (!extension1.startsWith("z")) { 060 return -1; 061 } 062 if (!extension2.startsWith("z")) { 063 return 1; 064 } 065 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1)); 066 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1)); 067 return splitSegmentNumber1.compareTo(splitSegmentNumber2); 068 } 069 } 070 071 private static final Path[] EMPTY_PATH_ARRAY = {}; 072 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; 073 074 /** 075 * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip) 076 * 077 * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip 078 * @return SeekableByteChannel that concatenates all ZIP split files 079 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip 080 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 081 */ 082 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { 083 return buildFromLastSplitSegment(lastSegmentFile.toPath()); 084 } 085 086 /** 087 * Concatenates ZIP split files from the last segment (the extension MUST be .zip) 088 * 089 * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip 090 * @return SeekableByteChannel that concatenates all ZIP split files 091 * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip 092 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 093 * @since 1.22 094 */ 095 public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException { 096 final String extension = PathUtils.getExtension(lastSegmentPath); 097 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { 098 throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip"); 099 } 100 final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath("."); 101 final String fileBaseName = PathUtils.getBaseName(lastSegmentPath); 102 final ArrayList<Path> splitZipSegments; 103 // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification 104 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); 105 try (Stream<Path> walk = Files.walk(parent, 1)) { 106 splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches()) 107 .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new)); 108 } 109 return forPaths(lastSegmentPath, splitZipSegments); 110 } 111 112 /** 113 * Concatenates the given files. 114 * 115 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order 116 * (for example .z01, .z02... .z99, .zip) 117 * @return SeekableByteChannel that concatenates all provided files 118 * @throws NullPointerException if files is null 119 * @throws IOException if opening a channel for one of the files fails 120 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 121 */ 122 public static SeekableByteChannel forFiles(final File... files) throws IOException { 123 final List<Path> paths = new ArrayList<>(); 124 for (final File f : Objects.requireNonNull(files, "files")) { 125 paths.add(f.toPath()); 126 } 127 return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); 128 } 129 130 /** 131 * Concatenates the given files. 132 * 133 * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip 134 * @param files the files to concatenate except for the last segment, note these files should be added in correct order (for example .z01, .z02... 135 * .z99) 136 * @return SeekableByteChannel that concatenates all provided files 137 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 138 * @throws NullPointerException if files or lastSegmentFile is null 139 */ 140 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException { 141 Objects.requireNonNull(files, "files"); 142 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); 143 final List<Path> filesList = new ArrayList<>(); 144 files.forEach(f -> filesList.add(f.toPath())); 145 return forPaths(lastSegmentFile.toPath(), filesList); 146 } 147 148 /** 149 * Concatenates the given channels. 150 * 151 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added 152 * in correct order (for example .z01, .z02... .z99, .zip) 153 * @return SeekableByteChannel that concatenates all provided channels 154 * @throws NullPointerException if channels is null 155 * @throws IOException if reading channels fails 156 */ 157 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException { 158 if (Objects.requireNonNull(channels, "channels").length == 1) { 159 return channels[0]; 160 } 161 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels)); 162 } 163 164 /** 165 * Concatenates the given channels. 166 * 167 * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip 168 * @param channels the channels to concatenate except for the last segment, note these channels should be added in correct order (for example 169 * .z01, .z02... .z99) 170 * @return SeekableByteChannel that concatenates all provided channels 171 * @throws NullPointerException if lastSegmentChannel or channels is null 172 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 173 */ 174 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels) 175 throws IOException { 176 Objects.requireNonNull(channels, "channels"); 177 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel"); 178 final List<SeekableByteChannel> channelsList = new ArrayList<>(); 179 channels.forEach(channelsList::add); 180 channelsList.add(lastSegmentChannel); 181 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0])); 182 } 183 184 /** 185 * Concatenates the given file paths. 186 * 187 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in 188 * correct order (for example: .z01, .z02... .z99, .zip) 189 * @param openOptions the options to open paths (shared by all paths). 190 * @return SeekableByteChannel that concatenates all provided files 191 * @throws NullPointerException if files is null 192 * @throws IOException if opening a channel for one of the files fails 193 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 194 * @since 1.22 195 */ 196 public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException { 197 final List<SeekableByteChannel> channels = new ArrayList<>(); 198 for (final Path path : Objects.requireNonNull(paths, "paths")) { 199 channels.add(Files.newByteChannel(path, openOptions)); 200 } 201 if (channels.size() == 1) { 202 return channels.get(0); 203 } 204 return new ZipSplitReadOnlySeekableByteChannel(channels); 205 } 206 207 /** 208 * Concatenates the given file paths. 209 * 210 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct 211 * order (for example: .z01, .z02... .z99, .zip) 212 * @return SeekableByteChannel that concatenates all provided files 213 * @throws NullPointerException if files is null 214 * @throws IOException if opening a channel for one of the files fails 215 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 216 * @since 1.22 217 */ 218 public static SeekableByteChannel forPaths(final Path... paths) throws IOException { 219 return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ }); 220 } 221 222 /** 223 * Concatenates the given file paths. 224 * 225 * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip 226 * @param paths the file paths to concatenate except for the last segment, note these files should be added in correct order (for example: .z01, 227 * .z02... .z99) 228 * @return SeekableByteChannel that concatenates all provided files 229 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 230 * @throws NullPointerException if files or lastSegmentPath is null 231 * @since 1.22 232 */ 233 public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException { 234 Objects.requireNonNull(paths, "paths"); 235 Objects.requireNonNull(lastSegmentPath, "lastSegmentPath"); 236 final List<Path> filesList = new ArrayList<>(); 237 paths.forEach(filesList::add); 238 filesList.add(lastSegmentPath); 239 return forPaths(filesList.toArray(EMPTY_PATH_ARRAY)); 240 } 241 242 private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); 243 244 /** 245 * Concatenates the given channels. 246 * 247 * <p> 248 * The channels should be add in ascending order, for example z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added 249 * as the last one in the channels 250 * </p> 251 * 252 * @param channels the channels to concatenate 253 * @throws NullPointerException if channels is null 254 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 255 */ 256 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException { 257 super(channels); 258 // the first split ZIP segment should begin with ZIP split signature 259 assertSplitSignature(channels); 260 } 261 262 /** 263 * Based on the ZIP specification: 264 * 265 * <p> 266 * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a 267 * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the 268 * local header signature for the first file in the archive. 269 * </p> 270 * <p> 271 * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50) 272 * </p> 273 * 274 * @param channels channels to be validated 275 * @throws IOException if an I/O error occurs. 276 */ 277 private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException { 278 final SeekableByteChannel channel = channels.get(0); 279 // the ZIP split file signature is at the beginning of the first split segment 280 channel.position(0L); 281 zipSplitSignatureByteBuffer.rewind(); 282 channel.read(zipSplitSignatureByteBuffer); 283 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array()); 284 if (!signature.equals(ZipLong.DD_SIG)) { 285 channel.position(0L); 286 throw new IOException("The first ZIP split segment does not begin with split ZIP file signature"); 287 } 288 channel.position(0L); 289 } 290}