001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.File; 021import java.io.IOException; 022import java.io.Serializable; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.file.Files; 026import java.nio.file.OpenOption; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Comparator; 032import java.util.List; 033import java.util.Objects; 034import java.util.regex.Pattern; 035import java.util.stream.Collectors; 036import java.util.stream.Stream; 037 038import org.apache.commons.compress.archivers.ArchiveStreamFactory; 039import org.apache.commons.compress.utils.FileNameUtils; 040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; 041 042/** 043 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. 044 * <p> 045 * If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of the archive. 046 * </p> 047 * 048 * @since 1.20 049 */ 050public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { 051 052 private static final class ZipSplitSegmentComparator implements Comparator<Path>, Serializable { 053 private static final long serialVersionUID = 20200123L; 054 055 @Override 056 public int compare(final Path file1, final Path file2) { 057 final String extension1 = FileNameUtils.getExtension(file1); 058 final String extension2 = FileNameUtils.getExtension(file2); 059 060 if (!extension1.startsWith("z")) { 061 return -1; 062 } 063 064 if (!extension2.startsWith("z")) { 065 return 1; 066 } 067 068 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1)); 069 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1)); 070 071 return splitSegmentNumber1.compareTo(splitSegmentNumber2); 072 } 073 } 074 075 private static final Path[] EMPTY_PATH_ARRAY = {}; 076 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; 077 078 /** 079 * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip) 080 * 081 * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip 082 * @return SeekableByteChannel that concatenates all ZIP split files 083 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip 084 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 085 */ 086 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { 087 return buildFromLastSplitSegment(lastSegmentFile.toPath()); 088 } 089 090 /** 091 * Concatenates ZIP split files from the last segment (the extension MUST be .zip) 092 * 093 * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip 094 * @return SeekableByteChannel that concatenates all ZIP split files 095 * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip 096 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 097 * @since 1.22 098 */ 099 public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException { 100 final String extension = FileNameUtils.getExtension(lastSegmentPath); 101 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { 102 throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip"); 103 } 104 105 final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() : lastSegmentPath.getFileSystem().getPath("."); 106 final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath); 107 final ArrayList<Path> splitZipSegments; 108 109 // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification 110 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); 111 try (Stream<Path> walk = Files.walk(parent, 1)) { 112 splitZipSegments = walk.filter(Files::isRegularFile).filter(path -> pattern.matcher(path.getFileName().toString()).matches()) 113 .sorted(new ZipSplitSegmentComparator()).collect(Collectors.toCollection(ArrayList::new)); 114 } 115 116 return forPaths(lastSegmentPath, splitZipSegments); 117 } 118 119 /** 120 * Concatenates the given files. 121 * 122 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct order 123 * (e.g. .z01, .z02... .z99, .zip) 124 * @return SeekableByteChannel that concatenates all provided files 125 * @throws NullPointerException if files is null 126 * @throws IOException if opening a channel for one of the files fails 127 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 128 */ 129 public static SeekableByteChannel forFiles(final File... files) throws IOException { 130 final List<Path> paths = new ArrayList<>(); 131 for (final File f : Objects.requireNonNull(files, "files")) { 132 paths.add(f.toPath()); 133 } 134 135 return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); 136 } 137 138 /** 139 * Concatenates the given files. 140 * 141 * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip 142 * @param files the files to concatenate except for the last segment, note these files should be added in correct order (e.g. .z01, .z02... .z99) 143 * @return SeekableByteChannel that concatenates all provided files 144 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 145 * @throws NullPointerException if files or lastSegmentFile is null 146 */ 147 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException { 148 Objects.requireNonNull(files, "files"); 149 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); 150 151 final List<Path> filesList = new ArrayList<>(); 152 files.forEach(f -> filesList.add(f.toPath())); 153 154 return forPaths(lastSegmentFile.toPath(), filesList); 155 } 156 157 /** 158 * Concatenates the given channels. 159 * 160 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) and these channels should be added 161 * in correct order (e.g. .z01, .z02... .z99, .zip) 162 * @return SeekableByteChannel that concatenates all provided channels 163 * @throws NullPointerException if channels is null 164 * @throws IOException if reading channels fails 165 */ 166 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException { 167 if (Objects.requireNonNull(channels, "channels").length == 1) { 168 return channels[0]; 169 } 170 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels)); 171 } 172 173 /** 174 * Concatenates the given channels. 175 * 176 * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip 177 * @param channels the channels to concatenate except for the last segment, note these channels should be added in correct order (e.g. .z01, 178 * .z02... .z99) 179 * @return SeekableByteChannel that concatenates all provided channels 180 * @throws NullPointerException if lastSegmentChannel or channels is null 181 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 182 */ 183 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, final Iterable<SeekableByteChannel> channels) 184 throws IOException { 185 Objects.requireNonNull(channels, "channels"); 186 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel"); 187 188 final List<SeekableByteChannel> channelsList = new ArrayList<>(); 189 channels.forEach(channelsList::add); 190 channelsList.add(lastSegmentChannel); 191 192 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0])); 193 } 194 195 /** 196 * Concatenates the given file paths. 197 * 198 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct 199 * order (e.g.: .z01, .z02... .z99, .zip) 200 * @param openOptions the options to open paths (shared by all paths). 201 * @return SeekableByteChannel that concatenates all provided files 202 * @throws NullPointerException if files is null 203 * @throws IOException if opening a channel for one of the files fails 204 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 205 * @since 1.22 206 */ 207 public static SeekableByteChannel forPaths(final List<Path> paths, final OpenOption[] openOptions) throws IOException { 208 final List<SeekableByteChannel> channels = new ArrayList<>(); 209 for (final Path path : Objects.requireNonNull(paths, "paths")) { 210 channels.add(Files.newByteChannel(path, openOptions)); 211 } 212 if (channels.size() == 1) { 213 return channels.get(0); 214 } 215 return new ZipSplitReadOnlySeekableByteChannel(channels); 216 } 217 218 /** 219 * Concatenates the given file paths. 220 * 221 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) and these files should be added in correct 222 * order (e.g.: .z01, .z02... .z99, .zip) 223 * @return SeekableByteChannel that concatenates all provided files 224 * @throws NullPointerException if files is null 225 * @throws IOException if opening a channel for one of the files fails 226 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 227 * @since 1.22 228 */ 229 public static SeekableByteChannel forPaths(final Path... paths) throws IOException { 230 return forPaths(Arrays.asList(paths), new OpenOption[] { StandardOpenOption.READ }); 231 } 232 233 /** 234 * Concatenates the given file paths. 235 * 236 * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip 237 * @param paths the file paths to concatenate except for the last segment, note these files should be added in correct order (e.g.: .z01, .z02... 238 * .z99) 239 * @return SeekableByteChannel that concatenates all provided files 240 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 241 * @throws NullPointerException if files or lastSegmentPath is null 242 * @since 1.22 243 */ 244 public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException { 245 Objects.requireNonNull(paths, "paths"); 246 Objects.requireNonNull(lastSegmentPath, "lastSegmentPath"); 247 248 final List<Path> filesList = new ArrayList<>(); 249 paths.forEach(filesList::add); 250 filesList.add(lastSegmentPath); 251 252 return forPaths(filesList.toArray(EMPTY_PATH_ARRAY)); 253 } 254 255 private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); 256 257 /** 258 * Concatenates the given channels. 259 * 260 * <p> 261 * The channels should be add in ascending order, e.g. z01, z02, ... z99, ZIP please note that the .zip file is the last segment and should be added as the 262 * last one in the channels 263 * </p> 264 * 265 * @param channels the channels to concatenate 266 * @throws NullPointerException if channels is null 267 * @throws IOException if the first channel doesn't seem to hold the beginning of a split archive 268 */ 269 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) throws IOException { 270 super(channels); 271 272 // the first split ZIP segment should begin with ZIP split signature 273 assertSplitSignature(channels); 274 } 275 276 /** 277 * Based on the ZIP specification: 278 * 279 * <p> 280 * 8.5.3 Spanned/Split archives created using PKZIP for Windows (V2.50 or greater), PKZIP Command Line (V2.50 or greater), or PKZIP Explorer will include a 281 * special spanning signature as the first 4 bytes of the first segment of the archive. This signature (0x08074b50) will be followed immediately by the 282 * local header signature for the first file in the archive. 283 * </p> 284 * <p> 285 * The first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50) 286 * </p> 287 * 288 * @param channels channels to be validated 289 * @throws IOException 290 */ 291 private void assertSplitSignature(final List<SeekableByteChannel> channels) throws IOException { 292 final SeekableByteChannel channel = channels.get(0); 293 // the ZIP split file signature is at the beginning of the first split segment 294 channel.position(0L); 295 296 zipSplitSignatureByteBuffer.rewind(); 297 channel.read(zipSplitSignatureByteBuffer); 298 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array()); 299 if (!signature.equals(ZipLong.DD_SIG)) { 300 channel.position(0L); 301 throw new IOException("The first ZIP split segment does not begin with split ZIP file signature"); 302 } 303 304 channel.position(0L); 305 } 306}