ZipSplitOutputStream.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.archivers.zip;

  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.nio.ByteBuffer;
  21. import java.nio.channels.FileChannel;
  22. import java.nio.file.Files;
  23. import java.nio.file.Path;
  24. import java.nio.file.StandardCopyOption;
  25. import java.nio.file.StandardOpenOption;
  26. import java.util.ArrayList;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.Objects;
  30. import java.util.TreeMap;

  31. import org.apache.commons.compress.utils.FileNameUtils;

  32. /**
  33.  * Used internally by {@link ZipArchiveOutputStream} when creating a split archive.
  34.  *
  35.  * @since 1.20
  36.  */
  37. final class ZipSplitOutputStream extends RandomAccessOutputStream {

  38.     /**
  39.      * 8.5.1 Capacities for split archives are as follows:
  40.      * <p>
  41.      * Maximum number of segments = 4,294,967,295 - 1 Maximum .ZIP segment size = 4,294,967,295 bytes (refer to section 8.5.6) Minimum segment size = 64K
  42.      * </p>
  43.      * <p>
  44.      * Maximum PKSFX segment size = 2,147,483,647 bytes
  45.      * </p>
  46.      */
  47.     private static final long ZIP_SEGMENT_MIN_SIZE = 64 * 1024L;
  48.     private static final long ZIP_SEGMENT_MAX_SIZE = 4294967295L;

  49.     private FileChannel currentChannel;
  50.     private FileRandomAccessOutputStream outputStream;
  51.     private Path zipFile;
  52.     private final long splitSize;
  53.     private long totalPosition;
  54.     private int currentSplitSegmentIndex;
  55.     private long currentSplitSegmentBytesWritten;
  56.     private boolean finished;
  57.     private final byte[] singleByte = new byte[1];
  58.     private final List<Long> diskToPosition = new ArrayList<>();
  59.     private final TreeMap<Long, Path> positionToFiles = new TreeMap<>();

  60.     /**
  61.      * Creates a split ZIP. If the ZIP file is smaller than the split size, then there will only be one split ZIP, and its suffix is .zip, otherwise the split
  62.      * segments should be like .z01, .z02, ... .z(N-1), .zip
  63.      *
  64.      * @param zipFile   the ZIP file to write to
  65.      * @param splitSize the split size
  66.      * @throws IllegalArgumentException if arguments are illegal: Zip split segment size should between 64K and 4,294,967,295.
  67.      * @throws IOException              if an I/O error occurs
  68.      */
  69.     ZipSplitOutputStream(final File zipFile, final long splitSize) throws IllegalArgumentException, IOException {
  70.         this(zipFile.toPath(), splitSize);
  71.     }

  72.     /**
  73.      * Creates a split ZIP. If the ZIP file is smaller than the split size, then there will only be one split ZIP, and its suffix is .zip, otherwise the split
  74.      * segments should be like .z01, .z02, ... .z(N-1), .zip
  75.      *
  76.      * @param zipFile   the path to ZIP file to write to
  77.      * @param splitSize the split size
  78.      * @throws IllegalArgumentException if arguments are illegal: Zip split segment size should between 64K and 4,294,967,295.
  79.      * @throws IOException              if an I/O error occurs
  80.      * @since 1.22
  81.      */
  82.     ZipSplitOutputStream(final Path zipFile, final long splitSize) throws IllegalArgumentException, IOException {
  83.         if (splitSize < ZIP_SEGMENT_MIN_SIZE || splitSize > ZIP_SEGMENT_MAX_SIZE) {
  84.             throw new IllegalArgumentException("Zip split segment size should between 64K and 4,294,967,295");
  85.         }
  86.         this.zipFile = zipFile;
  87.         this.splitSize = splitSize;
  88.         this.outputStream = new FileRandomAccessOutputStream(zipFile);
  89.         this.currentChannel = this.outputStream.channel();
  90.         this.positionToFiles.put(0L, this.zipFile);
  91.         this.diskToPosition.add(0L);
  92.         // write the ZIP split signature 0x08074B50 to the ZIP file
  93.         writeZipSplitSignature();
  94.     }

  95.     public long calculateDiskPosition(final long disk, final long localOffset) throws IOException {
  96.         if (disk >= Integer.MAX_VALUE) {
  97.             throw new IOException("Disk number exceeded internal limits: limit=" + Integer.MAX_VALUE + " requested=" + disk);
  98.         }
  99.         return diskToPosition.get((int) disk) + localOffset;
  100.     }

  101.     @Override
  102.     public void close() throws IOException {
  103.         if (!finished) {
  104.             finish();
  105.         }
  106.     }

  107.     /**
  108.      * Creates the new ZIP split segment, the last ZIP segment should be .zip, and the ZIP split segments' suffix should be like .z01, .z02, .z03, ... .z99,
  109.      * .z100, ..., .z(N-1), .zip
  110.      * <p>
  111.      * 8.3.3 Split ZIP files are typically written to the same location and are subject to name collisions if the spanned name format is used since each segment
  112.      * will reside on the same drive. To avoid name collisions, split archives are named as follows.
  113.      * </p>
  114.      * <p>
  115.      * Segment 1 = filename.z01 Segment n-1 = filename.z(n-1) Segment n = filename.zip
  116.      * </p>
  117.      * <p>
  118.      * NOTE: The ZIP split segment begin from 1,2,3,... , and we're creating a new segment, so the new segment suffix should be (currentSplitSegmentIndex + 2)
  119.      * </p>
  120.      *
  121.      * @param zipSplitSegmentSuffixIndex
  122.      * @return
  123.      * @throws IOException
  124.      */
  125.     private Path createNewSplitSegmentFile(final Integer zipSplitSegmentSuffixIndex) throws IOException {
  126.         final Path newFile = getSplitSegmentFileName(zipSplitSegmentSuffixIndex);

  127.         if (Files.exists(newFile)) {
  128.             throw new IOException("split ZIP segment " + newFile + " already exists");
  129.         }
  130.         return newFile;
  131.     }


  132.     /**
  133.      * The last ZIP split segment's suffix should be .zip
  134.      *
  135.      * @throws IOException
  136.      */
  137.     private void finish() throws IOException {
  138.         if (finished) {
  139.             throw new IOException("This archive has already been finished");
  140.         }

  141.         final String zipFileBaseName = FileNameUtils.getBaseName(zipFile);
  142.         outputStream.close();
  143.         Files.move(zipFile, zipFile.resolveSibling(zipFileBaseName + ".zip"), StandardCopyOption.ATOMIC_MOVE);
  144.         finished = true;
  145.     }

  146.     public long getCurrentSplitSegmentBytesWritten() {
  147.         return currentSplitSegmentBytesWritten;
  148.     }

  149.     public int getCurrentSplitSegmentIndex() {
  150.         return currentSplitSegmentIndex;
  151.     }

  152.     private Path getSplitSegmentFileName(final Integer zipSplitSegmentSuffixIndex) {
  153.         final int newZipSplitSegmentSuffixIndex = zipSplitSegmentSuffixIndex == null ? currentSplitSegmentIndex + 2 : zipSplitSegmentSuffixIndex;
  154.         final String baseName = FileNameUtils.getBaseName(zipFile);
  155.         final StringBuilder extension = new StringBuilder(".z");
  156.         if (newZipSplitSegmentSuffixIndex <= 9) {
  157.             extension.append("0").append(newZipSplitSegmentSuffixIndex);
  158.         } else {
  159.             extension.append(newZipSplitSegmentSuffixIndex);
  160.         }

  161.         final Path parent = zipFile.getParent();
  162.         final String dir = Objects.nonNull(parent) ? parent.toAbsolutePath().toString() : ".";
  163.         return zipFile.getFileSystem().getPath(dir, baseName + extension.toString());
  164.     }

  165.     /**
  166.      * Creates a new ZIP split segment and prepare to write to the new segment
  167.      *
  168.      * @throws IOException
  169.      */
  170.     private void openNewSplitSegment() throws IOException {
  171.         Path newFile;
  172.         if (currentSplitSegmentIndex == 0) {
  173.             outputStream.close();
  174.             newFile = createNewSplitSegmentFile(1);
  175.             Files.move(zipFile, newFile, StandardCopyOption.ATOMIC_MOVE);
  176.             this.positionToFiles.put(0L, newFile);
  177.         }

  178.         newFile = createNewSplitSegmentFile(null);

  179.         outputStream.close();
  180.         outputStream = new FileRandomAccessOutputStream(newFile);
  181.         currentChannel = outputStream.channel();
  182.         currentSplitSegmentBytesWritten = 0;
  183.         zipFile = newFile;
  184.         currentSplitSegmentIndex++;
  185.         this.diskToPosition.add(this.totalPosition);
  186.         this.positionToFiles.put(this.totalPosition, newFile);
  187.     }

  188.     @Override
  189.     public long position() {
  190.         return totalPosition;
  191.     }

  192.     /**
  193.      * Some data can not be written to different split segments, for example:
  194.      * <p>
  195.      * 4.4.1.5 The end of central directory record and the Zip64 end of central directory locator record MUST reside on the same disk when splitting or spanning
  196.      * an archive.
  197.      * </p>
  198.      *
  199.      * @param unsplittableContentSize
  200.      * @throws IllegalArgumentException
  201.      * @throws IOException
  202.      */
  203.     public void prepareToWriteUnsplittableContent(final long unsplittableContentSize) throws IllegalArgumentException, IOException {
  204.         if (unsplittableContentSize > this.splitSize) {
  205.             throw new IllegalArgumentException("The unsplittable content size is bigger than the split segment size");
  206.         }

  207.         final long bytesRemainingInThisSegment = this.splitSize - this.currentSplitSegmentBytesWritten;
  208.         if (bytesRemainingInThisSegment < unsplittableContentSize) {
  209.             openNewSplitSegment();
  210.         }
  211.     }

  212.     @Override
  213.     public void write(final byte[] b) throws IOException {
  214.         write(b, 0, b.length);
  215.     }

  216.     /**
  217.      * Writes the data to ZIP split segments, if the remaining space of current split segment is not enough, then a new split segment should be created
  218.      *
  219.      * @param b   data to write
  220.      * @param off offset of the start of data in param b
  221.      * @param len the length of data to write
  222.      * @throws IOException
  223.      */
  224.     @Override
  225.     public void write(final byte[] b, final int off, final int len) throws IOException {
  226.         if (len <= 0) {
  227.             return;
  228.         }

  229.         if (currentSplitSegmentBytesWritten >= splitSize) {
  230.             openNewSplitSegment();
  231.             write(b, off, len);
  232.         } else if (currentSplitSegmentBytesWritten + len > splitSize) {
  233.             final int bytesToWriteForThisSegment = (int) splitSize - (int) currentSplitSegmentBytesWritten;
  234.             write(b, off, bytesToWriteForThisSegment);
  235.             openNewSplitSegment();
  236.             write(b, off + bytesToWriteForThisSegment, len - bytesToWriteForThisSegment);
  237.         } else {
  238.             outputStream.write(b, off, len);
  239.             currentSplitSegmentBytesWritten += len;
  240.             totalPosition += len;
  241.         }
  242.     }

  243.     @Override
  244.     public void write(final int i) throws IOException {
  245.         singleByte[0] = (byte) (i & 0xff);
  246.         write(singleByte);
  247.     }

  248.     @Override
  249.     public void writeFully(final byte[] b, final int off, final int len, final long atPosition) throws IOException {
  250.         long remainingPosition = atPosition;
  251.         for (int remainingOff = off, remainingLen = len; remainingLen > 0; ) {
  252.             final Map.Entry<Long, Path> segment = positionToFiles.floorEntry(remainingPosition);
  253.             final Long segmentEnd = positionToFiles.higherKey(remainingPosition);
  254.             if (segmentEnd == null) {
  255.                 ZipIoUtil.writeFullyAt(this.currentChannel, ByteBuffer.wrap(b, remainingOff, remainingLen), remainingPosition - segment.getKey());
  256.                 remainingPosition += remainingLen;
  257.                 remainingOff += remainingLen;
  258.                 remainingLen = 0;
  259.             } else if (remainingPosition + remainingLen <= segmentEnd) {
  260.                 writeToSegment(segment.getValue(), remainingPosition - segment.getKey(), b, remainingOff, remainingLen);
  261.                 remainingPosition += remainingLen;
  262.                 remainingOff += remainingLen;
  263.                 remainingLen = 0;
  264.             } else {
  265.                 final int toWrite = Math.toIntExact(segmentEnd - remainingPosition);
  266.                 writeToSegment(segment.getValue(), remainingPosition - segment.getKey(), b, remainingOff, toWrite);
  267.                 remainingPosition += toWrite;
  268.                 remainingOff += toWrite;
  269.                 remainingLen -= toWrite;
  270.             }
  271.         }
  272.     }

  273.     private void writeToSegment(
  274.             final Path segment,
  275.             final long position,
  276.             final byte[] b,
  277.             final int off,
  278.             final int len
  279.     ) throws IOException {
  280.         try (FileChannel channel = FileChannel.open(segment, StandardOpenOption.WRITE)) {
  281.             ZipIoUtil.writeFullyAt(channel, ByteBuffer.wrap(b, off, len), position);
  282.         }
  283.     }

  284.     /**
  285.      * Writes the ZIP split signature (0x08074B50) to the head of the first ZIP split segment
  286.      *
  287.      * @throws IOException
  288.      */
  289.     private void writeZipSplitSignature() throws IOException {
  290.         outputStream.write(ZipArchiveOutputStream.DD_SIG);
  291.         currentSplitSegmentBytesWritten += ZipArchiveOutputStream.DD_SIG.length;
  292.         totalPosition += ZipArchiveOutputStream.DD_SIG.length;
  293.     }
  294. }