AbstractLZ77CompressorInputStream.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package org.apache.commons.compress.compressors.lz77support;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.Arrays;
- import org.apache.commons.compress.compressors.CompressorInputStream;
- import org.apache.commons.compress.utils.ByteUtils;
- import org.apache.commons.compress.utils.IOUtils;
- import org.apache.commons.compress.utils.InputStreamStatistics;
- import org.apache.commons.io.input.BoundedInputStream;
- /**
- * Encapsulates code common to LZ77 decompressors.
- *
- * <p>
- * Assumes the stream consists of blocks of literal data and back-references (called copies) in any order. Of course the first block must be a literal block for
- * the scheme to work - unless the {@link #prefill prefill} method has been used to provide initial data that is never returned by {@link #read read} but only
- * used for back-references.
- * </p>
- *
- * <p>
- * Subclasses must override the three-arg {@link #read read} method as the no-arg version delegates to it and the default implementation delegates to the no-arg
- * version, leading to infinite mutual recursion and a {@code StackOverflowError} otherwise.
- * </p>
- *
- * <p>
- * The contract for subclasses' {@code read} implementation is:
- * </p>
- * <ul>
- *
- * <li>keep track of the current state of the stream. Is it inside a literal block or a back-reference or in-between blocks?</li>
- *
- * <li>Use {@link #readOneByte} to access the underlying stream directly.</li>
- *
- * <li>If a new literal block starts, use {@link #startLiteral} to tell this class about it and read the literal data using {@link #readLiteral} until it
- * returns {@code 0}. {@link #hasMoreDataInBlock} will return {@code false} before the next call to {@link #readLiteral} would return {@code 0}.</li>
- *
- * <li>If a new back-reference starts, use {@link #startBackReference} to tell this class about it and read the literal data using {@link #readBackReference}
- * until it returns {@code 0}. {@link #hasMoreDataInBlock} will return {@code false} before the next call to {@link #readBackReference} would return
- * {@code 0}.</li>
- *
- * <li>If the end of the stream has been reached, return {@code -1} as this class' methods will never do so themselves.</li>
- *
- * </ul>
- *
- * <p>
- * {@link #readOneByte} and {@link #readLiteral} update the counter for bytes read.
- * </p>
- *
- * @since 1.14
- */
- public abstract class AbstractLZ77CompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
- /** Size of the window - must be bigger than the biggest offset expected. */
- private final int windowSize;
- /**
- * Buffer to write decompressed bytes to for back-references, will be three times windowSize big.
- *
- * <p>
- * Three times so we can slide the whole buffer a windowSize to the left once we've read twice windowSize and still have enough data inside of it to satisfy
- * back-references.
- * </p>
- */
- private final byte[] buf;
- /** One behind the index of the last byte in the buffer that was written, i.e. the next position to write to */
- private int writeIndex;
- /** Index of the next byte to be read. */
- private int readIndex;
- /** The underlying stream to read compressed data from */
- private final BoundedInputStream in;
- /** Number of bytes still to be read from the current literal or back-reference. */
- private long bytesRemaining;
- /** Offset of the current back-reference. */
- private int backReferenceOffset;
- /** Uncompressed size */
- private int size;
- // used in no-arg read method
- private final byte[] oneByte = new byte[1];
- /**
- * Supplier that delegates to {@link #readOneByte}.
- */
- protected final ByteUtils.ByteSupplier supplier = this::readOneByte;
- /**
- * Creates a new LZ77 input stream.
- *
- * @param is An InputStream to read compressed data from
- * @param windowSize Size of the window kept for back-references, must be bigger than the biggest offset expected.
- *
- * @throws IllegalArgumentException if windowSize is not bigger than 0
- */
- public AbstractLZ77CompressorInputStream(final InputStream is, final int windowSize) {
- this.in = BoundedInputStream.builder().setInputStream(is).asSupplier().get();
- if (windowSize <= 0) {
- throw new IllegalArgumentException("windowSize must be bigger than 0");
- }
- this.windowSize = windowSize;
- buf = new byte[3 * windowSize];
- writeIndex = readIndex = 0;
- bytesRemaining = 0;
- }
- /** {@inheritDoc} */
- @Override
- public int available() {
- return writeIndex - readIndex;
- }
- /** {@inheritDoc} */
- @Override
- public void close() throws IOException {
- in.close();
- }
- /**
- * @since 1.17
- */
- @Override
- public long getCompressedCount() {
- return in.getCount();
- }
- /**
- * Gets the uncompressed size of the stream
- *
- * @return the uncompressed size
- */
- public int getSize() {
- return size;
- }
- /**
- * Is there still data remaining inside the current block?
- *
- * @return true if there is still data remaining inside the current block.
- */
- protected final boolean hasMoreDataInBlock() {
- return bytesRemaining > 0;
- }
- /**
- * Adds some initial data to fill the window with.
- *
- * <p>
- * This is used if the stream has been cut into blocks and back-references of one block may refer to data of the previous block(s). One such example is the
- * LZ4 frame format using block dependency.
- * </p>
- *
- * @param data the data to fill the window with.
- * @throws IllegalStateException if the stream has already started to read data
- */
- public void prefill(final byte[] data) {
- if (writeIndex != 0) {
- throw new IllegalStateException("The stream has already been read from, can't prefill anymore");
- }
- // we don't need more data than the big offset could refer to, so cap it
- final int len = Math.min(windowSize, data.length);
- // we need the last data as we are dealing with *back*-references
- System.arraycopy(data, data.length - len, buf, 0, len);
- writeIndex += len;
- readIndex += len;
- }
- /** {@inheritDoc} */
- @Override
- public int read() throws IOException {
- return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
- }
- /**
- * Reads data from the current back-reference.
- *
- * @param b buffer to write data to
- * @param off offset to start writing to
- * @param len maximum amount of data to read
- * @return number of bytes read, may be 0. Will never return -1 as EOF-detection is the responsibility of the subclass
- * @throws NullPointerException if {@code b} is null
- * @throws IndexOutOfBoundsException if {@code off} is negative, {@code len} is negative, or {@code len} is greater than {@code b.length - off}
- */
- protected final int readBackReference(final byte[] b, final int off, final int len) {
- final int avail = available();
- if (len > avail) {
- tryToCopy(len - avail);
- }
- return readFromBuffer(b, off, len);
- }
- private int readFromBuffer(final byte[] b, final int off, final int len) {
- final int readable = Math.min(len, available());
- if (readable > 0) {
- System.arraycopy(buf, readIndex, b, off, readable);
- readIndex += readable;
- if (readIndex > 2 * windowSize) {
- slideBuffer();
- }
- }
- size += readable;
- return readable;
- }
- /**
- * Reads data from the current literal block.
- *
- * @param b buffer to write data to
- * @param off offset to start writing to
- * @param len maximum amount of data to read
- * @return number of bytes read, may be 0. Will never return -1 as EOF-detection is the responsibility of the subclass
- * @throws IOException if the underlying stream throws or signals an EOF before the amount of data promised for the block have been read
- * @throws NullPointerException if {@code b} is null
- * @throws IndexOutOfBoundsException if {@code off} is negative, {@code len} is negative, or {@code len} is greater than {@code b.length - off}
- */
- protected final int readLiteral(final byte[] b, final int off, final int len) throws IOException {
- final int avail = available();
- if (len > avail) {
- tryToReadLiteral(len - avail);
- }
- return readFromBuffer(b, off, len);
- }
- /**
- * Reads a single byte from the real input stream and ensures the data is accounted for.
- *
- * @return the byte read as value between 0 and 255 or -1 if EOF has been reached.
- * @throws IOException if the underlying stream throws
- */
- protected final int readOneByte() throws IOException {
- final int b = in.read();
- if (b != -1) {
- count(1);
- return b & 0xFF;
- }
- return -1;
- }
- private void slideBuffer() {
- System.arraycopy(buf, windowSize, buf, 0, windowSize * 2);
- writeIndex -= windowSize;
- readIndex -= windowSize;
- }
- /**
- * Used by subclasses to signal the next block contains a back-reference with the given coordinates.
- *
- * @param offset the offset of the back-reference
- * @param length the length of the back-reference
- * @throws IllegalArgumentException if offset not bigger than 0 or bigger than the number of bytes available for back-references or if length is negative
- */
- protected final void startBackReference(final int offset, final long length) {
- if (offset <= 0 || offset > writeIndex) {
- throw new IllegalArgumentException("offset must be bigger than 0 but not bigger than the number" + " of bytes available for back-references");
- }
- if (length < 0) {
- throw new IllegalArgumentException("length must not be negative");
- }
- backReferenceOffset = offset;
- bytesRemaining = length;
- }
- /**
- * Used by subclasses to signal the next block contains the given amount of literal data.
- *
- * @param length the length of the block
- * @throws IllegalArgumentException if length is negative
- */
- protected final void startLiteral(final long length) {
- if (length < 0) {
- throw new IllegalArgumentException("length must not be negative");
- }
- bytesRemaining = length;
- }
- private void tryToCopy(final int bytesToCopy) {
- // this will fit into the buffer without sliding and not
- // require more than is available inside the back-reference
- final int copy = Math.min((int) Math.min(bytesToCopy, bytesRemaining), buf.length - writeIndex);
- if (copy == 0) {
- // NOP
- } else if (backReferenceOffset == 1) { // pretty common special case
- final byte last = buf[writeIndex - 1];
- Arrays.fill(buf, writeIndex, writeIndex + copy, last);
- writeIndex += copy;
- } else if (copy < backReferenceOffset) {
- System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, copy);
- writeIndex += copy;
- } else {
- // back-reference overlaps with the bytes created from it
- // like go back two bytes and then copy six (by copying
- // the last two bytes three time).
- final int fullRots = copy / backReferenceOffset;
- for (int i = 0; i < fullRots; i++) {
- System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, backReferenceOffset);
- writeIndex += backReferenceOffset;
- }
- final int pad = copy - backReferenceOffset * fullRots;
- if (pad > 0) {
- System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, pad);
- writeIndex += pad;
- }
- }
- bytesRemaining -= copy;
- }
- private void tryToReadLiteral(final int bytesToRead) throws IOException {
- // min of "what is still inside the literal", "what does the user want" and "how much can fit into the buffer"
- final int reallyTryToRead = Math.min((int) Math.min(bytesToRead, bytesRemaining), buf.length - writeIndex);
- final int bytesRead = reallyTryToRead > 0 ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead) : 0 /* happens for bytesRemaining == 0 */;
- count(bytesRead);
- if (reallyTryToRead != bytesRead) {
- throw new IOException("Premature end of stream reading literal");
- }
- writeIndex += reallyTryToRead;
- bytesRemaining -= reallyTryToRead;
- }
- }