ArchiveInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.archivers;

  20. import java.io.FilterInputStream;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.nio.charset.Charset;
  24. import java.util.Iterator;
  25. import java.util.Objects;

  26. import org.apache.commons.io.Charsets;
  27. import org.apache.commons.io.function.IOConsumer;
  28. import org.apache.commons.io.function.IOIterator;
  29. import org.apache.commons.io.input.NullInputStream;

  30. /**
  31.  * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
  32.  * for the end of data in each entry as well as at the end of the file proper.
  33.  * <p>
  34.  * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
  35.  * </p>
  36.  * <p>
  37.  * The input stream classes must also implement a method with the signature:
  38.  * </p>
  39.  * <pre>
  40.  * public static boolean matches(byte[] signature, int length)
  41.  * </pre>
  42.  * <p>
  43.  * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
  44.  * </p>
  45.  *
  46.  * @param <E> The type of {@link ArchiveEntry} produced.
  47.  */
  48. public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {

  49.     class ArchiveEntryIOIterator implements IOIterator<E> {

  50.         private E next;

  51.         @Override
  52.         public boolean hasNext() throws IOException {
  53.             if (next == null) {
  54.                 next = getNextEntry();
  55.             }
  56.             return next != null;
  57.         }

  58.         @Override
  59.         public synchronized E next() throws IOException {
  60.             if (next != null) {
  61.                 final E e = next;
  62.                 next = null;
  63.                 return e;
  64.             }
  65.             return getNextEntry();
  66.         }

  67.         /**
  68.          * Always returns null, this is a "native" IOIterator.
  69.          *
  70.          * @return null.
  71.          */
  72.         @Override
  73.         public Iterator<E> unwrap() {
  74.             return null;
  75.         }

  76.     }

  77.     private static final int BYTE_MASK = 0xFF;

  78.     private final byte[] single = new byte[1];

  79.     /** The number of bytes read in this stream */
  80.     private long bytesRead;

  81.     private Charset charset;

  82.     /**
  83.      * Constructs a new instance.
  84.      */
  85.     public ArchiveInputStream() {
  86.         this(NullInputStream.INSTANCE, Charset.defaultCharset());
  87.     }

  88.     /**
  89.      * Constructs a new instance.
  90.      *
  91.      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
  92.      * @param charset charset.
  93.      * @since 1.26.0
  94.      */
  95.     // This will be protected once subclasses use builders.
  96.     private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
  97.         super(inputStream);
  98.         this.charset = Charsets.toCharset(charset);
  99.     }

  100.     /**
  101.      * Constructs a new instance.
  102.      *
  103.      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
  104.      * @param charsetName charset name.
  105.      * @since 1.26.0
  106.      */
  107.     protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
  108.         this(inputStream, Charsets.toCharset(charsetName));
  109.     }

  110.     /**
  111.      * Whether this stream is able to read the given entry.
  112.      * <p>
  113.      * Some archive formats support variants or details that are not supported (yet).
  114.      * </p>
  115.      *
  116.      * @param archiveEntry the entry to test
  117.      * @return This implementation always returns true.
  118.      *
  119.      * @since 1.1
  120.      */
  121.     public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
  122.         return true;
  123.     }

  124.     /**
  125.      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
  126.      *
  127.      * @param read the number of bytes read
  128.      */
  129.     protected void count(final int read) {
  130.         count((long) read);
  131.     }

  132.     /**
  133.      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
  134.      *
  135.      * @param read the number of bytes read
  136.      * @since 1.1
  137.      */
  138.     protected void count(final long read) {
  139.         if (read != -1) {
  140.             bytesRead += read;
  141.         }
  142.     }

  143.     /**
  144.      * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
  145.      * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
  146.      * <p>
  147.      * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
  148.      * has specified a concurrent modification policy.
  149.      * </p>
  150.      *
  151.      * @param action The action to be performed for each element
  152.      * @throws IOException          if an I/O error occurs.
  153.      * @throws NullPointerException if the specified action is null
  154.      * @since 2.17.0
  155.      */
  156.     public void forEach(final IOConsumer<? super E> action) throws IOException {
  157.         iterator().forEachRemaining(Objects.requireNonNull(action));
  158.     }

  159.     /**
  160.      * Gets the current number of bytes read from this stream.
  161.      *
  162.      * @return the number of read bytes
  163.      * @since 1.1
  164.      */
  165.     public long getBytesRead() {
  166.         return bytesRead;
  167.     }

  168.     /**
  169.      * Gets the Charest.
  170.      *
  171.      * @return the Charest.
  172.      */
  173.     public Charset getCharset() {
  174.         return charset;
  175.     }

  176.     /**
  177.      * Gets the current number of bytes read from this stream.
  178.      *
  179.      * @return the number of read bytes
  180.      * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
  181.      */
  182.     @Deprecated
  183.     public int getCount() {
  184.         return (int) bytesRead;
  185.     }

  186.     /**
  187.      * Gets the next Archive Entry in this Stream.
  188.      *
  189.      * @return the next entry, or {@code null} if there are no more entries.
  190.      * @throws IOException if the next entry could not be read.
  191.      */
  192.     public abstract E getNextEntry() throws IOException;

  193.     public IOIterator<E> iterator() {
  194.         return new ArchiveEntryIOIterator();
  195.     }

  196.     /**
  197.      * Does nothing.
  198.      *
  199.      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
  200.      *
  201.      * @param readlimit ignored.
  202.      */
  203.     @Override
  204.     public synchronized void mark(final int readlimit) {
  205.         // noop
  206.     }

  207.     /**
  208.      * Always returns false.
  209.      *
  210.      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
  211.      *
  212.      * @return Always returns false.
  213.      */
  214.     @Override
  215.     public boolean markSupported() {
  216.         return false;
  217.     }

  218.     /**
  219.      * Decrements the counter of already read bytes.
  220.      *
  221.      * @param pushedBack the number of bytes pushed back.
  222.      * @since 1.1
  223.      */
  224.     protected void pushedBackBytes(final long pushedBack) {
  225.         bytesRead -= pushedBack;
  226.     }

  227.     /**
  228.      * Reads a byte of data. This method will block until enough input is available.
  229.      *
  230.      * Simply calls the {@link #read(byte[], int, int)} method.
  231.      *
  232.      * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
  233.      *
  234.      * @return the byte read, or -1 if end of input is reached
  235.      * @throws IOException if an I/O error has occurred
  236.      */
  237.     @Override
  238.     public int read() throws IOException {
  239.         final int num = read(single, 0, 1);
  240.         return num == -1 ? -1 : single[0] & BYTE_MASK;
  241.     }

  242.     /**
  243.      * Does nothing.
  244.      *
  245.      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
  246.      *
  247.      * @throws IOException not thrown here but may be thrown from a subclass.
  248.      */
  249.     @Override
  250.     public synchronized void reset() throws IOException {
  251.         // noop
  252.     }
  253. }