View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import java.io.FilterInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.charset.Charset;
25  import java.util.Iterator;
26  import java.util.Objects;
27  
28  import org.apache.commons.io.Charsets;
29  import org.apache.commons.io.function.IOConsumer;
30  import org.apache.commons.io.function.IOIterator;
31  import org.apache.commons.io.input.NullInputStream;
32  
33  /**
34   * Archive input streams <strong>MUST</strong> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream
35   * generates EOF for the end of data in each entry as well as at the end of the file proper.
36   * <p>
37   * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
38   * </p>
39   * <p>
40   * The input stream classes must also implement a method with the signature:
41   * </p>
42   *
43   * <pre>
44   * public static boolean matches(byte[] signature, int length)
45   * </pre>
46   * <p>
47   * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
48   * </p>
49   *
50   * @param <E> The type of {@link ArchiveEntry} produced.
51   */
52  public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
53  
54      /**
55       * An iterator over a collection of a specific {@link ArchiveEntry} type.
56       */
57      final class ArchiveEntryIOIterator implements IOIterator<E> {
58  
59          private E next;
60  
61          @Override
62          public boolean hasNext() throws IOException {
63              if (next == null) {
64                  next = getNextEntry();
65              }
66              return next != null;
67          }
68  
69          @Override
70          public synchronized E next() throws IOException {
71              if (next != null) {
72                  final E e = next;
73                  next = null;
74                  return e;
75              }
76              return getNextEntry();
77          }
78  
79          /**
80           * Always returns null, this is a "native" IOIterator.
81           *
82           * @return Always returns null.
83           */
84          @Override
85          public Iterator<E> unwrap() {
86              return null;
87          }
88  
89      }
90  
91      private static final int BYTE_MASK = 0xFF;
92  
93      private final byte[] single = new byte[1];
94  
95      /** The number of bytes read in this stream. */
96      private long bytesRead;
97  
98      private Charset charset;
99  
100     /**
101      * Constructs a new instance.
102      */
103     @SuppressWarnings("resource")
104     public ArchiveInputStream() {
105         this(new NullInputStream(), Charset.defaultCharset());
106     }
107 
108     /**
109      * Constructs a new instance.
110      *
111      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
112      * @param charset charset.
113      * @since 1.26.0
114      */
115     // This will be protected once subclasses use builders.
116     private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
117         super(inputStream);
118         this.charset = Charsets.toCharset(charset);
119     }
120 
121     /**
122      * Constructs a new instance.
123      *
124      * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
125      * @param charsetName charset name.
126      * @since 1.26.0
127      */
128     protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
129         this(inputStream, Charsets.toCharset(charsetName));
130     }
131 
132     /**
133      * Tests whether this stream is able to read the given entry.
134      * <p>
135      * Some archive formats support variants or details that are not supported (yet).
136      * </p>
137      *
138      * @param archiveEntry the entry to test.
139      * @return This implementation always returns true.
140      * @since 1.1
141      */
142     public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
143         return true;
144     }
145 
146     /**
147      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
148      *
149      * @param read the number of bytes read.
150      */
151     protected void count(final int read) {
152         count((long) read);
153     }
154 
155     /**
156      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
157      *
158      * @param read the number of bytes read.
159      * @since 1.1
160      */
161     protected void count(final long read) {
162         if (read != -1) {
163             bytesRead += read;
164         }
165     }
166 
167     /**
168      * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
169      * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
170      * <p>
171      * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
172      * has specified a concurrent modification policy.
173      * </p>
174      *
175      * @param action The action to be performed for each element.
176      * @throws IOException          if an I/O error occurs.
177      * @throws NullPointerException if the specified action is null.
178      * @since 1.27.0
179      */
180     public void forEach(final IOConsumer<? super E> action) throws IOException {
181         iterator().forEachRemaining(Objects.requireNonNull(action));
182     }
183 
184     /**
185      * Gets the current number of bytes read from this stream.
186      *
187      * @return the number of read bytes.
188      * @since 1.1
189      */
190     public long getBytesRead() {
191         return bytesRead;
192     }
193 
194     /**
195      * Gets the Charset.
196      *
197      * @return the Charset.
198      */
199     public Charset getCharset() {
200         return charset;
201     }
202 
203     /**
204      * Gets the current number of bytes read from this stream.
205      *
206      * @return the number of read.
207      * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
208      */
209     @Deprecated
210     public int getCount() {
211         return (int) bytesRead;
212     }
213 
214     /**
215      * Gets the next Archive Entry in this Stream.
216      *
217      * @return the next entry, or {@code null} if there are no more entries.
218      * @throws IOException if the next entry could not be read.
219      */
220     public abstract E getNextEntry() throws IOException;
221 
222     /**
223      * Returns an iterator over the SubField elements in this extra field in proper sequence.
224      *
225      * @return an iterator over the SubField elements in this extra field in proper sequence.
226      * @since 1.27.0
227      */
228     public IOIterator<E> iterator() {
229         return new ArchiveEntryIOIterator();
230     }
231 
232     /**
233      * Does nothing.
234      * <p>
235      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
236      * </p>
237      *
238      * @param readlimit ignored.
239      */
240     @Override
241     public synchronized void mark(final int readlimit) {
242         // noop
243     }
244 
245     /**
246      * Always returns false.
247      * <p>
248      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
249      * </p>
250      *
251      * @return Always returns false.
252      */
253     @Override
254     public boolean markSupported() {
255         return false;
256     }
257 
258     /**
259      * Decrements the counter of already read bytes.
260      *
261      * @param pushedBack the number of bytes pushed back.
262      * @since 1.1
263      */
264     protected void pushedBackBytes(final long pushedBack) {
265         bytesRead -= pushedBack;
266     }
267 
268     /**
269      * Reads a byte of data. This method will block until enough input is available.
270      * <p>
271      * Simply calls the {@link #read(byte[], int, int)} method.
272      * </p>
273      * <p>
274      * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
275      * </p>
276      *
277      * @return the byte read, or -1 if end of input is reached.
278      * @throws IOException if an I/O error has occurred.
279      */
280     @Override
281     public int read() throws IOException {
282         final int num = read(single, 0, 1);
283         return num == -1 ? -1 : single[0] & BYTE_MASK;
284     }
285 
286     /**
287      * Does nothing.
288      * <p>
289      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
290      * </p>
291      *
292      * @throws IOException not thrown here but may be thrown from a subclass.
293      */
294     @Override
295     public synchronized void reset() throws IOException {
296         // noop
297     }
298 }