1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.commons.compress.archivers;
20
21 import java.io.FilterInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.nio.charset.Charset;
25 import java.util.Iterator;
26 import java.util.Objects;
27
28 import org.apache.commons.io.Charsets;
29 import org.apache.commons.io.function.IOConsumer;
30 import org.apache.commons.io.function.IOIterator;
31 import org.apache.commons.io.input.NullInputStream;
32
33 /**
34 * Archive input streams <strong>MUST</strong> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream
35 * generates EOF for the end of data in each entry as well as at the end of the file proper.
36 * <p>
37 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
38 * </p>
39 * <p>
40 * The input stream classes must also implement a method with the signature:
41 * </p>
42 *
43 * <pre>
44 * public static boolean matches(byte[] signature, int length)
45 * </pre>
46 * <p>
47 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
48 * </p>
49 *
50 * @param <E> The type of {@link ArchiveEntry} produced.
51 */
52 public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
53
54 /**
55 * An iterator over a collection of a specific {@link ArchiveEntry} type.
56 */
57 final class ArchiveEntryIOIterator implements IOIterator<E> {
58
59 private E next;
60
61 @Override
62 public boolean hasNext() throws IOException {
63 if (next == null) {
64 next = getNextEntry();
65 }
66 return next != null;
67 }
68
69 @Override
70 public synchronized E next() throws IOException {
71 if (next != null) {
72 final E e = next;
73 next = null;
74 return e;
75 }
76 return getNextEntry();
77 }
78
79 /**
80 * Always returns null, this is a "native" IOIterator.
81 *
82 * @return Always returns null.
83 */
84 @Override
85 public Iterator<E> unwrap() {
86 return null;
87 }
88
89 }
90
91 private static final int BYTE_MASK = 0xFF;
92
93 private final byte[] single = new byte[1];
94
95 /** The number of bytes read in this stream. */
96 private long bytesRead;
97
98 private Charset charset;
99
100 /**
101 * Constructs a new instance.
102 */
103 @SuppressWarnings("resource")
104 public ArchiveInputStream() {
105 this(new NullInputStream(), Charset.defaultCharset());
106 }
107
108 /**
109 * Constructs a new instance.
110 *
111 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
112 * @param charset charset.
113 * @since 1.26.0
114 */
115 // This will be protected once subclasses use builders.
116 private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
117 super(inputStream);
118 this.charset = Charsets.toCharset(charset);
119 }
120
121 /**
122 * Constructs a new instance.
123 *
124 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
125 * @param charsetName charset name.
126 * @since 1.26.0
127 */
128 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
129 this(inputStream, Charsets.toCharset(charsetName));
130 }
131
132 /**
133 * Tests whether this stream is able to read the given entry.
134 * <p>
135 * Some archive formats support variants or details that are not supported (yet).
136 * </p>
137 *
138 * @param archiveEntry the entry to test.
139 * @return This implementation always returns true.
140 * @since 1.1
141 */
142 public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
143 return true;
144 }
145
146 /**
147 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
148 *
149 * @param read the number of bytes read.
150 */
151 protected void count(final int read) {
152 count((long) read);
153 }
154
155 /**
156 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
157 *
158 * @param read the number of bytes read.
159 * @since 1.1
160 */
161 protected void count(final long read) {
162 if (read != -1) {
163 bytesRead += read;
164 }
165 }
166
167 /**
168 * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
169 * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
170 * <p>
171 * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
172 * has specified a concurrent modification policy.
173 * </p>
174 *
175 * @param action The action to be performed for each element.
176 * @throws IOException if an I/O error occurs.
177 * @throws NullPointerException if the specified action is null.
178 * @since 1.27.0
179 */
180 public void forEach(final IOConsumer<? super E> action) throws IOException {
181 iterator().forEachRemaining(Objects.requireNonNull(action));
182 }
183
184 /**
185 * Gets the current number of bytes read from this stream.
186 *
187 * @return the number of read bytes.
188 * @since 1.1
189 */
190 public long getBytesRead() {
191 return bytesRead;
192 }
193
194 /**
195 * Gets the Charset.
196 *
197 * @return the Charset.
198 */
199 public Charset getCharset() {
200 return charset;
201 }
202
203 /**
204 * Gets the current number of bytes read from this stream.
205 *
206 * @return the number of read.
207 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
208 */
209 @Deprecated
210 public int getCount() {
211 return (int) bytesRead;
212 }
213
214 /**
215 * Gets the next Archive Entry in this Stream.
216 *
217 * @return the next entry, or {@code null} if there are no more entries.
218 * @throws IOException if the next entry could not be read.
219 */
220 public abstract E getNextEntry() throws IOException;
221
222 /**
223 * Returns an iterator over the SubField elements in this extra field in proper sequence.
224 *
225 * @return an iterator over the SubField elements in this extra field in proper sequence.
226 * @since 1.27.0
227 */
228 public IOIterator<E> iterator() {
229 return new ArchiveEntryIOIterator();
230 }
231
232 /**
233 * Does nothing.
234 * <p>
235 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
236 * </p>
237 *
238 * @param readlimit ignored.
239 */
240 @Override
241 public synchronized void mark(final int readlimit) {
242 // noop
243 }
244
245 /**
246 * Always returns false.
247 * <p>
248 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
249 * </p>
250 *
251 * @return Always returns false.
252 */
253 @Override
254 public boolean markSupported() {
255 return false;
256 }
257
258 /**
259 * Decrements the counter of already read bytes.
260 *
261 * @param pushedBack the number of bytes pushed back.
262 * @since 1.1
263 */
264 protected void pushedBackBytes(final long pushedBack) {
265 bytesRead -= pushedBack;
266 }
267
268 /**
269 * Reads a byte of data. This method will block until enough input is available.
270 * <p>
271 * Simply calls the {@link #read(byte[], int, int)} method.
272 * </p>
273 * <p>
274 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
275 * </p>
276 *
277 * @return the byte read, or -1 if end of input is reached.
278 * @throws IOException if an I/O error has occurred.
279 */
280 @Override
281 public int read() throws IOException {
282 final int num = read(single, 0, 1);
283 return num == -1 ? -1 : single[0] & BYTE_MASK;
284 }
285
286 /**
287 * Does nothing.
288 * <p>
289 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
290 * </p>
291 *
292 * @throws IOException not thrown here but may be thrown from a subclass.
293 */
294 @Override
295 public synchronized void reset() throws IOException {
296 // noop
297 }
298 }