001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.FilterInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.charset.Charset;
025import java.util.Iterator;
026import java.util.Objects;
027
028import org.apache.commons.io.Charsets;
029import org.apache.commons.io.function.IOConsumer;
030import org.apache.commons.io.function.IOIterator;
031import org.apache.commons.io.input.NullInputStream;
032
033/**
034 * Archive input streams <strong>MUST</strong> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream
035 * generates EOF for the end of data in each entry as well as at the end of the file proper.
036 * <p>
037 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
038 * </p>
039 * <p>
040 * The input stream classes must also implement a method with the signature:
041 * </p>
042 *
043 * <pre>
044 * public static boolean matches(byte[] signature, int length)
045 * </pre>
046 * <p>
047 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
048 * </p>
049 *
050 * @param <E> The type of {@link ArchiveEntry} produced.
051 */
052public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
053
054    /**
055     * An iterator over a collection of a specific {@link ArchiveEntry} type.
056     */
057    final class ArchiveEntryIOIterator implements IOIterator<E> {
058
059        private E next;
060
061        @Override
062        public boolean hasNext() throws IOException {
063            if (next == null) {
064                next = getNextEntry();
065            }
066            return next != null;
067        }
068
069        @Override
070        public synchronized E next() throws IOException {
071            if (next != null) {
072                final E e = next;
073                next = null;
074                return e;
075            }
076            return getNextEntry();
077        }
078
079        /**
080         * Always returns null, this is a "native" IOIterator.
081         *
082         * @return Always returns null.
083         */
084        @Override
085        public Iterator<E> unwrap() {
086            return null;
087        }
088
089    }
090
091    private static final int BYTE_MASK = 0xFF;
092
093    private final byte[] single = new byte[1];
094
095    /** The number of bytes read in this stream. */
096    private long bytesRead;
097
098    private Charset charset;
099
100    /**
101     * Constructs a new instance.
102     */
103    @SuppressWarnings("resource")
104    public ArchiveInputStream() {
105        this(new NullInputStream(), Charset.defaultCharset());
106    }
107
108    /**
109     * Constructs a new instance.
110     *
111     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
112     * @param charset charset.
113     * @since 1.26.0
114     */
115    // This will be protected once subclasses use builders.
116    private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
117        super(inputStream);
118        this.charset = Charsets.toCharset(charset);
119    }
120
121    /**
122     * Constructs a new instance.
123     *
124     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
125     * @param charsetName charset name.
126     * @since 1.26.0
127     */
128    protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
129        this(inputStream, Charsets.toCharset(charsetName));
130    }
131
132    /**
133     * Tests whether this stream is able to read the given entry.
134     * <p>
135     * Some archive formats support variants or details that are not supported (yet).
136     * </p>
137     *
138     * @param archiveEntry the entry to test.
139     * @return This implementation always returns true.
140     * @since 1.1
141     */
142    public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
143        return true;
144    }
145
146    /**
147     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
148     *
149     * @param read the number of bytes read.
150     */
151    protected void count(final int read) {
152        count((long) read);
153    }
154
155    /**
156     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1).
157     *
158     * @param read the number of bytes read.
159     * @since 1.1
160     */
161    protected void count(final long read) {
162        if (read != -1) {
163            bytesRead += read;
164        }
165    }
166
167    /**
168     * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed
169     * in the order of iteration. Exceptions thrown by the action are relayed to the caller.
170     * <p>
171     * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class
172     * has specified a concurrent modification policy.
173     * </p>
174     *
175     * @param action The action to be performed for each element.
176     * @throws IOException          if an I/O error occurs.
177     * @throws NullPointerException if the specified action is null.
178     * @since 1.27.0
179     */
180    public void forEach(final IOConsumer<? super E> action) throws IOException {
181        iterator().forEachRemaining(Objects.requireNonNull(action));
182    }
183
184    /**
185     * Gets the current number of bytes read from this stream.
186     *
187     * @return the number of read bytes.
188     * @since 1.1
189     */
190    public long getBytesRead() {
191        return bytesRead;
192    }
193
194    /**
195     * Gets the Charset.
196     *
197     * @return the Charset.
198     */
199    public Charset getCharset() {
200        return charset;
201    }
202
203    /**
204     * Gets the current number of bytes read from this stream.
205     *
206     * @return the number of read.
207     * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
208     */
209    @Deprecated
210    public int getCount() {
211        return (int) bytesRead;
212    }
213
214    /**
215     * Gets the next Archive Entry in this Stream.
216     *
217     * @return the next entry, or {@code null} if there are no more entries.
218     * @throws IOException if the next entry could not be read.
219     */
220    public abstract E getNextEntry() throws IOException;
221
222    /**
223     * Returns an iterator over the SubField elements in this extra field in proper sequence.
224     *
225     * @return an iterator over the SubField elements in this extra field in proper sequence.
226     * @since 1.27.0
227     */
228    public IOIterator<E> iterator() {
229        return new ArchiveEntryIOIterator();
230    }
231
232    /**
233     * Does nothing.
234     * <p>
235     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
236     * </p>
237     *
238     * @param readlimit ignored.
239     */
240    @Override
241    public synchronized void mark(final int readlimit) {
242        // noop
243    }
244
245    /**
246     * Always returns false.
247     * <p>
248     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
249     * </p>
250     *
251     * @return Always returns false.
252     */
253    @Override
254    public boolean markSupported() {
255        return false;
256    }
257
258    /**
259     * Decrements the counter of already read bytes.
260     *
261     * @param pushedBack the number of bytes pushed back.
262     * @since 1.1
263     */
264    protected void pushedBackBytes(final long pushedBack) {
265        bytesRead -= pushedBack;
266    }
267
268    /**
269     * Reads a byte of data. This method will block until enough input is available.
270     * <p>
271     * Simply calls the {@link #read(byte[], int, int)} method.
272     * </p>
273     * <p>
274     * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
275     * </p>
276     *
277     * @return the byte read, or -1 if end of input is reached.
278     * @throws IOException if an I/O error has occurred.
279     */
280    @Override
281    public int read() throws IOException {
282        final int num = read(single, 0, 1);
283        return num == -1 ? -1 : single[0] & BYTE_MASK;
284    }
285
286    /**
287     * Does nothing.
288     * <p>
289     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
290     * </p>
291     *
292     * @throws IOException not thrown here but may be thrown from a subclass.
293     */
294    @Override
295    public synchronized void reset() throws IOException {
296        // noop
297    }
298}