001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.FilterInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.charset.Charset;
025
026import org.apache.commons.io.Charsets;
027import org.apache.commons.io.input.NullInputStream;
028
029/**
030 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
031 * for the end of data in each entry as well as at the end of the file proper.
032 * <p>
033 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
034 * </p>
035 * <p>
036 * The input stream classes must also implement a method with the signature:
037 * </p>
038 * <pre>
039 * public static boolean matches(byte[] signature, int length)
040 * </pre>
041 * <p>
042 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
043 * </p>
044 *
045 * @param <E> The type of {@link ArchiveEntry} produced.
046 */
047public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
048
049    private static final int BYTE_MASK = 0xFF;
050
051    private final byte[] single = new byte[1];
052
053    /** The number of bytes read in this stream */
054    private long bytesRead;
055
056    private Charset charset;
057
058    /**
059     * Constructs a new instance.
060     */
061    public ArchiveInputStream() {
062        this(NullInputStream.INSTANCE, Charset.defaultCharset());
063    }
064
065    /**
066     * Constructs a new instance.
067     *
068     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
069     * @param charset charset.
070     * @since 1.26.0
071     */
072    // This will be protected once subclasses use builders.
073    private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
074        super(inputStream);
075        this.charset = Charsets.toCharset(charset);
076    }
077
078    /**
079     * Constructs a new instance.
080     *
081     * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
082     * @param charsetName charset name.
083     * @since 1.26.0
084     */
085    protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
086        this(inputStream, Charsets.toCharset(charsetName));
087    }
088
089    /**
090     * Whether this stream is able to read the given entry.
091     * <p>
092     * Some archive formats support variants or details that are not supported (yet).
093     * </p>
094     *
095     * @param archiveEntry the entry to test
096     * @return This implementation always returns true.
097     *
098     * @since 1.1
099     */
100    public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
101        return true;
102    }
103
104    /**
105     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
106     *
107     * @param read the number of bytes read
108     */
109    protected void count(final int read) {
110        count((long) read);
111    }
112
113    /**
114     * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
115     *
116     * @param read the number of bytes read
117     * @since 1.1
118     */
119    protected void count(final long read) {
120        if (read != -1) {
121            bytesRead += read;
122        }
123    }
124
125    /**
126     * Gets the current number of bytes read from this stream.
127     *
128     * @return the number of read bytes
129     * @since 1.1
130     */
131    public long getBytesRead() {
132        return bytesRead;
133    }
134
135    /**
136     * Gets the Charest.
137     *
138     * @return the Charest.
139     */
140    public Charset getCharset() {
141        return charset;
142    }
143
144    /**
145     * Gets the current number of bytes read from this stream.
146     *
147     * @return the number of read bytes
148     * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
149     */
150    @Deprecated
151    public int getCount() {
152        return (int) bytesRead;
153    }
154
155    /**
156     * Gets the next Archive Entry in this Stream.
157     *
158     * @return the next entry, or {@code null} if there are no more entries
159     * @throws IOException if the next entry could not be read
160     */
161    public abstract E getNextEntry() throws IOException;
162
163    /**
164     * Does nothing.
165     *
166     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
167     *
168     * @param readlimit ignored.
169     */
170    @Override
171    public synchronized void mark(final int readlimit) {
172        // noop
173    }
174
175    /**
176     * Always returns false.
177     *
178     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
179     *
180     * @return Always returns false.
181     */
182    @Override
183    public boolean markSupported() {
184        return false;
185    }
186
187    /**
188     * Decrements the counter of already read bytes.
189     *
190     * @param pushedBack the number of bytes pushed back.
191     * @since 1.1
192     */
193    protected void pushedBackBytes(final long pushedBack) {
194        bytesRead -= pushedBack;
195    }
196
197    /**
198     * Reads a byte of data. This method will block until enough input is available.
199     *
200     * Simply calls the {@link #read(byte[], int, int)} method.
201     *
202     * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
203     *
204     * @return the byte read, or -1 if end of input is reached
205     * @throws IOException if an I/O error has occurred
206     */
207    @Override
208    public int read() throws IOException {
209        final int num = read(single, 0, 1);
210        return num == -1 ? -1 : single[0] & BYTE_MASK;
211    }
212
213    /**
214     * Does nothing.
215     *
216     * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
217     *
218     * @throws IOException not thrown here but may be thrown from a subclass.
219     */
220    @Override
221    public synchronized void reset() throws IOException {
222        // noop
223    }
224}