View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import java.io.FilterInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.charset.Charset;
25  
26  import org.apache.commons.io.Charsets;
27  import org.apache.commons.io.input.NullInputStream;
28  
29  /**
30   * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF
31   * for the end of data in each entry as well as at the end of the file proper.
32   * <p>
33   * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry.
34   * </p>
35   * <p>
36   * The input stream classes must also implement a method with the signature:
37   * </p>
38   * <pre>
39   * public static boolean matches(byte[] signature, int length)
40   * </pre>
41   * <p>
42   * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream.
43   * </p>
44   *
45   * @param <E> The type of {@link ArchiveEntry} produced.
46   */
47  public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream {
48  
49      private static final int BYTE_MASK = 0xFF;
50  
51      private final byte[] single = new byte[1];
52  
53      /** The number of bytes read in this stream */
54      private long bytesRead;
55  
56      private Charset charset;
57  
58      /**
59       * Constructs a new instance.
60       */
61      public ArchiveInputStream() {
62          this(NullInputStream.INSTANCE, Charset.defaultCharset());
63      }
64  
65      /**
66       * Constructs a new instance.
67       *
68       * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
69       * @param charset charset.
70       * @since 1.26.0
71       */
72      // This will be protected once subclasses use builders.
73      private ArchiveInputStream(final InputStream inputStream, final Charset charset) {
74          super(inputStream);
75          this.charset = Charsets.toCharset(charset);
76      }
77  
78      /**
79       * Constructs a new instance.
80       *
81       * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream.
82       * @param charsetName charset name.
83       * @since 1.26.0
84       */
85      protected ArchiveInputStream(final InputStream inputStream, final String charsetName) {
86          this(inputStream, Charsets.toCharset(charsetName));
87      }
88  
89      /**
90       * Whether this stream is able to read the given entry.
91       * <p>
92       * Some archive formats support variants or details that are not supported (yet).
93       * </p>
94       *
95       * @param archiveEntry the entry to test
96       * @return This implementation always returns true.
97       *
98       * @since 1.1
99       */
100     public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
101         return true;
102     }
103 
104     /**
105      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
106      *
107      * @param read the number of bytes read
108      */
109     protected void count(final int read) {
110         count((long) read);
111     }
112 
113     /**
114      * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1)
115      *
116      * @param read the number of bytes read
117      * @since 1.1
118      */
119     protected void count(final long read) {
120         if (read != -1) {
121             bytesRead += read;
122         }
123     }
124 
125     /**
126      * Gets the current number of bytes read from this stream.
127      *
128      * @return the number of read bytes
129      * @since 1.1
130      */
131     public long getBytesRead() {
132         return bytesRead;
133     }
134 
135     /**
136      * Gets the Charest.
137      *
138      * @return the Charest.
139      */
140     public Charset getCharset() {
141         return charset;
142     }
143 
144     /**
145      * Gets the current number of bytes read from this stream.
146      *
147      * @return the number of read bytes
148      * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead.
149      */
150     @Deprecated
151     public int getCount() {
152         return (int) bytesRead;
153     }
154 
155     /**
156      * Gets the next Archive Entry in this Stream.
157      *
158      * @return the next entry, or {@code null} if there are no more entries
159      * @throws IOException if the next entry could not be read
160      */
161     public abstract E getNextEntry() throws IOException;
162 
163     /**
164      * Does nothing.
165      *
166      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
167      *
168      * @param readlimit ignored.
169      */
170     @Override
171     public synchronized void mark(final int readlimit) {
172         // noop
173     }
174 
175     /**
176      * Always returns false.
177      *
178      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
179      *
180      * @return Always returns false.
181      */
182     @Override
183     public boolean markSupported() {
184         return false;
185     }
186 
187     /**
188      * Decrements the counter of already read bytes.
189      *
190      * @param pushedBack the number of bytes pushed back.
191      * @since 1.1
192      */
193     protected void pushedBackBytes(final long pushedBack) {
194         bytesRead -= pushedBack;
195     }
196 
197     /**
198      * Reads a byte of data. This method will block until enough input is available.
199      *
200      * Simply calls the {@link #read(byte[], int, int)} method.
201      *
202      * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise.
203      *
204      * @return the byte read, or -1 if end of input is reached
205      * @throws IOException if an I/O error has occurred
206      */
207     @Override
208     public int read() throws IOException {
209         final int num = read(single, 0, 1);
210         return num == -1 ? -1 : single[0] & BYTE_MASK;
211     }
212 
213     /**
214      * Does nothing.
215      *
216      * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream.
217      *
218      * @throws IOException not thrown here but may be thrown from a subclass.
219      */
220     @Override
221     public synchronized void reset() throws IOException {
222         // noop
223     }
224 }