1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.archivers; 20 21 import java.io.FilterInputStream; 22 import java.io.IOException; 23 import java.io.InputStream; 24 import java.nio.charset.Charset; 25 26 import org.apache.commons.io.Charsets; 27 import org.apache.commons.io.input.NullInputStream; 28 29 /** 30 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF 31 * for the end of data in each entry as well as at the end of the file proper. 32 * <p> 33 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry. 34 * </p> 35 * <p> 36 * The input stream classes must also implement a method with the signature: 37 * </p> 38 * <pre> 39 * public static boolean matches(byte[] signature, int length) 40 * </pre> 41 * <p> 42 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream. 43 * </p> 44 * 45 * @param <E> The type of {@link ArchiveEntry} produced. 46 */ 47 public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream { 48 49 private static final int BYTE_MASK = 0xFF; 50 51 private final byte[] single = new byte[1]; 52 53 /** The number of bytes read in this stream */ 54 private long bytesRead; 55 56 private Charset charset; 57 58 /** 59 * Constructs a new instance. 60 */ 61 public ArchiveInputStream() { 62 this(NullInputStream.INSTANCE, Charset.defaultCharset()); 63 } 64 65 /** 66 * Constructs a new instance. 67 * 68 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 69 * @param charset charset. 70 * @since 1.26.0 71 */ 72 // This will be protected once subclasses use builders. 73 private ArchiveInputStream(final InputStream inputStream, final Charset charset) { 74 super(inputStream); 75 this.charset = Charsets.toCharset(charset); 76 } 77 78 /** 79 * Constructs a new instance. 80 * 81 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 82 * @param charsetName charset name. 83 * @since 1.26.0 84 */ 85 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) { 86 this(inputStream, Charsets.toCharset(charsetName)); 87 } 88 89 /** 90 * Whether this stream is able to read the given entry. 91 * <p> 92 * Some archive formats support variants or details that are not supported (yet). 93 * </p> 94 * 95 * @param archiveEntry the entry to test 96 * @return This implementation always returns true. 97 * 98 * @since 1.1 99 */ 100 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 101 return true; 102 } 103 104 /** 105 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 106 * 107 * @param read the number of bytes read 108 */ 109 protected void count(final int read) { 110 count((long) read); 111 } 112 113 /** 114 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 115 * 116 * @param read the number of bytes read 117 * @since 1.1 118 */ 119 protected void count(final long read) { 120 if (read != -1) { 121 bytesRead += read; 122 } 123 } 124 125 /** 126 * Gets the current number of bytes read from this stream. 127 * 128 * @return the number of read bytes 129 * @since 1.1 130 */ 131 public long getBytesRead() { 132 return bytesRead; 133 } 134 135 /** 136 * Gets the Charest. 137 * 138 * @return the Charest. 139 */ 140 public Charset getCharset() { 141 return charset; 142 } 143 144 /** 145 * Gets the current number of bytes read from this stream. 146 * 147 * @return the number of read bytes 148 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead. 149 */ 150 @Deprecated 151 public int getCount() { 152 return (int) bytesRead; 153 } 154 155 /** 156 * Gets the next Archive Entry in this Stream. 157 * 158 * @return the next entry, or {@code null} if there are no more entries 159 * @throws IOException if the next entry could not be read 160 */ 161 public abstract E getNextEntry() throws IOException; 162 163 /** 164 * Does nothing. 165 * 166 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 167 * 168 * @param readlimit ignored. 169 */ 170 @Override 171 public synchronized void mark(final int readlimit) { 172 // noop 173 } 174 175 /** 176 * Always returns false. 177 * 178 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 179 * 180 * @return Always returns false. 181 */ 182 @Override 183 public boolean markSupported() { 184 return false; 185 } 186 187 /** 188 * Decrements the counter of already read bytes. 189 * 190 * @param pushedBack the number of bytes pushed back. 191 * @since 1.1 192 */ 193 protected void pushedBackBytes(final long pushedBack) { 194 bytesRead -= pushedBack; 195 } 196 197 /** 198 * Reads a byte of data. This method will block until enough input is available. 199 * 200 * Simply calls the {@link #read(byte[], int, int)} method. 201 * 202 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise. 203 * 204 * @return the byte read, or -1 if end of input is reached 205 * @throws IOException if an I/O error has occurred 206 */ 207 @Override 208 public int read() throws IOException { 209 final int num = read(single, 0, 1); 210 return num == -1 ? -1 : single[0] & BYTE_MASK; 211 } 212 213 /** 214 * Does nothing. 215 * 216 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 217 * 218 * @throws IOException not thrown here but may be thrown from a subclass. 219 */ 220 @Override 221 public synchronized void reset() throws IOException { 222 // noop 223 } 224 }