001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.FilterInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.charset.Charset; 025import java.util.Iterator; 026import java.util.Objects; 027 028import org.apache.commons.io.Charsets; 029import org.apache.commons.io.function.IOConsumer; 030import org.apache.commons.io.function.IOIterator; 031import org.apache.commons.io.input.NullInputStream; 032 033/** 034 * Archive input streams <strong>MUST</strong> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream 035 * generates EOF for the end of data in each entry as well as at the end of the file proper. 036 * <p> 037 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry. 038 * </p> 039 * <p> 040 * The input stream classes must also implement a method with the signature: 041 * </p> 042 * 043 * <pre> 044 * public static boolean matches(byte[] signature, int length) 045 * </pre> 046 * <p> 047 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream. 048 * </p> 049 * 050 * @param <E> The type of {@link ArchiveEntry} produced. 051 */ 052public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream { 053 054 /** 055 * An iterator over a collection of a specific {@link ArchiveEntry} type. 056 */ 057 final class ArchiveEntryIOIterator implements IOIterator<E> { 058 059 private E next; 060 061 @Override 062 public boolean hasNext() throws IOException { 063 if (next == null) { 064 next = getNextEntry(); 065 } 066 return next != null; 067 } 068 069 @Override 070 public synchronized E next() throws IOException { 071 if (next != null) { 072 final E e = next; 073 next = null; 074 return e; 075 } 076 return getNextEntry(); 077 } 078 079 /** 080 * Always returns null, this is a "native" IOIterator. 081 * 082 * @return Always returns null. 083 */ 084 @Override 085 public Iterator<E> unwrap() { 086 return null; 087 } 088 089 } 090 091 private static final int BYTE_MASK = 0xFF; 092 093 private final byte[] single = new byte[1]; 094 095 /** The number of bytes read in this stream. */ 096 private long bytesRead; 097 098 private Charset charset; 099 100 /** 101 * Constructs a new instance. 102 */ 103 @SuppressWarnings("resource") 104 public ArchiveInputStream() { 105 this(new NullInputStream(), Charset.defaultCharset()); 106 } 107 108 /** 109 * Constructs a new instance. 110 * 111 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 112 * @param charset charset. 113 * @since 1.26.0 114 */ 115 // This will be protected once subclasses use builders. 116 private ArchiveInputStream(final InputStream inputStream, final Charset charset) { 117 super(inputStream); 118 this.charset = Charsets.toCharset(charset); 119 } 120 121 /** 122 * Constructs a new instance. 123 * 124 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 125 * @param charsetName charset name. 126 * @since 1.26.0 127 */ 128 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) { 129 this(inputStream, Charsets.toCharset(charsetName)); 130 } 131 132 /** 133 * Tests whether this stream is able to read the given entry. 134 * <p> 135 * Some archive formats support variants or details that are not supported (yet). 136 * </p> 137 * 138 * @param archiveEntry the entry to test. 139 * @return This implementation always returns true. 140 * @since 1.1 141 */ 142 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 143 return true; 144 } 145 146 /** 147 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1). 148 * 149 * @param read the number of bytes read. 150 */ 151 protected void count(final int read) { 152 count((long) read); 153 } 154 155 /** 156 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1). 157 * 158 * @param read the number of bytes read. 159 * @since 1.1 160 */ 161 protected void count(final long read) { 162 if (read != -1) { 163 bytesRead += read; 164 } 165 } 166 167 /** 168 * Performs the given action for each element of the stream until all elements have been processed or the action throws an exception. Actions are performed 169 * in the order of iteration. Exceptions thrown by the action are relayed to the caller. 170 * <p> 171 * The behavior of this method is unspecified if the action performs side-effects that modify the underlying source of elements, unless an overriding class 172 * has specified a concurrent modification policy. 173 * </p> 174 * 175 * @param action The action to be performed for each element. 176 * @throws IOException if an I/O error occurs. 177 * @throws NullPointerException if the specified action is null. 178 * @since 1.27.0 179 */ 180 public void forEach(final IOConsumer<? super E> action) throws IOException { 181 iterator().forEachRemaining(Objects.requireNonNull(action)); 182 } 183 184 /** 185 * Gets the current number of bytes read from this stream. 186 * 187 * @return the number of read bytes. 188 * @since 1.1 189 */ 190 public long getBytesRead() { 191 return bytesRead; 192 } 193 194 /** 195 * Gets the Charset. 196 * 197 * @return the Charset. 198 */ 199 public Charset getCharset() { 200 return charset; 201 } 202 203 /** 204 * Gets the current number of bytes read from this stream. 205 * 206 * @return the number of read. 207 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead. 208 */ 209 @Deprecated 210 public int getCount() { 211 return (int) bytesRead; 212 } 213 214 /** 215 * Gets the next Archive Entry in this Stream. 216 * 217 * @return the next entry, or {@code null} if there are no more entries. 218 * @throws IOException if the next entry could not be read. 219 */ 220 public abstract E getNextEntry() throws IOException; 221 222 /** 223 * Returns an iterator over the SubField elements in this extra field in proper sequence. 224 * 225 * @return an iterator over the SubField elements in this extra field in proper sequence. 226 * @since 1.27.0 227 */ 228 public IOIterator<E> iterator() { 229 return new ArchiveEntryIOIterator(); 230 } 231 232 /** 233 * Does nothing. 234 * <p> 235 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 236 * </p> 237 * 238 * @param readlimit ignored. 239 */ 240 @Override 241 public synchronized void mark(final int readlimit) { 242 // noop 243 } 244 245 /** 246 * Always returns false. 247 * <p> 248 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 249 * </p> 250 * 251 * @return Always returns false. 252 */ 253 @Override 254 public boolean markSupported() { 255 return false; 256 } 257 258 /** 259 * Decrements the counter of already read bytes. 260 * 261 * @param pushedBack the number of bytes pushed back. 262 * @since 1.1 263 */ 264 protected void pushedBackBytes(final long pushedBack) { 265 bytesRead -= pushedBack; 266 } 267 268 /** 269 * Reads a byte of data. This method will block until enough input is available. 270 * <p> 271 * Simply calls the {@link #read(byte[], int, int)} method. 272 * </p> 273 * <p> 274 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise. 275 * </p> 276 * 277 * @return the byte read, or -1 if end of input is reached. 278 * @throws IOException if an I/O error has occurred. 279 */ 280 @Override 281 public int read() throws IOException { 282 final int num = read(single, 0, 1); 283 return num == -1 ? -1 : single[0] & BYTE_MASK; 284 } 285 286 /** 287 * Does nothing. 288 * <p> 289 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 290 * </p> 291 * 292 * @throws IOException not thrown here but may be thrown from a subclass. 293 */ 294 @Override 295 public synchronized void reset() throws IOException { 296 // noop 297 } 298}