001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.FilterInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.charset.Charset; 025 026import org.apache.commons.io.Charsets; 027import org.apache.commons.io.input.NullInputStream; 028 029/** 030 * Archive input streams <b>MUST</b> override the {@link #read(byte[], int, int)} - or {@link #read()} - method so that reading from the stream generates EOF 031 * for the end of data in each entry as well as at the end of the file proper. 032 * <p> 033 * The {@link #getNextEntry()} method is used to reset the input stream ready for reading the data from the next entry. 034 * </p> 035 * <p> 036 * The input stream classes must also implement a method with the signature: 037 * </p> 038 * <pre> 039 * public static boolean matches(byte[] signature, int length) 040 * </pre> 041 * <p> 042 * which is used by the {@link ArchiveStreamFactory} to autodetect the archive type from the first few bytes of a stream. 043 * </p> 044 * 045 * @param <E> The type of {@link ArchiveEntry} produced. 046 */ 047public abstract class ArchiveInputStream<E extends ArchiveEntry> extends FilterInputStream { 048 049 private static final int BYTE_MASK = 0xFF; 050 051 private final byte[] single = new byte[1]; 052 053 /** The number of bytes read in this stream */ 054 private long bytesRead; 055 056 private Charset charset; 057 058 /** 059 * Constructs a new instance. 060 */ 061 public ArchiveInputStream() { 062 this(NullInputStream.INSTANCE, Charset.defaultCharset()); 063 } 064 065 /** 066 * Constructs a new instance. 067 * 068 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 069 * @param charset charset. 070 * @since 1.26.0 071 */ 072 // This will be protected once subclasses use builders. 073 private ArchiveInputStream(final InputStream inputStream, final Charset charset) { 074 super(inputStream); 075 this.charset = Charsets.toCharset(charset); 076 } 077 078 /** 079 * Constructs a new instance. 080 * 081 * @param inputStream the underlying input stream, or {@code null} if this instance is to be created without an underlying stream. 082 * @param charsetName charset name. 083 * @since 1.26.0 084 */ 085 protected ArchiveInputStream(final InputStream inputStream, final String charsetName) { 086 this(inputStream, Charsets.toCharset(charsetName)); 087 } 088 089 /** 090 * Whether this stream is able to read the given entry. 091 * <p> 092 * Some archive formats support variants or details that are not supported (yet). 093 * </p> 094 * 095 * @param archiveEntry the entry to test 096 * @return This implementation always returns true. 097 * 098 * @since 1.1 099 */ 100 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 101 return true; 102 } 103 104 /** 105 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 106 * 107 * @param read the number of bytes read 108 */ 109 protected void count(final int read) { 110 count((long) read); 111 } 112 113 /** 114 * Increments the counter of already read bytes. Doesn't increment if the EOF has been hit (read == -1) 115 * 116 * @param read the number of bytes read 117 * @since 1.1 118 */ 119 protected void count(final long read) { 120 if (read != -1) { 121 bytesRead += read; 122 } 123 } 124 125 /** 126 * Gets the current number of bytes read from this stream. 127 * 128 * @return the number of read bytes 129 * @since 1.1 130 */ 131 public long getBytesRead() { 132 return bytesRead; 133 } 134 135 /** 136 * Gets the Charest. 137 * 138 * @return the Charest. 139 */ 140 public Charset getCharset() { 141 return charset; 142 } 143 144 /** 145 * Gets the current number of bytes read from this stream. 146 * 147 * @return the number of read bytes 148 * @deprecated this method may yield wrong results for large archives, use {@link #getBytesRead()} instead. 149 */ 150 @Deprecated 151 public int getCount() { 152 return (int) bytesRead; 153 } 154 155 /** 156 * Gets the next Archive Entry in this Stream. 157 * 158 * @return the next entry, or {@code null} if there are no more entries 159 * @throws IOException if the next entry could not be read 160 */ 161 public abstract E getNextEntry() throws IOException; 162 163 /** 164 * Does nothing. 165 * 166 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 167 * 168 * @param readlimit ignored. 169 */ 170 @Override 171 public synchronized void mark(final int readlimit) { 172 // noop 173 } 174 175 /** 176 * Always returns false. 177 * 178 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 179 * 180 * @return Always returns false. 181 */ 182 @Override 183 public boolean markSupported() { 184 return false; 185 } 186 187 /** 188 * Decrements the counter of already read bytes. 189 * 190 * @param pushedBack the number of bytes pushed back. 191 * @since 1.1 192 */ 193 protected void pushedBackBytes(final long pushedBack) { 194 bytesRead -= pushedBack; 195 } 196 197 /** 198 * Reads a byte of data. This method will block until enough input is available. 199 * 200 * Simply calls the {@link #read(byte[], int, int)} method. 201 * 202 * MUST be overridden if the {@link #read(byte[], int, int)} method is not overridden; may be overridden otherwise. 203 * 204 * @return the byte read, or -1 if end of input is reached 205 * @throws IOException if an I/O error has occurred 206 */ 207 @Override 208 public int read() throws IOException { 209 final int num = read(single, 0, 1); 210 return num == -1 ? -1 : single[0] & BYTE_MASK; 211 } 212 213 /** 214 * Does nothing. 215 * 216 * TODO [COMPRESS-670] Support mark() and reset() in ArchiveInputStream. 217 * 218 * @throws IOException not thrown here but may be thrown from a subclass. 219 */ 220 @Override 221 public synchronized void reset() throws IOException { 222 // noop 223 } 224}