1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.filefilter;
18
19 import java.io.File;
20 import java.io.IOException;
21 import java.io.Serializable;
22 import java.nio.ByteBuffer;
23 import java.nio.channels.FileChannel;
24 import java.nio.charset.Charset;
25 import java.nio.file.FileVisitResult;
26 import java.nio.file.Files;
27 import java.nio.file.Path;
28 import java.nio.file.attribute.BasicFileAttributes;
29 import java.util.Arrays;
30 import java.util.Objects;
31
32 import org.apache.commons.io.RandomAccessFileMode;
33 import org.apache.commons.io.RandomAccessFiles;
34
35 /**
36 * <p>
37 * File filter for matching files containing a "magic number". A magic number
38 * is a unique series of bytes common to all files of a specific file format.
39 * For instance, all Java class files begin with the bytes
40 * {@code 0xCAFEBABE}.
41 * </p>
42 * <h2>Using Classic IO</h2>
43 * <pre>
44 * File dir = FileUtils.current();
45 * MagicNumberFileFilter javaClassFileFilter =
46 * MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
47 * (byte) 0xBA, (byte) 0xBE});
48 * String[] javaClassFiles = dir.list(javaClassFileFilter);
49 * for (String javaClassFile : javaClassFiles) {
50 * System.out.println(javaClassFile);
51 * }
52 * </pre>
53 *
54 * <p>
55 * Sometimes, such as in the case of TAR files, the
56 * magic number will be offset by a certain number of bytes in the file. In the
57 * case of TAR archive files, this offset is 257 bytes.
58 * </p>
59 *
60 * <pre>
61 * File dir = FileUtils.current();
62 * MagicNumberFileFilter tarFileFilter =
63 * MagicNumberFileFilter("ustar", 257);
64 * String[] tarFiles = dir.list(tarFileFilter);
65 * for (String tarFile : tarFiles) {
66 * System.out.println(tarFile);
67 * }
68 * </pre>
69 * <h2>Using NIO</h2>
70 * <pre>
71 * final Path dir = PathUtils.current();
72 * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
73 * //
74 * // Walk one directory
75 * Files.<strong>walkFileTree</strong>(dir, Collections.emptySet(), 1, visitor);
76 * System.out.println(visitor.getPathCounters());
77 * System.out.println(visitor.getFileList());
78 * //
79 * visitor.getPathCounters().reset();
80 * //
81 * // Walk directory tree
82 * Files.<strong>walkFileTree</strong>(dir, visitor);
83 * System.out.println(visitor.getPathCounters());
84 * System.out.println(visitor.getDirList());
85 * System.out.println(visitor.getFileList());
86 * </pre>
87 * <h2>Deprecating Serialization</h2>
88 * <p>
89 * <em>Serialization is deprecated and will be removed in 3.0.</em>
90 * </p>
91 *
92 * <h2>Deprecating Serialization</h2>
93 * <p>
94 * <em>Serialization is deprecated and will be removed in 3.0.</em>
95 * </p>
96 *
97 * @since 2.0
98 * @see FileFilterUtils#magicNumberFileFilter(byte[])
99 * @see FileFilterUtils#magicNumberFileFilter(String)
100 * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
101 * @see FileFilterUtils#magicNumberFileFilter(String, long)
102 */
103 public class MagicNumberFileFilter extends AbstractFileFilter implements Serializable {
104
105 /**
106 * The serialization version unique identifier.
107 */
108 private static final long serialVersionUID = -547733176983104172L;
109
110 /**
111 * The magic number to compare against the file's bytes at the provided
112 * offset.
113 */
114 private final byte[] magicNumbers;
115
116 /**
117 * The offset (in bytes) within the files that the magic number's bytes
118 * should appear.
119 */
120 private final long byteOffset;
121
122 /**
123 * <p>
124 * Constructs a new MagicNumberFileFilter and associates it with the magic
125 * number to test for in files. This constructor assumes a starting offset
126 * of {@code 0}.
127 * </p>
128 *
129 * <p>
130 * It is important to note that <em>the array is not cloned</em> and that
131 * any changes to the magic number array after construction will affect the
132 * behavior of this file filter.
133 * </p>
134 *
135 * <pre>
136 * MagicNumberFileFilter javaClassFileFilter =
137 * MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
138 * (byte) 0xBA, (byte) 0xBE});
139 * </pre>
140 *
141 * @param magicNumber the magic number to look for in the file.
142 * @throws IllegalArgumentException if {@code magicNumber} is
143 * {@code null}, or contains no bytes.
144 */
145 public MagicNumberFileFilter(final byte[] magicNumber) {
146 this(magicNumber, 0);
147 }
148
149 /**
150 * <p>
151 * Constructs a new MagicNumberFileFilter and associates it with the magic
152 * number to test for in files and the byte offset location in the file to
153 * to look for that magic number.
154 * </p>
155 *
156 * <pre>
157 * MagicNumberFileFilter tarFileFilter =
158 * MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
159 * </pre>
160 *
161 * <pre>
162 * MagicNumberFileFilter javaClassFileFilter =
163 * MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
164 * </pre>
165 *
166 * @param magicNumbers the magic number to look for in the file.
167 * @param offset the byte offset in the file to start comparing bytes.
168 * @throws IllegalArgumentException if {@code magicNumber}
169 * contains no bytes, or {@code offset}
170 * is a negative number.
171 */
172 public MagicNumberFileFilter(final byte[] magicNumbers, final long offset) {
173 Objects.requireNonNull(magicNumbers, "magicNumbers");
174 if (magicNumbers.length == 0) {
175 throw new IllegalArgumentException("The magic number must contain at least one byte");
176 }
177 if (offset < 0) {
178 throw new IllegalArgumentException("The offset cannot be negative");
179 }
180
181 this.magicNumbers = magicNumbers.clone();
182 this.byteOffset = offset;
183 }
184
185 /**
186 * <p>
187 * Constructs a new MagicNumberFileFilter and associates it with the magic
188 * number to test for in files. This constructor assumes a starting offset
189 * of {@code 0}.
190 * </p>
191 *
192 * Example usage:
193 * <pre>
194 * {@code
195 * MagicNumberFileFilter xmlFileFilter =
196 * MagicNumberFileFilter("<?xml");
197 * }
198 * </pre>
199 *
200 * @param magicNumber the magic number to look for in the file.
201 * The string is converted to bytes using the platform default charset.
202 *
203 * @throws IllegalArgumentException if {@code magicNumber} is
204 * {@code null} or the empty String.
205 */
206 public MagicNumberFileFilter(final String magicNumber) {
207 this(magicNumber, 0);
208 }
209
210 /**
211 * <p>
212 * Constructs a new MagicNumberFileFilter and associates it with the magic
213 * number to test for in files and the byte offset location in the file to
214 * to look for that magic number.
215 * </p>
216 *
217 * <pre>
218 * MagicNumberFileFilter tarFileFilter = MagicNumberFileFilter("ustar", 257);
219 * </pre>
220 * <p>
221 * This method uses the virtual machine's {@linkplain Charset#defaultCharset() default charset}.
222 * </p>
223 *
224 * @param magicNumber the magic number to look for in the file.
225 * The string is converted to bytes using the platform default charset.
226 * @param offset the byte offset in the file to start comparing bytes.
227 * @throws IllegalArgumentException if {@code magicNumber} is
228 * the empty String, or {@code offset} is
229 * a negative number.
230 */
231 public MagicNumberFileFilter(final String magicNumber, final long offset) {
232 this(magicNumber.getBytes(Charset.defaultCharset()), offset);
233 }
234
235 /**
236 * <p>
237 * Accepts the provided file if the file contains the file filter's magic
238 * number at the specified offset.
239 * </p>
240 *
241 * <p>
242 * If any {@link IOException}s occur while reading the file, the file will
243 * be rejected.
244 * </p>
245 *
246 * @param file the file to accept or reject.
247 * @return {@code true} if the file contains the filter's magic number
248 * at the specified offset, {@code false} otherwise.
249 */
250 @Override
251 public boolean accept(final File file) {
252 if (isFile(file) && file.canRead()) {
253 try {
254 return RandomAccessFileMode.READ_ONLY.apply(file.toPath(),
255 raf -> Arrays.equals(magicNumbers, RandomAccessFiles.read(raf, byteOffset, magicNumbers.length)));
256 } catch (final IOException ignored) {
257 // Do nothing, fall through and do not accept file
258 }
259 }
260 return false;
261 }
262
263 /**
264 * <p>
265 * Accepts the provided file if the file contains the file filter's magic
266 * number at the specified offset.
267 * </p>
268 * <p>
269 * If any {@link IOException}s occur while reading the file, the file will
270 * be rejected.
271 *
272 * </p>
273 * @param file the file to accept or reject.
274 * @param attributes the path's basic attributes (may be null).
275 * @return {@code true} if the file contains the filter's magic number
276 * at the specified offset, {@code false} otherwise.
277 * @since 2.9.0
278 */
279 @Override
280 public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
281 if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
282 try {
283 try (FileChannel fileChannel = FileChannel.open(file)) {
284 final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
285 fileChannel.position(byteOffset);
286 final int read = fileChannel.read(byteBuffer);
287 if (read != magicNumbers.length) {
288 return FileVisitResult.TERMINATE;
289 }
290 return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()));
291 }
292 } catch (final IOException ignored) {
293 // Do nothing, fall through and do not accept file
294 }
295 }
296 return FileVisitResult.TERMINATE;
297 }
298
299 /**
300 * Returns a String representation of the file filter, which includes the
301 * magic number bytes and byte offset.
302 *
303 * @return a String representation of the file filter.
304 */
305 @Override
306 public String toString() {
307 final StringBuilder builder = new StringBuilder(super.toString());
308 builder.append("(");
309 // TODO perhaps use hex if value is not printable
310 builder.append(new String(magicNumbers, Charset.defaultCharset()));
311 builder.append(",");
312 builder.append(this.byteOffset);
313 builder.append(")");
314 return builder.toString();
315 }
316 }