View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.filefilter;
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.io.Serializable;
22  import java.nio.ByteBuffer;
23  import java.nio.channels.FileChannel;
24  import java.nio.charset.Charset;
25  import java.nio.file.FileVisitResult;
26  import java.nio.file.Files;
27  import java.nio.file.Path;
28  import java.nio.file.attribute.BasicFileAttributes;
29  import java.util.Arrays;
30  import java.util.Objects;
31  
32  import org.apache.commons.io.RandomAccessFileMode;
33  import org.apache.commons.io.RandomAccessFiles;
34  
35  /**
36   * <p>
37   * File filter for matching files containing a "magic number". A magic number
38   * is a unique series of bytes common to all files of a specific file format.
39   * For instance, all Java class files begin with the bytes
40   * {@code 0xCAFEBABE}.
41   * </p>
42   * <h2>Using Classic IO</h2>
43   * <pre>
44   * File dir = FileUtils.current();
45   * MagicNumberFileFilter javaClassFileFilter =
46   *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
47   *       (byte) 0xBA, (byte) 0xBE});
48   * String[] javaClassFiles = dir.list(javaClassFileFilter);
49   * for (String javaClassFile : javaClassFiles) {
50   *     System.out.println(javaClassFile);
51   * }
52   * </pre>
53   *
54   * <p>
55   * Sometimes, such as in the case of TAR files, the
56   * magic number will be offset by a certain number of bytes in the file. In the
57   * case of TAR archive files, this offset is 257 bytes.
58   * </p>
59   *
60   * <pre>
61   * File dir = FileUtils.current();
62   * MagicNumberFileFilter tarFileFilter =
63   *     MagicNumberFileFilter("ustar", 257);
64   * String[] tarFiles = dir.list(tarFileFilter);
65   * for (String tarFile : tarFiles) {
66   *     System.out.println(tarFile);
67   * }
68   * </pre>
69   * <h2>Using NIO</h2>
70   * <pre>
71   * final Path dir = PathUtils.current();
72   * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
73   * //
74   * // Walk one directory
75   * Files.<strong>walkFileTree</strong>(dir, Collections.emptySet(), 1, visitor);
76   * System.out.println(visitor.getPathCounters());
77   * System.out.println(visitor.getFileList());
78   * //
79   * visitor.getPathCounters().reset();
80   * //
81   * // Walk directory tree
82   * Files.<strong>walkFileTree</strong>(dir, visitor);
83   * System.out.println(visitor.getPathCounters());
84   * System.out.println(visitor.getDirList());
85   * System.out.println(visitor.getFileList());
86   * </pre>
87   * <h2>Deprecating Serialization</h2>
88   * <p>
89   * <em>Serialization is deprecated and will be removed in 3.0.</em>
90   * </p>
91   *
92   * <h2>Deprecating Serialization</h2>
93   * <p>
94   * <em>Serialization is deprecated and will be removed in 3.0.</em>
95   * </p>
96   *
97   * @since 2.0
98   * @see FileFilterUtils#magicNumberFileFilter(byte[])
99   * @see FileFilterUtils#magicNumberFileFilter(String)
100  * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
101  * @see FileFilterUtils#magicNumberFileFilter(String, long)
102  */
103 public class MagicNumberFileFilter extends AbstractFileFilter implements Serializable {
104 
105     /**
106      * The serialization version unique identifier.
107      */
108     private static final long serialVersionUID = -547733176983104172L;
109 
110     /**
111      * The magic number to compare against the file's bytes at the provided
112      * offset.
113      */
114     private final byte[] magicNumbers;
115 
116     /**
117      * The offset (in bytes) within the files that the magic number's bytes
118      * should appear.
119      */
120     private final long byteOffset;
121 
122     /**
123      * <p>
124      * Constructs a new MagicNumberFileFilter and associates it with the magic
125      * number to test for in files. This constructor assumes a starting offset
126      * of {@code 0}.
127      * </p>
128      *
129      * <p>
130      * It is important to note that <em>the array is not cloned</em> and that
131      * any changes to the magic number array after construction will affect the
132      * behavior of this file filter.
133      * </p>
134      *
135      * <pre>
136      * MagicNumberFileFilter javaClassFileFilter =
137      *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
138      *       (byte) 0xBA, (byte) 0xBE});
139      * </pre>
140      *
141      * @param magicNumber the magic number to look for in the file.
142      * @throws IllegalArgumentException if {@code magicNumber} is
143      *         {@code null}, or contains no bytes.
144      */
145     public MagicNumberFileFilter(final byte[] magicNumber) {
146         this(magicNumber, 0);
147     }
148 
149     /**
150      * <p>
151      * Constructs a new MagicNumberFileFilter and associates it with the magic
152      * number to test for in files and the byte offset location in the file to
153      * to look for that magic number.
154      * </p>
155      *
156      * <pre>
157      * MagicNumberFileFilter tarFileFilter =
158      *     MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
159      * </pre>
160      *
161      * <pre>
162      * MagicNumberFileFilter javaClassFileFilter =
163      *     MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
164      * </pre>
165      *
166      * @param magicNumbers the magic number to look for in the file.
167      * @param offset the byte offset in the file to start comparing bytes.
168      * @throws IllegalArgumentException if {@code magicNumber}
169      *         contains no bytes, or {@code offset}
170      *         is a negative number.
171      */
172     public MagicNumberFileFilter(final byte[] magicNumbers, final long offset) {
173         Objects.requireNonNull(magicNumbers, "magicNumbers");
174         if (magicNumbers.length == 0) {
175             throw new IllegalArgumentException("The magic number must contain at least one byte");
176         }
177         if (offset < 0) {
178             throw new IllegalArgumentException("The offset cannot be negative");
179         }
180 
181         this.magicNumbers = magicNumbers.clone();
182         this.byteOffset = offset;
183     }
184 
185     /**
186      * <p>
187      * Constructs a new MagicNumberFileFilter and associates it with the magic
188      * number to test for in files. This constructor assumes a starting offset
189      * of {@code 0}.
190      * </p>
191      *
192      * Example usage:
193      * <pre>
194      * {@code
195      * MagicNumberFileFilter xmlFileFilter =
196      *     MagicNumberFileFilter("<?xml");
197      * }
198      * </pre>
199      *
200      * @param magicNumber the magic number to look for in the file.
201      *        The string is converted to bytes using the platform default charset.
202      *
203      * @throws IllegalArgumentException if {@code magicNumber} is
204      *         {@code null} or the empty String.
205      */
206     public MagicNumberFileFilter(final String magicNumber) {
207         this(magicNumber, 0);
208     }
209 
210     /**
211      * <p>
212      * Constructs a new MagicNumberFileFilter and associates it with the magic
213      * number to test for in files and the byte offset location in the file to
214      * to look for that magic number.
215      * </p>
216      *
217      * <pre>
218      * MagicNumberFileFilter tarFileFilter = MagicNumberFileFilter("ustar", 257);
219      * </pre>
220      * <p>
221      * This method uses the virtual machine's {@link Charset#defaultCharset() default charset}.
222      * </p>
223      *
224      * @param magicNumber the magic number to look for in the file.
225      *        The string is converted to bytes using the platform default charset.
226      * @param offset the byte offset in the file to start comparing bytes.
227      * @throws IllegalArgumentException if {@code magicNumber} is
228      *         the empty String, or {@code offset} is
229      *         a negative number.
230      */
231     public MagicNumberFileFilter(final String magicNumber, final long offset) {
232         Objects.requireNonNull(magicNumber, "magicNumber");
233         if (magicNumber.isEmpty()) {
234             throw new IllegalArgumentException("The magic number must contain at least one byte");
235         }
236         if (offset < 0) {
237             throw new IllegalArgumentException("The offset cannot be negative");
238         }
239 
240         this.magicNumbers = magicNumber.getBytes(Charset.defaultCharset()); // explicitly uses the platform default charset
241         this.byteOffset = offset;
242     }
243 
244     /**
245      * <p>
246      * Accepts the provided file if the file contains the file filter's magic
247      * number at the specified offset.
248      * </p>
249      *
250      * <p>
251      * If any {@link IOException}s occur while reading the file, the file will
252      * be rejected.
253      * </p>
254      *
255      * @param file the file to accept or reject.
256      * @return {@code true} if the file contains the filter's magic number
257      *         at the specified offset, {@code false} otherwise.
258      */
259     @Override
260     public boolean accept(final File file) {
261         if (file != null && file.isFile() && file.canRead()) {
262             try {
263                 return RandomAccessFileMode.READ_ONLY.apply(file.toPath(),
264                         raf -> Arrays.equals(magicNumbers, RandomAccessFiles.read(raf, byteOffset, magicNumbers.length)));
265             } catch (final IOException ignored) {
266                 // Do nothing, fall through and do not accept file
267             }
268         }
269         return false;
270     }
271 
272     /**
273      * <p>
274      * Accepts the provided file if the file contains the file filter's magic
275      * number at the specified offset.
276      * </p>
277      * <p>
278      * If any {@link IOException}s occur while reading the file, the file will
279      * be rejected.
280      *
281      * </p>
282      * @param file the file to accept or reject.
283      * @param attributes the path's basic attributes (may be null).
284      * @return {@code true} if the file contains the filter's magic number
285      *         at the specified offset, {@code false} otherwise.
286      * @since 2.9.0
287      */
288     @Override
289     public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
290         if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
291             try {
292                 try (FileChannel fileChannel = FileChannel.open(file)) {
293                     final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
294                     fileChannel.position(byteOffset);
295                     final int read = fileChannel.read(byteBuffer);
296                     if (read != magicNumbers.length) {
297                         return FileVisitResult.TERMINATE;
298                     }
299                     return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()));
300                 }
301             } catch (final IOException ignored) {
302                 // Do nothing, fall through and do not accept file
303             }
304         }
305         return FileVisitResult.TERMINATE;
306     }
307 
308     /**
309      * Returns a String representation of the file filter, which includes the
310      * magic number bytes and byte offset.
311      *
312      * @return a String representation of the file filter.
313      */
314     @Override
315     public String toString() {
316         final StringBuilder builder = new StringBuilder(super.toString());
317         builder.append("(");
318         // TODO perhaps use hex if value is not printable
319         builder.append(new String(magicNumbers, Charset.defaultCharset()));
320         builder.append(",");
321         builder.append(this.byteOffset);
322         builder.append(")");
323         return builder.toString();
324     }
325 }