001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.filefilter;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.RandomAccessFile;
022import java.io.Serializable;
023import java.nio.ByteBuffer;
024import java.nio.channels.FileChannel;
025import java.nio.charset.Charset;
026import java.nio.file.FileVisitResult;
027import java.nio.file.Files;
028import java.nio.file.Path;
029import java.nio.file.attribute.BasicFileAttributes;
030import java.util.Arrays;
031import java.util.Objects;
032
033import org.apache.commons.io.RandomAccessFileMode;
034import org.apache.commons.io.RandomAccessFiles;
035
036/**
037 * <p>
038 * File filter for matching files containing a "magic number". A magic number
039 * is a unique series of bytes common to all files of a specific file format.
040 * For instance, all Java class files begin with the bytes
041 * {@code 0xCAFEBABE}.
042 * </p>
043 * <h2>Using Classic IO</h2>
044 * <pre>
045 * File dir = FileUtils.current();
046 * MagicNumberFileFilter javaClassFileFilter =
047 *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
048 *       (byte) 0xBA, (byte) 0xBE});
049 * String[] javaClassFiles = dir.list(javaClassFileFilter);
050 * for (String javaClassFile : javaClassFiles) {
051 *     System.out.println(javaClassFile);
052 * }
053 * </pre>
054 *
055 * <p>
056 * Sometimes, such as in the case of TAR files, the
057 * magic number will be offset by a certain number of bytes in the file. In the
058 * case of TAR archive files, this offset is 257 bytes.
059 * </p>
060 *
061 * <pre>
062 * File dir = FileUtils.current();
063 * MagicNumberFileFilter tarFileFilter =
064 *     MagicNumberFileFilter("ustar", 257);
065 * String[] tarFiles = dir.list(tarFileFilter);
066 * for (String tarFile : tarFiles) {
067 *     System.out.println(tarFile);
068 * }
069 * </pre>
070 * <h2>Using NIO</h2>
071 * <pre>
072 * final Path dir = PathUtils.current();
073 * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
074 * //
075 * // Walk one dir
076 * Files.<b>walkFileTree</b>(dir, Collections.emptySet(), 1, visitor);
077 * System.out.println(visitor.getPathCounters());
078 * System.out.println(visitor.getFileList());
079 * //
080 * visitor.getPathCounters().reset();
081 * //
082 * // Walk dir tree
083 * Files.<b>walkFileTree</b>(dir, visitor);
084 * System.out.println(visitor.getPathCounters());
085 * System.out.println(visitor.getDirList());
086 * System.out.println(visitor.getFileList());
087 * </pre>
088 * <h2>Deprecating Serialization</h2>
089 * <p>
090 * <em>Serialization is deprecated and will be removed in 3.0.</em>
091 * </p>
092 *
093 * <h2>Deprecating Serialization</h2>
094 * <p>
095 * <em>Serialization is deprecated and will be removed in 3.0.</em>
096 * </p>
097 *
098 * @since 2.0
099 * @see FileFilterUtils#magicNumberFileFilter(byte[])
100 * @see FileFilterUtils#magicNumberFileFilter(String)
101 * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
102 * @see FileFilterUtils#magicNumberFileFilter(String, long)
103 */
104public class MagicNumberFileFilter extends AbstractFileFilter implements Serializable {
105
106    /**
107     * The serialization version unique identifier.
108     */
109    private static final long serialVersionUID = -547733176983104172L;
110
111    /**
112     * The magic number to compare against the file's bytes at the provided
113     * offset.
114     */
115    private final byte[] magicNumbers;
116
117    /**
118     * The offset (in bytes) within the files that the magic number's bytes
119     * should appear.
120     */
121    private final long byteOffset;
122
123    /**
124     * <p>
125     * Constructs a new MagicNumberFileFilter and associates it with the magic
126     * number to test for in files. This constructor assumes a starting offset
127     * of {@code 0}.
128     * </p>
129     *
130     * <p>
131     * It is important to note that <em>the array is not cloned</em> and that
132     * any changes to the magic number array after construction will affect the
133     * behavior of this file filter.
134     * </p>
135     *
136     * <pre>
137     * MagicNumberFileFilter javaClassFileFilter =
138     *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
139     *       (byte) 0xBA, (byte) 0xBE});
140     * </pre>
141     *
142     * @param magicNumber the magic number to look for in the file.
143     *
144     * @throws IllegalArgumentException if {@code magicNumber} is
145     *         {@code null}, or contains no bytes.
146     */
147    public MagicNumberFileFilter(final byte[] magicNumber) {
148        this(magicNumber, 0);
149    }
150
151    /**
152     * <p>
153     * Constructs a new MagicNumberFileFilter and associates it with the magic
154     * number to test for in files and the byte offset location in the file to
155     * to look for that magic number.
156     * </p>
157     *
158     * <pre>
159     * MagicNumberFileFilter tarFileFilter =
160     *     MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
161     * </pre>
162     *
163     * <pre>
164     * MagicNumberFileFilter javaClassFileFilter =
165     *     MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
166     * </pre>
167     *
168     * @param magicNumbers the magic number to look for in the file.
169     * @param offset the byte offset in the file to start comparing bytes.
170     *
171     * @throws IllegalArgumentException if {@code magicNumber}
172     *         contains no bytes, or {@code offset}
173     *         is a negative number.
174     */
175    public MagicNumberFileFilter(final byte[] magicNumbers, final long offset) {
176        Objects.requireNonNull(magicNumbers, "magicNumbers");
177        if (magicNumbers.length == 0) {
178            throw new IllegalArgumentException("The magic number must contain at least one byte");
179        }
180        if (offset < 0) {
181            throw new IllegalArgumentException("The offset cannot be negative");
182        }
183
184        this.magicNumbers = magicNumbers.clone();
185        this.byteOffset = offset;
186    }
187
188    /**
189     * <p>
190     * Constructs a new MagicNumberFileFilter and associates it with the magic
191     * number to test for in files. This constructor assumes a starting offset
192     * of {@code 0}.
193     * </p>
194     *
195     * Example usage:
196     * <pre>
197     * {@code
198     * MagicNumberFileFilter xmlFileFilter =
199     *     MagicNumberFileFilter("<?xml");
200     * }
201     * </pre>
202     *
203     * @param magicNumber the magic number to look for in the file.
204     *        The string is converted to bytes using the platform default charset.
205     *
206     * @throws IllegalArgumentException if {@code magicNumber} is
207     *         {@code null} or the empty String.
208     */
209    public MagicNumberFileFilter(final String magicNumber) {
210        this(magicNumber, 0);
211    }
212
213    /**
214     * <p>
215     * Constructs a new MagicNumberFileFilter and associates it with the magic
216     * number to test for in files and the byte offset location in the file to
217     * to look for that magic number.
218     * </p>
219     *
220     * <pre>
221     * MagicNumberFileFilter tarFileFilter =
222     *     MagicNumberFileFilter("ustar", 257);
223     * </pre>
224     *
225     * @param magicNumber the magic number to look for in the file.
226     *        The string is converted to bytes using the platform default charset.
227     * @param offset the byte offset in the file to start comparing bytes.
228     *
229     * @throws IllegalArgumentException if {@code magicNumber} is
230     *         the empty String, or {@code offset} is
231     *         a negative number.
232     */
233    public MagicNumberFileFilter(final String magicNumber, final long offset) {
234        Objects.requireNonNull(magicNumber, "magicNumber");
235        if (magicNumber.isEmpty()) {
236            throw new IllegalArgumentException("The magic number must contain at least one byte");
237        }
238        if (offset < 0) {
239            throw new IllegalArgumentException("The offset cannot be negative");
240        }
241
242        this.magicNumbers = magicNumber.getBytes(Charset.defaultCharset()); // explicitly uses the platform default charset
243        this.byteOffset = offset;
244    }
245
246    /**
247     * <p>
248     * Accepts the provided file if the file contains the file filter's magic
249     * number at the specified offset.
250     * </p>
251     *
252     * <p>
253     * If any {@link IOException}s occur while reading the file, the file will
254     * be rejected.
255     * </p>
256     *
257     * @param file the file to accept or reject.
258     *
259     * @return {@code true} if the file contains the filter's magic number
260     *         at the specified offset, {@code false} otherwise.
261     */
262    @Override
263    public boolean accept(final File file) {
264        if (file != null && file.isFile() && file.canRead()) {
265            try (RandomAccessFile randomAccessFile = RandomAccessFileMode.READ_ONLY.create(file)) {
266                return Arrays.equals(magicNumbers, RandomAccessFiles.read(randomAccessFile, byteOffset, magicNumbers.length));
267            } catch (final IOException ignored) {
268                // Do nothing, fall through and do not accept file
269            }
270        }
271        return false;
272    }
273
274    /**
275     * <p>
276     * Accepts the provided file if the file contains the file filter's magic
277     * number at the specified offset.
278     * </p>
279     *
280     * <p>
281     * If any {@link IOException}s occur while reading the file, the file will
282     * be rejected.
283     * </p>
284     * @param file the file to accept or reject.
285     *
286     * @return {@code true} if the file contains the filter's magic number
287     *         at the specified offset, {@code false} otherwise.
288     * @since 2.9.0
289     */
290    @Override
291    public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
292        if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
293            try {
294                try (FileChannel fileChannel = FileChannel.open(file)) {
295                    final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
296                    fileChannel.position(byteOffset);
297                    final int read = fileChannel.read(byteBuffer);
298                    if (read != magicNumbers.length) {
299                        return FileVisitResult.TERMINATE;
300                    }
301                    return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()));
302                }
303            }
304            catch (final IOException ignored) {
305                // Do nothing, fall through and do not accept file
306            }
307        }
308        return FileVisitResult.TERMINATE;
309    }
310
311    /**
312     * Returns a String representation of the file filter, which includes the
313     * magic number bytes and byte offset.
314     *
315     * @return a String representation of the file filter.
316     */
317    @Override
318    public String toString() {
319        final StringBuilder builder = new StringBuilder(super.toString());
320        builder.append("(");
321        // TODO perhaps use hex if value is not printable
322        builder.append(new String(magicNumbers, Charset.defaultCharset()));
323        builder.append(",");
324        builder.append(this.byteOffset);
325        builder.append(")");
326        return builder.toString();
327    }
328}