MagicNumberFileFilter.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.filefilter;

  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.nio.ByteBuffer;
  22. import java.nio.channels.FileChannel;
  23. import java.nio.charset.Charset;
  24. import java.nio.file.FileVisitResult;
  25. import java.nio.file.Files;
  26. import java.nio.file.Path;
  27. import java.nio.file.attribute.BasicFileAttributes;
  28. import java.util.Arrays;
  29. import java.util.Objects;

  30. import org.apache.commons.io.RandomAccessFileMode;
  31. import org.apache.commons.io.RandomAccessFiles;

  32. /**
  33.  * <p>
  34.  * File filter for matching files containing a "magic number". A magic number
  35.  * is a unique series of bytes common to all files of a specific file format.
  36.  * For instance, all Java class files begin with the bytes
  37.  * {@code 0xCAFEBABE}.
  38.  * </p>
  39.  * <h2>Using Classic IO</h2>
  40.  * <pre>
  41.  * File dir = FileUtils.current();
  42.  * MagicNumberFileFilter javaClassFileFilter =
  43.  *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
  44.  *       (byte) 0xBA, (byte) 0xBE});
  45.  * String[] javaClassFiles = dir.list(javaClassFileFilter);
  46.  * for (String javaClassFile : javaClassFiles) {
  47.  *     System.out.println(javaClassFile);
  48.  * }
  49.  * </pre>
  50.  *
  51.  * <p>
  52.  * Sometimes, such as in the case of TAR files, the
  53.  * magic number will be offset by a certain number of bytes in the file. In the
  54.  * case of TAR archive files, this offset is 257 bytes.
  55.  * </p>
  56.  *
  57.  * <pre>
  58.  * File dir = FileUtils.current();
  59.  * MagicNumberFileFilter tarFileFilter =
  60.  *     MagicNumberFileFilter("ustar", 257);
  61.  * String[] tarFiles = dir.list(tarFileFilter);
  62.  * for (String tarFile : tarFiles) {
  63.  *     System.out.println(tarFile);
  64.  * }
  65.  * </pre>
  66.  * <h2>Using NIO</h2>
  67.  * <pre>
  68.  * final Path dir = PathUtils.current();
  69.  * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
  70.  * //
  71.  * // Walk one directory
  72.  * Files.<strong>walkFileTree</strong>(dir, Collections.emptySet(), 1, visitor);
  73.  * System.out.println(visitor.getPathCounters());
  74.  * System.out.println(visitor.getFileList());
  75.  * //
  76.  * visitor.getPathCounters().reset();
  77.  * //
  78.  * // Walk directory tree
  79.  * Files.<strong>walkFileTree</strong>(dir, visitor);
  80.  * System.out.println(visitor.getPathCounters());
  81.  * System.out.println(visitor.getDirList());
  82.  * System.out.println(visitor.getFileList());
  83.  * </pre>
  84.  * <h2>Deprecating Serialization</h2>
  85.  * <p>
  86.  * <em>Serialization is deprecated and will be removed in 3.0.</em>
  87.  * </p>
  88.  *
  89.  * <h2>Deprecating Serialization</h2>
  90.  * <p>
  91.  * <em>Serialization is deprecated and will be removed in 3.0.</em>
  92.  * </p>
  93.  *
  94.  * @since 2.0
  95.  * @see FileFilterUtils#magicNumberFileFilter(byte[])
  96.  * @see FileFilterUtils#magicNumberFileFilter(String)
  97.  * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
  98.  * @see FileFilterUtils#magicNumberFileFilter(String, long)
  99.  */
  100. public class MagicNumberFileFilter extends AbstractFileFilter implements Serializable {

  101.     /**
  102.      * The serialization version unique identifier.
  103.      */
  104.     private static final long serialVersionUID = -547733176983104172L;

  105.     /**
  106.      * The magic number to compare against the file's bytes at the provided
  107.      * offset.
  108.      */
  109.     private final byte[] magicNumbers;

  110.     /**
  111.      * The offset (in bytes) within the files that the magic number's bytes
  112.      * should appear.
  113.      */
  114.     private final long byteOffset;

  115.     /**
  116.      * <p>
  117.      * Constructs a new MagicNumberFileFilter and associates it with the magic
  118.      * number to test for in files. This constructor assumes a starting offset
  119.      * of {@code 0}.
  120.      * </p>
  121.      *
  122.      * <p>
  123.      * It is important to note that <em>the array is not cloned</em> and that
  124.      * any changes to the magic number array after construction will affect the
  125.      * behavior of this file filter.
  126.      * </p>
  127.      *
  128.      * <pre>
  129.      * MagicNumberFileFilter javaClassFileFilter =
  130.      *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
  131.      *       (byte) 0xBA, (byte) 0xBE});
  132.      * </pre>
  133.      *
  134.      * @param magicNumber the magic number to look for in the file.
  135.      * @throws IllegalArgumentException if {@code magicNumber} is
  136.      *         {@code null}, or contains no bytes.
  137.      */
  138.     public MagicNumberFileFilter(final byte[] magicNumber) {
  139.         this(magicNumber, 0);
  140.     }

  141.     /**
  142.      * <p>
  143.      * Constructs a new MagicNumberFileFilter and associates it with the magic
  144.      * number to test for in files and the byte offset location in the file to
  145.      * to look for that magic number.
  146.      * </p>
  147.      *
  148.      * <pre>
  149.      * MagicNumberFileFilter tarFileFilter =
  150.      *     MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
  151.      * </pre>
  152.      *
  153.      * <pre>
  154.      * MagicNumberFileFilter javaClassFileFilter =
  155.      *     MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
  156.      * </pre>
  157.      *
  158.      * @param magicNumbers the magic number to look for in the file.
  159.      * @param offset the byte offset in the file to start comparing bytes.
  160.      * @throws IllegalArgumentException if {@code magicNumber}
  161.      *         contains no bytes, or {@code offset}
  162.      *         is a negative number.
  163.      */
  164.     public MagicNumberFileFilter(final byte[] magicNumbers, final long offset) {
  165.         Objects.requireNonNull(magicNumbers, "magicNumbers");
  166.         if (magicNumbers.length == 0) {
  167.             throw new IllegalArgumentException("The magic number must contain at least one byte");
  168.         }
  169.         if (offset < 0) {
  170.             throw new IllegalArgumentException("The offset cannot be negative");
  171.         }

  172.         this.magicNumbers = magicNumbers.clone();
  173.         this.byteOffset = offset;
  174.     }

  175.     /**
  176.      * <p>
  177.      * Constructs a new MagicNumberFileFilter and associates it with the magic
  178.      * number to test for in files. This constructor assumes a starting offset
  179.      * of {@code 0}.
  180.      * </p>
  181.      *
  182.      * Example usage:
  183.      * <pre>
  184.      * {@code
  185.      * MagicNumberFileFilter xmlFileFilter =
  186.      *     MagicNumberFileFilter("<?xml");
  187.      * }
  188.      * </pre>
  189.      *
  190.      * @param magicNumber the magic number to look for in the file.
  191.      *        The string is converted to bytes using the platform default charset.
  192.      *
  193.      * @throws IllegalArgumentException if {@code magicNumber} is
  194.      *         {@code null} or the empty String.
  195.      */
  196.     public MagicNumberFileFilter(final String magicNumber) {
  197.         this(magicNumber, 0);
  198.     }

  199.     /**
  200.      * <p>
  201.      * Constructs a new MagicNumberFileFilter and associates it with the magic
  202.      * number to test for in files and the byte offset location in the file to
  203.      * to look for that magic number.
  204.      * </p>
  205.      *
  206.      * <pre>
  207.      * MagicNumberFileFilter tarFileFilter = MagicNumberFileFilter("ustar", 257);
  208.      * </pre>
  209.      * <p>
  210.      * This method uses the virtual machine's {@link Charset#defaultCharset() default charset}.
  211.      * </p>
  212.      *
  213.      * @param magicNumber the magic number to look for in the file.
  214.      *        The string is converted to bytes using the platform default charset.
  215.      * @param offset the byte offset in the file to start comparing bytes.
  216.      * @throws IllegalArgumentException if {@code magicNumber} is
  217.      *         the empty String, or {@code offset} is
  218.      *         a negative number.
  219.      */
  220.     public MagicNumberFileFilter(final String magicNumber, final long offset) {
  221.         Objects.requireNonNull(magicNumber, "magicNumber");
  222.         if (magicNumber.isEmpty()) {
  223.             throw new IllegalArgumentException("The magic number must contain at least one byte");
  224.         }
  225.         if (offset < 0) {
  226.             throw new IllegalArgumentException("The offset cannot be negative");
  227.         }

  228.         this.magicNumbers = magicNumber.getBytes(Charset.defaultCharset()); // explicitly uses the platform default charset
  229.         this.byteOffset = offset;
  230.     }

  231.     /**
  232.      * <p>
  233.      * Accepts the provided file if the file contains the file filter's magic
  234.      * number at the specified offset.
  235.      * </p>
  236.      *
  237.      * <p>
  238.      * If any {@link IOException}s occur while reading the file, the file will
  239.      * be rejected.
  240.      * </p>
  241.      *
  242.      * @param file the file to accept or reject.
  243.      * @return {@code true} if the file contains the filter's magic number
  244.      *         at the specified offset, {@code false} otherwise.
  245.      */
  246.     @Override
  247.     public boolean accept(final File file) {
  248.         if (file != null && file.isFile() && file.canRead()) {
  249.             try {
  250.                 return RandomAccessFileMode.READ_ONLY.apply(file.toPath(),
  251.                         raf -> Arrays.equals(magicNumbers, RandomAccessFiles.read(raf, byteOffset, magicNumbers.length)));
  252.             } catch (final IOException ignored) {
  253.                 // Do nothing, fall through and do not accept file
  254.             }
  255.         }
  256.         return false;
  257.     }

  258.     /**
  259.      * <p>
  260.      * Accepts the provided file if the file contains the file filter's magic
  261.      * number at the specified offset.
  262.      * </p>
  263.      * <p>
  264.      * If any {@link IOException}s occur while reading the file, the file will
  265.      * be rejected.
  266.      *
  267.      * </p>
  268.      * @param file the file to accept or reject.
  269.      * @param attributes the path's basic attributes (may be null).
  270.      * @return {@code true} if the file contains the filter's magic number
  271.      *         at the specified offset, {@code false} otherwise.
  272.      * @since 2.9.0
  273.      */
  274.     @Override
  275.     public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
  276.         if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
  277.             try {
  278.                 try (FileChannel fileChannel = FileChannel.open(file)) {
  279.                     final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
  280.                     fileChannel.position(byteOffset);
  281.                     final int read = fileChannel.read(byteBuffer);
  282.                     if (read != magicNumbers.length) {
  283.                         return FileVisitResult.TERMINATE;
  284.                     }
  285.                     return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()));
  286.                 }
  287.             } catch (final IOException ignored) {
  288.                 // Do nothing, fall through and do not accept file
  289.             }
  290.         }
  291.         return FileVisitResult.TERMINATE;
  292.     }

  293.     /**
  294.      * Returns a String representation of the file filter, which includes the
  295.      * magic number bytes and byte offset.
  296.      *
  297.      * @return a String representation of the file filter.
  298.      */
  299.     @Override
  300.     public String toString() {
  301.         final StringBuilder builder = new StringBuilder(super.toString());
  302.         builder.append("(");
  303.         // TODO perhaps use hex if value is not printable
  304.         builder.append(new String(magicNumbers, Charset.defaultCharset()));
  305.         builder.append(",");
  306.         builder.append(this.byteOffset);
  307.         builder.append(")");
  308.         return builder.toString();
  309.     }
  310. }