View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io;
18  
19  import java.io.File;
20  import java.io.FileFilter;
21  import java.io.IOException;
22  import java.nio.file.Files;
23  import java.util.Collection;
24  import java.util.Objects;
25  
26  import org.apache.commons.io.file.PathUtils;
27  import org.apache.commons.io.filefilter.FileFilterUtils;
28  import org.apache.commons.io.filefilter.IOFileFilter;
29  import org.apache.commons.io.filefilter.TrueFileFilter;
30  
31  /**
32   * Abstract class that walks through a directory hierarchy and provides subclasses with convenient hooks to add specific
33   * behavior.
34   * <p>
35   * This class operates with a {@link FileFilter} and maximum depth to limit the files and directories visited. Commons
36   * IO supplies many common filter implementations in the <a href="filefilter/package-summary.html"> filefilter</a>
37   * package.
38   * </p>
39   * <p>
40   * The following sections describe:
41   * </p>
42   * <ul>
43   * <li><a href="#example">1. Example Implementation</a> - example {@link FileCleaner} implementation.</li>
44   * <li><a href="#filter">2. Filter Example</a> - using {@link FileFilter}(s) with {@link DirectoryWalker}.</li>
45   * <li><a href="#cancel">3. Cancellation</a> - how to implement cancellation behavior.</li>
46   * </ul>
47   *
48   * <h2 id="example">1. Example Implementation</h2>
49   *
50   * There are many possible extensions, for example, to delete all files and '.svn' directories, and return a list of
51   * deleted files:
52   *
53   * <pre>
54   * public class FileCleaner extends DirectoryWalker {
55   *
56   *     public FileCleaner() {
57   *         super();
58   *     }
59   *
60   *     public List clean(File startDirectory) {
61   *         List results = new ArrayList();
62   *         walk(startDirectory, results);
63   *         return results;
64   *     }
65   *
66   *     protected boolean handleDirectory(File directory, int depth, Collection results) {
67   *         // delete svn directories and then skip
68   *         if (".svn".equals(directory.getName())) {
69   *             directory.delete();
70   *             return false;
71   *         } else {
72   *             return true;
73   *         }
74   *
75   *     }
76   *
77   *     protected void handleFile(File file, int depth, Collection results) {
78   *         // delete file and add to list of deleted
79   *         file.delete();
80   *         results.add(file);
81   *     }
82   * }
83   * </pre>
84   *
85   * <h2 id="filter">2. Filter Example</h2>
86   *
87   * <p>
88   * Choosing which directories and files to process can be a key aspect of using this class. This information can be
89   * setup in three ways, via three different constructors.
90   * </p>
91   * <p>
92   * The first option is to visit all directories and files. This is achieved via the no-args constructor.
93   * </p>
94   * <p>
95   * The second constructor option is to supply a single {@link FileFilter} that describes the files and directories to
96   * visit. Care must be taken with this option as the same filter is used for both directories and files.
97   * </p>
98   * <p>
99   * For example, if you wanted all directories which are not hidden and files which end in ".txt":
100  * </p>
101  *
102  * <pre>
103  * public class FooDirectoryWalker extends DirectoryWalker {
104  *     public FooDirectoryWalker(FileFilter filter) {
105  *         super(filter, -1);
106  *     }
107  * }
108  *
109  * // Build up the filters and create the walker
110  * // Create a filter for Non-hidden directories
111  * IOFileFilter fooDirFilter = FileFilterUtils.andFileFilter(FileFilterUtils.directoryFileFilter,
112  *     HiddenFileFilter.VISIBLE);
113  *
114  * // Create a filter for Files ending in ".txt"
115  * IOFileFilter fooFileFilter = FileFilterUtils.andFileFilter(FileFilterUtils.fileFileFilter,
116  *     FileFilterUtils.suffixFileFilter(".txt"));
117  *
118  * // Combine the directory and file filters using an OR condition
119  * java.io.FileFilter fooFilter = FileFilterUtils.orFileFilter(fooDirFilter, fooFileFilter);
120  *
121  * // Use the filter to construct a DirectoryWalker implementation
122  * FooDirectoryWalker walker = new FooDirectoryWalker(fooFilter);
123  * </pre>
124  * <p>
125  * The third constructor option is to specify separate filters, one for directories and one for files. These are
126  * combined internally to form the correct {@link FileFilter}, something which is very easy to get wrong when
127  * attempted manually, particularly when trying to express constructs like 'any file in directories named docs'.
128  * </p>
129  * <p>
130  * For example, if you wanted all directories which are not hidden and files which end in ".txt":
131  * </p>
132  *
133  * <pre>
134  *  public class FooDirectoryWalker extends DirectoryWalker {
135  *    public FooDirectoryWalker(IOFileFilter dirFilter, IOFileFilter fileFilter) {
136  *      super(dirFilter, fileFilter, -1);
137  *    }
138  *  }
139  *
140  *  // Use the filters to construct the walker
141  *  FooDirectoryWalker walker = new FooDirectoryWalker(
142  *    HiddenFileFilter.VISIBLE,
143  *    FileFilterUtils.suffixFileFilter(".txt"),
144  *  );
145  * </pre>
146  * <p>
147  * This is much simpler than the previous example, and is why it is the preferred option for filtering.
148  * </p>
149  *
150  * <h2 id="cancel">3. Cancellation</h2>
151  *
152  * <p>
153  * The DirectoryWalker contains some of the logic required for cancel processing. Subclasses must complete the
154  * implementation.
155  * </p>
156  * <p>
157  * What {@link DirectoryWalker} does provide for cancellation is:
158  * </p>
159  * <ul>
160  * <li>{@link CancelException} which can be thrown in any of the <em>lifecycle</em> methods to stop processing.</li>
161  * <li>The {@code walk()} method traps thrown {@link CancelException} and calls the {@code handleCancelled()}
162  * method, providing a place for custom cancel processing.</li>
163  * </ul>
164  * <p>
165  * Implementations need to provide:
166  * </p>
167  * <ul>
168  * <li>The decision logic on whether to cancel processing or not.</li>
169  * <li>Constructing and throwing a {@link CancelException}.</li>
170  * <li>Custom cancel processing in the {@code handleCancelled()} method.</li>
171  * </ul>
172  * <p>
173  * Two possible scenarios are envisaged for cancellation:
174  * </p>
175  * <ul>
176  * <li><a href="#external">3.1 External / Multi-threaded</a> - cancellation being decided/initiated by an external
177  * process.</li>
178  * <li><a href="#internal">3.2 Internal</a> - cancellation being decided/initiated from within a DirectoryWalker
179  * implementation.</li>
180  * </ul>
181  * <p>
182  * The following sections provide example implementations for these two different scenarios.
183  * </p>
184  *
185  * <h3 id="external">3.1 External / Multi-threaded</h3>
186  *
187  * <p>
188  * This example provides a public {@code cancel()} method that can be called by another thread to stop the
189  * processing. A typical example use-case is a cancel button on a GUI. Calling this method sets a
190  * <a href='https://docs.oracle.com/javase/specs/jls/se8/html/jls-8.html#d5e12277'>(@code volatile}</a>
191  * flag to ensure it works properly in a multi-threaded environment.
192  * The flag is returned by the {@code handleIsCancelled()} method, which causes the walk to stop
193  * immediately. The {@code handleCancelled()} method will be the next, and last, callback method received once cancellation has occurred.
194  * </p>
195  *
196  * <pre>
197  * public class FooDirectoryWalker extends DirectoryWalker {
198  *
199  *     private volatile boolean canceled = false;
200  *
201  *     public void cancel() {
202  *         canceled = true;
203  *     }
204  *
205  *     protected boolean handleIsCancelled(File file, int depth, Collection results) {
206  *         return canceled;
207  *     }
208  *
209  *     protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
210  *         // implement processing required when a cancellation occurs
211  *     }
212  * }
213  * </pre>
214  *
215  * <h3 id="internal">3.2 Internal</h3>
216  *
217  * <p>
218  * This shows an example of how internal cancellation processing could be implemented. <strong>Note</strong> the decision logic
219  * and throwing a {@link CancelException} could be implemented in any of the <em>lifecycle</em> methods.
220  * </p>
221  *
222  * <pre>
223  * public class BarDirectoryWalker extends DirectoryWalker {
224  *
225  *     protected boolean handleDirectory(File directory, int depth, Collection results) throws IOException {
226  *         // cancel if hidden directory
227  *         if (directory.isHidden()) {
228  *             throw new CancelException(file, depth);
229  *         }
230  *         return true;
231  *     }
232  *
233  *     protected void handleFile(File file, int depth, Collection results) throws IOException {
234  *         // cancel if read-only file
235  *         if (!file.canWrite()) {
236  *             throw new CancelException(file, depth);
237  *         }
238  *         results.add(file);
239  *     }
240  *
241  *     protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
242  *         // implement processing required when a cancellation occurs
243  *     }
244  * }
245  * </pre>
246  *
247  * @param <T> The result type, like {@link File}.
248  * @since 1.3
249  * @deprecated Apache Commons IO no longer uses this class. Instead, use
250  *             {@link PathUtils#walk(java.nio.file.Path, org.apache.commons.io.file.PathFilter, int, boolean, java.nio.file.FileVisitOption...)}
251  *             or {@link Files#walkFileTree(java.nio.file.Path, java.util.Set, int, java.nio.file.FileVisitor)}, and
252  *             friends.
253  */
254 @Deprecated
255 public abstract class DirectoryWalker<T> {
256 
257     /**
258      * CancelException is thrown in DirectoryWalker to cancel the current
259      * processing.
260      */
261     public static class CancelException extends IOException {
262 
263         /** Serialization id. */
264         private static final long serialVersionUID = 1347339620135041008L;
265 
266         /** The file being processed when the exception was thrown. */
267         private final File file;
268 
269         /** The file depth when the exception was thrown. */
270         private final int depth;
271 
272         /**
273          * Constructs a {@link CancelException} with
274          * the file and depth when cancellation occurred.
275          *
276          * @param file  the file when the operation was canceled, may be null.
277          * @param depth  the depth when the operation was canceled, may be null.
278          */
279         public CancelException(final File file, final int depth) {
280             this("Operation Cancelled", file, depth);
281         }
282 
283         /**
284          * Constructs a {@link CancelException} with
285          * an appropriate message and the file and depth when
286          * cancellation occurred.
287          *
288          * @param message  the detail message.
289          * @param file  the file when the operation was canceled.
290          * @param depth  the depth when the operation was canceled.
291          */
292         public CancelException(final String message, final File file, final int depth) {
293             super(message);
294             this.file = file;
295             this.depth = depth;
296         }
297 
298         /**
299          * Returns the depth when the operation was canceled.
300          *
301          * @return the depth when the operation was canceled.
302          */
303         public int getDepth() {
304             return depth;
305         }
306 
307         /**
308          * Returns the file when the operation was canceled.
309          *
310          * @return the file when the operation was canceled.
311          */
312         public File getFile() {
313             return file;
314         }
315     }
316 
317     /**
318      * The file filter to use to filter files and directories.
319      */
320     private final FileFilter filter;
321 
322     /**
323      * The limit on the directory depth to walk.
324      */
325     private final int depthLimit;
326 
327     /**
328      * Constructs an instance with no filtering and unlimited <em>depth</em>.
329      */
330     protected DirectoryWalker() {
331         this(null, -1);
332     }
333 
334     /**
335      * Constructs an instance with a filter and limit the <em>depth</em> navigated to.
336      * <p>
337      * The filter controls which files and directories will be navigated to as
338      * part of the walk. The {@link FileFilterUtils} class is useful for combining
339      * various filters together. A {@code null} filter means that no
340      * filtering should occur and all files and directories will be visited.
341      * </p>
342      *
343      * @param filter  the filter to apply, null means visit all files.
344      * @param depthLimit  controls how <em>deep</em> the hierarchy is
345      *  navigated to (less than 0 means unlimited).
346      */
347     protected DirectoryWalker(final FileFilter filter, final int depthLimit) {
348         this.filter = filter;
349         this.depthLimit = depthLimit;
350     }
351 
352     /**
353      * Constructs an instance with a directory and a file filter and an optional
354      * limit on the <em>depth</em> navigated to.
355      * <p>
356      * The filters control which files and directories will be navigated to as part
357      * of the walk. This constructor uses {@link FileFilterUtils#makeDirectoryOnly(IOFileFilter)}
358      * and {@link FileFilterUtils#makeFileOnly(IOFileFilter)} internally to combine the filters.
359      * A {@code null} filter means that no filtering should occur.
360      * </p>
361      *
362      * @param directoryFilter  the filter to apply to directories, null means visit all directories.
363      * @param fileFilter  the filter to apply to files, null means visit all files.
364      * @param depthLimit  controls how <em>deep</em> the hierarchy is
365      *  navigated to (less than 0 means unlimited).
366      */
367     protected DirectoryWalker(IOFileFilter directoryFilter, IOFileFilter fileFilter, final int depthLimit) {
368         if (directoryFilter == null && fileFilter == null) {
369             this.filter = null;
370         } else {
371             directoryFilter = directoryFilter != null ? directoryFilter : TrueFileFilter.TRUE;
372             fileFilter = fileFilter != null ? fileFilter : TrueFileFilter.TRUE;
373             directoryFilter = FileFilterUtils.makeDirectoryOnly(directoryFilter);
374             fileFilter = FileFilterUtils.makeFileOnly(fileFilter);
375             this.filter = directoryFilter.or(fileFilter);
376         }
377         this.depthLimit = depthLimit;
378     }
379 
380     /**
381      * Checks whether the walk has been canceled by calling {@link #handleIsCancelled},
382      * throwing a {@link CancelException} if it has.
383      * <p>
384      * Writers of subclasses should not normally call this method as it is called
385      * automatically by the walk of the tree. However, sometimes a single method,
386      * typically {@link #handleFile}, may take a long time to run. In that case,
387      * you may wish to check for cancellation by calling this method.
388      * </p>
389      *
390      * @param file  the current file being processed.
391      * @param depth  the current file level (starting directory = 0).
392      * @param results  the collection of result objects, may be updated.
393      * @throws IOException if an I/O Error occurs.
394      */
395     protected final void checkIfCancelled(final File file, final int depth, final Collection<T> results) throws
396             IOException {
397         if (handleIsCancelled(file, depth, results)) {
398             throw new CancelException(file, depth);
399         }
400     }
401 
402     /**
403      * Overridable callback method invoked with the contents of each directory.
404      * <p>
405      * This implementation returns the files unchanged
406      * </p>
407      *
408      * @param directory  the current directory being processed.
409      * @param depth  the current directory level (starting directory = 0).
410      * @param files the files (possibly filtered) in the directory, may be {@code null}
411      * @return the filtered list of files.
412      * @throws IOException if an I/O Error occurs.
413      * @since 2.0
414      */
415     @SuppressWarnings("unused") // Possibly thrown from subclasses.
416     protected File[] filterDirectoryContents(final File directory, final int depth, final File... files) throws
417             IOException {
418         return files;
419     }
420 
421     /**
422      * Overridable callback method invoked when the operation is canceled.
423      * The file being processed when the cancellation occurred can be
424      * obtained from the exception.
425      * <p>
426      * This implementation just re-throws the {@link CancelException}.
427      * </p>
428      *
429      * @param startDirectory  the directory that the walk started from.
430      * @param results  the collection of result objects, may be updated.
431      * @param cancel  the exception throw to cancel further processing
432      * containing details at the point of cancellation.
433      * @throws IOException if an I/O Error occurs.
434      */
435     protected void handleCancelled(final File startDirectory, final Collection<T> results,
436                        final CancelException cancel) throws IOException {
437         // re-throw exception - overridable by subclass
438         throw cancel;
439     }
440 
441     /**
442      * Overridable callback method invoked to determine if a directory should be processed.
443      * <p>
444      * This method returns a boolean to indicate if the directory should be examined or not.
445      * If you return false, the entire directory and any subdirectories will be skipped.
446      * Note that this functionality is in addition to the filtering by file filter.
447      * </p>
448      * <p>
449      * This implementation does nothing and returns true.
450      * </p>
451      *
452      * @param directory  the current directory being processed.
453      * @param depth  the current directory level (starting directory = 0).
454      * @param results  the collection of result objects, may be updated.
455      * @return true to process this directory, false to skip this directory.
456      * @throws IOException if an I/O Error occurs.
457      */
458     @SuppressWarnings("unused") // Possibly thrown from subclasses.
459     protected boolean handleDirectory(final File directory, final int depth, final Collection<T> results) throws
460             IOException {
461         // do nothing - overridable by subclass
462         return true;  // process directory
463     }
464 
465     /**
466      * Overridable callback method invoked at the end of processing each directory.
467      * <p>
468      * This implementation does nothing.
469      * </p>
470      *
471      * @param directory  the directory being processed.
472      * @param depth  the current directory level (starting directory = 0).
473      * @param results  the collection of result objects, may be updated.
474      * @throws IOException if an I/O Error occurs.
475      */
476     @SuppressWarnings("unused") // Possibly thrown from subclasses.
477     protected void handleDirectoryEnd(final File directory, final int depth, final Collection<T> results) throws
478             IOException {
479         // do nothing - overridable by subclass
480     }
481 
482     /**
483      * Overridable callback method invoked at the start of processing each directory.
484      * <p>
485      * This implementation does nothing.
486      * </p>
487      *
488      * @param directory  the current directory being processed.
489      * @param depth  the current directory level (starting directory = 0).
490      * @param results  the collection of result objects, may be updated.
491      * @throws IOException if an I/O Error occurs.
492      */
493     @SuppressWarnings("unused") // Possibly thrown from subclasses.
494     protected void handleDirectoryStart(final File directory, final int depth, final Collection<T> results) throws
495             IOException {
496         // do nothing - overridable by subclass
497     }
498 
499     /**
500      * Overridable callback method invoked at the end of processing.
501      * <p>
502      * This implementation does nothing.
503      * </p>
504      *
505      * @param results  the collection of result objects, may be updated.
506      * @throws IOException if an I/O Error occurs.
507      */
508     @SuppressWarnings("unused") // Possibly thrown from subclasses.
509     protected void handleEnd(final Collection<T> results) throws IOException {
510         // do nothing - overridable by subclass
511     }
512 
513     /**
514      * Overridable callback method invoked for each (non-directory) file.
515      * <p>
516      * This implementation does nothing.
517      * </p>
518      *
519      * @param file  the current file being processed.
520      * @param depth  the current directory level (starting directory = 0).
521      * @param results  the collection of result objects, may be updated.
522      * @throws IOException if an I/O Error occurs.
523      */
524     @SuppressWarnings("unused") // Possibly thrown from subclasses.
525     protected void handleFile(final File file, final int depth, final Collection<T> results) throws IOException {
526         // do nothing - overridable by subclass
527     }
528 
529     /**
530      * Overridable callback method invoked to determine if the entire walk
531      * operation should be immediately canceled.
532      * <p>
533      * This method should be implemented by those subclasses that want to
534      * provide a public {@code cancel()} method available from another
535      * thread. The design pattern for the subclass should be as follows:
536      * </p>
537      * <pre>
538      *  public class FooDirectoryWalker extends DirectoryWalker {
539      *    private volatile boolean canceled = false;
540      *
541      *    public void cancel() {
542      *        canceled = true;
543      *    }
544      *    private void handleIsCancelled(File file, int depth, Collection results) {
545      *        return canceled;
546      *    }
547      *    protected void handleCancelled(File startDirectory,
548      *              Collection results, CancelException cancel) {
549      *        // implement processing required when a cancellation occurs
550      *    }
551      *  }
552      * </pre>
553      * <p>
554      * If this method returns true, then the directory walk is immediately
555      * canceled. The next callback method will be {@link #handleCancelled}.
556      * </p>
557      * <p>
558      * This implementation returns false.
559      * </p>
560      *
561      * @param file  the file or directory being processed.
562      * @param depth  the current directory level (starting directory = 0).
563      * @param results  the collection of result objects, may be updated.
564      * @return true if the walk has been canceled.
565      * @throws IOException if an I/O Error occurs.
566      */
567     @SuppressWarnings("unused") // Possibly thrown from subclasses.
568     protected boolean handleIsCancelled(
569             final File file, final int depth, final Collection<T> results) throws IOException {
570         // do nothing - overridable by subclass
571         return false;  // not canceled
572     }
573 
574     /**
575      * Overridable callback method invoked for each restricted directory.
576      * <p>
577      * This implementation does nothing.
578      * </p>
579      *
580      * @param directory  the restricted directory.
581      * @param depth  the current directory level (starting directory = 0).
582      * @param results  the collection of result objects, may be updated.
583      * @throws IOException if an I/O Error occurs.
584      */
585     @SuppressWarnings("unused") // Possibly thrown from subclasses.
586     protected void handleRestricted(final File directory, final int depth, final Collection<T> results) throws
587             IOException {
588         // do nothing - overridable by subclass
589     }
590 
591     /**
592      * Overridable callback method invoked at the start of processing.
593      * <p>
594      * This implementation does nothing.
595      * </p>
596      *
597      * @param startDirectory  the directory to start from.
598      * @param results  the collection of result objects, may be updated.
599      * @throws IOException if an I/O Error occurs.
600      */
601     @SuppressWarnings("unused") // Possibly thrown from subclasses.
602     protected void handleStart(final File startDirectory, final Collection<T> results) throws IOException {
603         // do nothing - overridable by subclass
604     }
605 
606     /**
607      * Internal method that walks the directory hierarchy in a depth-first manner.
608      * <p>
609      * Users of this class do not need to call this method. This method will
610      * be called automatically by another (public) method on the specific subclass.
611      * </p>
612      * <p>
613      * Writers of subclasses should call this method to start the directory walk.
614      * Once called, this method will emit events as it walks the hierarchy.
615      * The event methods have the prefix {@code handle}.
616      * </p>
617      *
618      * @param startDirectory  the directory to start from, not null.
619      * @param results  the collection of result objects, may be updated.
620      * @throws NullPointerException if the start directory is null.
621      * @throws IOException if an I/O Error occurs.
622      */
623     protected final void walk(final File startDirectory, final Collection<T> results) throws IOException {
624         Objects.requireNonNull(startDirectory, "startDirectory");
625         try {
626             handleStart(startDirectory, results);
627             walk(startDirectory, 0, results);
628             handleEnd(results);
629         } catch (final CancelException cancel) {
630             handleCancelled(startDirectory, results, cancel);
631         }
632     }
633 
634     /**
635      * Main recursive method to examine the directory hierarchy.
636      *
637      * @param directory  the directory to examine, not null.
638      * @param depth  the directory level (starting directory = 0).
639      * @param results  the collection of result objects, may be updated.
640      * @throws IOException if an I/O Error occurs.
641      */
642     private void walk(final File directory, final int depth, final Collection<T> results) throws IOException {
643         checkIfCancelled(directory, depth, results);
644         if (handleDirectory(directory, depth, results)) {
645             handleDirectoryStart(directory, depth, results);
646             final int childDepth = depth + 1;
647             if (depthLimit < 0 || childDepth <= depthLimit) {
648                 checkIfCancelled(directory, depth, results);
649                 File[] childFiles = directory.listFiles(filter);
650                 childFiles = filterDirectoryContents(directory, depth, childFiles);
651                 if (childFiles == null) {
652                     handleRestricted(directory, childDepth, results);
653                 } else {
654                     for (final File childFile : childFiles) {
655                         if (childFile.isDirectory()) {
656                             walk(childFile, childDepth, results);
657                         } else {
658                             checkIfCancelled(childFile, childDepth, results);
659                             handleFile(childFile, childDepth, results);
660                             checkIfCancelled(childFile, childDepth, results);
661                         }
662                     }
663                 }
664             }
665             handleDirectoryEnd(directory, depth, results);
666         }
667         checkIfCancelled(directory, depth, results);
668     }
669 }