001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.io.FileFilter;
021import java.io.IOException;
022import java.util.Collection;
023import java.util.Objects;
024
025import org.apache.commons.io.filefilter.FileFilterUtils;
026import org.apache.commons.io.filefilter.IOFileFilter;
027import org.apache.commons.io.filefilter.TrueFileFilter;
028
029/**
030 * Abstract class that walks through a directory hierarchy and provides
031 * subclasses with convenient hooks to add specific behavior.
032 * <p>
033 * This class operates with a {@link FileFilter} and maximum depth to
034 * limit the files and directories visited.
035 * Commons IO supplies many common filter implementations in the
036 * <a href="filefilter/package-summary.html"> filefilter</a> package.
037 * </p>
038 * <p>
039 * The following sections describe:
040 * </p>
041 *   <ul>
042 *      <li><a href="#example">1. Example Implementation</a> - example
043 *          <code>FileCleaner</code> implementation.</li>
044 *      <li><a href="#filter">2. Filter Example</a> - using
045 *          {@link FileFilter}(s) with <code>DirectoryWalker</code>.</li>
046 *      <li><a href="#cancel">3. Cancellation</a> - how to implement cancellation
047 *          behavior.</li>
048 *   </ul>
049 *
050 * <h2 id="example">1. Example Implementation</h2>
051 *
052 * There are many possible extensions, for example, to delete all
053 * files and '.svn' directories, and return a list of deleted files:
054 * <pre>
055 *  public class FileCleaner extends DirectoryWalker {
056 *
057 *    public FileCleaner() {
058 *      super();
059 *    }
060 *
061 *    public List clean(File startDirectory) {
062 *      List results = new ArrayList();
063 *      walk(startDirectory, results);
064 *      return results;
065 *    }
066 *
067 *    protected boolean handleDirectory(File directory, int depth, Collection results) {
068 *      // delete svn directories and then skip
069 *      if (".svn".equals(directory.getName())) {
070 *        directory.delete();
071 *        return false;
072 *      } else {
073 *        return true;
074 *      }
075 *
076 *    }
077 *
078 *    protected void handleFile(File file, int depth, Collection results) {
079 *      // delete file and add to list of deleted
080 *      file.delete();
081 *      results.add(file);
082 *    }
083 *  }
084 * </pre>
085 *
086 * <h2 id="filter">2. Filter Example</h2>
087 *
088 * <p>
089 * Choosing which directories and files to process can be a key aspect
090 * of using this class. This information can be setup in three ways,
091 * via three different constructors.
092 * </p>
093 * <p>
094 * The first option is to visit all directories and files.
095 * This is achieved via the no-args constructor.
096 * </p>
097 * <p>
098 * The second constructor option is to supply a single {@link FileFilter}
099 * that describes the files and directories to visit. Care must be taken
100 * with this option as the same filter is used for both directories
101 * and files.
102 * </p>
103 * <p>
104 * For example, if you wanted all directories which are not hidden
105 * and files which end in ".txt":
106 * </p>
107 * <pre>
108 *  public class FooDirectoryWalker extends DirectoryWalker {
109 *    public FooDirectoryWalker(FileFilter filter) {
110 *      super(filter, -1);
111 *    }
112 *  }
113 *
114 *  // Build up the filters and create the walker
115 *    // Create a filter for Non-hidden directories
116 *    IOFileFilter fooDirFilter =
117 *        FileFilterUtils.andFileFilter(FileFilterUtils.directoryFileFilter,
118 *                                      HiddenFileFilter.VISIBLE);
119 *
120 *    // Create a filter for Files ending in ".txt"
121 *    IOFileFilter fooFileFilter =
122 *        FileFilterUtils.andFileFilter(FileFilterUtils.fileFileFilter,
123 *                                      FileFilterUtils.suffixFileFilter(".txt"));
124 *
125 *    // Combine the directory and file filters using an OR condition
126 *    java.io.FileFilter fooFilter =
127 *        FileFilterUtils.orFileFilter(fooDirFilter, fooFileFilter);
128 *
129 *    // Use the filter to construct a DirectoryWalker implementation
130 *    FooDirectoryWalker walker = new FooDirectoryWalker(fooFilter);
131 * </pre>
132 * <p>
133 * The third constructor option is to specify separate filters, one for
134 * directories and one for files. These are combined internally to form
135 * the correct <code>FileFilter</code>, something which is very easy to
136 * get wrong when attempted manually, particularly when trying to
137 * express constructs like 'any file in directories named docs'.
138 * </p>
139 * <p>
140 * For example, if you wanted all directories which are not hidden
141 * and files which end in ".txt":
142 * </p>
143 * <pre>
144 *  public class FooDirectoryWalker extends DirectoryWalker {
145 *    public FooDirectoryWalker(IOFileFilter dirFilter, IOFileFilter fileFilter) {
146 *      super(dirFilter, fileFilter, -1);
147 *    }
148 *  }
149 *
150 *  // Use the filters to construct the walker
151 *  FooDirectoryWalker walker = new FooDirectoryWalker(
152 *    HiddenFileFilter.VISIBLE,
153 *    FileFilterUtils.suffixFileFilter(".txt"),
154 *  );
155 * </pre>
156 * <p>
157 * This is much simpler than the previous example, and is why it is the preferred
158 * option for filtering.
159 * </p>
160 *
161 * <h2 id="cancel">3. Cancellation</h2>
162 *
163 * <p>
164 * The DirectoryWalker contains some of the logic required for cancel processing.
165 * Subclasses must complete the implementation.
166 * </p>
167 * <p>
168 * What <code>DirectoryWalker</code> does provide for cancellation is:
169 * </p>
170 * <ul>
171 *    <li>{@link CancelException} which can be thrown in any of the
172 *        <i>lifecycle</i> methods to stop processing.</li>
173 *    <li>The <code>walk()</code> method traps thrown {@link CancelException}
174 *        and calls the <code>handleCancelled()</code> method, providing
175 *        a place for custom cancel processing.</li>
176 * </ul>
177 * <p>
178 * Implementations need to provide:
179 * </p>
180 * <ul>
181 *    <li>The decision logic on whether to cancel processing or not.</li>
182 *    <li>Constructing and throwing a {@link CancelException}.</li>
183 *    <li>Custom cancel processing in the <code>handleCancelled()</code> method.
184 * </ul>
185 * <p>
186 * Two possible scenarios are envisaged for cancellation:
187 * </p>
188 * <ul>
189 *    <li><a href="#external">3.1 External / Multi-threaded</a> - cancellation being
190 *        decided/initiated by an external process.</li>
191 *    <li><a href="#internal">3.2 Internal</a> - cancellation being decided/initiated
192 *        from within a DirectoryWalker implementation.</li>
193 * </ul>
194 * <p>
195 * The following sections provide example implementations for these two different
196 * scenarios.
197 * </p>
198 *
199 * <h3 id="external">3.1 External / Multi-threaded</h3>
200 *
201 * <p>
202 * This example provides a public <code>cancel()</code> method that can be
203 * called by another thread to stop the processing. A typical example use-case
204 * would be a cancel button on a GUI. Calling this method sets a
205 * <a href="http://java.sun.com/docs/books/jls/second_edition/html/classes.doc.html#36930">
206 * volatile</a> flag to ensure it will work properly in a multi-threaded environment.
207 * The flag is returned by the <code>handleIsCancelled()</code> method, which
208 * will cause the walk to stop immediately. The <code>handleCancelled()</code>
209 * method will be the next, and last, callback method received once cancellation
210 * has occurred.
211 * </p>
212 *
213 * <pre>
214 *  public class FooDirectoryWalker extends DirectoryWalker {
215 *
216 *    private volatile boolean cancelled = false;
217 *
218 *    public void cancel() {
219 *        cancelled = true;
220 *    }
221 *
222 *    protected boolean handleIsCancelled(File file, int depth, Collection results) {
223 *        return cancelled;
224 *    }
225 *
226 *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
227 *        // implement processing required when a cancellation occurs
228 *    }
229 *  }
230 * </pre>
231 *
232 * <h3 id="internal">3.2 Internal</h3>
233 *
234 * <p>
235 * This shows an example of how internal cancellation processing could be implemented.
236 * <b>Note</b> the decision logic and throwing a {@link CancelException} could be implemented
237 * in any of the <i>lifecycle</i> methods.
238 * </p>
239 *
240 * <pre>
241 *  public class BarDirectoryWalker extends DirectoryWalker {
242 *
243 *    protected boolean handleDirectory(File directory, int depth, Collection results) throws IOException {
244 *        // cancel if hidden directory
245 *        if (directory.isHidden()) {
246 *            throw new CancelException(file, depth);
247 *        }
248 *        return true;
249 *    }
250 *
251 *    protected void handleFile(File file, int depth, Collection results) throws IOException {
252 *        // cancel if read-only file
253 *        if (!file.canWrite()) {
254 *            throw new CancelException(file, depth);
255 *        }
256 *        results.add(file);
257 *    }
258 *
259 *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
260 *        // implement processing required when a cancellation occurs
261 *    }
262 *  }
263 * </pre>
264 *
265 * @param <T> The result type, like {@link File}.
266 * @since 1.3
267 *
268 */
269public abstract class DirectoryWalker<T> {
270
271    /**
272     * The file filter to use to filter files and directories.
273     */
274    private final FileFilter filter;
275    /**
276     * The limit on the directory depth to walk.
277     */
278    private final int depthLimit;
279
280    /**
281     * Construct an instance with no filtering and unlimited <i>depth</i>.
282     */
283    protected DirectoryWalker() {
284        this(null, -1);
285    }
286
287    /**
288     * Constructs an instance with a filter and limit the <i>depth</i> navigated to.
289     * <p>
290     * The filter controls which files and directories will be navigated to as
291     * part of the walk. The {@link FileFilterUtils} class is useful for combining
292     * various filters together. A {@code null} filter means that no
293     * filtering should occur and all files and directories will be visited.
294     * </p>
295     *
296     * @param filter  the filter to apply, null means visit all files
297     * @param depthLimit  controls how <i>deep</i> the hierarchy is
298     *  navigated to (less than 0 means unlimited)
299     */
300    protected DirectoryWalker(final FileFilter filter, final int depthLimit) {
301        this.filter = filter;
302        this.depthLimit = depthLimit;
303    }
304
305    /**
306     * Constructs an instance with a directory and a file filter and an optional
307     * limit on the <i>depth</i> navigated to.
308     * <p>
309     * The filters control which files and directories will be navigated to as part
310     * of the walk. This constructor uses {@link FileFilterUtils#makeDirectoryOnly(IOFileFilter)}
311     * and {@link FileFilterUtils#makeFileOnly(IOFileFilter)} internally to combine the filters.
312     * A {@code null} filter means that no filtering should occur.
313     * </p>
314     *
315     * @param directoryFilter  the filter to apply to directories, null means visit all directories
316     * @param fileFilter  the filter to apply to files, null means visit all files
317     * @param depthLimit  controls how <i>deep</i> the hierarchy is
318     *  navigated to (less than 0 means unlimited)
319     */
320    protected DirectoryWalker(IOFileFilter directoryFilter, IOFileFilter fileFilter, final int depthLimit) {
321        if (directoryFilter == null && fileFilter == null) {
322            this.filter = null;
323        } else {
324            directoryFilter = directoryFilter != null ? directoryFilter : TrueFileFilter.TRUE;
325            fileFilter = fileFilter != null ? fileFilter : TrueFileFilter.TRUE;
326            directoryFilter = FileFilterUtils.makeDirectoryOnly(directoryFilter);
327            fileFilter = FileFilterUtils.makeFileOnly(fileFilter);
328            this.filter = FileFilterUtils.or(directoryFilter, fileFilter);
329        }
330        this.depthLimit = depthLimit;
331    }
332
333    //-----------------------------------------------------------------------
334    /**
335     * Internal method that walks the directory hierarchy in a depth-first manner.
336     * <p>
337     * Users of this class do not need to call this method. This method will
338     * be called automatically by another (public) method on the specific subclass.
339     * </p>
340     * <p>
341     * Writers of subclasses should call this method to start the directory walk.
342     * Once called, this method will emit events as it walks the hierarchy.
343     * The event methods have the prefix <code>handle</code>.
344     * </p>
345     *
346     * @param startDirectory  the directory to start from, not null
347     * @param results  the collection of result objects, may be updated
348     * @throws NullPointerException if the start directory is null
349     * @throws IOException if an I/O Error occurs
350     */
351    protected final void walk(final File startDirectory, final Collection<T> results) throws IOException {
352        Objects.requireNonNull(startDirectory, "startDirectory");
353        try {
354            handleStart(startDirectory, results);
355            walk(startDirectory, 0, results);
356            handleEnd(results);
357        } catch(final CancelException cancel) {
358            handleCancelled(startDirectory, results, cancel);
359        }
360    }
361
362    /**
363     * Main recursive method to examine the directory hierarchy.
364     *
365     * @param directory  the directory to examine, not null
366     * @param depth  the directory level (starting directory = 0)
367     * @param results  the collection of result objects, may be updated
368     * @throws IOException if an I/O Error occurs
369     */
370    private void walk(final File directory, final int depth, final Collection<T> results) throws IOException {
371        checkIfCancelled(directory, depth, results);
372        if (handleDirectory(directory, depth, results)) {
373            handleDirectoryStart(directory, depth, results);
374            final int childDepth = depth + 1;
375            if (depthLimit < 0 || childDepth <= depthLimit) {
376                checkIfCancelled(directory, depth, results);
377                File[] childFiles = filter == null ? directory.listFiles() : directory.listFiles(filter);
378                childFiles = filterDirectoryContents(directory, depth, childFiles);
379                if (childFiles == null) {
380                    handleRestricted(directory, childDepth, results);
381                } else {
382                    for (final File childFile : childFiles) {
383                        if (childFile.isDirectory()) {
384                            walk(childFile, childDepth, results);
385                        } else {
386                            checkIfCancelled(childFile, childDepth, results);
387                            handleFile(childFile, childDepth, results);
388                            checkIfCancelled(childFile, childDepth, results);
389                        }
390                    }
391                }
392            }
393            handleDirectoryEnd(directory, depth, results);
394        }
395        checkIfCancelled(directory, depth, results);
396    }
397
398    //-----------------------------------------------------------------------
399    /**
400     * Checks whether the walk has been cancelled by calling {@link #handleIsCancelled},
401     * throwing a <code>CancelException</code> if it has.
402     * <p>
403     * Writers of subclasses should not normally call this method as it is called
404     * automatically by the walk of the tree. However, sometimes a single method,
405     * typically {@link #handleFile}, may take a long time to run. In that case,
406     * you may wish to check for cancellation by calling this method.
407     * </p>
408     *
409     * @param file  the current file being processed
410     * @param depth  the current file level (starting directory = 0)
411     * @param results  the collection of result objects, may be updated
412     * @throws IOException if an I/O Error occurs
413     */
414    protected final void checkIfCancelled(final File file, final int depth, final Collection<T> results) throws
415            IOException {
416        if (handleIsCancelled(file, depth, results)) {
417            throw new CancelException(file, depth);
418        }
419    }
420
421    /**
422     * Overridable callback method invoked to determine if the entire walk
423     * operation should be immediately cancelled.
424     * <p>
425     * This method should be implemented by those subclasses that want to
426     * provide a public <code>cancel()</code> method available from another
427     * thread. The design pattern for the subclass should be as follows:
428     * </p>
429     * <pre>
430     *  public class FooDirectoryWalker extends DirectoryWalker {
431     *    private volatile boolean cancelled = false;
432     *
433     *    public void cancel() {
434     *        cancelled = true;
435     *    }
436     *    private void handleIsCancelled(File file, int depth, Collection results) {
437     *        return cancelled;
438     *    }
439     *    protected void handleCancelled(File startDirectory,
440     *              Collection results, CancelException cancel) {
441     *        // implement processing required when a cancellation occurs
442     *    }
443     *  }
444     * </pre>
445     * <p>
446     * If this method returns true, then the directory walk is immediately
447     * cancelled. The next callback method will be {@link #handleCancelled}.
448     * </p>
449     * <p>
450     * This implementation returns false.
451     * </p>
452     *
453     * @param file  the file or directory being processed
454     * @param depth  the current directory level (starting directory = 0)
455     * @param results  the collection of result objects, may be updated
456     * @return true if the walk has been cancelled
457     * @throws IOException if an I/O Error occurs
458     */
459    protected boolean handleIsCancelled(
460            final File file, final int depth, final Collection<T> results) throws IOException {
461        // do nothing - overridable by subclass
462        return false;  // not cancelled
463    }
464
465    /**
466     * Overridable callback method invoked when the operation is cancelled.
467     * The file being processed when the cancellation occurred can be
468     * obtained from the exception.
469     * <p>
470     * This implementation just re-throws the {@link CancelException}.
471     * </p>
472     *
473     * @param startDirectory  the directory that the walk started from
474     * @param results  the collection of result objects, may be updated
475     * @param cancel  the exception throw to cancel further processing
476     * containing details at the point of cancellation.
477     * @throws IOException if an I/O Error occurs
478     */
479    protected void handleCancelled(final File startDirectory, final Collection<T> results,
480                       final CancelException cancel) throws IOException {
481        // re-throw exception - overridable by subclass
482        throw cancel;
483    }
484
485    //-----------------------------------------------------------------------
486    /**
487     * Overridable callback method invoked at the start of processing.
488     * <p>
489     * This implementation does nothing.
490     * </p>
491     *
492     * @param startDirectory  the directory to start from
493     * @param results  the collection of result objects, may be updated
494     * @throws IOException if an I/O Error occurs
495     */
496    protected void handleStart(final File startDirectory, final Collection<T> results) throws IOException {
497        // do nothing - overridable by subclass
498    }
499
500    /**
501     * Overridable callback method invoked to determine if a directory should be processed.
502     * <p>
503     * This method returns a boolean to indicate if the directory should be examined or not.
504     * If you return false, the entire directory and any subdirectories will be skipped.
505     * Note that this functionality is in addition to the filtering by file filter.
506     * </p>
507     * <p>
508     * This implementation does nothing and returns true.
509     * </p>
510     *
511     * @param directory  the current directory being processed
512     * @param depth  the current directory level (starting directory = 0)
513     * @param results  the collection of result objects, may be updated
514     * @return true to process this directory, false to skip this directory
515     * @throws IOException if an I/O Error occurs
516     */
517    protected boolean handleDirectory(final File directory, final int depth, final Collection<T> results) throws
518            IOException {
519        // do nothing - overridable by subclass
520        return true;  // process directory
521    }
522
523    /**
524     * Overridable callback method invoked at the start of processing each directory.
525     * <p>
526     * This implementation does nothing.
527     * </p>
528     *
529     * @param directory  the current directory being processed
530     * @param depth  the current directory level (starting directory = 0)
531     * @param results  the collection of result objects, may be updated
532     * @throws IOException if an I/O Error occurs
533     */
534    protected void handleDirectoryStart(final File directory, final int depth, final Collection<T> results) throws
535            IOException {
536        // do nothing - overridable by subclass
537    }
538
539    /**
540     * Overridable callback method invoked with the contents of each directory.
541     * <p>
542     * This implementation returns the files unchanged
543     * </p>
544     *
545     * @param directory  the current directory being processed
546     * @param depth  the current directory level (starting directory = 0)
547     * @param files the files (possibly filtered) in the directory, may be {@code null}
548     * @return the filtered list of files
549     * @throws IOException if an I/O Error occurs
550     * @since 2.0
551     */
552    protected File[] filterDirectoryContents(final File directory, final int depth, final File... files) throws
553            IOException {
554        return files;
555    }
556
557    /**
558     * Overridable callback method invoked for each (non-directory) file.
559     * <p>
560     * This implementation does nothing.
561     * </p>
562     *
563     * @param file  the current file being processed
564     * @param depth  the current directory level (starting directory = 0)
565     * @param results  the collection of result objects, may be updated
566     * @throws IOException if an I/O Error occurs
567     */
568    protected void handleFile(final File file, final int depth, final Collection<T> results) throws IOException {
569        // do nothing - overridable by subclass
570    }
571
572    /**
573     * Overridable callback method invoked for each restricted directory.
574     * <p>
575     * This implementation does nothing.
576     * </p>
577     *
578     * @param directory  the restricted directory
579     * @param depth  the current directory level (starting directory = 0)
580     * @param results  the collection of result objects, may be updated
581     * @throws IOException if an I/O Error occurs
582     */
583    protected void handleRestricted(final File directory, final int depth, final Collection<T> results) throws
584            IOException {
585        // do nothing - overridable by subclass
586    }
587
588    /**
589     * Overridable callback method invoked at the end of processing each directory.
590     * <p>
591     * This implementation does nothing.
592     * </p>
593     *
594     * @param directory  the directory being processed
595     * @param depth  the current directory level (starting directory = 0)
596     * @param results  the collection of result objects, may be updated
597     * @throws IOException if an I/O Error occurs
598     */
599    protected void handleDirectoryEnd(final File directory, final int depth, final Collection<T> results) throws
600            IOException {
601        // do nothing - overridable by subclass
602    }
603
604    /**
605     * Overridable callback method invoked at the end of processing.
606     * <p>
607     * This implementation does nothing.
608     * </p>
609     *
610     * @param results  the collection of result objects, may be updated
611     * @throws IOException if an I/O Error occurs
612     */
613    protected void handleEnd(final Collection<T> results) throws IOException {
614        // do nothing - overridable by subclass
615    }
616
617    //-----------------------------------------------------------------------
618    /**
619     * CancelException is thrown in DirectoryWalker to cancel the current
620     * processing.
621     */
622    public static class CancelException extends IOException {
623
624        /** Serialization id. */
625        private static final long serialVersionUID = 1347339620135041008L;
626
627        /** The file being processed when the exception was thrown. */
628        private final File file;
629        /** The file depth when the exception was thrown. */
630        private final int depth;
631
632        /**
633         * Constructs a <code>CancelException</code> with
634         * the file and depth when cancellation occurred.
635         *
636         * @param file  the file when the operation was cancelled, may be null
637         * @param depth  the depth when the operation was cancelled, may be null
638         */
639        public CancelException(final File file, final int depth) {
640            this("Operation Cancelled", file, depth);
641        }
642
643        /**
644         * Constructs a <code>CancelException</code> with
645         * an appropriate message and the file and depth when
646         * cancellation occurred.
647         *
648         * @param message  the detail message
649         * @param file  the file when the operation was cancelled
650         * @param depth  the depth when the operation was cancelled
651         */
652        public CancelException(final String message, final File file, final int depth) {
653            super(message);
654            this.file = file;
655            this.depth = depth;
656        }
657
658        /**
659         * Returns the file when the operation was cancelled.
660         *
661         * @return the file when the operation was cancelled
662         */
663        public File getFile() {
664            return file;
665        }
666
667        /**
668         * Returns the depth when the operation was cancelled.
669         *
670         * @return the depth when the operation was cancelled
671         */
672        public int getDepth() {
673            return depth;
674        }
675    }
676}