001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.io.FileFilter;
021import java.io.IOException;
022import java.util.Collection;
023
024import org.apache.commons.io.filefilter.FileFilterUtils;
025import org.apache.commons.io.filefilter.IOFileFilter;
026import org.apache.commons.io.filefilter.TrueFileFilter;
027
028/**
029 * Abstract class that walks through a directory hierarchy and provides
030 * subclasses with convenient hooks to add specific behaviour.
031 * <p>
032 * This class operates with a {@link FileFilter} and maximum depth to
033 * limit the files and direcories visited.
034 * Commons IO supplies many common filter implementations in the
035 * <a href="filefilter/package-summary.html"> filefilter</a> package.
036 * <p>
037 * The following sections describe:
038 *   <ul>
039 *      <li><a href="#example">1. Example Implementation</a> - example
040 *          <code>FileCleaner</code> implementation.</li>
041 *      <li><a href="#filter">2. Filter Example</a> - using
042 *          {@link FileFilter}(s) with <code>DirectoryWalker</code>.</li>
043 *      <li><a href="#cancel">3. Cancellation</a> - how to implement cancellation
044 *          behaviour.</li>
045 *   </ul>
046 *
047 * <a name="example"></a>
048 * <h3>1. Example Implementation</h3>
049 *
050 * There are many possible extensions, for example, to delete all
051 * files and '.svn' directories, and return a list of deleted files:
052 * <pre>
053 *  public class FileCleaner extends DirectoryWalker {
054 *
055 *    public FileCleaner() {
056 *      super();
057 *    }
058 *
059 *    public List clean(File startDirectory) {
060 *      List results = new ArrayList();
061 *      walk(startDirectory, results);
062 *      return results;
063 *    }
064 *
065 *    protected boolean handleDirectory(File directory, int depth, Collection results) {
066 *      // delete svn directories and then skip
067 *      if (".svn".equals(directory.getName())) {
068 *        directory.delete();
069 *        return false;
070 *      } else {
071 *        return true;
072 *      }
073 *
074 *    }
075 *
076 *    protected void handleFile(File file, int depth, Collection results) {
077 *      // delete file and add to list of deleted
078 *      file.delete();
079 *      results.add(file);
080 *    }
081 *  }
082 * </pre>
083 *
084 * <a name="filter"></a>
085 * <h3>2. Filter Example</h3>
086 *
087 * Choosing which directories and files to process can be a key aspect
088 * of using this class. This information can be setup in three ways,
089 * via three different constructors.
090 * <p>
091 * The first option is to visit all directories and files.
092 * This is achieved via the no-args constructor.
093 * <p>
094 * The second constructor option is to supply a single {@link FileFilter}
095 * that describes the files and directories to visit. Care must be taken
096 * with this option as the same filter is used for both directories
097 * and files.
098 * <p>
099 * For example, if you wanted all directories which are not hidden
100 * and files which end in ".txt":
101 * <pre>
102 *  public class FooDirectoryWalker extends DirectoryWalker {
103 *    public FooDirectoryWalker(FileFilter filter) {
104 *      super(filter, -1);
105 *    }
106 *  }
107 *
108 *  // Build up the filters and create the walker
109 *    // Create a filter for Non-hidden directories
110 *    IOFileFilter fooDirFilter =
111 *        FileFilterUtils.andFileFilter(FileFilterUtils.directoryFileFilter,
112 *                                      HiddenFileFilter.VISIBLE);
113 *
114 *    // Create a filter for Files ending in ".txt"
115 *    IOFileFilter fooFileFilter =
116 *        FileFilterUtils.andFileFilter(FileFilterUtils.fileFileFilter,
117 *                                      FileFilterUtils.suffixFileFilter(".txt"));
118 *
119 *    // Combine the directory and file filters using an OR condition
120 *    java.io.FileFilter fooFilter =
121 *        FileFilterUtils.orFileFilter(fooDirFilter, fooFileFilter);
122 *
123 *    // Use the filter to construct a DirectoryWalker implementation
124 *    FooDirectoryWalker walker = new FooDirectoryWalker(fooFilter);
125 * </pre>
126 * <p>
127 * The third constructor option is to specify separate filters, one for
128 * directories and one for files. These are combined internally to form
129 * the correct <code>FileFilter</code>, something which is very easy to
130 * get wrong when attempted manually, particularly when trying to
131 * express constructs like 'any file in directories named docs'.
132 * <p>
133 * For example, if you wanted all directories which are not hidden
134 * and files which end in ".txt":
135 * <pre>
136 *  public class FooDirectoryWalker extends DirectoryWalker {
137 *    public FooDirectoryWalker(IOFileFilter dirFilter, IOFileFilter fileFilter) {
138 *      super(dirFilter, fileFilter, -1);
139 *    }
140 *  }
141 *
142 *  // Use the filters to construct the walker
143 *  FooDirectoryWalker walker = new FooDirectoryWalker(
144 *    HiddenFileFilter.VISIBLE,
145 *    FileFilterUtils.suffixFileFilter(".txt"),
146 *  );
147 * </pre>
148 * This is much simpler than the previous example, and is why it is the preferred
149 * option for filtering.
150 *
151 * <a name="cancel"></a>
152 * <h3>3. Cancellation</h3>
153 *
154 * The DirectoryWalker contains some of the logic required for cancel processing.
155 * Subclasses must complete the implementation.
156 * <p>
157 * What <code>DirectoryWalker</code> does provide for cancellation is:
158 * <ul>
159 *    <li>{@link CancelException} which can be thrown in any of the
160 *        <i>lifecycle</i> methods to stop processing.</li>
161 *    <li>The <code>walk()</code> method traps thrown {@link CancelException}
162 *        and calls the <code>handleCancelled()</code> method, providing
163 *        a place for custom cancel processing.</li>
164 * </ul>
165 * <p>
166 * Implementations need to provide:
167 * <ul>
168 *    <li>The decision logic on whether to cancel processing or not.</li>
169 *    <li>Constructing and throwing a {@link CancelException}.</li>
170 *    <li>Custom cancel processing in the <code>handleCancelled()</code> method.
171 * </ul>
172 * <p>
173 * Two possible scenarios are envisaged for cancellation:
174 * <ul>
175 *    <li><a href="#external">3.1 External / Mult-threaded</a> - cancellation being
176 *        decided/initiated by an external process.</li>
177 *    <li><a href="#internal">3.2 Internal</a> - cancellation being decided/initiated
178 *        from within a DirectoryWalker implementation.</li>
179 * </ul>
180 * <p>
181 * The following sections provide example implementations for these two different
182 * scenarios.
183 *
184 * <a name="external"></a>
185 * <h4>3.1 External / Multi-threaded</h4>
186 *
187 * This example provides a public <code>cancel()</code> method that can be
188 * called by another thread to stop the processing. A typical example use-case
189 * would be a cancel button on a GUI. Calling this method sets a
190 * <a href="http://java.sun.com/docs/books/jls/second_edition/html/classes.doc.html#36930">
191 * volatile</a> flag to ensure it will work properly in a multi-threaded environment.
192 * The flag is returned by the <code>handleIsCancelled()</code> method, which
193 * will cause the walk to stop immediately. The <code>handleCancelled()</code>
194 * method will be the next, and last, callback method received once cancellation
195 * has occurred.
196 *
197 * <pre>
198 *  public class FooDirectoryWalker extends DirectoryWalker {
199 *
200 *    private volatile boolean cancelled = false;
201 *
202 *    public void cancel() {
203 *        cancelled = true;
204 *    }
205 *
206 *    protected boolean handleIsCancelled(File file, int depth, Collection results) {
207 *        return cancelled;
208 *    }
209 *
210 *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
211 *        // implement processing required when a cancellation occurs
212 *    }
213 *  }
214 * </pre>
215 *
216 * <a name="internal"></a>
217 * <h4>3.2 Internal</h4>
218 *
219 * This shows an example of how internal cancellation processing could be implemented.
220 * <b>Note</b> the decision logic and throwing a {@link CancelException} could be implemented
221 * in any of the <i>lifecycle</i> methods.
222 *
223 * <pre>
224 *  public class BarDirectoryWalker extends DirectoryWalker {
225 *
226 *    protected boolean handleDirectory(File directory, int depth, Collection results) throws IOException {
227 *        // cancel if hidden directory
228 *        if (directory.isHidden()) {
229 *            throw new CancelException(file, depth);
230 *        }
231 *        return true;
232 *    }
233 *
234 *    protected void handleFile(File file, int depth, Collection results) throws IOException {
235 *        // cancel if read-only file
236 *        if (!file.canWrite()) {
237 *            throw new CancelException(file, depth);
238 *        }
239 *        results.add(file);
240 *    }
241 *
242 *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
243 *        // implement processing required when a cancellation occurs
244 *    }
245 *  }
246 * </pre>
247 *
248 * @since 1.3
249 * @version $Id: DirectoryWalker.java 1723627 2016-01-07 21:15:47Z niallp $
250 */
251public abstract class DirectoryWalker<T> {
252
253    /**
254     * The file filter to use to filter files and directories.
255     */
256    private final FileFilter filter;
257    /**
258     * The limit on the directory depth to walk.
259     */
260    private final int depthLimit;
261
262    /**
263     * Construct an instance with no filtering and unlimited <i>depth</i>.
264     */
265    protected DirectoryWalker() {
266        this(null, -1);
267    }
268
269    /**
270     * Construct an instance with a filter and limit the <i>depth</i> navigated to.
271     * <p>
272     * The filter controls which files and directories will be navigated to as
273     * part of the walk. The {@link FileFilterUtils} class is useful for combining
274     * various filters together. A {@code null} filter means that no
275     * filtering should occur and all files and directories will be visited.
276     *
277     * @param filter  the filter to apply, null means visit all files
278     * @param depthLimit  controls how <i>deep</i> the hierarchy is
279     *  navigated to (less than 0 means unlimited)
280     */
281    protected DirectoryWalker(final FileFilter filter, final int depthLimit) {
282        this.filter = filter;
283        this.depthLimit = depthLimit;
284    }
285
286    /**
287     * Construct an instance with a directory and a file filter and an optional
288     * limit on the <i>depth</i> navigated to.
289     * <p>
290     * The filters control which files and directories will be navigated to as part
291     * of the walk. This constructor uses {@link FileFilterUtils#makeDirectoryOnly(IOFileFilter)}
292     * and {@link FileFilterUtils#makeFileOnly(IOFileFilter)} internally to combine the filters.
293     * A {@code null} filter means that no filtering should occur.
294     *
295     * @param directoryFilter  the filter to apply to directories, null means visit all directories
296     * @param fileFilter  the filter to apply to files, null means visit all files
297     * @param depthLimit  controls how <i>deep</i> the hierarchy is
298     *  navigated to (less than 0 means unlimited)
299     */
300    protected DirectoryWalker(IOFileFilter directoryFilter, IOFileFilter fileFilter, final int depthLimit) {
301        if (directoryFilter == null && fileFilter == null) {
302            this.filter = null;
303        } else {
304            directoryFilter = directoryFilter != null ? directoryFilter : TrueFileFilter.TRUE;
305            fileFilter = fileFilter != null ? fileFilter : TrueFileFilter.TRUE;
306            directoryFilter = FileFilterUtils.makeDirectoryOnly(directoryFilter);
307            fileFilter = FileFilterUtils.makeFileOnly(fileFilter);
308            this.filter = FileFilterUtils.or(directoryFilter, fileFilter);
309        }
310        this.depthLimit = depthLimit;
311    }
312
313    //-----------------------------------------------------------------------
314    /**
315     * Internal method that walks the directory hierarchy in a depth-first manner.
316     * <p>
317     * Users of this class do not need to call this method. This method will
318     * be called automatically by another (public) method on the specific subclass.
319     * <p>
320     * Writers of subclasses should call this method to start the directory walk.
321     * Once called, this method will emit events as it walks the hierarchy.
322     * The event methods have the prefix <code>handle</code>.
323     *
324     * @param startDirectory  the directory to start from, not null
325     * @param results  the collection of result objects, may be updated
326     * @throws NullPointerException if the start directory is null
327     * @throws IOException if an I/O Error occurs
328     */
329    protected final void walk(final File startDirectory, final Collection<T> results) throws IOException {
330        if (startDirectory == null) {
331            throw new NullPointerException("Start Directory is null");
332        }
333        try {
334            handleStart(startDirectory, results);
335            walk(startDirectory, 0, results);
336            handleEnd(results);
337        } catch(final CancelException cancel) {
338            handleCancelled(startDirectory, results, cancel);
339        }
340    }
341
342    /**
343     * Main recursive method to examine the directory hierarchy.
344     *
345     * @param directory  the directory to examine, not null
346     * @param depth  the directory level (starting directory = 0)
347     * @param results  the collection of result objects, may be updated
348     * @throws IOException if an I/O Error occurs
349     */
350    private void walk(final File directory, final int depth, final Collection<T> results) throws IOException {
351        checkIfCancelled(directory, depth, results);
352        if (handleDirectory(directory, depth, results)) {
353            handleDirectoryStart(directory, depth, results);
354            final int childDepth = depth + 1;
355            if (depthLimit < 0 || childDepth <= depthLimit) {
356                checkIfCancelled(directory, depth, results);
357                File[] childFiles = filter == null ? directory.listFiles() : directory.listFiles(filter);
358                childFiles = filterDirectoryContents(directory, depth, childFiles);
359                if (childFiles == null) {
360                    handleRestricted(directory, childDepth, results);
361                } else {
362                    for (final File childFile : childFiles) {
363                        if (childFile.isDirectory()) {
364                            walk(childFile, childDepth, results);
365                        } else {
366                            checkIfCancelled(childFile, childDepth, results);
367                            handleFile(childFile, childDepth, results);
368                            checkIfCancelled(childFile, childDepth, results);
369                        }
370                    }
371                }
372            }
373            handleDirectoryEnd(directory, depth, results);
374        }
375        checkIfCancelled(directory, depth, results);
376    }
377
378    //-----------------------------------------------------------------------
379    /**
380     * Checks whether the walk has been cancelled by calling {@link #handleIsCancelled},
381     * throwing a <code>CancelException</code> if it has.
382     * <p>
383     * Writers of subclasses should not normally call this method as it is called
384     * automatically by the walk of the tree. However, sometimes a single method,
385     * typically {@link #handleFile}, may take a long time to run. In that case,
386     * you may wish to check for cancellation by calling this method.
387     *
388     * @param file  the current file being processed
389     * @param depth  the current file level (starting directory = 0)
390     * @param results  the collection of result objects, may be updated
391     * @throws IOException if an I/O Error occurs
392     */
393    protected final void checkIfCancelled(final File file, final int depth, final Collection<T> results) throws
394            IOException {
395        if (handleIsCancelled(file, depth, results)) {
396            throw new CancelException(file, depth);
397        }
398    }
399
400    /**
401     * Overridable callback method invoked to determine if the entire walk
402     * operation should be immediately cancelled.
403     * <p>
404     * This method should be implemented by those subclasses that want to
405     * provide a public <code>cancel()</code> method available from another
406     * thread. The design pattern for the subclass should be as follows:
407     * <pre>
408     *  public class FooDirectoryWalker extends DirectoryWalker {
409     *    private volatile boolean cancelled = false;
410     *
411     *    public void cancel() {
412     *        cancelled = true;
413     *    }
414     *    private void handleIsCancelled(File file, int depth, Collection results) {
415     *        return cancelled;
416     *    }
417     *    protected void handleCancelled(File startDirectory,
418     *              Collection results, CancelException cancel) {
419     *        // implement processing required when a cancellation occurs
420     *    }
421     *  }
422     * </pre>
423     * <p>
424     * If this method returns true, then the directory walk is immediately
425     * cancelled. The next callback method will be {@link #handleCancelled}.
426     * <p>
427     * This implementation returns false.
428     *
429     * @param file  the file or directory being processed
430     * @param depth  the current directory level (starting directory = 0)
431     * @param results  the collection of result objects, may be updated
432     * @return true if the walk has been cancelled
433     * @throws IOException if an I/O Error occurs
434     */
435    protected boolean handleIsCancelled(
436            final File file, final int depth, final Collection<T> results) throws IOException {
437        // do nothing - overridable by subclass
438        return false;  // not cancelled
439    }
440
441    /**
442     * Overridable callback method invoked when the operation is cancelled.
443     * The file being processed when the cancellation occurred can be
444     * obtained from the exception.
445     * <p>
446     * This implementation just re-throws the {@link CancelException}.
447     *
448     * @param startDirectory  the directory that the walk started from
449     * @param results  the collection of result objects, may be updated
450     * @param cancel  the exception throw to cancel further processing
451     * containing details at the point of cancellation.
452     * @throws IOException if an I/O Error occurs
453     */
454    protected void handleCancelled(final File startDirectory, final Collection<T> results,
455                       final CancelException cancel) throws IOException {
456        // re-throw exception - overridable by subclass
457        throw cancel;
458    }
459
460    //-----------------------------------------------------------------------
461    /**
462     * Overridable callback method invoked at the start of processing.
463     * <p>
464     * This implementation does nothing.
465     *
466     * @param startDirectory  the directory to start from
467     * @param results  the collection of result objects, may be updated
468     * @throws IOException if an I/O Error occurs
469     */
470    protected void handleStart(final File startDirectory, final Collection<T> results) throws IOException {
471        // do nothing - overridable by subclass
472    }
473
474    /**
475     * Overridable callback method invoked to determine if a directory should be processed.
476     * <p>
477     * This method returns a boolean to indicate if the directory should be examined or not.
478     * If you return false, the entire directory and any subdirectories will be skipped.
479     * Note that this functionality is in addition to the filtering by file filter.
480     * <p>
481     * This implementation does nothing and returns true.
482     *
483     * @param directory  the current directory being processed
484     * @param depth  the current directory level (starting directory = 0)
485     * @param results  the collection of result objects, may be updated
486     * @return true to process this directory, false to skip this directory
487     * @throws IOException if an I/O Error occurs
488     */
489    protected boolean handleDirectory(final File directory, final int depth, final Collection<T> results) throws
490            IOException {
491        // do nothing - overridable by subclass
492        return true;  // process directory
493    }
494
495    /**
496     * Overridable callback method invoked at the start of processing each directory.
497     * <p>
498     * This implementation does nothing.
499     *
500     * @param directory  the current directory being processed
501     * @param depth  the current directory level (starting directory = 0)
502     * @param results  the collection of result objects, may be updated
503     * @throws IOException if an I/O Error occurs
504     */
505    protected void handleDirectoryStart(final File directory, final int depth, final Collection<T> results) throws
506            IOException {
507        // do nothing - overridable by subclass
508    }
509
510    /**
511     * Overridable callback method invoked with the contents of each directory.
512     * <p>
513     * This implementation returns the files unchanged
514     *
515     * @param directory  the current directory being processed
516     * @param depth  the current directory level (starting directory = 0)
517     * @param files the files (possibly filtered) in the directory, may be {@code null}
518     * @return the filtered list of files
519     * @throws IOException if an I/O Error occurs
520     * @since 2.0
521     */
522    protected File[] filterDirectoryContents(final File directory, final int depth, final File[] files) throws
523            IOException {
524        return files;
525    }
526
527    /**
528     * Overridable callback method invoked for each (non-directory) file.
529     * <p>
530     * This implementation does nothing.
531     *
532     * @param file  the current file being processed
533     * @param depth  the current directory level (starting directory = 0)
534     * @param results  the collection of result objects, may be updated
535     * @throws IOException if an I/O Error occurs
536     */
537    protected void handleFile(final File file, final int depth, final Collection<T> results) throws IOException {
538        // do nothing - overridable by subclass
539    }
540
541    /**
542     * Overridable callback method invoked for each restricted directory.
543     * <p>
544     * This implementation does nothing.
545     *
546     * @param directory  the restricted directory
547     * @param depth  the current directory level (starting directory = 0)
548     * @param results  the collection of result objects, may be updated
549     * @throws IOException if an I/O Error occurs
550     */
551    protected void handleRestricted(final File directory, final int depth, final Collection<T> results) throws
552            IOException {
553        // do nothing - overridable by subclass
554    }
555
556    /**
557     * Overridable callback method invoked at the end of processing each directory.
558     * <p>
559     * This implementation does nothing.
560     *
561     * @param directory  the directory being processed
562     * @param depth  the current directory level (starting directory = 0)
563     * @param results  the collection of result objects, may be updated
564     * @throws IOException if an I/O Error occurs
565     */
566    protected void handleDirectoryEnd(final File directory, final int depth, final Collection<T> results) throws
567            IOException {
568        // do nothing - overridable by subclass
569    }
570
571    /**
572     * Overridable callback method invoked at the end of processing.
573     * <p>
574     * This implementation does nothing.
575     *
576     * @param results  the collection of result objects, may be updated
577     * @throws IOException if an I/O Error occurs
578     */
579    protected void handleEnd(final Collection<T> results) throws IOException {
580        // do nothing - overridable by subclass
581    }
582
583    //-----------------------------------------------------------------------
584    /**
585     * CancelException is thrown in DirectoryWalker to cancel the current
586     * processing.
587     */
588    public static class CancelException extends IOException {
589
590        /** Serialization id. */
591        private static final long serialVersionUID = 1347339620135041008L;
592
593        /** The file being processed when the exception was thrown. */
594        private final File file;
595        /** The file depth when the exception was thrown. */
596        private final int depth;
597
598        /**
599         * Constructs a <code>CancelException</code> with
600         * the file and depth when cancellation occurred.
601         *
602         * @param file  the file when the operation was cancelled, may be null
603         * @param depth  the depth when the operation was cancelled, may be null
604         */
605        public CancelException(final File file, final int depth) {
606            this("Operation Cancelled", file, depth);
607        }
608
609        /**
610         * Constructs a <code>CancelException</code> with
611         * an appropriate message and the file and depth when
612         * cancellation occurred.
613         *
614         * @param message  the detail message
615         * @param file  the file when the operation was cancelled
616         * @param depth  the depth when the operation was cancelled
617         */
618        public CancelException(final String message, final File file, final int depth) {
619            super(message);
620            this.file = file;
621            this.depth = depth;
622        }
623
624        /**
625         * Return the file when the operation was cancelled.
626         *
627         * @return the file when the operation was cancelled
628         */
629        public File getFile() {
630            return file;
631        }
632
633        /**
634         * Return the depth when the operation was cancelled.
635         *
636         * @return the depth when the operation was cancelled
637         */
638        public int getDepth() {
639            return depth;
640        }
641    }
642}