001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io;
018    
019    import java.io.File;
020    import java.io.FileFilter;
021    import java.io.IOException;
022    import java.util.Collection;
023    
024    import org.apache.commons.io.filefilter.FileFilterUtils;
025    import org.apache.commons.io.filefilter.IOFileFilter;
026    import org.apache.commons.io.filefilter.TrueFileFilter;
027    
028    /**
029     * Abstract class that walks through a directory hierarchy and provides
030     * subclasses with convenient hooks to add specific behaviour.
031     * <p>
032     * This class operates with a {@link FileFilter} and maximum depth to
033     * limit the files and direcories visited.
034     * Commons IO supplies many common filter implementations in the 
035     * <a href="filefilter/package-summary.html"> filefilter</a> package.
036     * <p>
037     * The following sections describe:
038     *   <ul>
039     *      <li><a href="#example">1. Example Implementation</a> - example
040     *          <code>FileCleaner</code> implementation.</li>
041     *      <li><a href="#filter">2. Filter Example</a> - using 
042     *          {@link FileFilter}(s) with <code>DirectoryWalker</code>.</li>
043     *      <li><a href="#cancel">3. Cancellation</a> - how to implement cancellation
044     *          behaviour.</li>
045     *   </ul>
046     *
047     * <a name="example"></a>
048     * <h3>1. Example Implementation</h3>
049     *
050     * There are many possible extensions, for example, to delete all
051     * files and '.svn' directories, and return a list of deleted files:
052     * <pre>
053     *  public class FileCleaner extends DirectoryWalker {
054     *
055     *    public FileCleaner() {
056     *      super();
057     *    }
058     *
059     *    public List clean(File startDirectory) {
060     *      List results = new ArrayList();
061     *      walk(startDirectory, results);
062     *      return results;
063     *    }
064     *
065     *    protected boolean handleDirectory(File directory, int depth, Collection results) {
066     *      // delete svn directories and then skip
067     *      if (".svn".equals(directory.getName())) {
068     *        directory.delete();
069     *        return false;
070     *      } else {
071     *        return true;
072     *      }
073     *
074     *    }
075     *
076     *    protected void handleFile(File file, int depth, Collection results) {
077     *      // delete file and add to list of deleted
078     *      file.delete();
079     *      results.add(file);
080     *    }
081     *  }
082     * </pre>
083     *
084     * <a name="filter"></a>
085     * <h3>2. Filter Example</h3>
086     *
087     * Choosing which directories and files to process can be a key aspect
088     * of using this class. This information can be setup in three ways,
089     * via three different constructors.
090     * <p>
091     * The first option is to visit all directories and files.
092     * This is achieved via the no-args constructor.
093     * <p>
094     * The second constructor option is to supply a single {@link FileFilter}
095     * that describes the files and directories to visit. Care must be taken
096     * with this option as the same filter is used for both directories
097     * and files.
098     * <p>
099     * For example, if you wanted all directories which are not hidden
100     * and files which end in ".txt":
101     * <pre>
102     *  public class FooDirectoryWalker extends DirectoryWalker {
103     *    public FooDirectoryWalker(FileFilter filter) {
104     *      super(filter, -1);
105     *    }
106     *  }
107     *  
108     *  // Build up the filters and create the walker
109     *    // Create a filter for Non-hidden directories
110     *    IOFileFilter fooDirFilter = 
111     *        FileFilterUtils.andFileFilter(FileFilterUtils.directoryFileFilter,
112     *                                      HiddenFileFilter.VISIBLE);
113     *
114     *    // Create a filter for Files ending in ".txt"
115     *    IOFileFilter fooFileFilter = 
116     *        FileFilterUtils.andFileFilter(FileFilterUtils.fileFileFilter,
117     *                                      FileFilterUtils.suffixFileFilter(".txt"));
118     *
119     *    // Combine the directory and file filters using an OR condition
120     *    java.io.FileFilter fooFilter = 
121     *        FileFilterUtils.orFileFilter(fooDirFilter, fooFileFilter);
122     *
123     *    // Use the filter to construct a DirectoryWalker implementation
124     *    FooDirectoryWalker walker = new FooDirectoryWalker(fooFilter);
125     * </pre>
126     * <p>
127     * The third constructor option is to specify separate filters, one for
128     * directories and one for files. These are combined internally to form
129     * the correct <code>FileFilter</code>, something which is very easy to
130     * get wrong when attempted manually, particularly when trying to
131     * express constructs like 'any file in directories named docs'.
132     * <p>
133     * For example, if you wanted all directories which are not hidden
134     * and files which end in ".txt":
135     * <pre>
136     *  public class FooDirectoryWalker extends DirectoryWalker {
137     *    public FooDirectoryWalker(IOFileFilter dirFilter, IOFileFilter fileFilter) {
138     *      super(dirFilter, fileFilter, -1);
139     *    }
140     *  }
141     *  
142     *  // Use the filters to construct the walker
143     *  FooDirectoryWalker walker = new FooDirectoryWalker(
144     *    HiddenFileFilter.VISIBLE,
145     *    FileFilterUtils.suffixFileFilter(".txt"),
146     *  );
147     * </pre>
148     * This is much simpler than the previous example, and is why it is the preferred
149     * option for filtering.
150     *
151     * <a name="cancel"></a>
152     * <h3>3. Cancellation</h3>
153     *
154     * The DirectoryWalker contains some of the logic required for cancel processing.
155     * Subclasses must complete the implementation.
156     * <p>
157     * What <code>DirectoryWalker</code> does provide for cancellation is:
158     * <ul>
159     *    <li>{@link CancelException} which can be thrown in any of the
160     *        <i>lifecycle</i> methods to stop processing.</li>
161     *    <li>The <code>walk()</code> method traps thrown {@link CancelException}
162     *        and calls the <code>handleCancelled()</code> method, providing
163     *        a place for custom cancel processing.</li>
164     * </ul>
165     * <p>
166     * Implementations need to provide:
167     * <ul>
168     *    <li>The decision logic on whether to cancel processing or not.</li>
169     *    <li>Constructing and throwing a {@link CancelException}.</li>
170     *    <li>Custom cancel processing in the <code>handleCancelled()</code> method.
171     * </ul>
172     * <p>
173     * Two possible scenarios are envisaged for cancellation:
174     * <ul>
175     *    <li><a href="#external">3.1 External / Mult-threaded</a> - cancellation being
176     *        decided/initiated by an external process.</li>
177     *    <li><a href="#internal">3.2 Internal</a> - cancellation being decided/initiated 
178     *        from within a DirectoryWalker implementation.</li>
179     * </ul>
180     * <p>
181     * The following sections provide example implementations for these two different
182     * scenarios.
183     *
184     * <a name="external"></a>
185     * <h4>3.1 External / Multi-threaded</h4>
186     *
187     * This example provides a public <code>cancel()</code> method that can be
188     * called by another thread to stop the processing. A typical example use-case
189     * would be a cancel button on a GUI. Calling this method sets a
190     * <a href="http://java.sun.com/docs/books/jls/second_edition/html/classes.doc.html#36930">
191     * volatile</a> flag to ensure it will work properly in a multi-threaded environment.
192     * The flag is returned by the <code>handleIsCancelled()</code> method, which
193     * will cause the walk to stop immediately. The <code>handleCancelled()</code>
194     * method will be the next, and last, callback method received once cancellation
195     * has occurred.
196     *
197     * <pre>
198     *  public class FooDirectoryWalker extends DirectoryWalker {
199     *
200     *    private volatile boolean cancelled = false;
201     *
202     *    public void cancel() {
203     *        cancelled = true;
204     *    }
205     *
206     *    private void handleIsCancelled(File file, int depth, Collection results) {
207     *        return cancelled;
208     *    }
209     *
210     *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
211     *        // implement processing required when a cancellation occurs
212     *    }
213     *  }
214     * </pre>
215     *
216     * <a name="internal"></a>
217     * <h4>3.2 Internal</h4>
218     *
219     * This shows an example of how internal cancellation processing could be implemented.
220     * <b>Note</b> the decision logic and throwing a {@link CancelException} could be implemented
221     * in any of the <i>lifecycle</i> methods. 
222     *
223     * <pre>
224     *  public class BarDirectoryWalker extends DirectoryWalker {
225     *
226     *    protected boolean handleDirectory(File directory, int depth, Collection results) throws IOException {
227     *        // cancel if hidden directory
228     *        if (directory.isHidden()) {
229     *            throw new CancelException(file, depth);
230     *        }
231     *        return true;
232     *    }
233     *
234     *    protected void handleFile(File file, int depth, Collection results) throws IOException {
235     *        // cancel if read-only file
236     *        if (!file.canWrite()) {
237     *            throw new CancelException(file, depth);
238     *        }
239     *        results.add(file);
240     *    }
241     *
242     *    protected void handleCancelled(File startDirectory, Collection results, CancelException cancel) {
243     *        // implement processing required when a cancellation occurs
244     *    }
245     *  }
246     * </pre>
247     *
248     * @since 1.3
249     * @version $Id: DirectoryWalker.java 1304052 2012-03-22 20:55:29Z ggregory $
250     */
251    public abstract class DirectoryWalker<T> {
252    
253        /**
254         * The file filter to use to filter files and directories.
255         */
256        private final FileFilter filter;
257        /**
258         * The limit on the directory depth to walk.
259         */
260        private final int depthLimit;
261    
262        /**
263         * Construct an instance with no filtering and unlimited <i>depth</i>.
264         */
265        protected DirectoryWalker() {
266            this(null, -1);
267        }
268    
269        /**
270         * Construct an instance with a filter and limit the <i>depth</i> navigated to.
271         * <p>
272         * The filter controls which files and directories will be navigated to as
273         * part of the walk. The {@link FileFilterUtils} class is useful for combining
274         * various filters together. A <code>null</code> filter means that no
275         * filtering should occur and all files and directories will be visited.
276         *
277         * @param filter  the filter to apply, null means visit all files
278         * @param depthLimit  controls how <i>deep</i> the hierarchy is
279         *  navigated to (less than 0 means unlimited)
280         */
281        protected DirectoryWalker(FileFilter filter, int depthLimit) {
282            this.filter = filter;
283            this.depthLimit = depthLimit;
284        }
285    
286        /**
287         * Construct an instance with a directory and a file filter and an optional
288         * limit on the <i>depth</i> navigated to.
289         * <p>
290         * The filters control which files and directories will be navigated to as part
291         * of the walk. This constructor uses {@link FileFilterUtils#makeDirectoryOnly(IOFileFilter)}
292         * and {@link FileFilterUtils#makeFileOnly(IOFileFilter)} internally to combine the filters.
293         * A <code>null</code> filter means that no filtering should occur.
294         *
295         * @param directoryFilter  the filter to apply to directories, null means visit all directories
296         * @param fileFilter  the filter to apply to files, null means visit all files
297         * @param depthLimit  controls how <i>deep</i> the hierarchy is
298         *  navigated to (less than 0 means unlimited)
299         */
300        protected DirectoryWalker(IOFileFilter directoryFilter, IOFileFilter fileFilter, int depthLimit) {
301            if (directoryFilter == null && fileFilter == null) {
302                this.filter = null;
303            } else {
304                directoryFilter = directoryFilter != null ? directoryFilter : TrueFileFilter.TRUE;
305                fileFilter = fileFilter != null ? fileFilter : TrueFileFilter.TRUE;
306                directoryFilter = FileFilterUtils.makeDirectoryOnly(directoryFilter);
307                fileFilter = FileFilterUtils.makeFileOnly(fileFilter);
308                this.filter = FileFilterUtils.or(directoryFilter, fileFilter);
309            }
310            this.depthLimit = depthLimit;
311        }
312    
313        //-----------------------------------------------------------------------
314        /**
315         * Internal method that walks the directory hierarchy in a depth-first manner.
316         * <p>
317         * Users of this class do not need to call this method. This method will
318         * be called automatically by another (public) method on the specific subclass.
319         * <p>
320         * Writers of subclasses should call this method to start the directory walk.
321         * Once called, this method will emit events as it walks the hierarchy.
322         * The event methods have the prefix <code>handle</code>.
323         *
324         * @param startDirectory  the directory to start from, not null
325         * @param results  the collection of result objects, may be updated
326         * @throws NullPointerException if the start directory is null
327         * @throws IOException if an I/O Error occurs
328         */
329        protected final void walk(File startDirectory, Collection<T> results) throws IOException {
330            if (startDirectory == null) {
331                throw new NullPointerException("Start Directory is null");
332            }
333            try {
334                handleStart(startDirectory, results);
335                walk(startDirectory, 0, results);
336                handleEnd(results);
337            } catch(CancelException cancel) {
338                handleCancelled(startDirectory, results, cancel);
339            }
340        }
341    
342        /**
343         * Main recursive method to examine the directory hierarchy.
344         *
345         * @param directory  the directory to examine, not null
346         * @param depth  the directory level (starting directory = 0)
347         * @param results  the collection of result objects, may be updated
348         * @throws IOException if an I/O Error occurs
349         */
350        private void walk(File directory, int depth, Collection<T> results) throws IOException {
351            checkIfCancelled(directory, depth, results);
352            if (handleDirectory(directory, depth, results)) {
353                handleDirectoryStart(directory, depth, results);
354                int childDepth = depth + 1;
355                if (depthLimit < 0 || childDepth <= depthLimit) {
356                    checkIfCancelled(directory, depth, results);
357                    File[] childFiles = filter == null ? directory.listFiles() : directory.listFiles(filter);
358                    childFiles = filterDirectoryContents(directory, depth, childFiles);
359                    if (childFiles == null) {
360                        handleRestricted(directory, childDepth, results);
361                    } else {
362                        for (File childFile : childFiles) {
363                            if (childFile.isDirectory()) {
364                                walk(childFile, childDepth, results);
365                            } else {
366                                checkIfCancelled(childFile, childDepth, results);
367                                handleFile(childFile, childDepth, results);
368                                checkIfCancelled(childFile, childDepth, results);
369                            }
370                        }
371                    }
372                }
373                handleDirectoryEnd(directory, depth, results);
374            }
375            checkIfCancelled(directory, depth, results);
376        }
377    
378        //-----------------------------------------------------------------------
379        /**
380         * Checks whether the walk has been cancelled by calling {@link #handleIsCancelled},
381         * throwing a <code>CancelException</code> if it has.
382         * <p>
383         * Writers of subclasses should not normally call this method as it is called
384         * automatically by the walk of the tree. However, sometimes a single method,
385         * typically {@link #handleFile}, may take a long time to run. In that case,
386         * you may wish to check for cancellation by calling this method.
387         * 
388         * @param file  the current file being processed
389         * @param depth  the current file level (starting directory = 0)
390         * @param results  the collection of result objects, may be updated
391         * @throws IOException if an I/O Error occurs
392         */
393        protected final void checkIfCancelled(File file, int depth, Collection<T> results) throws IOException {
394            if (handleIsCancelled(file, depth, results)) {
395                throw new CancelException(file, depth);
396            }
397        }
398    
399        /**
400         * Overridable callback method invoked to determine if the entire walk
401         * operation should be immediately cancelled.
402         * <p>
403         * This method should be implemented by those subclasses that want to
404         * provide a public <code>cancel()</code> method available from another
405         * thread. The design pattern for the subclass should be as follows:
406         * <pre>
407         *  public class FooDirectoryWalker extends DirectoryWalker {
408         *    private volatile boolean cancelled = false;
409         *
410         *    public void cancel() {
411         *        cancelled = true;
412         *    }
413         *    private void handleIsCancelled(File file, int depth, Collection results) {
414         *        return cancelled;
415         *    }
416         *    protected void handleCancelled(File startDirectory,
417         *              Collection results, CancelException cancel) {
418         *        // implement processing required when a cancellation occurs
419         *    }
420         *  }
421         * </pre>
422         * <p>
423         * If this method returns true, then the directory walk is immediately
424         * cancelled. The next callback method will be {@link #handleCancelled}.
425         * <p>
426         * This implementation returns false.
427         *
428         * @param file  the file or directory being processed
429         * @param depth  the current directory level (starting directory = 0)
430         * @param results  the collection of result objects, may be updated
431         * @return true if the walk has been cancelled
432         * @throws IOException if an I/O Error occurs
433         */
434        protected boolean handleIsCancelled(
435                File file, int depth, Collection<T> results) throws IOException {
436            // do nothing - overridable by subclass
437            return false;  // not cancelled
438        }
439    
440        /**
441         * Overridable callback method invoked when the operation is cancelled.
442         * The file being processed when the cancellation occurred can be
443         * obtained from the exception.
444         * <p>
445         * This implementation just re-throws the {@link CancelException}.
446         *
447         * @param startDirectory  the directory that the walk started from
448         * @param results  the collection of result objects, may be updated
449         * @param cancel  the exception throw to cancel further processing
450         * containing details at the point of cancellation. 
451         * @throws IOException if an I/O Error occurs
452         */
453        protected void handleCancelled(File startDirectory, Collection<T> results,
454                           CancelException cancel) throws IOException {
455            // re-throw exception - overridable by subclass
456            throw cancel;
457        }
458    
459        //-----------------------------------------------------------------------
460        /**
461         * Overridable callback method invoked at the start of processing.
462         * <p>
463         * This implementation does nothing.
464         *
465         * @param startDirectory  the directory to start from
466         * @param results  the collection of result objects, may be updated
467         * @throws IOException if an I/O Error occurs
468         */
469        protected void handleStart(File startDirectory, Collection<T> results) throws IOException {
470            // do nothing - overridable by subclass
471        }
472    
473        /**
474         * Overridable callback method invoked to determine if a directory should be processed.
475         * <p>
476         * This method returns a boolean to indicate if the directory should be examined or not.
477         * If you return false, the entire directory and any subdirectories will be skipped.
478         * Note that this functionality is in addition to the filtering by file filter.
479         * <p>
480         * This implementation does nothing and returns true.
481         *
482         * @param directory  the current directory being processed
483         * @param depth  the current directory level (starting directory = 0)
484         * @param results  the collection of result objects, may be updated
485         * @return true to process this directory, false to skip this directory
486         * @throws IOException if an I/O Error occurs
487         */
488        protected boolean handleDirectory(File directory, int depth, Collection<T> results) throws IOException {
489            // do nothing - overridable by subclass
490            return true;  // process directory
491        }
492    
493        /**
494         * Overridable callback method invoked at the start of processing each directory.
495         * <p>
496         * This implementation does nothing.
497         *
498         * @param directory  the current directory being processed
499         * @param depth  the current directory level (starting directory = 0)
500         * @param results  the collection of result objects, may be updated
501         * @throws IOException if an I/O Error occurs
502         */
503        protected void handleDirectoryStart(File directory, int depth, Collection<T> results) throws IOException {
504            // do nothing - overridable by subclass
505        }
506    
507        /**
508         * Overridable callback method invoked with the contents of each directory.
509         * <p>
510         * This implementation returns the files unchanged
511         *
512         * @param directory  the current directory being processed
513         * @param depth  the current directory level (starting directory = 0)
514         * @param files the files (possibly filtered) in the directory
515         * @return the filtered list of files
516         * @throws IOException if an I/O Error occurs
517         * @since 2.0
518         */
519        protected File[] filterDirectoryContents(File directory, int depth, File[] files) throws IOException {
520            return files;
521        }
522    
523        /**
524         * Overridable callback method invoked for each (non-directory) file.
525         * <p>
526         * This implementation does nothing.
527         *
528         * @param file  the current file being processed
529         * @param depth  the current directory level (starting directory = 0)
530         * @param results  the collection of result objects, may be updated
531         * @throws IOException if an I/O Error occurs
532         */
533        protected void handleFile(File file, int depth, Collection<T> results) throws IOException {
534            // do nothing - overridable by subclass
535        }
536    
537        /**
538         * Overridable callback method invoked for each restricted directory.
539         * <p>
540         * This implementation does nothing.
541         *
542         * @param directory  the restricted directory
543         * @param depth  the current directory level (starting directory = 0)
544         * @param results  the collection of result objects, may be updated
545         * @throws IOException if an I/O Error occurs
546         */
547        protected void handleRestricted(File directory, int depth, Collection<T> results) throws IOException  {
548            // do nothing - overridable by subclass
549        }
550    
551        /**
552         * Overridable callback method invoked at the end of processing each directory.
553         * <p>
554         * This implementation does nothing.
555         *
556         * @param directory  the directory being processed
557         * @param depth  the current directory level (starting directory = 0)
558         * @param results  the collection of result objects, may be updated
559         * @throws IOException if an I/O Error occurs
560         */
561        protected void handleDirectoryEnd(File directory, int depth, Collection<T> results) throws IOException {
562            // do nothing - overridable by subclass
563        }
564    
565        /**
566         * Overridable callback method invoked at the end of processing.
567         * <p>
568         * This implementation does nothing.
569         *
570         * @param results  the collection of result objects, may be updated
571         * @throws IOException if an I/O Error occurs
572         */
573        protected void handleEnd(Collection<T> results) throws IOException {
574            // do nothing - overridable by subclass
575        }
576    
577        //-----------------------------------------------------------------------
578        /**
579         * CancelException is thrown in DirectoryWalker to cancel the current
580         * processing.
581         */
582        public static class CancelException extends IOException {
583    
584            /** Serialization id. */
585            private static final long serialVersionUID = 1347339620135041008L;
586            
587            /** The file being processed when the exception was thrown. */
588            private final File file;
589            /** The file depth when the exception was thrown. */
590            private final int depth;
591    
592            /**
593             * Constructs a <code>CancelException</code> with
594             * the file and depth when cancellation occurred.
595             *
596             * @param file  the file when the operation was cancelled, may be null
597             * @param depth  the depth when the operation was cancelled, may be null
598             */
599            public CancelException(File file, int depth) {
600                this("Operation Cancelled", file, depth);
601            }
602    
603            /**
604             * Constructs a <code>CancelException</code> with
605             * an appropriate message and the file and depth when
606             * cancellation occurred.
607             *
608             * @param message  the detail message
609             * @param file  the file when the operation was cancelled
610             * @param depth  the depth when the operation was cancelled
611             */
612            public CancelException(String message, File file, int depth) {
613                super(message);
614                this.file = file;
615                this.depth = depth;
616            }
617    
618            /**
619             * Return the file when the operation was cancelled.
620             *
621             * @return the file when the operation was cancelled
622             */
623            public File getFile() {
624                return file;
625            }
626    
627            /**
628             * Return the depth when the operation was cancelled.
629             *
630             * @return the depth when the operation was cancelled
631             */
632            public int getDepth() {
633                return depth;
634            }
635        }
636    }