001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Stack;
024
025/**
026 * General filename and filepath manipulation utilities.
027 * <p>
028 * When dealing with filenames you can hit problems when moving from a Windows
029 * based development machine to a Unix based production machine.
030 * This class aims to help avoid those problems.
031 * <p>
032 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
033 * using JDK {@link java.io.File File} objects and the two argument constructor
034 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
035 * <p>
036 * Most methods on this class are designed to work the same on both Unix and Windows.
037 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
038 * <p>
039 * Most methods recognise both separators (forward and back), and both
040 * sets of prefixes. See the javadoc of each method for details.
041 * <p>
042 * This class defines six components within a filename
043 * (example C:\dev\project\file.txt):
044 * <ul>
045 * <li>the prefix - C:\</li>
046 * <li>the path - dev\project\</li>
047 * <li>the full path - C:\dev\project\</li>
048 * <li>the name - file.txt</li>
049 * <li>the base name - file</li>
050 * <li>the extension - txt</li>
051 * </ul>
052 * Note that this class works best if directory filenames end with a separator.
053 * If you omit the last separator, it is impossible to determine if the filename
054 * corresponds to a file or a directory. As a result, we have chosen to say
055 * it corresponds to a file.
056 * <p>
057 * This class only supports Unix and Windows style names.
058 * Prefixes are matched as follows:
059 * <pre>
060 * Windows:
061 * a\b\c.txt           --&gt; ""          --&gt; relative
062 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
063 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
064 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
065 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
066 *
067 * Unix:
068 * a/b/c.txt           --&gt; ""          --&gt; relative
069 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
070 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
071 * ~                   --&gt; "~/"        --&gt; current user (slash added)
072 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
073 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
074 * </pre>
075 * Both prefix styles are matched always, irrespective of the machine that you are
076 * currently running on.
077 * <p>
078 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
079 *
080 * @version $Id: FilenameUtils.java 1702170 2015-09-10 06:35:02Z krosenvold $
081 * @since 1.1
082 */
083public class FilenameUtils {
084
085    private static final int NOT_FOUND = -1;
086
087    /**
088     * The extension separator character.
089     * @since 1.4
090     */
091    public static final char EXTENSION_SEPARATOR = '.';
092
093    /**
094     * The extension separator String.
095     * @since 1.4
096     */
097    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
098
099    /**
100     * The Unix separator character.
101     */
102    private static final char UNIX_SEPARATOR = '/';
103
104    /**
105     * The Windows separator character.
106     */
107    private static final char WINDOWS_SEPARATOR = '\\';
108
109    /**
110     * The system separator character.
111     */
112    private static final char SYSTEM_SEPARATOR = File.separatorChar;
113
114    /**
115     * The separator character that is the opposite of the system separator.
116     */
117    private static final char OTHER_SEPARATOR;
118    static {
119        if (isSystemWindows()) {
120            OTHER_SEPARATOR = UNIX_SEPARATOR;
121        } else {
122            OTHER_SEPARATOR = WINDOWS_SEPARATOR;
123        }
124    }
125
126    /**
127     * Instances should NOT be constructed in standard programming.
128     */
129    public FilenameUtils() {
130        super();
131    }
132
133    //-----------------------------------------------------------------------
134    /**
135     * Determines if Windows file system is in use.
136     *
137     * @return true if the system is Windows
138     */
139    static boolean isSystemWindows() {
140        return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
141    }
142
143    //-----------------------------------------------------------------------
144    /**
145     * Checks if the character is a separator.
146     *
147     * @param ch  the character to check
148     * @return true if it is a separator character
149     */
150    private static boolean isSeparator(final char ch) {
151        return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
152    }
153
154    //-----------------------------------------------------------------------
155    /**
156     * Normalizes a path, removing double and single dot path steps.
157     * <p>
158     * This method normalizes a path to a standard format.
159     * The input may contain separators in either Unix or Windows format.
160     * The output will contain separators in the format of the system.
161     * <p>
162     * A trailing slash will be retained.
163     * A double slash will be merged to a single slash (but UNC names are handled).
164     * A single dot path segment will be removed.
165     * A double dot will cause that path segment and the one before to be removed.
166     * If the double dot has no parent path segment to work with, {@code null}
167     * is returned.
168     * <p>
169     * The output will be the same on both Unix and Windows except
170     * for the separator character.
171     * <pre>
172     * /foo//               --&gt;   /foo/
173     * /foo/./              --&gt;   /foo/
174     * /foo/../bar          --&gt;   /bar
175     * /foo/../bar/         --&gt;   /bar/
176     * /foo/../bar/../baz   --&gt;   /baz
177     * //foo//./bar         --&gt;   /foo/bar
178     * /../                 --&gt;   null
179     * ../foo               --&gt;   null
180     * foo/bar/..           --&gt;   foo/
181     * foo/../../bar        --&gt;   null
182     * foo/../bar           --&gt;   bar
183     * //server/foo/../bar  --&gt;   //server/bar
184     * //server/../bar      --&gt;   null
185     * C:\foo\..\bar        --&gt;   C:\bar
186     * C:\..\bar            --&gt;   null
187     * ~/foo/../bar/        --&gt;   ~/bar/
188     * ~/../bar             --&gt;   null
189     * </pre>
190     * (Note the file separator returned will be correct for Windows/Unix)
191     *
192     * @param filename  the filename to normalize, null returns null
193     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
194     */
195    public static String normalize(final String filename) {
196        return doNormalize(filename, SYSTEM_SEPARATOR, true);
197    }
198    /**
199     * Normalizes a path, removing double and single dot path steps.
200     * <p>
201     * This method normalizes a path to a standard format.
202     * The input may contain separators in either Unix or Windows format.
203     * The output will contain separators in the format specified.
204     * <p>
205     * A trailing slash will be retained.
206     * A double slash will be merged to a single slash (but UNC names are handled).
207     * A single dot path segment will be removed.
208     * A double dot will cause that path segment and the one before to be removed.
209     * If the double dot has no parent path segment to work with, {@code null}
210     * is returned.
211     * <p>
212     * The output will be the same on both Unix and Windows except
213     * for the separator character.
214     * <pre>
215     * /foo//               --&gt;   /foo/
216     * /foo/./              --&gt;   /foo/
217     * /foo/../bar          --&gt;   /bar
218     * /foo/../bar/         --&gt;   /bar/
219     * /foo/../bar/../baz   --&gt;   /baz
220     * //foo//./bar         --&gt;   /foo/bar
221     * /../                 --&gt;   null
222     * ../foo               --&gt;   null
223     * foo/bar/..           --&gt;   foo/
224     * foo/../../bar        --&gt;   null
225     * foo/../bar           --&gt;   bar
226     * //server/foo/../bar  --&gt;   //server/bar
227     * //server/../bar      --&gt;   null
228     * C:\foo\..\bar        --&gt;   C:\bar
229     * C:\..\bar            --&gt;   null
230     * ~/foo/../bar/        --&gt;   ~/bar/
231     * ~/../bar             --&gt;   null
232     * </pre>
233     * The output will be the same on both Unix and Windows including
234     * the separator character.
235     *
236     * @param filename  the filename to normalize, null returns null
237     * @param unixSeparator {@code true} if a unix separator should
238     * be used or {@code false} if a windows separator should be used.
239     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
240     * @since 2.0
241     */
242    public static String normalize(final String filename, final boolean unixSeparator) {
243        final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
244        return doNormalize(filename, separator, true);
245    }
246
247    //-----------------------------------------------------------------------
248    /**
249     * Normalizes a path, removing double and single dot path steps,
250     * and removing any final directory separator.
251     * <p>
252     * This method normalizes a path to a standard format.
253     * The input may contain separators in either Unix or Windows format.
254     * The output will contain separators in the format of the system.
255     * <p>
256     * A trailing slash will be removed.
257     * A double slash will be merged to a single slash (but UNC names are handled).
258     * A single dot path segment will be removed.
259     * A double dot will cause that path segment and the one before to be removed.
260     * If the double dot has no parent path segment to work with, {@code null}
261     * is returned.
262     * <p>
263     * The output will be the same on both Unix and Windows except
264     * for the separator character.
265     * <pre>
266     * /foo//               --&gt;   /foo
267     * /foo/./              --&gt;   /foo
268     * /foo/../bar          --&gt;   /bar
269     * /foo/../bar/         --&gt;   /bar
270     * /foo/../bar/../baz   --&gt;   /baz
271     * //foo//./bar         --&gt;   /foo/bar
272     * /../                 --&gt;   null
273     * ../foo               --&gt;   null
274     * foo/bar/..           --&gt;   foo
275     * foo/../../bar        --&gt;   null
276     * foo/../bar           --&gt;   bar
277     * //server/foo/../bar  --&gt;   //server/bar
278     * //server/../bar      --&gt;   null
279     * C:\foo\..\bar        --&gt;   C:\bar
280     * C:\..\bar            --&gt;   null
281     * ~/foo/../bar/        --&gt;   ~/bar
282     * ~/../bar             --&gt;   null
283     * </pre>
284     * (Note the file separator returned will be correct for Windows/Unix)
285     *
286     * @param filename  the filename to normalize, null returns null
287     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
288     */
289    public static String normalizeNoEndSeparator(final String filename) {
290        return doNormalize(filename, SYSTEM_SEPARATOR, false);
291    }
292
293    /**
294     * Normalizes a path, removing double and single dot path steps,
295     * and removing any final directory separator.
296     * <p>
297     * This method normalizes a path to a standard format.
298     * The input may contain separators in either Unix or Windows format.
299     * The output will contain separators in the format specified.
300     * <p>
301     * A trailing slash will be removed.
302     * A double slash will be merged to a single slash (but UNC names are handled).
303     * A single dot path segment will be removed.
304     * A double dot will cause that path segment and the one before to be removed.
305     * If the double dot has no parent path segment to work with, {@code null}
306     * is returned.
307     * <p>
308     * The output will be the same on both Unix and Windows including
309     * the separator character.
310     * <pre>
311     * /foo//               --&gt;   /foo
312     * /foo/./              --&gt;   /foo
313     * /foo/../bar          --&gt;   /bar
314     * /foo/../bar/         --&gt;   /bar
315     * /foo/../bar/../baz   --&gt;   /baz
316     * //foo//./bar         --&gt;   /foo/bar
317     * /../                 --&gt;   null
318     * ../foo               --&gt;   null
319     * foo/bar/..           --&gt;   foo
320     * foo/../../bar        --&gt;   null
321     * foo/../bar           --&gt;   bar
322     * //server/foo/../bar  --&gt;   //server/bar
323     * //server/../bar      --&gt;   null
324     * C:\foo\..\bar        --&gt;   C:\bar
325     * C:\..\bar            --&gt;   null
326     * ~/foo/../bar/        --&gt;   ~/bar
327     * ~/../bar             --&gt;   null
328     * </pre>
329     *
330     * @param filename  the filename to normalize, null returns null
331     * @param unixSeparator {@code true} if a unix separator should
332     * be used or {@code false} if a windows separtor should be used.
333     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
334     * @since 2.0
335     */
336    public static String normalizeNoEndSeparator(final String filename, final boolean unixSeparator) {
337         final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
338        return doNormalize(filename, separator, false);
339    }
340
341    /**
342     * Internal method to perform the normalization.
343     *
344     * @param filename  the filename
345     * @param separator The separator character to use
346     * @param keepSeparator  true to keep the final separator
347     * @return the normalized filename. Null bytes inside string will be removed.
348     */
349    private static String doNormalize(final String filename, final char separator, final boolean keepSeparator) {
350        if (filename == null) {
351            return null;
352        }
353
354        failIfNullBytePresent(filename);
355
356        int size = filename.length();
357        if (size == 0) {
358            return filename;
359        }
360        final int prefix = getPrefixLength(filename);
361        if (prefix < 0) {
362            return null;
363        }
364
365        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
366        filename.getChars(0, filename.length(), array, 0);
367
368        // fix separators throughout
369        final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
370        for (int i = 0; i < array.length; i++) {
371            if (array[i] == otherSeparator) {
372                array[i] = separator;
373            }
374        }
375
376        // add extra separator on the end to simplify code below
377        boolean lastIsDirectory = true;
378        if (array[size - 1] != separator) {
379            array[size++] = separator;
380            lastIsDirectory = false;
381        }
382
383        // adjoining slashes
384        for (int i = prefix + 1; i < size; i++) {
385            if (array[i] == separator && array[i - 1] == separator) {
386                System.arraycopy(array, i, array, i - 1, size - i);
387                size--;
388                i--;
389            }
390        }
391
392        // dot slash
393        for (int i = prefix + 1; i < size; i++) {
394            if (array[i] == separator && array[i - 1] == '.' &&
395                    (i == prefix + 1 || array[i - 2] == separator)) {
396                if (i == size - 1) {
397                    lastIsDirectory = true;
398                }
399                System.arraycopy(array, i + 1, array, i - 1, size - i);
400                size -=2;
401                i--;
402            }
403        }
404
405        // double dot slash
406        outer:
407        for (int i = prefix + 2; i < size; i++) {
408            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
409                    (i == prefix + 2 || array[i - 3] == separator)) {
410                if (i == prefix + 2) {
411                    return null;
412                }
413                if (i == size - 1) {
414                    lastIsDirectory = true;
415                }
416                int j;
417                for (j = i - 4 ; j >= prefix; j--) {
418                    if (array[j] == separator) {
419                        // remove b/../ from a/b/../c
420                        System.arraycopy(array, i + 1, array, j + 1, size - i);
421                        size -= i - j;
422                        i = j + 1;
423                        continue outer;
424                    }
425                }
426                // remove a/../ from a/../c
427                System.arraycopy(array, i + 1, array, prefix, size - i);
428                size -= i + 1 - prefix;
429                i = prefix + 1;
430            }
431        }
432
433        if (size <= 0) {  // should never be less than 0
434            return "";
435        }
436        if (size <= prefix) {  // should never be less than prefix
437            return new String(array, 0, size);
438        }
439        if (lastIsDirectory && keepSeparator) {
440            return new String(array, 0, size);  // keep trailing separator
441        }
442        return new String(array, 0, size - 1);  // lose trailing separator
443    }
444
445    //-----------------------------------------------------------------------
446    /**
447     * Concatenates a filename to a base path using normal command line style rules.
448     * <p>
449     * The effect is equivalent to resultant directory after changing
450     * directory to the first argument, followed by changing directory to
451     * the second argument.
452     * <p>
453     * The first argument is the base path, the second is the path to concatenate.
454     * The returned path is always normalized via {@link #normalize(String)},
455     * thus <code>..</code> is handled.
456     * <p>
457     * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
458     * it will be normalized and returned.
459     * Otherwise, the paths will be joined, normalized and returned.
460     * <p>
461     * The output will be the same on both Unix and Windows except
462     * for the separator character.
463     * <pre>
464     * /foo/ + bar          --&gt;   /foo/bar
465     * /foo + bar           --&gt;   /foo/bar
466     * /foo + /bar          --&gt;   /bar
467     * /foo + C:/bar        --&gt;   C:/bar
468     * /foo + C:bar         --&gt;   C:bar (*)
469     * /foo/a/ + ../bar     --&gt;   foo/bar
470     * /foo/ + ../../bar    --&gt;   null
471     * /foo/ + /bar         --&gt;   /bar
472     * /foo/.. + /bar       --&gt;   /bar
473     * /foo + bar/c.txt     --&gt;   /foo/bar/c.txt
474     * /foo/c.txt + bar     --&gt;   /foo/c.txt/bar (!)
475     * </pre>
476     * (*) Note that the Windows relative drive prefix is unreliable when
477     * used with this method.
478     * (!) Note that the first parameter must be a path. If it ends with a name, then
479     * the name will be built into the concatenated path. If this might be a problem,
480     * use {@link #getFullPath(String)} on the base path argument.
481     *
482     * @param basePath  the base path to attach to, always treated as a path
483     * @param fullFilenameToAdd  the filename (or path) to attach to the base
484     * @return the concatenated path, or null if invalid.  Null bytes inside string will be removed
485     */
486    public static String concat(final String basePath, final String fullFilenameToAdd) {
487        final int prefix = getPrefixLength(fullFilenameToAdd);
488        if (prefix < 0) {
489            return null;
490        }
491        if (prefix > 0) {
492            return normalize(fullFilenameToAdd);
493        }
494        if (basePath == null) {
495            return null;
496        }
497        final int len = basePath.length();
498        if (len == 0) {
499            return normalize(fullFilenameToAdd);
500        }
501        final char ch = basePath.charAt(len - 1);
502        if (isSeparator(ch)) {
503            return normalize(basePath + fullFilenameToAdd);
504        } else {
505            return normalize(basePath + '/' + fullFilenameToAdd);
506        }
507    }
508
509    /**
510     * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
511     * <p>
512     * The files names are expected to be normalized.
513     * </p>
514     *
515     * Edge cases:
516     * <ul>
517     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
518     * <li>A directory does not contain itself: return false</li>
519     * <li>A null child file is not contained in any parent: return false</li>
520     * </ul>
521     *
522     * @param canonicalParent
523     *            the file to consider as the parent.
524     * @param canonicalChild
525     *            the file to consider as the child.
526     * @return true is the candidate leaf is under by the specified composite. False otherwise.
527     * @throws IOException
528     *             if an IO error occurs while checking the files.
529     * @since 2.2
530     * @see FileUtils#directoryContains(File, File)
531     */
532    public static boolean directoryContains(final String canonicalParent, final String canonicalChild)
533            throws IOException {
534
535        // Fail fast against NullPointerException
536        if (canonicalParent == null) {
537            throw new IllegalArgumentException("Directory must not be null");
538        }
539
540        if (canonicalChild == null) {
541            return false;
542        }
543
544        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
545            return false;
546        }
547
548        return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
549    }
550
551    //-----------------------------------------------------------------------
552    /**
553     * Converts all separators to the Unix separator of forward slash.
554     *
555     * @param path  the path to be changed, null ignored
556     * @return the updated path
557     */
558    public static String separatorsToUnix(final String path) {
559        if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) {
560            return path;
561        }
562        return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
563    }
564
565    /**
566     * Converts all separators to the Windows separator of backslash.
567     *
568     * @param path  the path to be changed, null ignored
569     * @return the updated path
570     */
571    public static String separatorsToWindows(final String path) {
572        if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) {
573            return path;
574        }
575        return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
576    }
577
578    /**
579     * Converts all separators to the system separator.
580     *
581     * @param path  the path to be changed, null ignored
582     * @return the updated path
583     */
584    public static String separatorsToSystem(final String path) {
585        if (path == null) {
586            return null;
587        }
588        if (isSystemWindows()) {
589            return separatorsToWindows(path);
590        } else {
591            return separatorsToUnix(path);
592        }
593    }
594
595    //-----------------------------------------------------------------------
596    /**
597     * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
598     * <p>
599     * This method will handle a file in either Unix or Windows format.
600     * <p>
601     * The prefix length includes the first slash in the full filename
602     * if applicable. Thus, it is possible that the length returned is greater
603     * than the length of the input string.
604     * <pre>
605     * Windows:
606     * a\b\c.txt           --&gt; ""          --&gt; relative
607     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
608     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
609     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
610     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
611     * \\\a\b\c.txt        --&gt;  error, length = -1
612     *
613     * Unix:
614     * a/b/c.txt           --&gt; ""          --&gt; relative
615     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
616     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
617     * ~                   --&gt; "~/"        --&gt; current user (slash added)
618     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
619     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
620     * //server/a/b/c.txt  --&gt; "//server/"
621     * ///a/b/c.txt        --&gt; error, length = -1
622     * </pre>
623     * <p>
624     * The output will be the same irrespective of the machine that the code is running on.
625     * ie. both Unix and Windows prefixes are matched regardless.
626     *
627     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
628     * These must be followed by a server name, so double-slashes are not collapsed
629     * to a single slash at the start of the filename.
630     *
631     * @param filename  the filename to find the prefix in, null returns -1
632     * @return the length of the prefix, -1 if invalid or null
633     */
634    public static int getPrefixLength(final String filename) {
635        if (filename == null) {
636            return NOT_FOUND;
637        }
638        final int len = filename.length();
639        if (len == 0) {
640            return 0;
641        }
642        char ch0 = filename.charAt(0);
643        if (ch0 == ':') {
644            return NOT_FOUND;
645        }
646        if (len == 1) {
647            if (ch0 == '~') {
648                return 2;  // return a length greater than the input
649            }
650            return isSeparator(ch0) ? 1 : 0;
651        } else {
652            if (ch0 == '~') {
653                int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
654                int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
655                if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
656                    return len + 1;  // return a length greater than the input
657                }
658                posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
659                posWin = posWin == NOT_FOUND ? posUnix : posWin;
660                return Math.min(posUnix, posWin) + 1;
661            }
662            final char ch1 = filename.charAt(1);
663            if (ch1 == ':') {
664                ch0 = Character.toUpperCase(ch0);
665                if (ch0 >= 'A' && ch0 <= 'Z') {
666                    if (len == 2 || isSeparator(filename.charAt(2)) == false) {
667                        return 2;
668                    }
669                    return 3;
670                }
671                return NOT_FOUND;
672
673            } else if (isSeparator(ch0) && isSeparator(ch1)) {
674                int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
675                int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
676                if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
677                    return NOT_FOUND;
678                }
679                posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
680                posWin = posWin == NOT_FOUND ? posUnix : posWin;
681                return Math.min(posUnix, posWin) + 1;
682            } else {
683                return isSeparator(ch0) ? 1 : 0;
684            }
685        }
686    }
687
688    /**
689     * Returns the index of the last directory separator character.
690     * <p>
691     * This method will handle a file in either Unix or Windows format.
692     * The position of the last forward or backslash is returned.
693     * <p>
694     * The output will be the same irrespective of the machine that the code is running on.
695     *
696     * @param filename  the filename to find the last path separator in, null returns -1
697     * @return the index of the last separator character, or -1 if there
698     * is no such character
699     */
700    public static int indexOfLastSeparator(final String filename) {
701        if (filename == null) {
702            return NOT_FOUND;
703        }
704        final int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
705        final int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
706        return Math.max(lastUnixPos, lastWindowsPos);
707    }
708
709    /**
710     * Returns the index of the last extension separator character, which is a dot.
711     * <p>
712     * This method also checks that there is no directory separator after the last dot. To do this it uses
713     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
714     * </p>
715     * <p>
716     * The output will be the same irrespective of the machine that the code is running on.
717     * </p>
718     * 
719     * @param filename
720     *            the filename to find the last extension separator in, null returns -1
721     * @return the index of the last extension separator character, or -1 if there is no such character
722     */
723    public static int indexOfExtension(final String filename) {
724        if (filename == null) {
725            return NOT_FOUND;
726        }
727        final int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
728        final int lastSeparator = indexOfLastSeparator(filename);
729        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
730    }
731
732    //-----------------------------------------------------------------------
733    /**
734     * Gets the prefix from a full filename, such as <code>C:/</code>
735     * or <code>~/</code>.
736     * <p>
737     * This method will handle a file in either Unix or Windows format.
738     * The prefix includes the first slash in the full filename where applicable.
739     * <pre>
740     * Windows:
741     * a\b\c.txt           --&gt; ""          --&gt; relative
742     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
743     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
744     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
745     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
746     *
747     * Unix:
748     * a/b/c.txt           --&gt; ""          --&gt; relative
749     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
750     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
751     * ~                   --&gt; "~/"        --&gt; current user (slash added)
752     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
753     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
754     * </pre>
755     * <p>
756     * The output will be the same irrespective of the machine that the code is running on.
757     * ie. both Unix and Windows prefixes are matched regardless.
758     *
759     * @param filename  the filename to query, null returns null
760     * @return the prefix of the file, null if invalid. Null bytes inside string will be removed
761     */
762    public static String getPrefix(final String filename) {
763        if (filename == null) {
764            return null;
765        }
766        final int len = getPrefixLength(filename);
767        if (len < 0) {
768            return null;
769        }
770        if (len > filename.length()) {
771            failIfNullBytePresent(filename + UNIX_SEPARATOR);
772            return filename + UNIX_SEPARATOR;
773        }
774        String path = filename.substring(0, len);
775        failIfNullBytePresent(path);
776        return path;
777    }
778
779    /**
780     * Gets the path from a full filename, which excludes the prefix.
781     * <p>
782     * This method will handle a file in either Unix or Windows format.
783     * The method is entirely text based, and returns the text before and
784     * including the last forward or backslash.
785     * <pre>
786     * C:\a\b\c.txt --&gt; a\b\
787     * ~/a/b/c.txt  --&gt; a/b/
788     * a.txt        --&gt; ""
789     * a/b/c        --&gt; a/b/
790     * a/b/c/       --&gt; a/b/c/
791     * </pre>
792     * <p>
793     * The output will be the same irrespective of the machine that the code is running on.
794     * <p>
795     * This method drops the prefix from the result.
796     * See {@link #getFullPath(String)} for the method that retains the prefix.
797     *
798     * @param filename  the filename to query, null returns null
799     * @return the path of the file, an empty string if none exists, null if invalid.
800     * Null bytes inside string will be removed
801     */
802    public static String getPath(final String filename) {
803        return doGetPath(filename, 1);
804    }
805
806    /**
807     * Gets the path from a full filename, which excludes the prefix, and
808     * also excluding the final directory separator.
809     * <p>
810     * This method will handle a file in either Unix or Windows format.
811     * The method is entirely text based, and returns the text before the
812     * last forward or backslash.
813     * <pre>
814     * C:\a\b\c.txt --&gt; a\b
815     * ~/a/b/c.txt  --&gt; a/b
816     * a.txt        --&gt; ""
817     * a/b/c        --&gt; a/b
818     * a/b/c/       --&gt; a/b/c
819     * </pre>
820     * <p>
821     * The output will be the same irrespective of the machine that the code is running on.
822     * <p>
823     * This method drops the prefix from the result.
824     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
825     *
826     * @param filename  the filename to query, null returns null
827     * @return the path of the file, an empty string if none exists, null if invalid.
828     * Null bytes inside string will be removed
829     */
830    public static String getPathNoEndSeparator(final String filename) {
831        return doGetPath(filename, 0);
832    }
833
834    /**
835     * Does the work of getting the path.
836     *
837     * @param filename  the filename
838     * @param separatorAdd  0 to omit the end separator, 1 to return it
839     * @return the path. Null bytes inside string will be removed
840     */
841    private static String doGetPath(final String filename, final int separatorAdd) {
842        if (filename == null) {
843            return null;
844        }
845        final int prefix = getPrefixLength(filename);
846        if (prefix < 0) {
847            return null;
848        }
849        final int index = indexOfLastSeparator(filename);
850        final int endIndex = index+separatorAdd;
851        if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
852            return "";
853        }
854        String path = filename.substring(prefix, endIndex);
855        failIfNullBytePresent(path);
856        return path;
857    }
858
859    /**
860     * Gets the full path from a full filename, which is the prefix + path.
861     * <p>
862     * This method will handle a file in either Unix or Windows format.
863     * The method is entirely text based, and returns the text before and
864     * including the last forward or backslash.
865     * <pre>
866     * C:\a\b\c.txt --&gt; C:\a\b\
867     * ~/a/b/c.txt  --&gt; ~/a/b/
868     * a.txt        --&gt; ""
869     * a/b/c        --&gt; a/b/
870     * a/b/c/       --&gt; a/b/c/
871     * C:           --&gt; C:
872     * C:\          --&gt; C:\
873     * ~            --&gt; ~/
874     * ~/           --&gt; ~/
875     * ~user        --&gt; ~user/
876     * ~user/       --&gt; ~user/
877     * </pre>
878     * <p>
879     * The output will be the same irrespective of the machine that the code is running on.
880     *
881     * @param filename  the filename to query, null returns null
882     * @return the path of the file, an empty string if none exists, null if invalid
883     */
884    public static String getFullPath(final String filename) {
885        return doGetFullPath(filename, true);
886    }
887
888    /**
889     * Gets the full path from a full filename, which is the prefix + path,
890     * and also excluding the final directory separator.
891     * <p>
892     * This method will handle a file in either Unix or Windows format.
893     * The method is entirely text based, and returns the text before the
894     * last forward or backslash.
895     * <pre>
896     * C:\a\b\c.txt --&gt; C:\a\b
897     * ~/a/b/c.txt  --&gt; ~/a/b
898     * a.txt        --&gt; ""
899     * a/b/c        --&gt; a/b
900     * a/b/c/       --&gt; a/b/c
901     * C:           --&gt; C:
902     * C:\          --&gt; C:\
903     * ~            --&gt; ~
904     * ~/           --&gt; ~
905     * ~user        --&gt; ~user
906     * ~user/       --&gt; ~user
907     * </pre>
908     * <p>
909     * The output will be the same irrespective of the machine that the code is running on.
910     *
911     * @param filename  the filename to query, null returns null
912     * @return the path of the file, an empty string if none exists, null if invalid
913     */
914    public static String getFullPathNoEndSeparator(final String filename) {
915        return doGetFullPath(filename, false);
916    }
917
918    /**
919     * Does the work of getting the path.
920     *
921     * @param filename  the filename
922     * @param includeSeparator  true to include the end separator
923     * @return the path
924     */
925    private static String doGetFullPath(final String filename, final boolean includeSeparator) {
926        if (filename == null) {
927            return null;
928        }
929        final int prefix = getPrefixLength(filename);
930        if (prefix < 0) {
931            return null;
932        }
933        if (prefix >= filename.length()) {
934            if (includeSeparator) {
935                return getPrefix(filename);  // add end slash if necessary
936            } else {
937                return filename;
938            }
939        }
940        final int index = indexOfLastSeparator(filename);
941        if (index < 0) {
942            return filename.substring(0, prefix);
943        }
944        int end = index + (includeSeparator ?  1 : 0);
945        if (end == 0) {
946            end++;
947        }
948        return filename.substring(0, end);
949    }
950
951    /**
952     * Gets the name minus the path from a full filename.
953     * <p>
954     * This method will handle a file in either Unix or Windows format.
955     * The text after the last forward or backslash is returned.
956     * <pre>
957     * a/b/c.txt --&gt; c.txt
958     * a.txt     --&gt; a.txt
959     * a/b/c     --&gt; c
960     * a/b/c/    --&gt; ""
961     * </pre>
962     * <p>
963     * The output will be the same irrespective of the machine that the code is running on.
964     *
965     * @param filename  the filename to query, null returns null
966     * @return the name of the file without the path, or an empty string if none exists.
967     * Null bytes inside string will be removed
968     */
969    public static String getName(final String filename) {
970        if (filename == null) {
971            return null;
972        }
973        failIfNullBytePresent(filename);
974        final int index = indexOfLastSeparator(filename);
975        return filename.substring(index + 1);
976    }
977
978    /**
979     * Check the input for null bytes, a sign of unsanitized data being passed to to file level functions.
980     *
981     * This may be used for poison byte attacks.
982     * @param path the path to check
983     */
984    private static void failIfNullBytePresent(String path) {
985        int len = path.length();
986        for (int i = 0; i < len; i++) {
987            if (path.charAt(i) == 0) {
988                throw new IllegalArgumentException("Null byte present in file/path name. There are no " +
989                        "known legitimate use cases for such data, but several injection attacks may use it");
990            }
991        }
992    }
993
994    /**
995     * Gets the base name, minus the full path and extension, from a full filename.
996     * <p>
997     * This method will handle a file in either Unix or Windows format.
998     * The text after the last forward or backslash and before the last dot is returned.
999     * <pre>
1000     * a/b/c.txt --&gt; c
1001     * a.txt     --&gt; a
1002     * a/b/c     --&gt; c
1003     * a/b/c/    --&gt; ""
1004     * </pre>
1005     * <p>
1006     * The output will be the same irrespective of the machine that the code is running on.
1007     *
1008     * @param filename  the filename to query, null returns null
1009     * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string
1010     * will be removed
1011     */
1012    public static String getBaseName(final String filename) {
1013        return removeExtension(getName(filename));
1014    }
1015
1016    /**
1017     * Gets the extension of a filename.
1018     * <p>
1019     * This method returns the textual part of the filename after the last dot.
1020     * There must be no directory separator after the dot.
1021     * <pre>
1022     * foo.txt      --&gt; "txt"
1023     * a/b/c.jpg    --&gt; "jpg"
1024     * a/b.txt/c    --&gt; ""
1025     * a/b/c        --&gt; ""
1026     * </pre>
1027     * <p>
1028     * The output will be the same irrespective of the machine that the code is running on.
1029     *
1030     * @param filename the filename to retrieve the extension of.
1031     * @return the extension of the file or an empty string if none exists or {@code null}
1032     * if the filename is {@code null}.
1033     */
1034    public static String getExtension(final String filename) {
1035        if (filename == null) {
1036            return null;
1037        }
1038        final int index = indexOfExtension(filename);
1039        if (index == NOT_FOUND) {
1040            return "";
1041        } else {
1042            return filename.substring(index + 1);
1043        }
1044    }
1045
1046    //-----------------------------------------------------------------------
1047    /**
1048     * Removes the extension from a filename.
1049     * <p>
1050     * This method returns the textual part of the filename before the last dot.
1051     * There must be no directory separator after the dot.
1052     * <pre>
1053     * foo.txt    --&gt; foo
1054     * a\b\c.jpg  --&gt; a\b\c
1055     * a\b\c      --&gt; a\b\c
1056     * a.b\c      --&gt; a.b\c
1057     * </pre>
1058     * <p>
1059     * The output will be the same irrespective of the machine that the code is running on.
1060     *
1061     * @param filename  the filename to query, null returns null
1062     * @return the filename minus the extension
1063     */
1064    public static String removeExtension(final String filename) {
1065        if (filename == null) {
1066            return null;
1067        }
1068        failIfNullBytePresent(filename);
1069
1070        final int index = indexOfExtension(filename);
1071        if (index == NOT_FOUND) {
1072            return filename;
1073        } else {
1074            return filename.substring(0, index);
1075        }
1076    }
1077
1078    //-----------------------------------------------------------------------
1079    /**
1080     * Checks whether two filenames are equal exactly.
1081     * <p>
1082     * No processing is performed on the filenames other than comparison,
1083     * thus this is merely a null-safe case-sensitive equals.
1084     *
1085     * @param filename1  the first filename to query, may be null
1086     * @param filename2  the second filename to query, may be null
1087     * @return true if the filenames are equal, null equals null
1088     * @see IOCase#SENSITIVE
1089     */
1090    public static boolean equals(final String filename1, final String filename2) {
1091        return equals(filename1, filename2, false, IOCase.SENSITIVE);
1092    }
1093
1094    /**
1095     * Checks whether two filenames are equal using the case rules of the system.
1096     * <p>
1097     * No processing is performed on the filenames other than comparison.
1098     * The check is case-sensitive on Unix and case-insensitive on Windows.
1099     *
1100     * @param filename1  the first filename to query, may be null
1101     * @param filename2  the second filename to query, may be null
1102     * @return true if the filenames are equal, null equals null
1103     * @see IOCase#SYSTEM
1104     */
1105    public static boolean equalsOnSystem(final String filename1, final String filename2) {
1106        return equals(filename1, filename2, false, IOCase.SYSTEM);
1107    }
1108
1109    //-----------------------------------------------------------------------
1110    /**
1111     * Checks whether two filenames are equal after both have been normalized.
1112     * <p>
1113     * Both filenames are first passed to {@link #normalize(String)}.
1114     * The check is then performed in a case-sensitive manner.
1115     *
1116     * @param filename1  the first filename to query, may be null
1117     * @param filename2  the second filename to query, may be null
1118     * @return true if the filenames are equal, null equals null
1119     * @see IOCase#SENSITIVE
1120     */
1121    public static boolean equalsNormalized(final String filename1, final String filename2) {
1122        return equals(filename1, filename2, true, IOCase.SENSITIVE);
1123    }
1124
1125    /**
1126     * Checks whether two filenames are equal after both have been normalized
1127     * and using the case rules of the system.
1128     * <p>
1129     * Both filenames are first passed to {@link #normalize(String)}.
1130     * The check is then performed case-sensitive on Unix and
1131     * case-insensitive on Windows.
1132     *
1133     * @param filename1  the first filename to query, may be null
1134     * @param filename2  the second filename to query, may be null
1135     * @return true if the filenames are equal, null equals null
1136     * @see IOCase#SYSTEM
1137     */
1138    public static boolean equalsNormalizedOnSystem(final String filename1, final String filename2) {
1139        return equals(filename1, filename2, true, IOCase.SYSTEM);
1140    }
1141
1142    /**
1143     * Checks whether two filenames are equal, optionally normalizing and providing
1144     * control over the case-sensitivity.
1145     *
1146     * @param filename1  the first filename to query, may be null
1147     * @param filename2  the second filename to query, may be null
1148     * @param normalized  whether to normalize the filenames
1149     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1150     * @return true if the filenames are equal, null equals null
1151     * @since 1.3
1152     */
1153    public static boolean equals(
1154            String filename1, String filename2,
1155            final boolean normalized, IOCase caseSensitivity) {
1156
1157        if (filename1 == null || filename2 == null) {
1158            return filename1 == null && filename2 == null;
1159        }
1160        if (normalized) {
1161            filename1 = normalize(filename1);
1162            filename2 = normalize(filename2);
1163            if (filename1 == null || filename2 == null) {
1164                throw new NullPointerException(
1165                    "Error normalizing one or both of the file names");
1166            }
1167        }
1168        if (caseSensitivity == null) {
1169            caseSensitivity = IOCase.SENSITIVE;
1170        }
1171        return caseSensitivity.checkEquals(filename1, filename2);
1172    }
1173
1174    //-----------------------------------------------------------------------
1175    /**
1176     * Checks whether the extension of the filename is that specified.
1177     * <p>
1178     * This method obtains the extension as the textual part of the filename
1179     * after the last dot. There must be no directory separator after the dot.
1180     * The extension check is case-sensitive on all platforms.
1181     *
1182     * @param filename  the filename to query, null returns false
1183     * @param extension  the extension to check for, null or empty checks for no extension
1184     * @return true if the filename has the specified extension
1185     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1186     */
1187    public static boolean isExtension(final String filename, final String extension) {
1188        if (filename == null) {
1189            return false;
1190        }
1191        failIfNullBytePresent(filename);
1192
1193        if (extension == null || extension.isEmpty()) {
1194            return indexOfExtension(filename) == NOT_FOUND;
1195        }
1196        final String fileExt = getExtension(filename);
1197        return fileExt.equals(extension);
1198    }
1199
1200    /**
1201     * Checks whether the extension of the filename is one of those specified.
1202     * <p>
1203     * This method obtains the extension as the textual part of the filename
1204     * after the last dot. There must be no directory separator after the dot.
1205     * The extension check is case-sensitive on all platforms.
1206     *
1207     * @param filename  the filename to query, null returns false
1208     * @param extensions  the extensions to check for, null checks for no extension
1209     * @return true if the filename is one of the extensions
1210     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1211     */
1212    public static boolean isExtension(final String filename, final String[] extensions) {
1213        if (filename == null) {
1214            return false;
1215        }
1216        failIfNullBytePresent(filename);
1217
1218        if (extensions == null || extensions.length == 0) {
1219            return indexOfExtension(filename) == NOT_FOUND;
1220        }
1221        final String fileExt = getExtension(filename);
1222        for (final String extension : extensions) {
1223            if (fileExt.equals(extension)) {
1224                return true;
1225            }
1226        }
1227        return false;
1228    }
1229
1230    /**
1231     * Checks whether the extension of the filename is one of those specified.
1232     * <p>
1233     * This method obtains the extension as the textual part of the filename
1234     * after the last dot. There must be no directory separator after the dot.
1235     * The extension check is case-sensitive on all platforms.
1236     *
1237     * @param filename  the filename to query, null returns false
1238     * @param extensions  the extensions to check for, null checks for no extension
1239     * @return true if the filename is one of the extensions
1240     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1241     */
1242    public static boolean isExtension(final String filename, final Collection<String> extensions) {
1243        if (filename == null) {
1244            return false;
1245        }
1246        failIfNullBytePresent(filename);
1247
1248        if (extensions == null || extensions.isEmpty()) {
1249            return indexOfExtension(filename) == NOT_FOUND;
1250        }
1251        final String fileExt = getExtension(filename);
1252        for (final String extension : extensions) {
1253            if (fileExt.equals(extension)) {
1254                return true;
1255            }
1256        }
1257        return false;
1258    }
1259
1260    //-----------------------------------------------------------------------
1261    /**
1262     * Checks a filename to see if it matches the specified wildcard matcher,
1263     * always testing case-sensitive.
1264     * <p>
1265     * The wildcard matcher uses the characters '?' and '*' to represent a
1266     * single or multiple (zero or more) wildcard characters.
1267     * This is the same as often found on Dos/Unix command lines.
1268     * The check is case-sensitive always.
1269     * <pre>
1270     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1271     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1272     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1273     * wildcardMatch("c.txt", "*.???")      --&gt; true
1274     * wildcardMatch("c.txt", "*.????")     --&gt; false
1275     * </pre>
1276     * N.B. the sequence "*?" does not work properly at present in match strings.
1277     *
1278     * @param filename  the filename to match on
1279     * @param wildcardMatcher  the wildcard string to match against
1280     * @return true if the filename matches the wilcard string
1281     * @see IOCase#SENSITIVE
1282     */
1283    public static boolean wildcardMatch(final String filename, final String wildcardMatcher) {
1284        return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1285    }
1286
1287    /**
1288     * Checks a filename to see if it matches the specified wildcard matcher
1289     * using the case rules of the system.
1290     * <p>
1291     * The wildcard matcher uses the characters '?' and '*' to represent a
1292     * single or multiple (zero or more) wildcard characters.
1293     * This is the same as often found on Dos/Unix command lines.
1294     * The check is case-sensitive on Unix and case-insensitive on Windows.
1295     * <pre>
1296     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1297     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1298     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1299     * wildcardMatch("c.txt", "*.???")      --&gt; true
1300     * wildcardMatch("c.txt", "*.????")     --&gt; false
1301     * </pre>
1302     * N.B. the sequence "*?" does not work properly at present in match strings.
1303     *
1304     * @param filename  the filename to match on
1305     * @param wildcardMatcher  the wildcard string to match against
1306     * @return true if the filename matches the wilcard string
1307     * @see IOCase#SYSTEM
1308     */
1309    public static boolean wildcardMatchOnSystem(final String filename, final String wildcardMatcher) {
1310        return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1311    }
1312
1313    /**
1314     * Checks a filename to see if it matches the specified wildcard matcher
1315     * allowing control over case-sensitivity.
1316     * <p>
1317     * The wildcard matcher uses the characters '?' and '*' to represent a
1318     * single or multiple (zero or more) wildcard characters.
1319     * N.B. the sequence "*?" does not work properly at present in match strings.
1320     *
1321     * @param filename  the filename to match on
1322     * @param wildcardMatcher  the wildcard string to match against
1323     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1324     * @return true if the filename matches the wilcard string
1325     * @since 1.3
1326     */
1327    public static boolean wildcardMatch(final String filename, final String wildcardMatcher, IOCase caseSensitivity) {
1328        if (filename == null && wildcardMatcher == null) {
1329            return true;
1330        }
1331        if (filename == null || wildcardMatcher == null) {
1332            return false;
1333        }
1334        if (caseSensitivity == null) {
1335            caseSensitivity = IOCase.SENSITIVE;
1336        }
1337        final String[] wcs = splitOnTokens(wildcardMatcher);
1338        boolean anyChars = false;
1339        int textIdx = 0;
1340        int wcsIdx = 0;
1341        final Stack<int[]> backtrack = new Stack<int[]>();
1342
1343        // loop around a backtrack stack, to handle complex * matching
1344        do {
1345            if (backtrack.size() > 0) {
1346                final int[] array = backtrack.pop();
1347                wcsIdx = array[0];
1348                textIdx = array[1];
1349                anyChars = true;
1350            }
1351
1352            // loop whilst tokens and text left to process
1353            while (wcsIdx < wcs.length) {
1354
1355                if (wcs[wcsIdx].equals("?")) {
1356                    // ? so move to next text char
1357                    textIdx++;
1358                    if (textIdx > filename.length()) {
1359                        break;
1360                    }
1361                    anyChars = false;
1362
1363                } else if (wcs[wcsIdx].equals("*")) {
1364                    // set any chars status
1365                    anyChars = true;
1366                    if (wcsIdx == wcs.length - 1) {
1367                        textIdx = filename.length();
1368                    }
1369
1370                } else {
1371                    // matching text token
1372                    if (anyChars) {
1373                        // any chars then try to locate text token
1374                        textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1375                        if (textIdx == NOT_FOUND) {
1376                            // token not found
1377                            break;
1378                        }
1379                        final int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1380                        if (repeat >= 0) {
1381                            backtrack.push(new int[] {wcsIdx, repeat});
1382                        }
1383                    } else {
1384                        // matching from current position
1385                        if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1386                            // couldnt match token
1387                            break;
1388                        }
1389                    }
1390
1391                    // matched text token, move text index to end of matched token
1392                    textIdx += wcs[wcsIdx].length();
1393                    anyChars = false;
1394                }
1395
1396                wcsIdx++;
1397            }
1398
1399            // full match
1400            if (wcsIdx == wcs.length && textIdx == filename.length()) {
1401                return true;
1402            }
1403
1404        } while (backtrack.size() > 0);
1405
1406        return false;
1407    }
1408
1409    /**
1410     * Splits a string into a number of tokens.
1411     * The text is split by '?' and '*'.
1412     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1413     *
1414     * @param text  the text to split
1415     * @return the array of tokens, never null
1416     */
1417    static String[] splitOnTokens(final String text) {
1418        // used by wildcardMatch
1419        // package level so a unit test may run on this
1420
1421        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1422            return new String[] { text };
1423        }
1424
1425        final char[] array = text.toCharArray();
1426        final ArrayList<String> list = new ArrayList<String>();
1427        final StringBuilder buffer = new StringBuilder();
1428        char prevChar = 0;
1429        for (final char ch : array) {
1430            if (ch == '?' || ch == '*') {
1431                if (buffer.length() != 0) {
1432                    list.add(buffer.toString());
1433                    buffer.setLength(0);
1434                }
1435                if (ch == '?') {
1436                    list.add("?");
1437                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1438                    list.add("*");
1439                }
1440            } else {
1441                buffer.append(ch);
1442            }
1443            prevChar = ch;
1444        }
1445        if (buffer.length() != 0) {
1446            list.add(buffer.toString());
1447        }
1448
1449        return list.toArray( new String[ list.size() ] );
1450    }
1451
1452}