001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io;
018    
019    import java.io.File;
020    import java.util.ArrayList;
021    import java.util.Collection;
022    import java.util.Stack;
023    
024    /**
025     * General filename and filepath manipulation utilities.
026     * <p>
027     * When dealing with filenames you can hit problems when moving from a Windows
028     * based development machine to a Unix based production machine.
029     * This class aims to help avoid those problems.
030     * <p>
031     * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
032     * using JDK {@link java.io.File File} objects and the two argument constructor
033     * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
034     * <p>
035     * Most methods on this class are designed to work the same on both Unix and Windows.
036     * Those that don't include 'System', 'Unix' or 'Windows' in their name.
037     * <p>
038     * Most methods recognise both separators (forward and back), and both
039     * sets of prefixes. See the javadoc of each method for details.
040     * <p>
041     * This class defines six components within a filename
042     * (example C:\dev\project\file.txt):
043     * <ul>
044     * <li>the prefix - C:\</li>
045     * <li>the path - dev\project\</li>
046     * <li>the full path - C:\dev\project\</li>
047     * <li>the name - file.txt</li>
048     * <li>the base name - file</li>
049     * <li>the extension - txt</li>
050     * </ul>
051     * Note that this class works best if directory filenames end with a separator.
052     * If you omit the last separator, it is impossible to determine if the filename
053     * corresponds to a file or a directory. As a result, we have chosen to say
054     * it corresponds to a file.
055     * <p>
056     * This class only supports Unix and Windows style names.
057     * Prefixes are matched as follows:
058     * <pre>
059     * Windows:
060     * a\b\c.txt           --> ""          --> relative
061     * \a\b\c.txt          --> "\"         --> current drive absolute
062     * C:a\b\c.txt         --> "C:"        --> drive relative
063     * C:\a\b\c.txt        --> "C:\"       --> absolute
064     * \\server\a\b\c.txt  --> "\\server\" --> UNC
065     *
066     * Unix:
067     * a/b/c.txt           --> ""          --> relative
068     * /a/b/c.txt          --> "/"         --> absolute
069     * ~/a/b/c.txt         --> "~/"        --> current user
070     * ~                   --> "~/"        --> current user (slash added)
071     * ~user/a/b/c.txt     --> "~user/"    --> named user
072     * ~user               --> "~user/"    --> named user (slash added)
073     * </pre>
074     * Both prefix styles are matched always, irrespective of the machine that you are
075     * currently running on.
076     * <p>
077     * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
078     *
079     * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
080     * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
081     * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
082     * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
083     * @author <a href="mailto:peter@apache.org">Peter Donald</a>
084     * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
085     * @author Matthew Hawthorne
086     * @author Martin Cooper
087     * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
088     * @author Stephen Colebourne
089     * @version $Id: FilenameUtils.java 1004077 2010-10-04 00:58:42Z niallp $
090     * @since Commons IO 1.1
091     */
092    public class FilenameUtils {
093    
094        /**
095         * The extension separator character.
096         * @since Commons IO 1.4
097         */
098        public static final char EXTENSION_SEPARATOR = '.';
099    
100        /**
101         * The extension separator String.
102         * @since Commons IO 1.4
103         */
104        public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
105    
106        /**
107         * The Unix separator character.
108         */
109        private static final char UNIX_SEPARATOR = '/';
110    
111        /**
112         * The Windows separator character.
113         */
114        private static final char WINDOWS_SEPARATOR = '\\';
115    
116        /**
117         * The system separator character.
118         */
119        private static final char SYSTEM_SEPARATOR = File.separatorChar;
120    
121        /**
122         * The separator character that is the opposite of the system separator.
123         */
124        private static final char OTHER_SEPARATOR;
125        static {
126            if (isSystemWindows()) {
127                OTHER_SEPARATOR = UNIX_SEPARATOR;
128            } else {
129                OTHER_SEPARATOR = WINDOWS_SEPARATOR;
130            }
131        }
132    
133        /**
134         * Instances should NOT be constructed in standard programming.
135         */
136        public FilenameUtils() {
137            super();
138        }
139    
140        //-----------------------------------------------------------------------
141        /**
142         * Determines if Windows file system is in use.
143         * 
144         * @return true if the system is Windows
145         */
146        static boolean isSystemWindows() {
147            return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
148        }
149    
150        //-----------------------------------------------------------------------
151        /**
152         * Checks if the character is a separator.
153         * 
154         * @param ch  the character to check
155         * @return true if it is a separator character
156         */
157        private static boolean isSeparator(char ch) {
158            return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
159        }
160    
161        //-----------------------------------------------------------------------
162        /**
163         * Normalizes a path, removing double and single dot path steps.
164         * <p>
165         * This method normalizes a path to a standard format.
166         * The input may contain separators in either Unix or Windows format.
167         * The output will contain separators in the format of the system.
168         * <p>
169         * A trailing slash will be retained.
170         * A double slash will be merged to a single slash (but UNC names are handled).
171         * A single dot path segment will be removed.
172         * A double dot will cause that path segment and the one before to be removed.
173         * If the double dot has no parent path segment to work with, <code>null</code>
174         * is returned.
175         * <p>
176         * The output will be the same on both Unix and Windows except
177         * for the separator character.
178         * <pre>
179         * /foo//               -->   /foo/
180         * /foo/./              -->   /foo/
181         * /foo/../bar          -->   /bar
182         * /foo/../bar/         -->   /bar/
183         * /foo/../bar/../baz   -->   /baz
184         * //foo//./bar         -->   /foo/bar
185         * /../                 -->   null
186         * ../foo               -->   null
187         * foo/bar/..           -->   foo/
188         * foo/../../bar        -->   null
189         * foo/../bar           -->   bar
190         * //server/foo/../bar  -->   //server/bar
191         * //server/../bar      -->   null
192         * C:\foo\..\bar        -->   C:\bar
193         * C:\..\bar            -->   null
194         * ~/foo/../bar/        -->   ~/bar/
195         * ~/../bar             -->   null
196         * </pre>
197         * (Note the file separator returned will be correct for Windows/Unix)
198         *
199         * @param filename  the filename to normalize, null returns null
200         * @return the normalized filename, or null if invalid
201         */
202        public static String normalize(String filename) {
203            return doNormalize(filename, SYSTEM_SEPARATOR, true);
204        }
205        /**
206         * Normalizes a path, removing double and single dot path steps.
207         * <p>
208         * This method normalizes a path to a standard format.
209         * The input may contain separators in either Unix or Windows format.
210         * The output will contain separators in the format specified.
211         * <p>
212         * A trailing slash will be retained.
213         * A double slash will be merged to a single slash (but UNC names are handled).
214         * A single dot path segment will be removed.
215         * A double dot will cause that path segment and the one before to be removed.
216         * If the double dot has no parent path segment to work with, <code>null</code>
217         * is returned.
218         * <p>
219         * The output will be the same on both Unix and Windows except
220         * for the separator character.
221         * <pre>
222         * /foo//               -->   /foo/
223         * /foo/./              -->   /foo/
224         * /foo/../bar          -->   /bar
225         * /foo/../bar/         -->   /bar/
226         * /foo/../bar/../baz   -->   /baz
227         * //foo//./bar         -->   /foo/bar
228         * /../                 -->   null
229         * ../foo               -->   null
230         * foo/bar/..           -->   foo/
231         * foo/../../bar        -->   null
232         * foo/../bar           -->   bar
233         * //server/foo/../bar  -->   //server/bar
234         * //server/../bar      -->   null
235         * C:\foo\..\bar        -->   C:\bar
236         * C:\..\bar            -->   null
237         * ~/foo/../bar/        -->   ~/bar/
238         * ~/../bar             -->   null
239         * </pre>
240         * The output will be the same on both Unix and Windows including
241         * the separator character.
242         *
243         * @param filename  the filename to normalize, null returns null
244         * @param unixSeparator <code>true</code> if a unix separator should
245         * be used or <code>false</code> if a windows separator should be used.
246         * @return the normalized filename, or null if invalid
247         * @since Commons IO 2.0
248         */
249        public static String normalize(String filename, boolean unixSeparator) {
250            char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR);
251            return doNormalize(filename, separator, true);
252        }
253    
254        //-----------------------------------------------------------------------
255        /**
256         * Normalizes a path, removing double and single dot path steps,
257         * and removing any final directory separator.
258         * <p>
259         * This method normalizes a path to a standard format.
260         * The input may contain separators in either Unix or Windows format.
261         * The output will contain separators in the format of the system.
262         * <p>
263         * A trailing slash will be removed.
264         * A double slash will be merged to a single slash (but UNC names are handled).
265         * A single dot path segment will be removed.
266         * A double dot will cause that path segment and the one before to be removed.
267         * If the double dot has no parent path segment to work with, <code>null</code>
268         * is returned.
269         * <p>
270         * The output will be the same on both Unix and Windows except
271         * for the separator character.
272         * <pre>
273         * /foo//               -->   /foo
274         * /foo/./              -->   /foo
275         * /foo/../bar          -->   /bar
276         * /foo/../bar/         -->   /bar
277         * /foo/../bar/../baz   -->   /baz
278         * //foo//./bar         -->   /foo/bar
279         * /../                 -->   null
280         * ../foo               -->   null
281         * foo/bar/..           -->   foo
282         * foo/../../bar        -->   null
283         * foo/../bar           -->   bar
284         * //server/foo/../bar  -->   //server/bar
285         * //server/../bar      -->   null
286         * C:\foo\..\bar        -->   C:\bar
287         * C:\..\bar            -->   null
288         * ~/foo/../bar/        -->   ~/bar
289         * ~/../bar             -->   null
290         * </pre>
291         * (Note the file separator returned will be correct for Windows/Unix)
292         *
293         * @param filename  the filename to normalize, null returns null
294         * @return the normalized filename, or null if invalid
295         */
296        public static String normalizeNoEndSeparator(String filename) {
297            return doNormalize(filename, SYSTEM_SEPARATOR, false);
298        }
299    
300        /**
301         * Normalizes a path, removing double and single dot path steps,
302         * and removing any final directory separator.
303         * <p>
304         * This method normalizes a path to a standard format.
305         * The input may contain separators in either Unix or Windows format.
306         * The output will contain separators in the format specified.
307         * <p>
308         * A trailing slash will be removed.
309         * A double slash will be merged to a single slash (but UNC names are handled).
310         * A single dot path segment will be removed.
311         * A double dot will cause that path segment and the one before to be removed.
312         * If the double dot has no parent path segment to work with, <code>null</code>
313         * is returned.
314         * <p>
315         * The output will be the same on both Unix and Windows including
316         * the separator character.
317         * <pre>
318         * /foo//               -->   /foo
319         * /foo/./              -->   /foo
320         * /foo/../bar          -->   /bar
321         * /foo/../bar/         -->   /bar
322         * /foo/../bar/../baz   -->   /baz
323         * //foo//./bar         -->   /foo/bar
324         * /../                 -->   null
325         * ../foo               -->   null
326         * foo/bar/..           -->   foo
327         * foo/../../bar        -->   null
328         * foo/../bar           -->   bar
329         * //server/foo/../bar  -->   //server/bar
330         * //server/../bar      -->   null
331         * C:\foo\..\bar        -->   C:\bar
332         * C:\..\bar            -->   null
333         * ~/foo/../bar/        -->   ~/bar
334         * ~/../bar             -->   null
335         * </pre>
336         *
337         * @param filename  the filename to normalize, null returns null
338         * @param unixSeparator <code>true</code> if a unix separator should
339         * be used or <code>false</code> if a windows separtor should be used.
340         * @return the normalized filename, or null if invalid
341         * @since Commons IO 2.0
342         */
343        public static String normalizeNoEndSeparator(String filename, boolean unixSeparator) {
344             char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR);
345            return doNormalize(filename, separator, false);
346        }
347    
348        /**
349         * Internal method to perform the normalization.
350         *
351         * @param filename  the filename
352         * @param separator The separator character to use
353         * @param keepSeparator  true to keep the final separator
354         * @return the normalized filename
355         */
356        private static String doNormalize(String filename, char separator, boolean keepSeparator) {
357            if (filename == null) {
358                return null;
359            }
360            int size = filename.length();
361            if (size == 0) {
362                return filename;
363            }
364            int prefix = getPrefixLength(filename);
365            if (prefix < 0) {
366                return null;
367            }
368            
369            char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
370            filename.getChars(0, filename.length(), array, 0);
371            
372            // fix separators throughout
373            char otherSeparator = (separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR);
374            for (int i = 0; i < array.length; i++) {
375                if (array[i] == otherSeparator) {
376                    array[i] = separator;
377                }
378            }
379            
380            // add extra separator on the end to simplify code below
381            boolean lastIsDirectory = true;
382            if (array[size - 1] != separator) {
383                array[size++] = separator;
384                lastIsDirectory = false;
385            }
386            
387            // adjoining slashes
388            for (int i = prefix + 1; i < size; i++) {
389                if (array[i] == separator && array[i - 1] == separator) {
390                    System.arraycopy(array, i, array, i - 1, size - i);
391                    size--;
392                    i--;
393                }
394            }
395            
396            // dot slash
397            for (int i = prefix + 1; i < size; i++) {
398                if (array[i] == separator && array[i - 1] == '.' &&
399                        (i == prefix + 1 || array[i - 2] == separator)) {
400                    if (i == size - 1) {
401                        lastIsDirectory = true;
402                    }
403                    System.arraycopy(array, i + 1, array, i - 1, size - i);
404                    size -=2;
405                    i--;
406                }
407            }
408            
409            // double dot slash
410            outer:
411            for (int i = prefix + 2; i < size; i++) {
412                if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
413                        (i == prefix + 2 || array[i - 3] == separator)) {
414                    if (i == prefix + 2) {
415                        return null;
416                    }
417                    if (i == size - 1) {
418                        lastIsDirectory = true;
419                    }
420                    int j;
421                    for (j = i - 4 ; j >= prefix; j--) {
422                        if (array[j] == separator) {
423                            // remove b/../ from a/b/../c
424                            System.arraycopy(array, i + 1, array, j + 1, size - i);
425                            size -= (i - j);
426                            i = j + 1;
427                            continue outer;
428                        }
429                    }
430                    // remove a/../ from a/../c
431                    System.arraycopy(array, i + 1, array, prefix, size - i);
432                    size -= (i + 1 - prefix);
433                    i = prefix + 1;
434                }
435            }
436            
437            if (size <= 0) {  // should never be less than 0
438                return "";
439            }
440            if (size <= prefix) {  // should never be less than prefix
441                return new String(array, 0, size);
442            }
443            if (lastIsDirectory && keepSeparator) {
444                return new String(array, 0, size);  // keep trailing separator
445            }
446            return new String(array, 0, size - 1);  // lose trailing separator
447        }
448    
449        //-----------------------------------------------------------------------
450        /**
451         * Concatenates a filename to a base path using normal command line style rules.
452         * <p>
453         * The effect is equivalent to resultant directory after changing
454         * directory to the first argument, followed by changing directory to
455         * the second argument.
456         * <p>
457         * The first argument is the base path, the second is the path to concatenate.
458         * The returned path is always normalized via {@link #normalize(String)},
459         * thus <code>..</code> is handled.
460         * <p>
461         * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
462         * it will be normalized and returned.
463         * Otherwise, the paths will be joined, normalized and returned.
464         * <p>
465         * The output will be the same on both Unix and Windows except
466         * for the separator character.
467         * <pre>
468         * /foo/ + bar          -->   /foo/bar
469         * /foo + bar           -->   /foo/bar
470         * /foo + /bar          -->   /bar
471         * /foo + C:/bar        -->   C:/bar
472         * /foo + C:bar         -->   C:bar (*)
473         * /foo/a/ + ../bar     -->   foo/bar
474         * /foo/ + ../../bar    -->   null
475         * /foo/ + /bar         -->   /bar
476         * /foo/.. + /bar       -->   /bar
477         * /foo + bar/c.txt     -->   /foo/bar/c.txt
478         * /foo/c.txt + bar     -->   /foo/c.txt/bar (!)
479         * </pre>
480         * (*) Note that the Windows relative drive prefix is unreliable when
481         * used with this method.
482         * (!) Note that the first parameter must be a path. If it ends with a name, then
483         * the name will be built into the concatenated path. If this might be a problem,
484         * use {@link #getFullPath(String)} on the base path argument.
485         *
486         * @param basePath  the base path to attach to, always treated as a path
487         * @param fullFilenameToAdd  the filename (or path) to attach to the base
488         * @return the concatenated path, or null if invalid
489         */
490        public static String concat(String basePath, String fullFilenameToAdd) {
491            int prefix = getPrefixLength(fullFilenameToAdd);
492            if (prefix < 0) {
493                return null;
494            }
495            if (prefix > 0) {
496                return normalize(fullFilenameToAdd);
497            }
498            if (basePath == null) {
499                return null;
500            }
501            int len = basePath.length();
502            if (len == 0) {
503                return normalize(fullFilenameToAdd);
504            }
505            char ch = basePath.charAt(len - 1);
506            if (isSeparator(ch)) {
507                return normalize(basePath + fullFilenameToAdd);
508            } else {
509                return normalize(basePath + '/' + fullFilenameToAdd);
510            }
511        }
512    
513        //-----------------------------------------------------------------------
514        /**
515         * Converts all separators to the Unix separator of forward slash.
516         * 
517         * @param path  the path to be changed, null ignored
518         * @return the updated path
519         */
520        public static String separatorsToUnix(String path) {
521            if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
522                return path;
523            }
524            return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
525        }
526    
527        /**
528         * Converts all separators to the Windows separator of backslash.
529         * 
530         * @param path  the path to be changed, null ignored
531         * @return the updated path
532         */
533        public static String separatorsToWindows(String path) {
534            if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
535                return path;
536            }
537            return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
538        }
539    
540        /**
541         * Converts all separators to the system separator.
542         * 
543         * @param path  the path to be changed, null ignored
544         * @return the updated path
545         */
546        public static String separatorsToSystem(String path) {
547            if (path == null) {
548                return null;
549            }
550            if (isSystemWindows()) {
551                return separatorsToWindows(path);
552            } else {
553                return separatorsToUnix(path);
554            }
555        }
556    
557        //-----------------------------------------------------------------------
558        /**
559         * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
560         * <p>
561         * This method will handle a file in either Unix or Windows format.
562         * <p>
563         * The prefix length includes the first slash in the full filename
564         * if applicable. Thus, it is possible that the length returned is greater
565         * than the length of the input string.
566         * <pre>
567         * Windows:
568         * a\b\c.txt           --> ""          --> relative
569         * \a\b\c.txt          --> "\"         --> current drive absolute
570         * C:a\b\c.txt         --> "C:"        --> drive relative
571         * C:\a\b\c.txt        --> "C:\"       --> absolute
572         * \\server\a\b\c.txt  --> "\\server\" --> UNC
573         *
574         * Unix:
575         * a/b/c.txt           --> ""          --> relative
576         * /a/b/c.txt          --> "/"         --> absolute
577         * ~/a/b/c.txt         --> "~/"        --> current user
578         * ~                   --> "~/"        --> current user (slash added)
579         * ~user/a/b/c.txt     --> "~user/"    --> named user
580         * ~user               --> "~user/"    --> named user (slash added)
581         * </pre>
582         * <p>
583         * The output will be the same irrespective of the machine that the code is running on.
584         * ie. both Unix and Windows prefixes are matched regardless.
585         *
586         * @param filename  the filename to find the prefix in, null returns -1
587         * @return the length of the prefix, -1 if invalid or null
588         */
589        public static int getPrefixLength(String filename) {
590            if (filename == null) {
591                return -1;
592            }
593            int len = filename.length();
594            if (len == 0) {
595                return 0;
596            }
597            char ch0 = filename.charAt(0);
598            if (ch0 == ':') {
599                return -1;
600            }
601            if (len == 1) {
602                if (ch0 == '~') {
603                    return 2;  // return a length greater than the input
604                }
605                return (isSeparator(ch0) ? 1 : 0);
606            } else {
607                if (ch0 == '~') {
608                    int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
609                    int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
610                    if (posUnix == -1 && posWin == -1) {
611                        return len + 1;  // return a length greater than the input
612                    }
613                    posUnix = (posUnix == -1 ? posWin : posUnix);
614                    posWin = (posWin == -1 ? posUnix : posWin);
615                    return Math.min(posUnix, posWin) + 1;
616                }
617                char ch1 = filename.charAt(1);
618                if (ch1 == ':') {
619                    ch0 = Character.toUpperCase(ch0);
620                    if (ch0 >= 'A' && ch0 <= 'Z') {
621                        if (len == 2 || isSeparator(filename.charAt(2)) == false) {
622                            return 2;
623                        }
624                        return 3;
625                    }
626                    return -1;
627                    
628                } else if (isSeparator(ch0) && isSeparator(ch1)) {
629                    int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
630                    int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
631                    if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
632                        return -1;
633                    }
634                    posUnix = (posUnix == -1 ? posWin : posUnix);
635                    posWin = (posWin == -1 ? posUnix : posWin);
636                    return Math.min(posUnix, posWin) + 1;
637                } else {
638                    return (isSeparator(ch0) ? 1 : 0);
639                }
640            }
641        }
642    
643        /**
644         * Returns the index of the last directory separator character.
645         * <p>
646         * This method will handle a file in either Unix or Windows format.
647         * The position of the last forward or backslash is returned.
648         * <p>
649         * The output will be the same irrespective of the machine that the code is running on.
650         * 
651         * @param filename  the filename to find the last path separator in, null returns -1
652         * @return the index of the last separator character, or -1 if there
653         * is no such character
654         */
655        public static int indexOfLastSeparator(String filename) {
656            if (filename == null) {
657                return -1;
658            }
659            int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
660            int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
661            return Math.max(lastUnixPos, lastWindowsPos);
662        }
663    
664        /**
665         * Returns the index of the last extension separator character, which is a dot.
666         * <p>
667         * This method also checks that there is no directory separator after the last dot.
668         * To do this it uses {@link #indexOfLastSeparator(String)} which will
669         * handle a file in either Unix or Windows format.
670         * <p>
671         * The output will be the same irrespective of the machine that the code is running on.
672         * 
673         * @param filename  the filename to find the last path separator in, null returns -1
674         * @return the index of the last separator character, or -1 if there
675         * is no such character
676         */
677        public static int indexOfExtension(String filename) {
678            if (filename == null) {
679                return -1;
680            }
681            int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
682            int lastSeparator = indexOfLastSeparator(filename);
683            return (lastSeparator > extensionPos ? -1 : extensionPos);
684        }
685    
686        //-----------------------------------------------------------------------
687        /**
688         * Gets the prefix from a full filename, such as <code>C:/</code>
689         * or <code>~/</code>.
690         * <p>
691         * This method will handle a file in either Unix or Windows format.
692         * The prefix includes the first slash in the full filename where applicable.
693         * <pre>
694         * Windows:
695         * a\b\c.txt           --> ""          --> relative
696         * \a\b\c.txt          --> "\"         --> current drive absolute
697         * C:a\b\c.txt         --> "C:"        --> drive relative
698         * C:\a\b\c.txt        --> "C:\"       --> absolute
699         * \\server\a\b\c.txt  --> "\\server\" --> UNC
700         *
701         * Unix:
702         * a/b/c.txt           --> ""          --> relative
703         * /a/b/c.txt          --> "/"         --> absolute
704         * ~/a/b/c.txt         --> "~/"        --> current user
705         * ~                   --> "~/"        --> current user (slash added)
706         * ~user/a/b/c.txt     --> "~user/"    --> named user
707         * ~user               --> "~user/"    --> named user (slash added)
708         * </pre>
709         * <p>
710         * The output will be the same irrespective of the machine that the code is running on.
711         * ie. both Unix and Windows prefixes are matched regardless.
712         *
713         * @param filename  the filename to query, null returns null
714         * @return the prefix of the file, null if invalid
715         */
716        public static String getPrefix(String filename) {
717            if (filename == null) {
718                return null;
719            }
720            int len = getPrefixLength(filename);
721            if (len < 0) {
722                return null;
723            }
724            if (len > filename.length()) {
725                return filename + UNIX_SEPARATOR;  // we know this only happens for unix
726            }
727            return filename.substring(0, len);
728        }
729    
730        /**
731         * Gets the path from a full filename, which excludes the prefix.
732         * <p>
733         * This method will handle a file in either Unix or Windows format.
734         * The method is entirely text based, and returns the text before and
735         * including the last forward or backslash.
736         * <pre>
737         * C:\a\b\c.txt --> a\b\
738         * ~/a/b/c.txt  --> a/b/
739         * a.txt        --> ""
740         * a/b/c        --> a/b/
741         * a/b/c/       --> a/b/c/
742         * </pre>
743         * <p>
744         * The output will be the same irrespective of the machine that the code is running on.
745         * <p>
746         * This method drops the prefix from the result.
747         * See {@link #getFullPath(String)} for the method that retains the prefix.
748         *
749         * @param filename  the filename to query, null returns null
750         * @return the path of the file, an empty string if none exists, null if invalid
751         */
752        public static String getPath(String filename) {
753            return doGetPath(filename, 1);
754        }
755    
756        /**
757         * Gets the path from a full filename, which excludes the prefix, and
758         * also excluding the final directory separator.
759         * <p>
760         * This method will handle a file in either Unix or Windows format.
761         * The method is entirely text based, and returns the text before the
762         * last forward or backslash.
763         * <pre>
764         * C:\a\b\c.txt --> a\b
765         * ~/a/b/c.txt  --> a/b
766         * a.txt        --> ""
767         * a/b/c        --> a/b
768         * a/b/c/       --> a/b/c
769         * </pre>
770         * <p>
771         * The output will be the same irrespective of the machine that the code is running on.
772         * <p>
773         * This method drops the prefix from the result.
774         * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
775         *
776         * @param filename  the filename to query, null returns null
777         * @return the path of the file, an empty string if none exists, null if invalid
778         */
779        public static String getPathNoEndSeparator(String filename) {
780            return doGetPath(filename, 0);
781        }
782    
783        /**
784         * Does the work of getting the path.
785         * 
786         * @param filename  the filename
787         * @param separatorAdd  0 to omit the end separator, 1 to return it
788         * @return the path
789         */
790        private static String doGetPath(String filename, int separatorAdd) {
791            if (filename == null) {
792                return null;
793            }
794            int prefix = getPrefixLength(filename);
795            if (prefix < 0) {
796                return null;
797            }
798            int index = indexOfLastSeparator(filename);
799            int endIndex = index+separatorAdd;
800            if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
801                return "";
802            }
803            return filename.substring(prefix, endIndex);
804        }
805    
806        /**
807         * Gets the full path from a full filename, which is the prefix + path.
808         * <p>
809         * This method will handle a file in either Unix or Windows format.
810         * The method is entirely text based, and returns the text before and
811         * including the last forward or backslash.
812         * <pre>
813         * C:\a\b\c.txt --> C:\a\b\
814         * ~/a/b/c.txt  --> ~/a/b/
815         * a.txt        --> ""
816         * a/b/c        --> a/b/
817         * a/b/c/       --> a/b/c/
818         * C:           --> C:
819         * C:\          --> C:\
820         * ~            --> ~/
821         * ~/           --> ~/
822         * ~user        --> ~user/
823         * ~user/       --> ~user/
824         * </pre>
825         * <p>
826         * The output will be the same irrespective of the machine that the code is running on.
827         *
828         * @param filename  the filename to query, null returns null
829         * @return the path of the file, an empty string if none exists, null if invalid
830         */
831        public static String getFullPath(String filename) {
832            return doGetFullPath(filename, true);
833        }
834    
835        /**
836         * Gets the full path from a full filename, which is the prefix + path,
837         * and also excluding the final directory separator.
838         * <p>
839         * This method will handle a file in either Unix or Windows format.
840         * The method is entirely text based, and returns the text before the
841         * last forward or backslash.
842         * <pre>
843         * C:\a\b\c.txt --> C:\a\b
844         * ~/a/b/c.txt  --> ~/a/b
845         * a.txt        --> ""
846         * a/b/c        --> a/b
847         * a/b/c/       --> a/b/c
848         * C:           --> C:
849         * C:\          --> C:\
850         * ~            --> ~
851         * ~/           --> ~
852         * ~user        --> ~user
853         * ~user/       --> ~user
854         * </pre>
855         * <p>
856         * The output will be the same irrespective of the machine that the code is running on.
857         *
858         * @param filename  the filename to query, null returns null
859         * @return the path of the file, an empty string if none exists, null if invalid
860         */
861        public static String getFullPathNoEndSeparator(String filename) {
862            return doGetFullPath(filename, false);
863        }
864    
865        /**
866         * Does the work of getting the path.
867         * 
868         * @param filename  the filename
869         * @param includeSeparator  true to include the end separator
870         * @return the path
871         */
872        private static String doGetFullPath(String filename, boolean includeSeparator) {
873            if (filename == null) {
874                return null;
875            }
876            int prefix = getPrefixLength(filename);
877            if (prefix < 0) {
878                return null;
879            }
880            if (prefix >= filename.length()) {
881                if (includeSeparator) {
882                    return getPrefix(filename);  // add end slash if necessary
883                } else {
884                    return filename;
885                }
886            }
887            int index = indexOfLastSeparator(filename);
888            if (index < 0) {
889                return filename.substring(0, prefix);
890            }
891            int end = index + (includeSeparator ?  1 : 0);
892            if (end == 0) {
893                end++;
894            }
895            return filename.substring(0, end);
896        }
897    
898        /**
899         * Gets the name minus the path from a full filename.
900         * <p>
901         * This method will handle a file in either Unix or Windows format.
902         * The text after the last forward or backslash is returned.
903         * <pre>
904         * a/b/c.txt --> c.txt
905         * a.txt     --> a.txt
906         * a/b/c     --> c
907         * a/b/c/    --> ""
908         * </pre>
909         * <p>
910         * The output will be the same irrespective of the machine that the code is running on.
911         *
912         * @param filename  the filename to query, null returns null
913         * @return the name of the file without the path, or an empty string if none exists
914         */
915        public static String getName(String filename) {
916            if (filename == null) {
917                return null;
918            }
919            int index = indexOfLastSeparator(filename);
920            return filename.substring(index + 1);
921        }
922    
923        /**
924         * Gets the base name, minus the full path and extension, from a full filename.
925         * <p>
926         * This method will handle a file in either Unix or Windows format.
927         * The text after the last forward or backslash and before the last dot is returned.
928         * <pre>
929         * a/b/c.txt --> c
930         * a.txt     --> a
931         * a/b/c     --> c
932         * a/b/c/    --> ""
933         * </pre>
934         * <p>
935         * The output will be the same irrespective of the machine that the code is running on.
936         *
937         * @param filename  the filename to query, null returns null
938         * @return the name of the file without the path, or an empty string if none exists
939         */
940        public static String getBaseName(String filename) {
941            return removeExtension(getName(filename));
942        }
943    
944        /**
945         * Gets the extension of a filename.
946         * <p>
947         * This method returns the textual part of the filename after the last dot.
948         * There must be no directory separator after the dot.
949         * <pre>
950         * foo.txt      --> "txt"
951         * a/b/c.jpg    --> "jpg"
952         * a/b.txt/c    --> ""
953         * a/b/c        --> ""
954         * </pre>
955         * <p>
956         * The output will be the same irrespective of the machine that the code is running on.
957         *
958         * @param filename the filename to retrieve the extension of.
959         * @return the extension of the file or an empty string if none exists or <code>null</code>
960         * if the filename is <code>null</code>.
961         */
962        public static String getExtension(String filename) {
963            if (filename == null) {
964                return null;
965            }
966            int index = indexOfExtension(filename);
967            if (index == -1) {
968                return "";
969            } else {
970                return filename.substring(index + 1);
971            }
972        }
973    
974        //-----------------------------------------------------------------------
975        /**
976         * Removes the extension from a filename.
977         * <p>
978         * This method returns the textual part of the filename before the last dot.
979         * There must be no directory separator after the dot.
980         * <pre>
981         * foo.txt    --> foo
982         * a\b\c.jpg  --> a\b\c
983         * a\b\c      --> a\b\c
984         * a.b\c      --> a.b\c
985         * </pre>
986         * <p>
987         * The output will be the same irrespective of the machine that the code is running on.
988         *
989         * @param filename  the filename to query, null returns null
990         * @return the filename minus the extension
991         */
992        public static String removeExtension(String filename) {
993            if (filename == null) {
994                return null;
995            }
996            int index = indexOfExtension(filename);
997            if (index == -1) {
998                return filename;
999            } else {
1000                return filename.substring(0, index);
1001            }
1002        }
1003    
1004        //-----------------------------------------------------------------------
1005        /**
1006         * Checks whether two filenames are equal exactly.
1007         * <p>
1008         * No processing is performed on the filenames other than comparison,
1009         * thus this is merely a null-safe case-sensitive equals.
1010         *
1011         * @param filename1  the first filename to query, may be null
1012         * @param filename2  the second filename to query, may be null
1013         * @return true if the filenames are equal, null equals null
1014         * @see IOCase#SENSITIVE
1015         */
1016        public static boolean equals(String filename1, String filename2) {
1017            return equals(filename1, filename2, false, IOCase.SENSITIVE);
1018        }
1019    
1020        /**
1021         * Checks whether two filenames are equal using the case rules of the system.
1022         * <p>
1023         * No processing is performed on the filenames other than comparison.
1024         * The check is case-sensitive on Unix and case-insensitive on Windows.
1025         *
1026         * @param filename1  the first filename to query, may be null
1027         * @param filename2  the second filename to query, may be null
1028         * @return true if the filenames are equal, null equals null
1029         * @see IOCase#SYSTEM
1030         */
1031        public static boolean equalsOnSystem(String filename1, String filename2) {
1032            return equals(filename1, filename2, false, IOCase.SYSTEM);
1033        }
1034    
1035        //-----------------------------------------------------------------------
1036        /**
1037         * Checks whether two filenames are equal after both have been normalized.
1038         * <p>
1039         * Both filenames are first passed to {@link #normalize(String)}.
1040         * The check is then performed in a case-sensitive manner.
1041         *
1042         * @param filename1  the first filename to query, may be null
1043         * @param filename2  the second filename to query, may be null
1044         * @return true if the filenames are equal, null equals null
1045         * @see IOCase#SENSITIVE
1046         */
1047        public static boolean equalsNormalized(String filename1, String filename2) {
1048            return equals(filename1, filename2, true, IOCase.SENSITIVE);
1049        }
1050    
1051        /**
1052         * Checks whether two filenames are equal after both have been normalized
1053         * and using the case rules of the system.
1054         * <p>
1055         * Both filenames are first passed to {@link #normalize(String)}.
1056         * The check is then performed case-sensitive on Unix and
1057         * case-insensitive on Windows.
1058         *
1059         * @param filename1  the first filename to query, may be null
1060         * @param filename2  the second filename to query, may be null
1061         * @return true if the filenames are equal, null equals null
1062         * @see IOCase#SYSTEM
1063         */
1064        public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
1065            return equals(filename1, filename2, true, IOCase.SYSTEM);
1066        }
1067    
1068        /**
1069         * Checks whether two filenames are equal, optionally normalizing and providing
1070         * control over the case-sensitivity.
1071         *
1072         * @param filename1  the first filename to query, may be null
1073         * @param filename2  the second filename to query, may be null
1074         * @param normalized  whether to normalize the filenames
1075         * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1076         * @return true if the filenames are equal, null equals null
1077         * @since Commons IO 1.3
1078         */
1079        public static boolean equals(
1080                String filename1, String filename2,
1081                boolean normalized, IOCase caseSensitivity) {
1082            
1083            if (filename1 == null || filename2 == null) {
1084                return (filename1 == null && filename2 == null);
1085            }
1086            if (normalized) {
1087                filename1 = normalize(filename1);
1088                filename2 = normalize(filename2);
1089                if (filename1 == null || filename2 == null) {
1090                    throw new NullPointerException(
1091                        "Error normalizing one or both of the file names");
1092                }
1093            }
1094            if (caseSensitivity == null) {
1095                caseSensitivity = IOCase.SENSITIVE;
1096            }
1097            return caseSensitivity.checkEquals(filename1, filename2);
1098        }
1099    
1100        //-----------------------------------------------------------------------
1101        /**
1102         * Checks whether the extension of the filename is that specified.
1103         * <p>
1104         * This method obtains the extension as the textual part of the filename
1105         * after the last dot. There must be no directory separator after the dot.
1106         * The extension check is case-sensitive on all platforms.
1107         *
1108         * @param filename  the filename to query, null returns false
1109         * @param extension  the extension to check for, null or empty checks for no extension
1110         * @return true if the filename has the specified extension
1111         */
1112        public static boolean isExtension(String filename, String extension) {
1113            if (filename == null) {
1114                return false;
1115            }
1116            if (extension == null || extension.length() == 0) {
1117                return (indexOfExtension(filename) == -1);
1118            }
1119            String fileExt = getExtension(filename);
1120            return fileExt.equals(extension);
1121        }
1122    
1123        /**
1124         * Checks whether the extension of the filename is one of those specified.
1125         * <p>
1126         * This method obtains the extension as the textual part of the filename
1127         * after the last dot. There must be no directory separator after the dot.
1128         * The extension check is case-sensitive on all platforms.
1129         *
1130         * @param filename  the filename to query, null returns false
1131         * @param extensions  the extensions to check for, null checks for no extension
1132         * @return true if the filename is one of the extensions
1133         */
1134        public static boolean isExtension(String filename, String[] extensions) {
1135            if (filename == null) {
1136                return false;
1137            }
1138            if (extensions == null || extensions.length == 0) {
1139                return (indexOfExtension(filename) == -1);
1140            }
1141            String fileExt = getExtension(filename);
1142            for (String extension : extensions) {
1143                if (fileExt.equals(extension)) {
1144                    return true;
1145                }
1146            }
1147            return false;
1148        }
1149    
1150        /**
1151         * Checks whether the extension of the filename is one of those specified.
1152         * <p>
1153         * This method obtains the extension as the textual part of the filename
1154         * after the last dot. There must be no directory separator after the dot.
1155         * The extension check is case-sensitive on all platforms.
1156         *
1157         * @param filename  the filename to query, null returns false
1158         * @param extensions  the extensions to check for, null checks for no extension
1159         * @return true if the filename is one of the extensions
1160         */
1161        public static boolean isExtension(String filename, Collection<String> extensions) {
1162            if (filename == null) {
1163                return false;
1164            }
1165            if (extensions == null || extensions.isEmpty()) {
1166                return (indexOfExtension(filename) == -1);
1167            }
1168            String fileExt = getExtension(filename);
1169            for (String extension : extensions) {
1170                if (fileExt.equals(extension)) {
1171                    return true;
1172                }
1173            }
1174            return false;
1175        }
1176    
1177        //-----------------------------------------------------------------------
1178        /**
1179         * Checks a filename to see if it matches the specified wildcard matcher,
1180         * always testing case-sensitive.
1181         * <p>
1182         * The wildcard matcher uses the characters '?' and '*' to represent a
1183         * single or multiple (zero or more) wildcard characters.
1184         * This is the same as often found on Dos/Unix command lines.
1185         * The check is case-sensitive always.
1186         * <pre>
1187         * wildcardMatch("c.txt", "*.txt")      --> true
1188         * wildcardMatch("c.txt", "*.jpg")      --> false
1189         * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1190         * wildcardMatch("c.txt", "*.???")      --> true
1191         * wildcardMatch("c.txt", "*.????")     --> false
1192         * </pre>
1193         * N.B. the sequence "*?" does not work properly at present in match strings.
1194         * 
1195         * @param filename  the filename to match on
1196         * @param wildcardMatcher  the wildcard string to match against
1197         * @return true if the filename matches the wilcard string
1198         * @see IOCase#SENSITIVE
1199         */
1200        public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1201            return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1202        }
1203    
1204        /**
1205         * Checks a filename to see if it matches the specified wildcard matcher
1206         * using the case rules of the system.
1207         * <p>
1208         * The wildcard matcher uses the characters '?' and '*' to represent a
1209         * single or multiple (zero or more) wildcard characters.
1210         * This is the same as often found on Dos/Unix command lines.
1211         * The check is case-sensitive on Unix and case-insensitive on Windows.
1212         * <pre>
1213         * wildcardMatch("c.txt", "*.txt")      --> true
1214         * wildcardMatch("c.txt", "*.jpg")      --> false
1215         * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1216         * wildcardMatch("c.txt", "*.???")      --> true
1217         * wildcardMatch("c.txt", "*.????")     --> false
1218         * </pre>
1219         * N.B. the sequence "*?" does not work properly at present in match strings.
1220         * 
1221         * @param filename  the filename to match on
1222         * @param wildcardMatcher  the wildcard string to match against
1223         * @return true if the filename matches the wilcard string
1224         * @see IOCase#SYSTEM
1225         */
1226        public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1227            return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1228        }
1229    
1230        /**
1231         * Checks a filename to see if it matches the specified wildcard matcher
1232         * allowing control over case-sensitivity.
1233         * <p>
1234         * The wildcard matcher uses the characters '?' and '*' to represent a
1235         * single or multiple (zero or more) wildcard characters.
1236         * N.B. the sequence "*?" does not work properly at present in match strings.
1237         * 
1238         * @param filename  the filename to match on
1239         * @param wildcardMatcher  the wildcard string to match against
1240         * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1241         * @return true if the filename matches the wilcard string
1242         * @since Commons IO 1.3
1243         */
1244        public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1245            if (filename == null && wildcardMatcher == null) {
1246                return true;
1247            }
1248            if (filename == null || wildcardMatcher == null) {
1249                return false;
1250            }
1251            if (caseSensitivity == null) {
1252                caseSensitivity = IOCase.SENSITIVE;
1253            }
1254            String[] wcs = splitOnTokens(wildcardMatcher);
1255            boolean anyChars = false;
1256            int textIdx = 0;
1257            int wcsIdx = 0;
1258            Stack<int[]> backtrack = new Stack<int[]>();
1259            
1260            // loop around a backtrack stack, to handle complex * matching
1261            do {
1262                if (backtrack.size() > 0) {
1263                    int[] array = backtrack.pop();
1264                    wcsIdx = array[0];
1265                    textIdx = array[1];
1266                    anyChars = true;
1267                }
1268                
1269                // loop whilst tokens and text left to process
1270                while (wcsIdx < wcs.length) {
1271          
1272                    if (wcs[wcsIdx].equals("?")) {
1273                        // ? so move to next text char
1274                        textIdx++;
1275                        if (textIdx > filename.length()) {
1276                            break;
1277                        }
1278                        anyChars = false;
1279                        
1280                    } else if (wcs[wcsIdx].equals("*")) {
1281                        // set any chars status
1282                        anyChars = true;
1283                        if (wcsIdx == wcs.length - 1) {
1284                            textIdx = filename.length();
1285                        }
1286                        
1287                    } else {
1288                        // matching text token
1289                        if (anyChars) {
1290                            // any chars then try to locate text token
1291                            textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1292                            if (textIdx == -1) {
1293                                // token not found
1294                                break;
1295                            }
1296                            int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1297                            if (repeat >= 0) {
1298                                backtrack.push(new int[] {wcsIdx, repeat});
1299                            }
1300                        } else {
1301                            // matching from current position
1302                            if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1303                                // couldnt match token
1304                                break;
1305                            }
1306                        }
1307          
1308                        // matched text token, move text index to end of matched token
1309                        textIdx += wcs[wcsIdx].length();
1310                        anyChars = false;
1311                    }
1312          
1313                    wcsIdx++;
1314                }
1315                
1316                // full match
1317                if (wcsIdx == wcs.length && textIdx == filename.length()) {
1318                    return true;
1319                }
1320                
1321            } while (backtrack.size() > 0);
1322      
1323            return false;
1324        }
1325    
1326        /**
1327         * Splits a string into a number of tokens.
1328         * The text is split by '?' and '*'.
1329         * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1330         * 
1331         * @param text  the text to split
1332         * @return the array of tokens, never null
1333         */
1334        static String[] splitOnTokens(String text) {
1335            // used by wildcardMatch
1336            // package level so a unit test may run on this
1337            
1338            if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
1339                return new String[] { text };
1340            }
1341    
1342            char[] array = text.toCharArray();
1343            ArrayList<String> list = new ArrayList<String>();
1344            StringBuilder buffer = new StringBuilder();
1345            for (int i = 0; i < array.length; i++) {
1346                if (array[i] == '?' || array[i] == '*') {
1347                    if (buffer.length() != 0) {
1348                        list.add(buffer.toString());
1349                        buffer.setLength(0);
1350                    }
1351                    if (array[i] == '?') {
1352                        list.add("?");
1353                    } else if (list.size() == 0 ||
1354                            (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1355                        list.add("*");
1356                    }
1357                } else {
1358                    buffer.append(array[i]);
1359                }
1360            }
1361            if (buffer.length() != 0) {
1362                list.add(buffer.toString());
1363            }
1364    
1365            return list.toArray( new String[ list.size() ] );
1366        }
1367    
1368    }