001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.util.ArrayDeque;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Deque;
025import java.util.List;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028import java.util.stream.Stream;
029
030/**
031 * General file name and file path manipulation utilities.
032 * <p>
033 * When dealing with file names you can hit problems when moving from a Windows
034 * based development machine to a Unix based production machine.
035 * This class aims to help avoid those problems.
036 * </p>
037 * <p>
038 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
039 * using JDK {@link java.io.File File} objects and the two argument constructor
040 * {@link java.io.File#File(java.io.File, String) File(File,String)}.
041 * </p>
042 * <p>
043 * Most methods on this class are designed to work the same on both Unix and Windows.
044 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
045 * </p>
046 * <p>
047 * Most methods recognize both separators (forward and back), and both
048 * sets of prefixes. See the Javadoc of each method for details.
049 * </p>
050 * <p>
051 * This class defines six components within a file name
052 * (example C:\dev\project\file.txt):
053 * </p>
054 * <ul>
055 * <li>the prefix - C:\</li>
056 * <li>the path - dev\project\</li>
057 * <li>the full path - C:\dev\project\</li>
058 * <li>the name - file.txt</li>
059 * <li>the base name - file</li>
060 * <li>the extension - txt</li>
061 * </ul>
062 * <p>
063 * Note that this class works best if directory file names end with a separator.
064 * If you omit the last separator, it is impossible to determine if the file name
065 * corresponds to a file or a directory. As a result, we have chosen to say
066 * it corresponds to a file.
067 * </p>
068 * <p>
069 * This class only supports Unix and Windows style names.
070 * Prefixes are matched as follows:
071 * </p>
072 * <pre>
073 * Windows:
074 * a\b\c.txt           --&gt; ""          --&gt; relative
075 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
076 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
077 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
078 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
079 *
080 * Unix:
081 * a/b/c.txt           --&gt; ""          --&gt; relative
082 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
083 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
084 * ~                   --&gt; "~/"        --&gt; current user (slash added)
085 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
086 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
087 * </pre>
088 * <p>
089 * Both prefix styles are matched always, irrespective of the machine that you are
090 * currently running on.
091 * </p>
092 * <p>
093 * Provenance: Excalibur, Alexandria, Tomcat, Commons-Utils.
094 * </p>
095 *
096 * @since 1.1
097 */
098public class FilenameUtils {
099
100    private static final String[] EMPTY_STRING_ARRAY = {};
101
102    private static final String EMPTY_STRING = "";
103
104    private static final int NOT_FOUND = -1;
105
106    /**
107     * The extension separator character.
108     * @since 1.4
109     */
110    public static final char EXTENSION_SEPARATOR = '.';
111
112    /**
113     * The extension separator String.
114     * @since 1.4
115     */
116    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
117
118    /**
119     * The Unix separator character.
120     */
121    private static final char UNIX_NAME_SEPARATOR = '/';
122
123    /**
124     * The Windows separator character.
125     */
126    private static final char WINDOWS_NAME_SEPARATOR = '\\';
127
128    /**
129     * The system separator character.
130     */
131    private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
132
133    /**
134     * The separator character that is the opposite of the system separator.
135     */
136    private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
137
138    private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
139
140    private static final int IPV4_MAX_OCTET_VALUE = 255;
141
142    private static final int IPV6_MAX_HEX_GROUPS = 8;
143
144    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
145
146    private static final int MAX_UNSIGNED_SHORT = 0xffff;
147
148    private static final int BASE_16 = 16;
149
150    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
151
152    /**
153     * Concatenates a fileName to a base path using normal command line style rules.
154     * <p>
155     * The effect is equivalent to resultant directory after changing
156     * directory to the first argument, followed by changing directory to
157     * the second argument.
158     * </p>
159     * <p>
160     * The first argument is the base path, the second is the path to concatenate.
161     * The returned path is always normalized via {@link #normalize(String)},
162     * thus {@code ..} is handled.
163     * </p>
164     * <p>
165     * If {@code pathToAdd} is absolute (has an absolute prefix), then
166     * it will be normalized and returned.
167     * Otherwise, the paths will be joined, normalized and returned.
168     * </p>
169     * <p>
170     * The output will be the same on both Unix and Windows except
171     * for the separator character.
172     * </p>
173     * <pre>
174     * /foo/      + bar        --&gt;  /foo/bar
175     * /foo       + bar        --&gt;  /foo/bar
176     * /foo       + /bar       --&gt;  /bar
177     * /foo       + C:/bar     --&gt;  C:/bar
178     * /foo       + C:bar      --&gt;  C:bar [1]
179     * /foo/a/    + ../bar     --&gt;  /foo/bar
180     * /foo/      + ../../bar  --&gt;  null
181     * /foo/      + /bar       --&gt;  /bar
182     * /foo/..    + /bar       --&gt;  /bar
183     * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
184     * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
185     * </pre>
186     * <p>
187     * [1] Note that the Windows relative drive prefix is unreliable when
188     * used with this method.
189     * </p>
190     * <p>
191     * [2] Note that the first parameter must be a path. If it ends with a name, then
192     * the name will be built into the concatenated path. If this might be a problem,
193     * use {@link #getFullPath(String)} on the base path argument.
194     * </p>
195     *
196     * @param basePath  the base path to attach to, always treated as a path
197     * @param fullFileNameToAdd  the fileName (or path) to attach to the base
198     * @return the concatenated path, or null if invalid
199     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
200     */
201    public static String concat(final String basePath, final String fullFileNameToAdd) {
202        final int prefix = getPrefixLength(fullFileNameToAdd);
203        if (prefix < 0) {
204            return null;
205        }
206        if (prefix > 0) {
207            return normalize(fullFileNameToAdd);
208        }
209        if (basePath == null) {
210            return null;
211        }
212        final int len = basePath.length();
213        if (len == 0) {
214            return normalize(fullFileNameToAdd);
215        }
216        final char ch = basePath.charAt(len - 1);
217        if (isSeparator(ch)) {
218            return normalize(basePath + fullFileNameToAdd);
219        }
220        return normalize(basePath + '/' + fullFileNameToAdd);
221    }
222
223    /**
224     * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
225     * <p>
226     * The files names are expected to be normalized.
227     * </p>
228     *
229     * Edge cases:
230     * <ul>
231     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
232     * <li>A directory does not contain itself: return false</li>
233     * <li>A null child file is not contained in any parent: return false</li>
234     * </ul>
235     *
236     * @param canonicalParent
237     *            the file to consider as the parent.
238     * @param canonicalChild
239     *            the file to consider as the child.
240     * @return true is the candidate leaf is under by the specified composite. False otherwise.
241     * @since 2.2
242     * @see FileUtils#directoryContains(File, File)
243     */
244    public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
245        if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
246            return false;
247        }
248
249        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
250            return false;
251        }
252
253        final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
254        final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
255
256        return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
257    }
258
259    /**
260     * Does the work of getting the path.
261     *
262     * @param fileName  the fileName
263     * @param includeSeparator  true to include the end separator
264     * @return the path
265     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
266     */
267    private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
268        if (fileName == null) {
269            return null;
270        }
271        final int prefix = getPrefixLength(fileName);
272        if (prefix < 0) {
273            return null;
274        }
275        if (prefix >= fileName.length()) {
276            if (includeSeparator) {
277                return getPrefix(fileName);  // add end slash if necessary
278            }
279            return fileName;
280        }
281        final int index = indexOfLastSeparator(fileName);
282        if (index < 0) {
283            return fileName.substring(0, prefix);
284        }
285        int end = index + (includeSeparator ?  1 : 0);
286        if (end == 0) {
287            end++;
288        }
289        return fileName.substring(0, end);
290    }
291
292    /**
293     * Does the work of getting the path.
294     *
295     * @param fileName  the fileName
296     * @param separatorAdd  0 to omit the end separator, 1 to return it
297     * @return the path
298     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
299     */
300    private static String doGetPath(final String fileName, final int separatorAdd) {
301        if (fileName == null) {
302            return null;
303        }
304        final int prefix = getPrefixLength(fileName);
305        if (prefix < 0) {
306            return null;
307        }
308        final int index = indexOfLastSeparator(fileName);
309        final int endIndex = index + separatorAdd;
310        if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
311            return EMPTY_STRING;
312        }
313        return requireNonNullChars(fileName.substring(prefix, endIndex));
314    }
315
316    /**
317     * Internal method to perform the normalization.
318     *
319     * @param fileName  the fileName
320     * @param separator The separator character to use
321     * @param keepSeparator  true to keep the final separator
322     * @return the normalized fileName
323     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
324     */
325    private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
326        if (fileName == null) {
327            return null;
328        }
329
330        requireNonNullChars(fileName);
331
332        int size = fileName.length();
333        if (size == 0) {
334            return fileName;
335        }
336        final int prefix = getPrefixLength(fileName);
337        if (prefix < 0) {
338            return null;
339        }
340
341        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
342        fileName.getChars(0, fileName.length(), array, 0);
343
344        // fix separators throughout
345        final char otherSeparator = flipSeparator(separator);
346        for (int i = 0; i < array.length; i++) {
347            if (array[i] == otherSeparator) {
348                array[i] = separator;
349            }
350        }
351
352        // add extra separator on the end to simplify code below
353        boolean lastIsDirectory = true;
354        if (array[size - 1] != separator) {
355            array[size++] = separator;
356            lastIsDirectory = false;
357        }
358
359        // adjoining slashes
360        // If we get here, prefix can only be 0 or greater, size 1 or greater
361        // If prefix is 0, set loop start to 1 to prevent index errors
362        for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
363            if (array[i] == separator && array[i - 1] == separator) {
364                System.arraycopy(array, i, array, i - 1, size - i);
365                size--;
366                i--;
367            }
368        }
369
370        // dot slash
371        for (int i = prefix + 1; i < size; i++) {
372            if (array[i] == separator && array[i - 1] == '.' &&
373                    (i == prefix + 1 || array[i - 2] == separator)) {
374                if (i == size - 1) {
375                    lastIsDirectory = true;
376                }
377                System.arraycopy(array, i + 1, array, i - 1, size - i);
378                size -=2;
379                i--;
380            }
381        }
382
383        // double dot slash
384        outer:
385        for (int i = prefix + 2; i < size; i++) {
386            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
387                    (i == prefix + 2 || array[i - 3] == separator)) {
388                if (i == prefix + 2) {
389                    return null;
390                }
391                if (i == size - 1) {
392                    lastIsDirectory = true;
393                }
394                int j;
395                for (j = i - 4 ; j >= prefix; j--) {
396                    if (array[j] == separator) {
397                        // remove b/../ from a/b/../c
398                        System.arraycopy(array, i + 1, array, j + 1, size - i);
399                        size -= i - j;
400                        i = j + 1;
401                        continue outer;
402                    }
403                }
404                // remove a/../ from a/../c
405                System.arraycopy(array, i + 1, array, prefix, size - i);
406                size -= i + 1 - prefix;
407                i = prefix + 1;
408            }
409        }
410
411        if (size <= 0) {  // should never be less than 0
412            return EMPTY_STRING;
413        }
414        if (size <= prefix) {  // should never be less than prefix
415            return new String(array, 0, size);
416        }
417        if (lastIsDirectory && keepSeparator) {
418            return new String(array, 0, size);  // keep trailing separator
419        }
420        return new String(array, 0, size - 1);  // lose trailing separator
421    }
422
423    /**
424     * Checks whether two fileNames are equal exactly.
425     * <p>
426     * No processing is performed on the fileNames other than comparison,
427     * thus this is merely a null-safe case-sensitive equals.
428     * </p>
429     *
430     * @param fileName1  the first fileName to query, may be null
431     * @param fileName2  the second fileName to query, may be null
432     * @return true if the fileNames are equal, null equals null
433     * @see IOCase#SENSITIVE
434     */
435    public static boolean equals(final String fileName1, final String fileName2) {
436        return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
437    }
438
439    /**
440     * Checks whether two fileNames are equal, optionally normalizing and providing
441     * control over the case-sensitivity.
442     *
443     * @param fileName1  the first fileName to query, may be null
444     * @param fileName2  the second fileName to query, may be null
445     * @param normalize  whether to normalize the fileNames
446     * @param ioCase  what case sensitivity rule to use, null means case-sensitive
447     * @return true if the fileNames are equal, null equals null
448     * @since 1.3
449     */
450    public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
451
452        if (fileName1 == null || fileName2 == null) {
453            return fileName1 == null && fileName2 == null;
454        }
455        if (normalize) {
456            fileName1 = normalize(fileName1);
457            if (fileName1 == null) {
458                return false;
459            }
460            fileName2 = normalize(fileName2);
461            if (fileName2 == null) {
462                return false;
463            }
464        }
465        return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
466    }
467
468    /**
469     * Checks whether two fileNames are equal after both have been normalized.
470     * <p>
471     * Both fileNames are first passed to {@link #normalize(String)}.
472     * The check is then performed in a case-sensitive manner.
473     * </p>
474     *
475     * @param fileName1  the first fileName to query, may be null
476     * @param fileName2  the second fileName to query, may be null
477     * @return true if the fileNames are equal, null equals null
478     * @see IOCase#SENSITIVE
479     */
480    public static boolean equalsNormalized(final String fileName1, final String fileName2) {
481        return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
482    }
483
484    /**
485     * Checks whether two fileNames are equal after both have been normalized
486     * and using the case rules of the system.
487     * <p>
488     * Both fileNames are first passed to {@link #normalize(String)}.
489     * The check is then performed case-sensitive on Unix and
490     * case-insensitive on Windows.
491     * </p>
492     *
493     * @param fileName1  the first fileName to query, may be null
494     * @param fileName2  the second fileName to query, may be null
495     * @return true if the fileNames are equal, null equals null
496     * @see IOCase#SYSTEM
497     */
498    public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
499        return equals(fileName1, fileName2, true, IOCase.SYSTEM);
500    }
501
502    /**
503     * Checks whether two fileNames are equal using the case rules of the system.
504     * <p>
505     * No processing is performed on the fileNames other than comparison.
506     * The check is case-sensitive on Unix and case-insensitive on Windows.
507     * </p>
508     *
509     * @param fileName1  the first fileName to query, may be null
510     * @param fileName2  the second fileName to query, may be null
511     * @return true if the fileNames are equal, null equals null
512     * @see IOCase#SYSTEM
513     */
514    public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
515        return equals(fileName1, fileName2, false, IOCase.SYSTEM);
516    }
517
518    /**
519     * Flips the Windows name separator to Linux and vice-versa.
520     *
521     * @param ch The Windows or Linux name separator.
522     * @return The Windows or Linux name separator.
523     */
524    static char flipSeparator(final char ch) {
525        if (ch == UNIX_NAME_SEPARATOR) {
526            return WINDOWS_NAME_SEPARATOR;
527        }
528        if (ch == WINDOWS_NAME_SEPARATOR) {
529            return UNIX_NAME_SEPARATOR;
530        }
531        throw new IllegalArgumentException(String.valueOf(ch));
532    }
533
534    /**
535     * Special handling for NTFS ADS: Don't accept colon in the fileName.
536     *
537     * @param fileName a file name
538     * @return ADS offsets.
539     */
540    private static int getAdsCriticalOffset(final String fileName) {
541        // Step 1: Remove leading path segments.
542        final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
543        final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
544        if (offset1 == -1) {
545            if (offset2 == -1) {
546                return 0;
547            }
548            return offset2 + 1;
549        }
550        if (offset2 == -1) {
551            return offset1 + 1;
552        }
553        return Math.max(offset1, offset2) + 1;
554    }
555
556    /**
557     * Gets the base name, minus the full path and extension, from a full fileName.
558     * <p>
559     * This method will handle a file in either Unix or Windows format.
560     * The text after the last forward or backslash and before the last dot is returned.
561     * </p>
562     * <pre>
563     * a/b/c.txt --&gt; c
564     * a.txt     --&gt; a
565     * a/b/c     --&gt; c
566     * a/b/c/    --&gt; ""
567     * </pre>
568     * <p>
569     * The output will be the same irrespective of the machine that the code is running on.
570     * </p>
571     *
572     * @param fileName  the fileName to query, null returns null
573     * @return the name of the file without the path, or an empty string if none exists
574     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
575     */
576    public static String getBaseName(final String fileName) {
577        return removeExtension(getName(fileName));
578    }
579
580    /**
581     * Gets the extension of a fileName.
582     * <p>
583     * This method returns the textual part of the fileName after the last dot.
584     * There must be no directory separator after the dot.
585     * </p>
586     * <pre>
587     * foo.txt      --&gt; "txt"
588     * a/b/c.jpg    --&gt; "jpg"
589     * a/b.txt/c    --&gt; ""
590     * a/b/c        --&gt; ""
591     * </pre>
592     * <p>
593     * The output will be the same irrespective of the machine that the code is running on, with the
594     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
595     * </p>
596     * <p>
597     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
598     * In this case, the name wouldn't be the name of a file, but the identifier of an
599     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
600     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
601     * an {@link IllegalArgumentException} for names like this.
602     * </p>
603     *
604     * @param fileName the fileName to retrieve the extension of.
605     * @return the extension of the file or an empty string if none exists or {@code null}
606     * if the fileName is {@code null}.
607     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
608     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
609     */
610    public static String getExtension(final String fileName) throws IllegalArgumentException {
611        if (fileName == null) {
612            return null;
613        }
614        final int index = indexOfExtension(fileName);
615        if (index == NOT_FOUND) {
616            return EMPTY_STRING;
617        }
618        return fileName.substring(index + 1);
619    }
620
621    /**
622     * Gets the full path from a full fileName, which is the prefix + path.
623     * <p>
624     * This method will handle a file in either Unix or Windows format.
625     * The method is entirely text based, and returns the text before and
626     * including the last forward or backslash.
627     * </p>
628     * <pre>
629     * C:\a\b\c.txt --&gt; C:\a\b\
630     * ~/a/b/c.txt  --&gt; ~/a/b/
631     * a.txt        --&gt; ""
632     * a/b/c        --&gt; a/b/
633     * a/b/c/       --&gt; a/b/c/
634     * C:           --&gt; C:
635     * C:\          --&gt; C:\
636     * ~            --&gt; ~/
637     * ~/           --&gt; ~/
638     * ~user        --&gt; ~user/
639     * ~user/       --&gt; ~user/
640     * </pre>
641     * <p>
642     * The output will be the same irrespective of the machine that the code is running on.
643     * </p>
644     *
645     * @param fileName  the fileName to query, null returns null
646     * @return the path of the file, an empty string if none exists, null if invalid
647     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
648     */
649    public static String getFullPath(final String fileName) {
650        return doGetFullPath(fileName, true);
651    }
652
653    /**
654     * Gets the full path from a full fileName, which is the prefix + path,
655     * and also excluding the final directory separator.
656     * <p>
657     * This method will handle a file in either Unix or Windows format.
658     * The method is entirely text based, and returns the text before the
659     * last forward or backslash.
660     * </p>
661     * <pre>
662     * C:\a\b\c.txt --&gt; C:\a\b
663     * ~/a/b/c.txt  --&gt; ~/a/b
664     * a.txt        --&gt; ""
665     * a/b/c        --&gt; a/b
666     * a/b/c/       --&gt; a/b/c
667     * C:           --&gt; C:
668     * C:\          --&gt; C:\
669     * ~            --&gt; ~
670     * ~/           --&gt; ~
671     * ~user        --&gt; ~user
672     * ~user/       --&gt; ~user
673     * </pre>
674     * <p>
675     * The output will be the same irrespective of the machine that the code is running on.
676     * </p>
677     *
678     * @param fileName  the fileName to query, null returns null
679     * @return the path of the file, an empty string if none exists, null if invalid
680     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
681     */
682    public static String getFullPathNoEndSeparator(final String fileName) {
683        return doGetFullPath(fileName, false);
684    }
685
686    /**
687     * Gets the name minus the path from a full fileName.
688     * <p>
689     * This method will handle a file in either Unix or Windows format.
690     * The text after the last forward or backslash is returned.
691     * </p>
692     * <pre>
693     * a/b/c.txt --&gt; c.txt
694     * a.txt     --&gt; a.txt
695     * a/b/c     --&gt; c
696     * a/b/c/    --&gt; ""
697     * </pre>
698     * <p>
699     * The output will be the same irrespective of the machine that the code is running on.
700     * </p>
701     *
702     * @param fileName  the fileName to query, null returns null
703     * @return the name of the file without the path, or an empty string if none exists
704     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
705     */
706    public static String getName(final String fileName) {
707        if (fileName == null) {
708            return null;
709        }
710        return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
711    }
712
713    /**
714     * Gets the path from a full fileName, which excludes the prefix.
715     * <p>
716     * This method will handle a file in either Unix or Windows format.
717     * The method is entirely text based, and returns the text before and
718     * including the last forward or backslash.
719     * </p>
720     * <pre>
721     * C:\a\b\c.txt --&gt; a\b\
722     * ~/a/b/c.txt  --&gt; a/b/
723     * a.txt        --&gt; ""
724     * a/b/c        --&gt; a/b/
725     * a/b/c/       --&gt; a/b/c/
726     * </pre>
727     * <p>
728     * The output will be the same irrespective of the machine that the code is running on.
729     * </p>
730     * <p>
731     * This method drops the prefix from the result.
732     * See {@link #getFullPath(String)} for the method that retains the prefix.
733     * </p>
734     *
735     * @param fileName  the fileName to query, null returns null
736     * @return the path of the file, an empty string if none exists, null if invalid
737     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
738     */
739    public static String getPath(final String fileName) {
740        return doGetPath(fileName, 1);
741    }
742
743    /**
744     * Gets the path from a full fileName, which excludes the prefix, and
745     * also excluding the final directory separator.
746     * <p>
747     * This method will handle a file in either Unix or Windows format.
748     * The method is entirely text based, and returns the text before the
749     * last forward or backslash.
750     * </p>
751     * <pre>
752     * C:\a\b\c.txt --&gt; a\b
753     * ~/a/b/c.txt  --&gt; a/b
754     * a.txt        --&gt; ""
755     * a/b/c        --&gt; a/b
756     * a/b/c/       --&gt; a/b/c
757     * </pre>
758     * <p>
759     * The output will be the same irrespective of the machine that the code is running on.
760     * </p>
761     * <p>
762     * This method drops the prefix from the result.
763     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
764     * </p>
765     *
766     * @param fileName  the fileName to query, null returns null
767     * @return the path of the file, an empty string if none exists, null if invalid
768     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
769     */
770    public static String getPathNoEndSeparator(final String fileName) {
771        return doGetPath(fileName, 0);
772    }
773
774    /**
775     * Gets the prefix from a full fileName, such as {@code C:/}
776     * or {@code ~/}.
777     * <p>
778     * This method will handle a file in either Unix or Windows format.
779     * The prefix includes the first slash in the full fileName where applicable.
780     * </p>
781     * <pre>
782     * Windows:
783     * a\b\c.txt           --&gt; ""          --&gt; relative
784     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
785     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
786     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
787     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
788     *
789     * Unix:
790     * a/b/c.txt           --&gt; ""          --&gt; relative
791     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
792     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
793     * ~                   --&gt; "~/"        --&gt; current user (slash added)
794     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
795     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
796     * </pre>
797     * <p>
798     * The output will be the same irrespective of the machine that the code is running on.
799     * ie. both Unix and Windows prefixes are matched regardless.
800     * </p>
801     *
802     * @param fileName  the fileName to query, null returns null
803     * @return the prefix of the file, null if invalid
804     * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
805     */
806    public static String getPrefix(final String fileName) {
807        if (fileName == null) {
808            return null;
809        }
810        final int len = getPrefixLength(fileName);
811        if (len < 0) {
812            return null;
813        }
814        if (len > fileName.length()) {
815            requireNonNullChars(fileName);
816            return fileName + UNIX_NAME_SEPARATOR;
817        }
818        return requireNonNullChars(fileName.substring(0, len));
819    }
820
821    /**
822     * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}.
823     * <p>
824     * This method will handle a file in either Unix or Windows format.
825     * </p>
826     * <p>
827     * The prefix length includes the first slash in the full fileName
828     * if applicable. Thus, it is possible that the length returned is greater
829     * than the length of the input string.
830     * </p>
831     * <pre>
832     * Windows:
833     * a\b\c.txt           --&gt; 0           --&gt; relative
834     * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
835     * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
836     * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
837     * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
838     * \\\a\b\c.txt        --&gt; -1          --&gt; error
839     *
840     * Unix:
841     * a/b/c.txt           --&gt; 0           --&gt; relative
842     * /a/b/c.txt          --&gt; 1           --&gt; absolute
843     * ~/a/b/c.txt         --&gt; 2           --&gt; current user
844     * ~                   --&gt; 2           --&gt; current user (slash added)
845     * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
846     * ~user               --&gt; 6           --&gt; named user (slash added)
847     * //server/a/b/c.txt  --&gt; 9
848     * ///a/b/c.txt        --&gt; -1          --&gt; error
849     * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
850     * </pre>
851     * <p>
852     * The output will be the same irrespective of the machine that the code is running on.
853     * ie. both Unix and Windows prefixes are matched regardless.
854     * </p>
855     * <p>
856     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
857     * These must be followed by a server name, so double-slashes are not collapsed
858     * to a single slash at the start of the fileName.
859     * </p>
860     *
861     * @param fileName  the fileName to find the prefix in, null returns -1
862     * @return the length of the prefix, -1 if invalid or null
863     */
864    public static int getPrefixLength(final String fileName) {
865        if (fileName == null) {
866            return NOT_FOUND;
867        }
868        final int len = fileName.length();
869        if (len == 0) {
870            return 0;
871        }
872        char ch0 = fileName.charAt(0);
873        if (ch0 == ':') {
874            return NOT_FOUND;
875        }
876        if (len == 1) {
877            if (ch0 == '~') {
878                return 2;  // return a length greater than the input
879            }
880            return isSeparator(ch0) ? 1 : 0;
881        }
882        if (ch0 == '~') {
883            int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
884            int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
885            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
886                return len + 1;  // return a length greater than the input
887            }
888            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
889            posWin = posWin == NOT_FOUND ? posUnix : posWin;
890            return Math.min(posUnix, posWin) + 1;
891        }
892        final char ch1 = fileName.charAt(1);
893        if (ch1 == ':') {
894            ch0 = Character.toUpperCase(ch0);
895            if (ch0 >= 'A' && ch0 <= 'Z') {
896                if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
897                    return 0;
898                }
899                if (len == 2 || !isSeparator(fileName.charAt(2))) {
900                    return 2;
901                }
902                return 3;
903            }
904            if (ch0 == UNIX_NAME_SEPARATOR) {
905                return 1;
906            }
907            return NOT_FOUND;
908
909        }
910        if (!isSeparator(ch0) || !isSeparator(ch1)) {
911            return isSeparator(ch0) ? 1 : 0;
912        }
913        int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
914        int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
915        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
916            return NOT_FOUND;
917        }
918        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
919        posWin = posWin == NOT_FOUND ? posUnix : posWin;
920        final int pos = Math.min(posUnix, posWin) + 1;
921        final String hostnamePart = fileName.substring(2, pos - 1);
922        return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
923    }
924
925    /**
926     * Returns the index of the last extension separator character, which is a dot.
927     * <p>
928     * This method also checks that there is no directory separator after the last dot. To do this it uses
929     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
930     * </p>
931     * <p>
932     * The output will be the same irrespective of the machine that the code is running on, with the
933     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
934     * </p>
935     * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt".
936     * In this case, the name wouldn't be the name of a file, but the identifier of an
937     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
938     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
939     * an {@link IllegalArgumentException} for names like this.
940     *
941     * @param fileName
942     *            the fileName to find the last extension separator in, null returns -1
943     * @return the index of the last extension separator character, or -1 if there is no such character
944     * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact,
945     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
946     */
947    public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
948        if (fileName == null) {
949            return NOT_FOUND;
950        }
951        if (isSystemWindows()) {
952            // Special handling for NTFS ADS: Don't accept colon in the fileName.
953            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
954            if (offset != -1) {
955                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
956            }
957        }
958        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
959        final int lastSeparator = indexOfLastSeparator(fileName);
960        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
961    }
962
963    /**
964     * Returns the index of the last directory separator character.
965     * <p>
966     * This method will handle a file in either Unix or Windows format.
967     * The position of the last forward or backslash is returned.
968     * <p>
969     * The output will be the same irrespective of the machine that the code is running on.
970     *
971     * @param fileName  the fileName to find the last path separator in, null returns -1
972     * @return the index of the last separator character, or -1 if there
973     * is no such character
974     */
975    public static int indexOfLastSeparator(final String fileName) {
976        if (fileName == null) {
977            return NOT_FOUND;
978        }
979        final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
980        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
981        return Math.max(lastUnixPos, lastWindowsPos);
982    }
983
984    private static boolean isEmpty(final String string) {
985        return string == null || string.isEmpty();
986    }
987
988    /**
989     * Checks whether the extension of the fileName is one of those specified.
990     * <p>
991     * This method obtains the extension as the textual part of the fileName
992     * after the last dot. There must be no directory separator after the dot.
993     * The extension check is case-sensitive on all platforms.
994     *
995     * @param fileName  the fileName to query, null returns false
996     * @param extensions  the extensions to check for, null checks for no extension
997     * @return true if the fileName is one of the extensions
998     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
999     */
1000    public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1001        if (fileName == null) {
1002            return false;
1003        }
1004        requireNonNullChars(fileName);
1005
1006        if (extensions == null || extensions.isEmpty()) {
1007            return indexOfExtension(fileName) == NOT_FOUND;
1008        }
1009        return extensions.contains(getExtension(fileName));
1010    }
1011
1012    /**
1013     * Checks whether the extension of the fileName is that specified.
1014     * <p>
1015     * This method obtains the extension as the textual part of the fileName
1016     * after the last dot. There must be no directory separator after the dot.
1017     * The extension check is case-sensitive on all platforms.
1018     *
1019     * @param fileName  the fileName to query, null returns false
1020     * @param extension  the extension to check for, null or empty checks for no extension
1021     * @return true if the fileName has the specified extension
1022     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1023     */
1024    public static boolean isExtension(final String fileName, final String extension) {
1025        if (fileName == null) {
1026            return false;
1027        }
1028        requireNonNullChars(fileName);
1029
1030        if (isEmpty(extension)) {
1031            return indexOfExtension(fileName) == NOT_FOUND;
1032        }
1033        return getExtension(fileName).equals(extension);
1034    }
1035
1036    /**
1037     * Checks whether the extension of the fileName is one of those specified.
1038     * <p>
1039     * This method obtains the extension as the textual part of the fileName
1040     * after the last dot. There must be no directory separator after the dot.
1041     * The extension check is case-sensitive on all platforms.
1042     *
1043     * @param fileName  the fileName to query, null returns false
1044     * @param extensions  the extensions to check for, null checks for no extension
1045     * @return true if the fileName is one of the extensions
1046     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1047     */
1048    public static boolean isExtension(final String fileName, final String... extensions) {
1049        if (fileName == null) {
1050            return false;
1051        }
1052        requireNonNullChars(fileName);
1053
1054        if (extensions == null || extensions.length == 0) {
1055            return indexOfExtension(fileName) == NOT_FOUND;
1056        }
1057        final String fileExt = getExtension(fileName);
1058        return Stream.of(extensions).anyMatch(fileExt::equals);
1059    }
1060
1061    /**
1062     * Checks whether a given string represents a valid IPv4 address.
1063     *
1064     * @param name the name to validate
1065     * @return true if the given name is a valid IPv4 address
1066     */
1067    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1068    private static boolean isIPv4Address(final String name) {
1069        final Matcher m = IPV4_PATTERN.matcher(name);
1070        if (!m.matches() || m.groupCount() != 4) {
1071            return false;
1072        }
1073
1074        // verify that address subgroups are legal
1075        for (int i = 1; i <= 4; i++) {
1076            final String ipSegment = m.group(i);
1077            final int iIpSegment = Integer.parseInt(ipSegment);
1078            if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1079                return false;
1080            }
1081
1082            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1083                return false;
1084            }
1085
1086        }
1087
1088        return true;
1089    }
1090
1091    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1092    /**
1093     * Checks whether a given string represents a valid IPv6 address.
1094     *
1095     * @param inet6Address the name to validate
1096     * @return true if the given name is a valid IPv6 address
1097     */
1098    private static boolean isIPv6Address(final String inet6Address) {
1099        final boolean containsCompressedZeroes = inet6Address.contains("::");
1100        if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1101            return false;
1102        }
1103        if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1104                || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1105            return false;
1106        }
1107        String[] octets = inet6Address.split(":");
1108        if (containsCompressedZeroes) {
1109            final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1110            if (inet6Address.endsWith("::")) {
1111                // String.split() drops ending empty segments
1112                octetList.add("");
1113            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1114                octetList.remove(0);
1115            }
1116            octets = octetList.toArray(EMPTY_STRING_ARRAY);
1117        }
1118        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1119            return false;
1120        }
1121        int validOctets = 0;
1122        int emptyOctets = 0; // consecutive empty chunks
1123        for (int index = 0; index < octets.length; index++) {
1124            final String octet = octets[index];
1125            if (octet.isEmpty()) {
1126                emptyOctets++;
1127                if (emptyOctets > 1) {
1128                    return false;
1129                }
1130            } else {
1131                emptyOctets = 0;
1132                // Is last chunk an IPv4 address?
1133                if (index == octets.length - 1 && octet.contains(".")) {
1134                    if (!isIPv4Address(octet)) {
1135                        return false;
1136                    }
1137                    validOctets += 2;
1138                    continue;
1139                }
1140                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1141                    return false;
1142                }
1143                final int octetInt;
1144                try {
1145                    octetInt = Integer.parseInt(octet, BASE_16);
1146                } catch (final NumberFormatException e) {
1147                    return false;
1148                }
1149                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1150                    return false;
1151                }
1152            }
1153            validOctets++;
1154        }
1155        return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1156    }
1157
1158    /**
1159     * Checks whether a given string is a valid host name according to
1160     * RFC 3986 - not accepting IP addresses.
1161     *
1162     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1163     * @param name the hostname to validate
1164     * @return true if the given name is a valid host name
1165     */
1166    private static boolean isRFC3986HostName(final String name) {
1167        final String[] parts = name.split("\\.", -1);
1168        for (int i = 0; i < parts.length; i++) {
1169            if (parts[i].isEmpty()) {
1170                // trailing dot is legal, otherwise we've hit a .. sequence
1171                return i == parts.length - 1;
1172            }
1173            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1174                return false;
1175            }
1176        }
1177        return true;
1178    }
1179
1180    /**
1181     * Checks if the character is a separator.
1182     *
1183     * @param ch  the character to check
1184     * @return true if it is a separator character
1185     */
1186    private static boolean isSeparator(final char ch) {
1187        return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1188    }
1189
1190    /**
1191     * Determines if Windows file system is in use.
1192     *
1193     * @return true if the system is Windows
1194     */
1195    static boolean isSystemWindows() {
1196        return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1197    }
1198
1199    /**
1200     * Checks whether a given string is a valid host name according to
1201     * RFC 3986.
1202     *
1203     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1204     * RFC calls a "reg-name". Percent encoded names don't seem to be
1205     * valid names in UNC paths.</p>
1206     *
1207     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1208     * @param name the hostname to validate
1209     * @return true if the given name is a valid host name
1210     */
1211    private static boolean isValidHostName(final String name) {
1212        return isIPv6Address(name) || isRFC3986HostName(name);
1213    }
1214
1215    /**
1216     * Normalizes a path, removing double and single dot path steps.
1217     * <p>
1218     * This method normalizes a path to a standard format.
1219     * The input may contain separators in either Unix or Windows format.
1220     * The output will contain separators in the format of the system.
1221     * <p>
1222     * A trailing slash will be retained.
1223     * A double slash will be merged to a single slash (but UNC names are handled).
1224     * A single dot path segment will be removed.
1225     * A double dot will cause that path segment and the one before to be removed.
1226     * If the double dot has no parent path segment to work with, {@code null}
1227     * is returned.
1228     * <p>
1229     * The output will be the same on both Unix and Windows except
1230     * for the separator character.
1231     * <pre>
1232     * /foo//               --&gt;   /foo/
1233     * /foo/./              --&gt;   /foo/
1234     * /foo/../bar          --&gt;   /bar
1235     * /foo/../bar/         --&gt;   /bar/
1236     * /foo/../bar/../baz   --&gt;   /baz
1237     * //foo//./bar         --&gt;   //foo/bar
1238     * /../                 --&gt;   null
1239     * ../foo               --&gt;   null
1240     * foo/bar/..           --&gt;   foo/
1241     * foo/../../bar        --&gt;   null
1242     * foo/../bar           --&gt;   bar
1243     * //server/foo/../bar  --&gt;   //server/bar
1244     * //server/../bar      --&gt;   null
1245     * C:\foo\..\bar        --&gt;   C:\bar
1246     * C:\..\bar            --&gt;   null
1247     * ~/foo/../bar/        --&gt;   ~/bar/
1248     * ~/../bar             --&gt;   null
1249     * </pre>
1250     * (Note the file separator returned will be correct for Windows/Unix)
1251     *
1252     * @param fileName  the fileName to normalize, null returns null
1253     * @return the normalized fileName, or null if invalid
1254     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1255     */
1256    public static String normalize(final String fileName) {
1257        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1258    }
1259
1260    /**
1261     * Normalizes a path, removing double and single dot path steps.
1262     * <p>
1263     * This method normalizes a path to a standard format.
1264     * The input may contain separators in either Unix or Windows format.
1265     * The output will contain separators in the format specified.
1266     * <p>
1267     * A trailing slash will be retained.
1268     * A double slash will be merged to a single slash (but UNC names are handled).
1269     * A single dot path segment will be removed.
1270     * A double dot will cause that path segment and the one before to be removed.
1271     * If the double dot has no parent path segment to work with, {@code null}
1272     * is returned.
1273     * <p>
1274     * The output will be the same on both Unix and Windows except
1275     * for the separator character.
1276     * <pre>
1277     * /foo//               --&gt;   /foo/
1278     * /foo/./              --&gt;   /foo/
1279     * /foo/../bar          --&gt;   /bar
1280     * /foo/../bar/         --&gt;   /bar/
1281     * /foo/../bar/../baz   --&gt;   /baz
1282     * //foo//./bar         --&gt;   /foo/bar
1283     * /../                 --&gt;   null
1284     * ../foo               --&gt;   null
1285     * foo/bar/..           --&gt;   foo/
1286     * foo/../../bar        --&gt;   null
1287     * foo/../bar           --&gt;   bar
1288     * //server/foo/../bar  --&gt;   //server/bar
1289     * //server/../bar      --&gt;   null
1290     * C:\foo\..\bar        --&gt;   C:\bar
1291     * C:\..\bar            --&gt;   null
1292     * ~/foo/../bar/        --&gt;   ~/bar/
1293     * ~/../bar             --&gt;   null
1294     * </pre>
1295     * The output will be the same on both Unix and Windows including
1296     * the separator character.
1297     *
1298     * @param fileName  the fileName to normalize, null returns null
1299     * @param unixSeparator {@code true} if a Unix separator should
1300     * be used or {@code false} if a Windows separator should be used.
1301     * @return the normalized fileName, or null if invalid
1302     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1303     * @since 2.0
1304     */
1305    public static String normalize(final String fileName, final boolean unixSeparator) {
1306        return doNormalize(fileName, toSeparator(unixSeparator), true);
1307    }
1308
1309    /**
1310     * Normalizes a path, removing double and single dot path steps,
1311     * and removing any final directory separator.
1312     * <p>
1313     * This method normalizes a path to a standard format.
1314     * The input may contain separators in either Unix or Windows format.
1315     * The output will contain separators in the format of the system.
1316     * <p>
1317     * A trailing slash will be removed.
1318     * A double slash will be merged to a single slash (but UNC names are handled).
1319     * A single dot path segment will be removed.
1320     * A double dot will cause that path segment and the one before to be removed.
1321     * If the double dot has no parent path segment to work with, {@code null}
1322     * is returned.
1323     * <p>
1324     * The output will be the same on both Unix and Windows except
1325     * for the separator character.
1326     * <pre>
1327     * /foo//               --&gt;   /foo
1328     * /foo/./              --&gt;   /foo
1329     * /foo/../bar          --&gt;   /bar
1330     * /foo/../bar/         --&gt;   /bar
1331     * /foo/../bar/../baz   --&gt;   /baz
1332     * //foo//./bar         --&gt;   /foo/bar
1333     * /../                 --&gt;   null
1334     * ../foo               --&gt;   null
1335     * foo/bar/..           --&gt;   foo
1336     * foo/../../bar        --&gt;   null
1337     * foo/../bar           --&gt;   bar
1338     * //server/foo/../bar  --&gt;   //server/bar
1339     * //server/../bar      --&gt;   null
1340     * C:\foo\..\bar        --&gt;   C:\bar
1341     * C:\..\bar            --&gt;   null
1342     * ~/foo/../bar/        --&gt;   ~/bar
1343     * ~/../bar             --&gt;   null
1344     * </pre>
1345     * (Note the file separator returned will be correct for Windows/Unix)
1346     *
1347     * @param fileName  the fileName to normalize, null returns null
1348     * @return the normalized fileName, or null if invalid
1349     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1350     */
1351    public static String normalizeNoEndSeparator(final String fileName) {
1352        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1353    }
1354
1355    /**
1356     * Normalizes a path, removing double and single dot path steps,
1357     * and removing any final directory separator.
1358     * <p>
1359     * This method normalizes a path to a standard format.
1360     * The input may contain separators in either Unix or Windows format.
1361     * The output will contain separators in the format specified.
1362     * <p>
1363     * A trailing slash will be removed.
1364     * A double slash will be merged to a single slash (but UNC names are handled).
1365     * A single dot path segment will be removed.
1366     * A double dot will cause that path segment and the one before to be removed.
1367     * If the double dot has no parent path segment to work with, {@code null}
1368     * is returned.
1369     * <p>
1370     * The output will be the same on both Unix and Windows including
1371     * the separator character.
1372     * <pre>
1373     * /foo//               --&gt;   /foo
1374     * /foo/./              --&gt;   /foo
1375     * /foo/../bar          --&gt;   /bar
1376     * /foo/../bar/         --&gt;   /bar
1377     * /foo/../bar/../baz   --&gt;   /baz
1378     * //foo//./bar         --&gt;   /foo/bar
1379     * /../                 --&gt;   null
1380     * ../foo               --&gt;   null
1381     * foo/bar/..           --&gt;   foo
1382     * foo/../../bar        --&gt;   null
1383     * foo/../bar           --&gt;   bar
1384     * //server/foo/../bar  --&gt;   //server/bar
1385     * //server/../bar      --&gt;   null
1386     * C:\foo\..\bar        --&gt;   C:\bar
1387     * C:\..\bar            --&gt;   null
1388     * ~/foo/../bar/        --&gt;   ~/bar
1389     * ~/../bar             --&gt;   null
1390     * </pre>
1391     *
1392     * @param fileName  the fileName to normalize, null returns null
1393     * @param unixSeparator {@code true} if a Unix separator should
1394     * be used or {@code false} if a Windows separator should be used.
1395     * @return the normalized fileName, or null if invalid
1396     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1397     * @since 2.0
1398     */
1399    public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1400         return doNormalize(fileName, toSeparator(unixSeparator), false);
1401    }
1402
1403    /**
1404     * Removes the extension from a fileName.
1405     * <p>
1406     * This method returns the textual part of the fileName before the last dot.
1407     * There must be no directory separator after the dot.
1408     * <pre>
1409     * foo.txt    --&gt; foo
1410     * a\b\c.jpg  --&gt; a\b\c
1411     * a\b\c      --&gt; a\b\c
1412     * a.b\c      --&gt; a.b\c
1413     * </pre>
1414     * <p>
1415     * The output will be the same irrespective of the machine that the code is running on.
1416     *
1417     * @param fileName  the fileName to query, null returns null
1418     * @return the fileName minus the extension
1419     * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
1420     */
1421    public static String removeExtension(final String fileName) {
1422        if (fileName == null) {
1423            return null;
1424        }
1425        requireNonNullChars(fileName);
1426
1427        final int index = indexOfExtension(fileName);
1428        if (index == NOT_FOUND) {
1429            return fileName;
1430        }
1431        return fileName.substring(0, index);
1432    }
1433
1434    /**
1435     * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1436     *
1437     * This may be used for poison byte attacks.
1438     *
1439     * @param path the path to check
1440     * @return The input
1441     * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
1442     */
1443    private static String requireNonNullChars(final String path) {
1444        if (path.indexOf(0) >= 0) {
1445            throw new IllegalArgumentException(
1446                "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1447        }
1448        return path;
1449    }
1450
1451    /**
1452     * Converts all separators to the system separator.
1453     *
1454     * @param path the path to be changed, null ignored.
1455     * @return the updated path.
1456     */
1457    public static String separatorsToSystem(final String path) {
1458        return FileSystem.getCurrent().normalizeSeparators(path);
1459    }
1460
1461    /**
1462     * Converts all separators to the Unix separator of forward slash.
1463     *
1464     * @param path the path to be changed, null ignored.
1465     * @return the new path.
1466     */
1467    public static String separatorsToUnix(final String path) {
1468        return FileSystem.LINUX.normalizeSeparators(path);
1469    }
1470
1471    /**
1472     * Converts all separators to the Windows separator of backslash.
1473     *
1474     * @param path the path to be changed, null ignored.
1475     * @return the updated path.
1476     */
1477    public static String separatorsToWindows(final String path) {
1478        return FileSystem.WINDOWS.normalizeSeparators(path);
1479    }
1480
1481    /**
1482     * Splits a string into a number of tokens.
1483     * The text is split by '?' and '*'.
1484     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1485     *
1486     * @param text  the text to split
1487     * @return the array of tokens, never null
1488     */
1489    static String[] splitOnTokens(final String text) {
1490        // used by wildcardMatch
1491        // package level so a unit test may run on this
1492
1493        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1494            return new String[] { text };
1495        }
1496
1497        final char[] array = text.toCharArray();
1498        final ArrayList<String> list = new ArrayList<>();
1499        final StringBuilder buffer = new StringBuilder();
1500        char prevChar = 0;
1501        for (final char ch : array) {
1502            if (ch == '?' || ch == '*') {
1503                if (buffer.length() != 0) {
1504                    list.add(buffer.toString());
1505                    buffer.setLength(0);
1506                }
1507                if (ch == '?') {
1508                    list.add("?");
1509                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1510                    list.add("*");
1511                }
1512            } else {
1513                buffer.append(ch);
1514            }
1515            prevChar = ch;
1516        }
1517        if (buffer.length() != 0) {
1518            list.add(buffer.toString());
1519        }
1520
1521        return list.toArray(EMPTY_STRING_ARRAY);
1522    }
1523
1524    /**
1525     * Returns '/' if given true, '\\' otherwise.
1526     *
1527     * @param unixSeparator which separator to return.
1528     * @return '/' if given true, '\\' otherwise.
1529     */
1530    private static char toSeparator(final boolean unixSeparator) {
1531        return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1532    }
1533
1534    /**
1535     * Checks a fileName to see if it matches the specified wildcard matcher,
1536     * always testing case-sensitive.
1537     * <p>
1538     * The wildcard matcher uses the characters '?' and '*' to represent a
1539     * single or multiple (zero or more) wildcard characters.
1540     * This is the same as often found on DOS/Unix command lines.
1541     * The check is case-sensitive always.
1542     * <pre>
1543     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1544     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1545     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1546     * wildcardMatch("c.txt", "*.???")      --&gt; true
1547     * wildcardMatch("c.txt", "*.????")     --&gt; false
1548     * </pre>
1549     * N.B. the sequence "*?" does not work properly at present in match strings.
1550     *
1551     * @param fileName  the fileName to match on
1552     * @param wildcardMatcher  the wildcard string to match against
1553     * @return true if the fileName matches the wildcard string
1554     * @see IOCase#SENSITIVE
1555     */
1556    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1557        return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1558    }
1559
1560    /**
1561     * Checks a fileName to see if it matches the specified wildcard matcher
1562     * allowing control over case-sensitivity.
1563     * <p>
1564     * The wildcard matcher uses the characters '?' and '*' to represent a
1565     * single or multiple (zero or more) wildcard characters.
1566     * N.B. the sequence "*?" does not work properly at present in match strings.
1567     *
1568     * @param fileName  the fileName to match on
1569     * @param wildcardMatcher  the wildcard string to match against
1570     * @param ioCase  what case sensitivity rule to use, null means case-sensitive
1571     * @return true if the fileName matches the wildcard string
1572     * @since 1.3
1573     */
1574    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1575        if (fileName == null && wildcardMatcher == null) {
1576            return true;
1577        }
1578        if (fileName == null || wildcardMatcher == null) {
1579            return false;
1580        }
1581        ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1582        final String[] wcs = splitOnTokens(wildcardMatcher);
1583        boolean anyChars = false;
1584        int textIdx = 0;
1585        int wcsIdx = 0;
1586        final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1587
1588        // loop around a backtrack stack, to handle complex * matching
1589        do {
1590            if (!backtrack.isEmpty()) {
1591                final int[] array = backtrack.pop();
1592                wcsIdx = array[0];
1593                textIdx = array[1];
1594                anyChars = true;
1595            }
1596
1597            // loop whilst tokens and text left to process
1598            while (wcsIdx < wcs.length) {
1599
1600                if (wcs[wcsIdx].equals("?")) {
1601                    // ? so move to next text char
1602                    textIdx++;
1603                    if (textIdx > fileName.length()) {
1604                        break;
1605                    }
1606                    anyChars = false;
1607
1608                } else if (wcs[wcsIdx].equals("*")) {
1609                    // set any chars status
1610                    anyChars = true;
1611                    if (wcsIdx == wcs.length - 1) {
1612                        textIdx = fileName.length();
1613                    }
1614
1615                } else {
1616                    // matching text token
1617                    if (anyChars) {
1618                        // any chars then try to locate text token
1619                        textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1620                        if (textIdx == NOT_FOUND) {
1621                            // token not found
1622                            break;
1623                        }
1624                        final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1625                        if (repeat >= 0) {
1626                            backtrack.push(new int[] {wcsIdx, repeat});
1627                        }
1628                    } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1629                        // matching from current position
1630                        // couldn't match token
1631                        break;
1632                    }
1633
1634                    // matched text token, move text index to end of matched token
1635                    textIdx += wcs[wcsIdx].length();
1636                    anyChars = false;
1637                }
1638
1639                wcsIdx++;
1640            }
1641
1642            // full match
1643            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1644                return true;
1645            }
1646
1647        } while (!backtrack.isEmpty());
1648
1649        return false;
1650    }
1651
1652    /**
1653     * Checks a fileName to see if it matches the specified wildcard matcher
1654     * using the case rules of the system.
1655     * <p>
1656     * The wildcard matcher uses the characters '?' and '*' to represent a
1657     * single or multiple (zero or more) wildcard characters.
1658     * This is the same as often found on DOS/Unix command lines.
1659     * The check is case-sensitive on Unix and case-insensitive on Windows.
1660     * <pre>
1661     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1662     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1663     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1664     * wildcardMatch("c.txt", "*.???")      --&gt; true
1665     * wildcardMatch("c.txt", "*.????")     --&gt; false
1666     * </pre>
1667     * N.B. the sequence "*?" does not work properly at present in match strings.
1668     *
1669     * @param fileName  the fileName to match on
1670     * @param wildcardMatcher  the wildcard string to match against
1671     * @return true if the fileName matches the wildcard string
1672     * @see IOCase#SYSTEM
1673     */
1674    public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1675        return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1676    }
1677
1678    /**
1679     * Instances should NOT be constructed in standard programming.
1680     */
1681    public FilenameUtils() {
1682    }
1683}