View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io;
18  
19  import java.io.File;
20  import java.util.ArrayDeque;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collection;
24  import java.util.Deque;
25  import java.util.List;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  import java.util.stream.Stream;
29  
30  /**
31   * General file name and file path manipulation utilities. The methods in this class
32   * operate on strings that represent relative or absolute paths. Nothing in this class
33   * ever accesses the file system, or depends on whether a path points to a file that exists.
34   * <p>
35   * When dealing with file names, you can hit problems when moving from a Windows
36   * based development machine to a UNIX based production machine.
37   * This class aims to help avoid those problems.
38   * </p>
39   * <p>
40   * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
41   * using JDK {@link File File} objects and the two argument constructor
42   * {@link File#File(java.io.File, String) File(File,String)}.
43   * </p>
44   * <p>
45   * Most methods in this class are designed to work the same on both UNIX and Windows.
46   * Those that don't include 'System', 'Unix', or 'Windows' in their name.
47   * </p>
48   * <p>
49   * Most methods recognize both separators (forward and backslashes), and both
50   * sets of prefixes. See the Javadoc of each method for details.
51   * </p>
52   * <p>
53   * This class defines six components within a path (sometimes called a file name or a full file name).
54   * Given an absolute Windows path such as C:\dev\project\file.txt they are:
55   * </p>
56   * <ul>
57   * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
58   * <li>the prefix - C:\</li>
59   * <li>the path - dev\project\</li>
60   * <li>the full path - C:\dev\project\</li>
61   * <li>the name - file.txt</li>
62   * <li>the base name - file</li>
63   * <li>the extension - txt</li>
64   * </ul>
65   * <p>
66   * Given an absolute UNIX path such as /dev/project/file.txt they are:
67   * </p>
68   * <ul>
69   * <li>the full file name, or just file name - /dev/project/file.txt</li>
70   * <li>the prefix - /</li>
71   * <li>the path - dev/project</li>
72   * <li>the full path - /dev/project</li>
73   * <li>the name - file.txt</li>
74   * <li>the base name - file</li>
75   * <li>the extension - txt</li>
76   * </ul>
77   * <p>
78   * Given a relative Windows path such as dev\project\file.txt they are:
79   * </p>
80   * <ul>
81   * <li>the full file name, or just file name - dev\project\file.txt</li>
82   * <li>the prefix - null</li>
83   * <li>the path - dev\project\</li>
84   * <li>the full path - dev\project\</li>
85   * <li>the name - file.txt</li>
86   * <li>the base name - file</li>
87   * <li>the extension - txt</li>
88   * </ul>
89   * <p>
90   * Given an absolute UNIX path such as /dev/project/file.txt they are:
91   * </p>
92   * <ul>
93   * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
94   * <li>the prefix - /</li>
95   * <li>the path - dev/project</li>
96   * <li>the full path - /dev/project</li>
97   * <li>the name - file.txt</li>
98   * <li>the base name - file</li>
99   * <li>the extension - txt</li>
100  * </ul>
101  *
102  *
103  * <p>
104  * This class works best if directory names end with a separator.
105  * If you omit the last separator, it is impossible to determine if the last component
106  * corresponds to a file or a directory. This class treats final components
107  * that do not end with a separator as files, not directories.
108  * </p>
109  * <p>
110  * This class only supports UNIX and Windows style names.
111  * Prefixes are matched as follows:
112  * </p>
113  * <pre>
114  * Windows:
115  * a\b\c.txt           --&gt; ""          --&gt; relative
116  * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
117  * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
118  * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
119  * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
120  *
121  * Unix:
122  * a/b/c.txt           --&gt; ""          --&gt; relative
123  * /a/b/c.txt          --&gt; "/"         --&gt; absolute
124  * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
125  * ~                   --&gt; "~/"        --&gt; current user (slash added)
126  * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
127  * ~user               --&gt; "~user/"    --&gt; named user (slash added)
128  * </pre>
129  * <p>
130  * Both prefix styles are matched, irrespective of the machine that you are
131  * currently running on.
132  * </p>
133  *
134  * @since 1.1
135  */
136 public class FilenameUtils {
137 
138     private static final String[] EMPTY_STRING_ARRAY = {};
139 
140     private static final String EMPTY_STRING = "";
141 
142     private static final int NOT_FOUND = -1;
143 
144     /**
145      * The extension separator character.
146      * @since 1.4
147      */
148     public static final char EXTENSION_SEPARATOR = '.';
149 
150     /**
151      * The extension separator String.
152      * @since 1.4
153      */
154     public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
155 
156     /**
157      * The UNIX separator character.
158      */
159     private static final char UNIX_NAME_SEPARATOR = '/';
160 
161     /**
162      * The Windows separator character.
163      */
164     private static final char WINDOWS_NAME_SEPARATOR = '\\';
165 
166     /**
167      * The system separator character.
168      */
169     private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
170 
171     /**
172      * The separator character that is the opposite of the system separator.
173      */
174     private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
175 
176     private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
177 
178     private static final int IPV4_MAX_OCTET_VALUE = 255;
179 
180     private static final int IPV6_MAX_HEX_GROUPS = 8;
181 
182     private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
183 
184     private static final int MAX_UNSIGNED_SHORT = 0xffff;
185 
186     private static final int BASE_16 = 16;
187 
188     private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
189 
190     /**
191      * Concatenates a fileName to a base path using normal command line style rules.
192      * <p>
193      * The effect is equivalent to resultant directory after changing
194      * directory to the first argument, followed by changing directory to
195      * the second argument.
196      * </p>
197      * <p>
198      * The first argument is the base path, the second is the path to concatenate.
199      * The returned path is always normalized via {@link #normalize(String)},
200      * thus {@code ..} is handled.
201      * </p>
202      * <p>
203      * If {@code pathToAdd} is absolute (has an absolute prefix), then
204      * it will be normalized and returned.
205      * Otherwise, the paths will be joined, normalized and returned.
206      * </p>
207      * <p>
208      * The output will be the same on both UNIX and Windows except
209      * for the separator character.
210      * </p>
211      * <pre>
212      * /foo/      + bar        --&gt;  /foo/bar
213      * /foo       + bar        --&gt;  /foo/bar
214      * /foo       + /bar       --&gt;  /bar
215      * /foo       + C:/bar     --&gt;  C:/bar
216      * /foo       + C:bar      --&gt;  C:bar [1]
217      * /foo/a/    + ../bar     --&gt;  /foo/bar
218      * /foo/      + ../../bar  --&gt;  null
219      * /foo/      + /bar       --&gt;  /bar
220      * /foo/..    + /bar       --&gt;  /bar
221      * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
222      * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
223      * </pre>
224      * <p>
225      * [1] Note that the Windows relative drive prefix is unreliable when
226      * used with this method.
227      * </p>
228      * <p>
229      * [2] Note that the first parameter must be a path. If it ends with a name, then
230      * the name will be built into the concatenated path. If this might be a problem,
231      * use {@link #getFullPath(String)} on the base path argument.
232      * </p>
233      *
234      * @param basePath  the base path to attach to, always treated as a path
235      * @param fullFileNameToAdd  the file name (or path) to attach to the base
236      * @return the concatenated path, or null if invalid
237      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
238      */
239     public static String concat(final String basePath, final String fullFileNameToAdd) {
240         final int prefix = getPrefixLength(fullFileNameToAdd);
241         if (prefix < 0) {
242             return null;
243         }
244         if (prefix > 0) {
245             return normalize(fullFileNameToAdd);
246         }
247         if (basePath == null) {
248             return null;
249         }
250         final int len = basePath.length();
251         if (len == 0) {
252             return normalize(fullFileNameToAdd);
253         }
254         final char ch = basePath.charAt(len - 1);
255         if (isSeparator(ch)) {
256             return normalize(basePath + fullFileNameToAdd);
257         }
258         return normalize(basePath + '/' + fullFileNameToAdd);
259     }
260 
261     /**
262      * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
263      * This does not read from the file system, and there is no guarantee or expectation that
264      * these paths actually exist.
265      * <p>
266      * The files names are expected to be normalized.
267      * </p>
268      *
269      * Edge cases:
270      * <ul>
271      * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
272      * <li>A directory does not contain itself: return false</li>
273      * <li>A null child file is not contained in any parent: return false</li>
274      * </ul>
275      *
276      * @param canonicalParent the path string to consider as the parent.
277      * @param canonicalChild the path string to consider as the child.
278      * @return true if the candidate leaf is under the specified composite. False otherwise.
279      * @since 2.2
280      * @see FileUtils#directoryContains(File, File)
281      */
282     public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
283         if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
284             return false;
285         }
286 
287         if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
288             return false;
289         }
290 
291         final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
292         final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
293 
294         return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
295     }
296 
297     /**
298      * Does the work of getting the path.
299      *
300      * @param fileName  the file name
301      * @param includeSeparator  true to include the end separator
302      * @return the path
303      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
304      */
305     private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
306         if (fileName == null) {
307             return null;
308         }
309         final int prefix = getPrefixLength(fileName);
310         if (prefix < 0) {
311             return null;
312         }
313         if (prefix >= fileName.length()) {
314             if (includeSeparator) {
315                 return getPrefix(fileName);  // add end slash if necessary
316             }
317             return fileName;
318         }
319         final int index = indexOfLastSeparator(fileName);
320         if (index < 0) {
321             return fileName.substring(0, prefix);
322         }
323         int end = index + (includeSeparator ?  1 : 0);
324         if (end == 0) {
325             end++;
326         }
327         return fileName.substring(0, end);
328     }
329 
330     /**
331      * Does the work of getting the path.
332      *
333      * @param fileName  the file name
334      * @param separatorAdd  0 to omit the end separator, 1 to return it
335      * @return the path
336      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
337      */
338     private static String doGetPath(final String fileName, final int separatorAdd) {
339         if (fileName == null) {
340             return null;
341         }
342         final int prefix = getPrefixLength(fileName);
343         if (prefix < 0) {
344             return null;
345         }
346         final int index = indexOfLastSeparator(fileName);
347         final int endIndex = index + separatorAdd;
348         if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
349             return EMPTY_STRING;
350         }
351         return requireNonNullChars(fileName.substring(prefix, endIndex));
352     }
353 
354     /**
355      * Internal method to perform the normalization.
356      *
357      * @param fileName  the file name
358      * @param separator The separator character to use
359      * @param keepSeparator  true to keep the final separator
360      * @return the normalized fileName
361      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
362      */
363     private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
364         if (fileName == null) {
365             return null;
366         }
367 
368         requireNonNullChars(fileName);
369 
370         int size = fileName.length();
371         if (size == 0) {
372             return fileName;
373         }
374         final int prefix = getPrefixLength(fileName);
375         if (prefix < 0) {
376             return null;
377         }
378 
379         final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
380         fileName.getChars(0, fileName.length(), array, 0);
381 
382         // fix separators throughout
383         final char otherSeparator = flipSeparator(separator);
384         for (int i = 0; i < array.length; i++) {
385             if (array[i] == otherSeparator) {
386                 array[i] = separator;
387             }
388         }
389 
390         // add extra separator on the end to simplify code below
391         boolean lastIsDirectory = true;
392         if (array[size - 1] != separator) {
393             array[size++] = separator;
394             lastIsDirectory = false;
395         }
396 
397         // adjoining slashes
398         // If we get here, prefix can only be 0 or greater, size 1 or greater
399         // If prefix is 0, set loop start to 1 to prevent index errors
400         for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
401             if (array[i] == separator && array[i - 1] == separator) {
402                 System.arraycopy(array, i, array, i - 1, size - i);
403                 size--;
404                 i--;
405             }
406         }
407 
408         // dot slash
409         for (int i = prefix + 1; i < size; i++) {
410             if (array[i] == separator && array[i - 1] == '.' &&
411                     (i == prefix + 1 || array[i - 2] == separator)) {
412                 if (i == size - 1) {
413                     lastIsDirectory = true;
414                 }
415                 System.arraycopy(array, i + 1, array, i - 1, size - i);
416                 size -=2;
417                 i--;
418             }
419         }
420 
421         // double dot slash
422         outer:
423         for (int i = prefix + 2; i < size; i++) {
424             if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
425                     (i == prefix + 2 || array[i - 3] == separator)) {
426                 if (i == prefix + 2) {
427                     return null;
428                 }
429                 if (i == size - 1) {
430                     lastIsDirectory = true;
431                 }
432                 int j;
433                 for (j = i - 4 ; j >= prefix; j--) {
434                     if (array[j] == separator) {
435                         // remove b/../ from a/b/../c
436                         System.arraycopy(array, i + 1, array, j + 1, size - i);
437                         size -= i - j;
438                         i = j + 1;
439                         continue outer;
440                     }
441                 }
442                 // remove a/../ from a/../c
443                 System.arraycopy(array, i + 1, array, prefix, size - i);
444                 size -= i + 1 - prefix;
445                 i = prefix + 1;
446             }
447         }
448 
449         if (size <= 0) {  // should never be less than 0
450             return EMPTY_STRING;
451         }
452         if (size <= prefix) {  // should never be less than prefix
453             return new String(array, 0, size);
454         }
455         if (lastIsDirectory && keepSeparator) {
456             return new String(array, 0, size);  // keep trailing separator
457         }
458         return new String(array, 0, size - 1);  // lose trailing separator
459     }
460 
461     /**
462      * Checks whether two file names are exactly equal.
463      * <p>
464      * No processing is performed on the file names other than comparison.
465      * This is merely a null-safe case-sensitive string equality.
466      * </p>
467      *
468      * @param fileName1  the first file name, may be null
469      * @param fileName2  the second file name, may be null
470      * @return true if the file names are equal, null equals null
471      * @see IOCase#SENSITIVE
472      */
473     public static boolean equals(final String fileName1, final String fileName2) {
474         return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
475     }
476 
477     /**
478      * Checks whether two file names are equal, optionally normalizing and providing
479      * control over the case-sensitivity.
480      *
481      * @param fileName1  the first file name, may be null
482      * @param fileName2  the second file name, may be null
483      * @param normalize  whether to normalize the file names
484      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
485      * @return true if the file names are equal, null equals null
486      * @since 1.3
487      */
488     public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
489 
490         if (fileName1 == null || fileName2 == null) {
491             return fileName1 == null && fileName2 == null;
492         }
493         if (normalize) {
494             fileName1 = normalize(fileName1);
495             if (fileName1 == null) {
496                 return false;
497             }
498             fileName2 = normalize(fileName2);
499             if (fileName2 == null) {
500                 return false;
501             }
502         }
503         return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
504     }
505 
506     /**
507      * Checks whether two file names are equal after both have been normalized.
508      * <p>
509      * Both file names are first passed to {@link #normalize(String)}.
510      * The check is then performed in a case-sensitive manner.
511      * </p>
512      *
513      * @param fileName1  the first file name, may be null
514      * @param fileName2  the second file name, may be null
515      * @return true if the file names are equal, null equals null
516      * @see IOCase#SENSITIVE
517      */
518     public static boolean equalsNormalized(final String fileName1, final String fileName2) {
519         return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
520     }
521 
522     /**
523      * Checks whether two file names are equal using the case rules of the system
524      * after both have been normalized.
525      * <p>
526      * Both file names are first passed to {@link #normalize(String)}.
527      * The check is then performed case-sensitively on UNIX and
528      * case-insensitively on Windows.
529      * </p>
530      *
531      * @param fileName1  the first file name, may be null
532      * @param fileName2  the second file name, may be null
533      * @return true if the file names are equal, null equals null
534      * @see IOCase#SYSTEM
535      */
536     public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
537         return equals(fileName1, fileName2, true, IOCase.SYSTEM);
538     }
539 
540     /**
541      * Checks whether two file names are equal using the case rules of the system.
542      * <p>
543      * No processing is performed on the file names other than comparison.
544      * The check is case-sensitive on UNIX and case-insensitive on Windows.
545      * </p>
546      *
547      * @param fileName1  the first file name, may be null
548      * @param fileName2  the second file name, may be null
549      * @return true if the file names are equal, null equals null
550      * @see IOCase#SYSTEM
551      */
552     public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
553         return equals(fileName1, fileName2, false, IOCase.SYSTEM);
554     }
555 
556     /**
557      * Flips the Windows name separator to Linux and vice-versa.
558      *
559      * @param ch The Windows or Linux name separator.
560      * @return The Windows or Linux name separator.
561      */
562     static char flipSeparator(final char ch) {
563         if (ch == UNIX_NAME_SEPARATOR) {
564             return WINDOWS_NAME_SEPARATOR;
565         }
566         if (ch == WINDOWS_NAME_SEPARATOR) {
567             return UNIX_NAME_SEPARATOR;
568         }
569         throw new IllegalArgumentException(String.valueOf(ch));
570     }
571 
572     /**
573      * Special handling for NTFS ADS: Don't accept colon in the file name.
574      *
575      * @param fileName a file name
576      * @return ADS offsets.
577      */
578     private static int getAdsCriticalOffset(final String fileName) {
579         // Step 1: Remove leading path segments.
580         final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
581         final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
582         if (offset1 == -1) {
583             if (offset2 == -1) {
584                 return 0;
585             }
586             return offset2 + 1;
587         }
588         if (offset2 == -1) {
589             return offset1 + 1;
590         }
591         return Math.max(offset1, offset2) + 1;
592     }
593 
594     /**
595      * Gets the base name, minus the full path and extension, from a full file name.
596      * <p>
597      * This method will handle a path in either UNIX or Windows format.
598      * The text after the last forward or backslash and before the last dot is returned.
599      * </p>
600      * <pre>
601      * a/b/c.txt --&gt; c
602      * a\b\c.txt --&gt; c
603      * a/b/c.foo.txt --&gt; c.foo
604      * a.txt     --&gt; a
605      * a/b/c     --&gt; c
606      * a/b/c/    --&gt; ""
607      * </pre>
608      * <p>
609      * The output will be the same irrespective of the machine that the code is running on.
610      * </p>
611      *
612      * @param fileName  the file name, null returns null
613      * @return the name of the file without the path, or an empty string if none exists
614      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
615      */
616     public static String getBaseName(final String fileName) {
617         return removeExtension(getName(fileName));
618     }
619 
620     /**
621      * Gets the extension of a fileName.
622      * <p>
623      * This method returns the textual part of the file name after the last dot.
624      * There must be no directory separator after the dot.
625      * </p>
626      * <pre>
627      * foo.txt      --&gt; "txt"
628      * a/b/c.jpg    --&gt; "jpg"
629      * a/b.txt/c    --&gt; ""
630      * a/b/c        --&gt; ""
631      * </pre>
632      * <p>
633      * The output will be the same irrespective of the machine that the code is running on, with the
634      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
635      * </p>
636      * <p>
637      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
638      * In this case, the name wouldn't be the name of a file, but the identifier of an
639      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
640      * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
641      * an {@link IllegalArgumentException} for names like this.
642      * </p>
643      *
644      * @param fileName the file name to retrieve the extension of.
645      * @return the extension of the file or an empty string if none exists or {@code null}
646      * if the file name is {@code null}.
647      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
648      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
649      */
650     public static String getExtension(final String fileName) throws IllegalArgumentException {
651         if (fileName == null) {
652             return null;
653         }
654         final int index = indexOfExtension(fileName);
655         if (index == NOT_FOUND) {
656             return EMPTY_STRING;
657         }
658         return fileName.substring(index + 1);
659     }
660 
661     /**
662      * Gets the full path (prefix + path) from a full file name.
663      * <p>
664      * This method will handle a file in either UNIX or Windows format.
665      * The method is entirely text based, and returns the text before and
666      * including the last forward or backslash.
667      * </p>
668      * <pre>
669      * C:\a\b\c.txt --&gt; C:\a\b\
670      * ~/a/b/c.txt  --&gt; ~/a/b/
671      * a.txt        --&gt; ""
672      * a/b/c        --&gt; a/b/
673      * a/b/c/       --&gt; a/b/c/
674      * C:           --&gt; C:
675      * C:\          --&gt; C:\
676      * ~            --&gt; ~/
677      * ~/           --&gt; ~/
678      * ~user        --&gt; ~user/
679      * ~user/       --&gt; ~user/
680      * </pre>
681      * <p>
682      * The output will be the same irrespective of the machine that the code is running on.
683      * </p>
684      *
685      * @param fileName  the file name, null returns null
686      * @return the path of the file, an empty string if none exists, null if invalid
687      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
688      */
689     public static String getFullPath(final String fileName) {
690         return doGetFullPath(fileName, true);
691     }
692 
693     /**
694      * Gets the full path (prefix + path) from a full file name,
695      * excluding the final directory separator.
696      * <p>
697      * This method will handle a file in either UNIX or Windows format.
698      * The method is entirely text based, and returns the text before the
699      * last forward or backslash.
700      * </p>
701      * <pre>
702      * C:\a\b\c.txt --&gt; C:\a\b
703      * ~/a/b/c.txt  --&gt; ~/a/b
704      * a.txt        --&gt; ""
705      * a/b/c        --&gt; a/b
706      * a/b/c/       --&gt; a/b/c
707      * C:           --&gt; C:
708      * C:\          --&gt; C:\
709      * ~            --&gt; ~
710      * ~/           --&gt; ~
711      * ~user        --&gt; ~user
712      * ~user/       --&gt; ~user
713      * </pre>
714      * <p>
715      * The output will be the same irrespective of the machine that the code is running on.
716      * </p>
717      *
718      * @param fileName  the file name, null returns null
719      * @return the path of the file, an empty string if none exists, null if invalid
720      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
721      */
722     public static String getFullPathNoEndSeparator(final String fileName) {
723         return doGetFullPath(fileName, false);
724     }
725 
726     /**
727      * Gets the name minus the path from a full file name.
728      * <p>
729      * This method will handle a file in either UNIX or Windows format.
730      * The text after the last forward or backslash is returned.
731      * </p>
732      * <pre>
733      * a/b/c.txt --&gt; c.txt
734      * a\b\c.txt --&gt; c.txt
735      * a.txt     --&gt; a.txt
736      * a/b/c     --&gt; c
737      * a/b/c/    --&gt; ""
738      * </pre>
739      * <p>
740      * The output will be the same irrespective of the machine that the code is running on.
741      * </p>
742      *
743      * @param fileName  the file name, null returns null
744      * @return the name of the file without the path, or an empty string if none exists
745      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
746      */
747     public static String getName(final String fileName) {
748         if (fileName == null) {
749             return null;
750         }
751         return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
752     }
753 
754     /**
755      * Gets the path from a full file name, which excludes the prefix and the name.
756      * <p>
757      * This method will handle a file in either UNIX or Windows format.
758      * The method is entirely text based, and returns the text before and
759      * including the last forward or backslash.
760      * </p>
761      * <pre>
762      * C:\a\b\c.txt --&gt; a\b\
763      * ~/a/b/c.txt  --&gt; a/b/
764      * a.txt        --&gt; ""
765      * a/b/c        --&gt; a/b/
766      * a/b/c/       --&gt; a/b/c/
767      * </pre>
768      * <p>
769      * The output will be the same irrespective of the machine that the code is running on.
770      * </p>
771      * <p>
772      * This method drops the prefix from the result.
773      * See {@link #getFullPath(String)} for the method that retains the prefix.
774      * </p>
775      *
776      * @param fileName  the file name, null returns null
777      * @return the path of the file, an empty string if none exists, null if invalid
778      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
779      */
780     public static String getPath(final String fileName) {
781         return doGetPath(fileName, 1);
782     }
783 
784     /**
785      * Gets the path (which excludes the prefix) from a full file name, and
786      * also excluding the final directory separator.
787      * <p>
788      * This method will handle a file in either UNIX or Windows format.
789      * The method is entirely text based, and returns the text before the
790      * last forward or backslash.
791      * </p>
792      * <pre>
793      * C:\a\b\c.txt --&gt; a\b
794      * ~/a/b/c.txt  --&gt; a/b
795      * a.txt        --&gt; ""
796      * a/b/c        --&gt; a/b
797      * a/b/c/       --&gt; a/b/c
798      * </pre>
799      * <p>
800      * The output will be the same irrespective of the machine that the code is running on.
801      * </p>
802      * <p>
803      * This method drops the prefix from the result.
804      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
805      * </p>
806      *
807      * @param fileName  the file name, null returns null
808      * @return the path of the file, an empty string if none exists, null if invalid
809      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
810      */
811     public static String getPathNoEndSeparator(final String fileName) {
812         return doGetPath(fileName, 0);
813     }
814 
815     /**
816      * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
817      * <p>
818      * This method will handle a file in either UNIX or Windows format.
819      * The prefix includes the first slash in the full file name where applicable.
820      * </p>
821      * <pre>
822      * Windows:
823      * a\b\c.txt           --&gt; ""          --&gt; relative
824      * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
825      * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
826      * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
827      * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
828      *
829      * Unix:
830      * a/b/c.txt           --&gt; ""          --&gt; relative
831      * /a/b/c.txt          --&gt; "/"         --&gt; absolute
832      * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
833      * ~                   --&gt; "~/"        --&gt; current user (slash added)
834      * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
835      * ~user               --&gt; "~user/"    --&gt; named user (slash added)
836      * </pre>
837      * <p>
838      * The output will be the same irrespective of the machine that the code is running on.
839      * ie. both UNIX and Windows prefixes are matched regardless.
840      * </p>
841      *
842      * @param fileName  the file name, null returns null
843      * @return the prefix of the file, null if invalid
844      * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
845      */
846     public static String getPrefix(final String fileName) {
847         if (fileName == null) {
848             return null;
849         }
850         final int len = getPrefixLength(fileName);
851         if (len < 0) {
852             return null;
853         }
854         if (len > fileName.length()) {
855             requireNonNullChars(fileName);
856             return fileName + UNIX_NAME_SEPARATOR;
857         }
858         return requireNonNullChars(fileName.substring(0, len));
859     }
860 
861     /**
862      * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
863      * <p>
864      * This method will handle a file in either UNIX or Windows format.
865      * </p>
866      * <p>
867      * The prefix length includes the first slash in the full file name
868      * if applicable. Thus, it is possible that the length returned is greater
869      * than the length of the input string.
870      * </p>
871      * <pre>
872      * Windows:
873      * a\b\c.txt           --&gt; 0           --&gt; relative
874      * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
875      * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
876      * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
877      * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
878      * \\\a\b\c.txt        --&gt; -1          --&gt; error
879      *
880      * Unix:
881      * a/b/c.txt           --&gt; 0           --&gt; relative
882      * /a/b/c.txt          --&gt; 1           --&gt; absolute
883      * ~/a/b/c.txt         --&gt; 2           --&gt; current user
884      * ~                   --&gt; 2           --&gt; current user (slash added)
885      * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
886      * ~user               --&gt; 6           --&gt; named user (slash added)
887      * //server/a/b/c.txt  --&gt; 9
888      * ///a/b/c.txt        --&gt; -1          --&gt; error
889      * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
890      * </pre>
891      * <p>
892      * The output will be the same irrespective of the machine that the code is running on.
893      * ie. both UNIX and Windows prefixes are matched regardless.
894      * </p>
895      * <p>
896      * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
897      * These must be followed by a server name, so double-slashes are not collapsed
898      * to a single slash at the start of the file name.
899      * </p>
900      *
901      * @param fileName  the file name to find the prefix in, null returns -1
902      * @return the length of the prefix, -1 if invalid or null
903      */
904     public static int getPrefixLength(final String fileName) {
905         if (fileName == null) {
906             return NOT_FOUND;
907         }
908         final int len = fileName.length();
909         if (len == 0) {
910             return 0;
911         }
912         char ch0 = fileName.charAt(0);
913         if (ch0 == ':') {
914             return NOT_FOUND;
915         }
916         if (len == 1) {
917             if (ch0 == '~') {
918                 return 2;  // return a length greater than the input
919             }
920             return isSeparator(ch0) ? 1 : 0;
921         }
922         if (ch0 == '~') {
923             int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
924             int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
925             if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
926                 return len + 1;  // return a length greater than the input
927             }
928             posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
929             posWin = posWin == NOT_FOUND ? posUnix : posWin;
930             return Math.min(posUnix, posWin) + 1;
931         }
932         final char ch1 = fileName.charAt(1);
933         if (ch1 == ':') {
934             ch0 = Character.toUpperCase(ch0);
935             if (ch0 >= 'A' && ch0 <= 'Z') {
936                 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
937                     return 0;
938                 }
939                 if (len == 2 || !isSeparator(fileName.charAt(2))) {
940                     return 2;
941                 }
942                 return 3;
943             }
944             if (ch0 == UNIX_NAME_SEPARATOR) {
945                 return 1;
946             }
947             return NOT_FOUND;
948 
949         }
950         if (!isSeparator(ch0) || !isSeparator(ch1)) {
951             return isSeparator(ch0) ? 1 : 0;
952         }
953         int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
954         int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
955         if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
956             return NOT_FOUND;
957         }
958         posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
959         posWin = posWin == NOT_FOUND ? posUnix : posWin;
960         final int pos = Math.min(posUnix, posWin) + 1;
961         final String hostnamePart = fileName.substring(2, pos - 1);
962         return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
963     }
964 
965     /**
966      * Returns the index of the last extension separator character, which is a dot.
967      * <p>
968      * This method also checks that there is no directory separator after the last dot. To do this it uses
969      * {@link #indexOfLastSeparator(String)} which will handle a file in either UNIX or Windows format.
970      * </p>
971      * <p>
972      * The output will be the same irrespective of the machine that the code is running on, with the
973      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
974      * </p>
975      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
976      * In this case, the name wouldn't be the name of a file, but the identifier of an
977      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
978      * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
979      * an {@link IllegalArgumentException} for names like this.
980      *
981      * @param fileName
982      *            the file name to find the last extension separator in, null returns -1
983      * @return the index of the last extension separator character, or -1 if there is no such character
984      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
985      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
986      */
987     public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
988         if (fileName == null) {
989             return NOT_FOUND;
990         }
991         if (isSystemWindows()) {
992             // Special handling for NTFS ADS: Don't accept colon in the file name.
993             final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
994             if (offset != -1) {
995                 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
996             }
997         }
998         final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
999         final int lastSeparator = indexOfLastSeparator(fileName);
1000         return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
1001     }
1002 
1003     /**
1004      * Returns the index of the last directory separator character.
1005      * <p>
1006      * This method will handle a file in either UNIX or Windows format.
1007      * The position of the last forward or backslash is returned.
1008      * <p>
1009      * The output will be the same irrespective of the machine that the code is running on.
1010      *
1011      * @param fileName  the file name to find the last path separator in, null returns -1
1012      * @return the index of the last separator character, or -1 if there
1013      * is no such character
1014      */
1015     public static int indexOfLastSeparator(final String fileName) {
1016         if (fileName == null) {
1017             return NOT_FOUND;
1018         }
1019         final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1020         final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1021         return Math.max(lastUnixPos, lastWindowsPos);
1022     }
1023 
1024     private static boolean isEmpty(final String string) {
1025         return string == null || string.isEmpty();
1026     }
1027 
1028     /**
1029      * Checks whether the extension of the file name is one of those specified.
1030      * <p>
1031      * This method obtains the extension as the textual part of the file name
1032      * after the last dot. There must be no directory separator after the dot.
1033      * The extension check is case-sensitive on all platforms.
1034      *
1035      * @param fileName  the file name, null returns false
1036      * @param extensions  the extensions to check for, null checks for no extension
1037      * @return true if the file name is one of the extensions
1038      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1039      */
1040     public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1041         if (fileName == null) {
1042             return false;
1043         }
1044         requireNonNullChars(fileName);
1045 
1046         if (extensions == null || extensions.isEmpty()) {
1047             return indexOfExtension(fileName) == NOT_FOUND;
1048         }
1049         return extensions.contains(getExtension(fileName));
1050     }
1051 
1052     /**
1053      * Checks whether the extension of the file name is that specified.
1054      * <p>
1055      * This method obtains the extension as the textual part of the file name
1056      * after the last dot. There must be no directory separator after the dot.
1057      * The extension check is case-sensitive on all platforms.
1058      *
1059      * @param fileName  the file name, null returns false
1060      * @param extension  the extension to check for, null or empty checks for no extension
1061      * @return true if the file name has the specified extension
1062      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1063      */
1064     public static boolean isExtension(final String fileName, final String extension) {
1065         if (fileName == null) {
1066             return false;
1067         }
1068         requireNonNullChars(fileName);
1069 
1070         if (isEmpty(extension)) {
1071             return indexOfExtension(fileName) == NOT_FOUND;
1072         }
1073         return getExtension(fileName).equals(extension);
1074     }
1075 
1076     /**
1077      * Checks whether the extension of the file name is one of those specified.
1078      * <p>
1079      * This method obtains the extension as the textual part of the file name
1080      * after the last dot. There must be no directory separator after the dot.
1081      * The extension check is case-sensitive on all platforms.
1082      *
1083      * @param fileName  the file name, null returns false
1084      * @param extensions  the extensions to check for, null checks for no extension
1085      * @return true if the file name is one of the extensions
1086      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1087      */
1088     public static boolean isExtension(final String fileName, final String... extensions) {
1089         if (fileName == null) {
1090             return false;
1091         }
1092         requireNonNullChars(fileName);
1093 
1094         if (extensions == null || extensions.length == 0) {
1095             return indexOfExtension(fileName) == NOT_FOUND;
1096         }
1097         final String fileExt = getExtension(fileName);
1098         return Stream.of(extensions).anyMatch(fileExt::equals);
1099     }
1100 
1101     /**
1102      * Checks whether a given string represents a valid IPv4 address.
1103      *
1104      * @param name the name to validate
1105      * @return true if the given name is a valid IPv4 address
1106      */
1107     // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1108     private static boolean isIPv4Address(final String name) {
1109         final Matcher m = IPV4_PATTERN.matcher(name);
1110         if (!m.matches() || m.groupCount() != 4) {
1111             return false;
1112         }
1113 
1114         // verify that address subgroups are legal
1115         for (int i = 1; i <= 4; i++) {
1116             final String ipSegment = m.group(i);
1117             final int iIpSegment = Integer.parseInt(ipSegment);
1118             if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1119                 return false;
1120             }
1121 
1122             if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1123                 return false;
1124             }
1125 
1126         }
1127 
1128         return true;
1129     }
1130 
1131     // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1132     /**
1133      * Checks whether a given string represents a valid IPv6 address.
1134      *
1135      * @param inet6Address the name to validate
1136      * @return true if the given name is a valid IPv6 address
1137      */
1138     private static boolean isIPv6Address(final String inet6Address) {
1139         final boolean containsCompressedZeroes = inet6Address.contains("::");
1140         if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1141             return false;
1142         }
1143         if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1144                 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1145             return false;
1146         }
1147         String[] octets = inet6Address.split(":");
1148         if (containsCompressedZeroes) {
1149             final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1150             if (inet6Address.endsWith("::")) {
1151                 // String.split() drops ending empty segments
1152                 octetList.add("");
1153             } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1154                 octetList.remove(0);
1155             }
1156             octets = octetList.toArray(EMPTY_STRING_ARRAY);
1157         }
1158         if (octets.length > IPV6_MAX_HEX_GROUPS) {
1159             return false;
1160         }
1161         int validOctets = 0;
1162         int emptyOctets = 0; // consecutive empty chunks
1163         for (int index = 0; index < octets.length; index++) {
1164             final String octet = octets[index];
1165             if (octet.isEmpty()) {
1166                 emptyOctets++;
1167                 if (emptyOctets > 1) {
1168                     return false;
1169                 }
1170             } else {
1171                 emptyOctets = 0;
1172                 // Is last chunk an IPv4 address?
1173                 if (index == octets.length - 1 && octet.contains(".")) {
1174                     if (!isIPv4Address(octet)) {
1175                         return false;
1176                     }
1177                     validOctets += 2;
1178                     continue;
1179                 }
1180                 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1181                     return false;
1182                 }
1183                 final int octetInt;
1184                 try {
1185                     octetInt = Integer.parseInt(octet, BASE_16);
1186                 } catch (final NumberFormatException e) {
1187                     return false;
1188                 }
1189                 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1190                     return false;
1191                 }
1192             }
1193             validOctets++;
1194         }
1195         return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1196     }
1197 
1198     /**
1199      * Checks whether a given string is a valid host name according to
1200      * RFC 3986 - not accepting IP addresses.
1201      *
1202      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1203      * @param name the hostname to validate
1204      * @return true if the given name is a valid host name
1205      */
1206     private static boolean isRFC3986HostName(final String name) {
1207         final String[] parts = name.split("\\.", -1);
1208         for (int i = 0; i < parts.length; i++) {
1209             if (parts[i].isEmpty()) {
1210                 // trailing dot is legal, otherwise we've hit a .. sequence
1211                 return i == parts.length - 1;
1212             }
1213             if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1214                 return false;
1215             }
1216         }
1217         return true;
1218     }
1219 
1220     /**
1221      * Checks if the character is a separator.
1222      *
1223      * @param ch  the character to check
1224      * @return true if it is a separator character
1225      */
1226     private static boolean isSeparator(final char ch) {
1227         return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1228     }
1229 
1230     /**
1231      * Determines if Windows file system is in use.
1232      *
1233      * @return true if the system is Windows
1234      */
1235     static boolean isSystemWindows() {
1236         return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1237     }
1238 
1239     /**
1240      * Checks whether a given string is a valid host name according to
1241      * RFC 3986.
1242      *
1243      * <p>Accepted are IP addresses (v4 and v6) as well as what the
1244      * RFC calls a "reg-name". Percent encoded names don't seem to be
1245      * valid names in UNC paths.</p>
1246      *
1247      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1248      * @param name the hostname to validate
1249      * @return true if the given name is a valid host name
1250      */
1251     private static boolean isValidHostName(final String name) {
1252         return isIPv6Address(name) || isRFC3986HostName(name);
1253     }
1254 
1255     /**
1256      * Normalizes a path, removing double and single dot path steps.
1257      * <p>
1258      * This method normalizes a path to a standard format.
1259      * The input may contain separators in either UNIX or Windows format.
1260      * The output will contain separators in the format of the system.
1261      * <p>
1262      * A trailing slash will be retained.
1263      * A double slash will be merged to a single slash (but UNC names are handled).
1264      * A single dot path segment will be removed.
1265      * A double dot will cause that path segment and the one before to be removed.
1266      * If the double dot has no parent path segment, {@code null} is returned.
1267      * <p>
1268      * The output will be the same on both UNIX and Windows except
1269      * for the separator character.
1270      * <pre>
1271      * /foo//               --&gt;   /foo/
1272      * /foo/./              --&gt;   /foo/
1273      * /foo/../bar          --&gt;   /bar
1274      * /foo/../bar/         --&gt;   /bar/
1275      * /foo/../bar/../baz   --&gt;   /baz
1276      * //foo//./bar         --&gt;   //foo/bar
1277      * /../                 --&gt;   null
1278      * ../foo               --&gt;   null
1279      * foo/bar/..           --&gt;   foo/
1280      * foo/../../bar        --&gt;   null
1281      * foo/../bar           --&gt;   bar
1282      * //server/foo/../bar  --&gt;   //server/bar
1283      * //server/../bar      --&gt;   null
1284      * C:\foo\..\bar        --&gt;   C:\bar
1285      * C:\..\bar            --&gt;   null
1286      * ~/foo/../bar/        --&gt;   ~/bar/
1287      * ~/../bar             --&gt;   null
1288      * </pre>
1289      * (Note the file separator will be correct for Windows/Unix.)
1290      *
1291      * @param fileName  the file name to normalize, null returns null
1292      * @return the normalized fileName, or null if invalid
1293      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1294      */
1295     public static String normalize(final String fileName) {
1296         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1297     }
1298 
1299     /**
1300      * Normalizes a path, removing double and single dot path steps.
1301      * <p>
1302      * This method normalizes a path to a standard format.
1303      * The input may contain separators in either UNIX or Windows format.
1304      * The output will contain separators in the format specified.
1305      * <p>
1306      * A trailing slash will be retained.
1307      * A double slash will be merged to a single slash (but UNC names are handled).
1308      * A single dot path segment will be removed.
1309      * A double dot will cause that path segment and the one before to be removed.
1310      * If the double dot has no parent path segment to work with, {@code null}
1311      * is returned.
1312      * <p>
1313      * The output will be the same on both UNIX and Windows except
1314      * for the separator character.
1315      * <pre>
1316      * /foo//               --&gt;   /foo/
1317      * /foo/./              --&gt;   /foo/
1318      * /foo/../bar          --&gt;   /bar
1319      * /foo/../bar/         --&gt;   /bar/
1320      * /foo/../bar/../baz   --&gt;   /baz
1321      * //foo//./bar         --&gt;   /foo/bar
1322      * /../                 --&gt;   null
1323      * ../foo               --&gt;   null
1324      * foo/bar/..           --&gt;   foo/
1325      * foo/../../bar        --&gt;   null
1326      * foo/../bar           --&gt;   bar
1327      * //server/foo/../bar  --&gt;   //server/bar
1328      * //server/../bar      --&gt;   null
1329      * C:\foo\..\bar        --&gt;   C:\bar
1330      * C:\..\bar            --&gt;   null
1331      * ~/foo/../bar/        --&gt;   ~/bar/
1332      * ~/../bar             --&gt;   null
1333      * </pre>
1334      * The output will be the same on both UNIX and Windows including
1335      * the separator character.
1336      *
1337      * @param fileName  the file name to normalize, null returns null
1338      * @param unixSeparator {@code true} if a UNIX separator should
1339      * be used or {@code false} if a Windows separator should be used.
1340      * @return the normalized fileName, or null if invalid
1341      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1342      * @since 2.0
1343      */
1344     public static String normalize(final String fileName, final boolean unixSeparator) {
1345         return doNormalize(fileName, toSeparator(unixSeparator), true);
1346     }
1347 
1348     /**
1349      * Normalizes a path, removing double and single dot path steps,
1350      * and removing any final directory separator.
1351      * <p>
1352      * This method normalizes a path to a standard format.
1353      * The input may contain separators in either UNIX or Windows format.
1354      * The output will contain separators in the format of the system.
1355      * <p>
1356      * A trailing slash will be removed.
1357      * A double slash will be merged to a single slash (but UNC names are handled).
1358      * A single dot path segment will be removed.
1359      * A double dot will cause that path segment and the one before to be removed.
1360      * If the double dot has no parent path segment to work with, {@code null}
1361      * is returned.
1362      * <p>
1363      * The output will be the same on both UNIX and Windows except
1364      * for the separator character.
1365      * <pre>
1366      * /foo//               --&gt;   /foo
1367      * /foo/./              --&gt;   /foo
1368      * /foo/../bar          --&gt;   /bar
1369      * /foo/../bar/         --&gt;   /bar
1370      * /foo/../bar/../baz   --&gt;   /baz
1371      * //foo//./bar         --&gt;   /foo/bar
1372      * /../                 --&gt;   null
1373      * ../foo               --&gt;   null
1374      * foo/bar/..           --&gt;   foo
1375      * foo/../../bar        --&gt;   null
1376      * foo/../bar           --&gt;   bar
1377      * //server/foo/../bar  --&gt;   //server/bar
1378      * //server/../bar      --&gt;   null
1379      * C:\foo\..\bar        --&gt;   C:\bar
1380      * C:\..\bar            --&gt;   null
1381      * ~/foo/../bar/        --&gt;   ~/bar
1382      * ~/../bar             --&gt;   null
1383      * </pre>
1384      * (Note the file separator returned will be correct for Windows/Unix)
1385      *
1386      * @param fileName  the file name to normalize, null returns null
1387      * @return the normalized fileName, or null if invalid
1388      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1389      */
1390     public static String normalizeNoEndSeparator(final String fileName) {
1391         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1392     }
1393 
1394     /**
1395      * Normalizes a path, removing double and single dot path steps,
1396      * and removing any final directory separator.
1397      * <p>
1398      * This method normalizes a path to a standard format.
1399      * The input may contain separators in either UNIX or Windows format.
1400      * The output will contain separators in the format specified.
1401      * <p>
1402      * A trailing slash will be removed.
1403      * A double slash will be merged to a single slash (but UNC names are handled).
1404      * A single dot path segment will be removed.
1405      * A double dot will cause that path segment and the one before to be removed.
1406      * If the double dot has no parent path segment to work with, {@code null}
1407      * is returned.
1408      * <p>
1409      * The output will be the same on both UNIX and Windows including
1410      * the separator character.
1411      * <pre>
1412      * /foo//               --&gt;   /foo
1413      * /foo/./              --&gt;   /foo
1414      * /foo/../bar          --&gt;   /bar
1415      * /foo/../bar/         --&gt;   /bar
1416      * /foo/../bar/../baz   --&gt;   /baz
1417      * //foo//./bar         --&gt;   /foo/bar
1418      * /../                 --&gt;   null
1419      * ../foo               --&gt;   null
1420      * foo/bar/..           --&gt;   foo
1421      * foo/../../bar        --&gt;   null
1422      * foo/../bar           --&gt;   bar
1423      * //server/foo/../bar  --&gt;   //server/bar
1424      * //server/../bar      --&gt;   null
1425      * C:\foo\..\bar        --&gt;   C:\bar
1426      * C:\..\bar            --&gt;   null
1427      * ~/foo/../bar/        --&gt;   ~/bar
1428      * ~/../bar             --&gt;   null
1429      * </pre>
1430      *
1431      * @param fileName  the file name to normalize, null returns null
1432      * @param unixSeparator {@code true} if a UNIX separator should
1433      * be used or {@code false} if a Windows separator should be used.
1434      * @return the normalized fileName, or null if invalid
1435      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1436      * @since 2.0
1437      */
1438     public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1439          return doNormalize(fileName, toSeparator(unixSeparator), false);
1440     }
1441 
1442     /**
1443      * Removes the extension from a fileName.
1444      * <p>
1445      * This method returns the textual part of the file name before the last dot.
1446      * There must be no directory separator after the dot.
1447      * <pre>
1448      * foo.txt    --&gt; foo
1449      * .txt       --&gt; "" (empty string)
1450      * a\b\c.jpg  --&gt; a\b\c
1451      * /a/b/c.jpg --&gt; /a/b/c
1452      * a\b\c      --&gt; a\b\c
1453      * a.b\c      --&gt; a.b\c
1454      * </pre>
1455      * <p>
1456      * The output will be the same irrespective of the machine that the code is running on.
1457      *
1458      * @param fileName  the file name, null returns null
1459      * @return the file name minus the extension
1460      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1461      */
1462     public static String removeExtension(final String fileName) {
1463         if (fileName == null) {
1464             return null;
1465         }
1466         requireNonNullChars(fileName);
1467 
1468         final int index = indexOfExtension(fileName);
1469         if (index == NOT_FOUND) {
1470             return fileName;
1471         }
1472         return fileName.substring(0, index);
1473     }
1474 
1475     /**
1476      * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1477      *
1478      * This may be used to defend against poison byte attacks.
1479      *
1480      * @param path the path to check
1481      * @return The input
1482      * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
1483      */
1484     private static String requireNonNullChars(final String path) {
1485         if (path.indexOf(0) >= 0) {
1486             throw new IllegalArgumentException(
1487                 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1488         }
1489         return path;
1490     }
1491 
1492     /**
1493      * Converts all separators to the system separator.
1494      *
1495      * @param path the path to be changed, null ignored.
1496      * @return the updated path.
1497      */
1498     public static String separatorsToSystem(final String path) {
1499         return FileSystem.getCurrent().normalizeSeparators(path);
1500     }
1501 
1502     /**
1503      * Converts all separators to the UNIX separator of forward slash.
1504      *
1505      * @param path the path to be changed, null ignored.
1506      * @return the new path.
1507      */
1508     public static String separatorsToUnix(final String path) {
1509         return FileSystem.LINUX.normalizeSeparators(path);
1510     }
1511 
1512     /**
1513      * Converts all separators to the Windows separator of backslash.
1514      *
1515      * @param path the path to be changed, null ignored.
1516      * @return the updated path.
1517      */
1518     public static String separatorsToWindows(final String path) {
1519         return FileSystem.WINDOWS.normalizeSeparators(path);
1520     }
1521 
1522     /**
1523      * Splits a string into a number of tokens.
1524      * The text is split by '?' and '*'.
1525      * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1526      *
1527      * @param text  the text to split
1528      * @return the array of tokens, never null
1529      */
1530     static String[] splitOnTokens(final String text) {
1531         // used by wildcardMatch
1532         // package level so a unit test may run on this
1533 
1534         if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1535             return new String[] { text };
1536         }
1537 
1538         final char[] array = text.toCharArray();
1539         final ArrayList<String> list = new ArrayList<>();
1540         final StringBuilder buffer = new StringBuilder();
1541         char prevChar = 0;
1542         for (final char ch : array) {
1543             if (ch == '?' || ch == '*') {
1544                 if (buffer.length() != 0) {
1545                     list.add(buffer.toString());
1546                     buffer.setLength(0);
1547                 }
1548                 if (ch == '?') {
1549                     list.add("?");
1550                 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1551                     list.add("*");
1552                 }
1553             } else {
1554                 buffer.append(ch);
1555             }
1556             prevChar = ch;
1557         }
1558         if (buffer.length() != 0) {
1559             list.add(buffer.toString());
1560         }
1561 
1562         return list.toArray(EMPTY_STRING_ARRAY);
1563     }
1564 
1565     /**
1566      * Returns '/' if given true, '\\' otherwise.
1567      *
1568      * @param unixSeparator which separator to return.
1569      * @return '/' if given true, '\\' otherwise.
1570      */
1571     private static char toSeparator(final boolean unixSeparator) {
1572         return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1573     }
1574 
1575     /**
1576      * Checks a fileName to see if it matches the specified wildcard matcher,
1577      * always testing case-sensitive.
1578      * <p>
1579      * The wildcard matcher uses the characters '?' and '*' to represent a
1580      * single or multiple (zero or more) wildcard characters.
1581      * This is the same as often found on DOS/Unix command lines.
1582      * The check is case-sensitive always.
1583      * <pre>
1584      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1585      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1586      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1587      * wildcardMatch("c.txt", "*.???")      --&gt; true
1588      * wildcardMatch("c.txt", "*.????")     --&gt; false
1589      * </pre>
1590      * N.B. the sequence "*?" does not work properly at present in match strings.
1591      *
1592      * @param fileName  the file name to match on
1593      * @param wildcardMatcher  the wildcard string to match against
1594      * @return true if the file name matches the wildcard string
1595      * @see IOCase#SENSITIVE
1596      */
1597     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1598         return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1599     }
1600 
1601     /**
1602      * Checks a fileName to see if it matches the specified wildcard matcher
1603      * allowing control over case-sensitivity.
1604      * <p>
1605      * The wildcard matcher uses the characters '?' and '*' to represent a
1606      * single or multiple (zero or more) wildcard characters.
1607      * N.B. the sequence "*?" does not work properly at present in match strings.
1608      *
1609      * @param fileName  the file name to match on
1610      * @param wildcardMatcher  the wildcard string to match against
1611      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
1612      * @return true if the file name matches the wildcard string
1613      * @since 1.3
1614      */
1615     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1616         if (fileName == null && wildcardMatcher == null) {
1617             return true;
1618         }
1619         if (fileName == null || wildcardMatcher == null) {
1620             return false;
1621         }
1622         ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1623         final String[] wcs = splitOnTokens(wildcardMatcher);
1624         boolean anyChars = false;
1625         int textIdx = 0;
1626         int wcsIdx = 0;
1627         final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1628 
1629         // loop around a backtrack stack, to handle complex * matching
1630         do {
1631             if (!backtrack.isEmpty()) {
1632                 final int[] array = backtrack.pop();
1633                 wcsIdx = array[0];
1634                 textIdx = array[1];
1635                 anyChars = true;
1636             }
1637 
1638             // loop whilst tokens and text left to process
1639             while (wcsIdx < wcs.length) {
1640 
1641                 if (wcs[wcsIdx].equals("?")) {
1642                     // ? so move to next text char
1643                     textIdx++;
1644                     if (textIdx > fileName.length()) {
1645                         break;
1646                     }
1647                     anyChars = false;
1648 
1649                 } else if (wcs[wcsIdx].equals("*")) {
1650                     // set any chars status
1651                     anyChars = true;
1652                     if (wcsIdx == wcs.length - 1) {
1653                         textIdx = fileName.length();
1654                     }
1655 
1656                 } else {
1657                     // matching text token
1658                     if (anyChars) {
1659                         // any chars then try to locate text token
1660                         textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1661                         if (textIdx == NOT_FOUND) {
1662                             // token not found
1663                             break;
1664                         }
1665                         final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1666                         if (repeat >= 0) {
1667                             backtrack.push(new int[] {wcsIdx, repeat});
1668                         }
1669                     } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1670                         // matching from current position
1671                         // couldn't match token
1672                         break;
1673                     }
1674 
1675                     // matched text token, move text index to end of matched token
1676                     textIdx += wcs[wcsIdx].length();
1677                     anyChars = false;
1678                 }
1679 
1680                 wcsIdx++;
1681             }
1682 
1683             // full match
1684             if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1685                 return true;
1686             }
1687 
1688         } while (!backtrack.isEmpty());
1689 
1690         return false;
1691     }
1692 
1693     /**
1694      * Checks a fileName to see if it matches the specified wildcard matcher
1695      * using the case rules of the system.
1696      * <p>
1697      * The wildcard matcher uses the characters '?' and '*' to represent a
1698      * single or multiple (zero or more) wildcard characters.
1699      * This is the same as often found on DOS/Unix command lines.
1700      * The check is case-sensitive on UNIX and case-insensitive on Windows.
1701      * <pre>
1702      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1703      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1704      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1705      * wildcardMatch("c.txt", "*.???")      --&gt; true
1706      * wildcardMatch("c.txt", "*.????")     --&gt; false
1707      * </pre>
1708      * N.B. the sequence "*?" does not work properly at present in match strings.
1709      *
1710      * @param fileName  the file name to match on
1711      * @param wildcardMatcher  the wildcard string to match against
1712      * @return true if the file name matches the wildcard string
1713      * @see IOCase#SYSTEM
1714      */
1715     public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1716         return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1717     }
1718 
1719     /**
1720      * Instances should NOT be constructed in standard programming.
1721      *
1722      * @deprecated TODO Make private in 3.0.
1723      */
1724     @Deprecated
1725     public FilenameUtils() {
1726         // empty
1727     }
1728 }