View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io;
18  
19  import java.io.File;
20  import java.util.ArrayDeque;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collection;
24  import java.util.Deque;
25  import java.util.List;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  import java.util.stream.Stream;
29  
30  /**
31   * General file name and file path manipulation utilities. The methods in this class
32   * operate on strings that represent relative or absolute paths. Nothing in this class
33   * ever accesses the file system, or depends on whether a path points to a file that exists.
34   * <p>
35   * When dealing with file names, you can hit problems when moving from a Windows
36   * based development machine to a Unix based production machine.
37   * This class aims to help avoid those problems.
38   * </p>
39   * <p>
40   * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
41   * using JDK {@link File File} objects and the two argument constructor
42   * {@link File#File(java.io.File, String) File(File,String)}.
43   * </p>
44   * <p>
45   * Most methods in this class are designed to work the same on both Unix and Windows.
46   * Those that don't include 'System', 'Unix', or 'Windows' in their name.
47   * </p>
48   * <p>
49   * Most methods recognize both separators (forward and backslashes), and both
50   * sets of prefixes. See the Javadoc of each method for details.
51   * </p>
52   * <p>
53   * This class defines six components within a path (sometimes called a file name or a full file name).
54   * Given an absolute Windows path such as C:\dev\project\file.txt they are:
55   * </p>
56   * <ul>
57   * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
58   * <li>the prefix - C:\</li>
59   * <li>the path - dev\project\</li>
60   * <li>the full path - C:\dev\project\</li>
61   * <li>the name - file.txt</li>
62   * <li>the base name - file</li>
63   * <li>the extension - txt</li>
64   * </ul>
65   * <p>
66   * Given an absolute Unix path such as /dev/project/file.txt they are:
67   * </p>
68   * <ul>
69   * <li>the full file name, or just file name - /dev/project/file.txt</li>
70   * <li>the prefix - /</li>
71   * <li>the path - dev/project</li>
72   * <li>the full path - /dev/project</li>
73   * <li>the name - file.txt</li>
74   * <li>the base name - file</li>
75   * <li>the extension - txt</li>
76   * </ul>
77   * <p>
78   * Given a relative Windows path such as dev\project\file.txt they are:
79   * </p>
80   * <ul>
81   * <li>the full file name, or just file name - dev\project\file.txt</li>
82   * <li>the prefix - null</li>
83   * <li>the path - dev\project\</li>
84   * <li>the full path - dev\project\</li>
85   * <li>the name - file.txt</li>
86   * <li>the base name - file</li>
87   * <li>the extension - txt</li>
88   * </ul>
89   * <p>
90   * Given an absolute Unix path such as /dev/project/file.txt they are:
91   * </p>
92   * <ul>
93   * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
94   * <li>the prefix - /</li>
95   * <li>the path - dev/project</li>
96   * <li>the full path - /dev/project</li>
97   * <li>the name - file.txt</li>
98   * <li>the base name - file</li>
99   * <li>the extension - txt</li>
100  * </ul>
101  *
102  *
103  * <p>
104  * This class works best if directory names end with a separator.
105  * If you omit the last separator, it is impossible to determine if the last component
106  * corresponds to a file or a directory. This class treats final components
107  * that do not end with a separator as files, not directories.
108  * </p>
109  * <p>
110  * This class only supports Unix and Windows style names.
111  * Prefixes are matched as follows:
112  * </p>
113  * <pre>
114  * Windows:
115  * a\b\c.txt           --&gt; ""          --&gt; relative
116  * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
117  * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
118  * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
119  * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
120  *
121  * Unix:
122  * a/b/c.txt           --&gt; ""          --&gt; relative
123  * /a/b/c.txt          --&gt; "/"         --&gt; absolute
124  * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
125  * ~                   --&gt; "~/"        --&gt; current user (slash added)
126  * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
127  * ~user               --&gt; "~user/"    --&gt; named user (slash added)
128  * </pre>
129  * <p>
130  * Both prefix styles are matched, irrespective of the machine that you are
131  * currently running on.
132  * </p>
133  *
134  * @since 1.1
135  */
136 public class FilenameUtils {
137 
138     private static final String[] EMPTY_STRING_ARRAY = {};
139 
140     private static final String EMPTY_STRING = "";
141 
142     private static final int NOT_FOUND = -1;
143 
144     /**
145      * The extension separator character.
146      *
147      * @since 1.4
148      */
149     public static final char EXTENSION_SEPARATOR = '.';
150 
151     /**
152      * The extension separator String.
153      *
154      * @since 1.4
155      */
156     public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
157 
158     /**
159      * The Unix separator character.
160      */
161     private static final char UNIX_NAME_SEPARATOR = '/';
162 
163     /**
164      * The Windows separator character.
165      */
166     private static final char WINDOWS_NAME_SEPARATOR = '\\';
167 
168     /**
169      * The system separator character.
170      */
171     private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
172 
173     /**
174      * The separator character that is the opposite of the system separator.
175      */
176     private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
177 
178     private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
179 
180     private static final int IPV4_MAX_OCTET_VALUE = 255;
181 
182     private static final int IPV6_MAX_HEX_GROUPS = 8;
183 
184     private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
185 
186     private static final int MAX_UNSIGNED_SHORT = 0xffff;
187 
188     private static final int BASE_16 = 16;
189 
190     private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
191 
192     /**
193      * Concatenates a fileName to a base path using normal command line style rules.
194      * <p>
195      * The effect is equivalent to resultant directory after changing
196      * directory to the first argument, followed by changing directory to
197      * the second argument.
198      * </p>
199      * <p>
200      * The first argument is the base path, the second is the path to concatenate.
201      * The returned path is always normalized via {@link #normalize(String)},
202      * thus {@code ..} is handled.
203      * </p>
204      * <p>
205      * If {@code pathToAdd} is absolute (has an absolute prefix), then
206      * it will be normalized and returned.
207      * Otherwise, the paths will be joined, normalized and returned.
208      * </p>
209      * <p>
210      * The output will be the same on both Unix and Windows except
211      * for the separator character.
212      * </p>
213      * <pre>
214      * /foo/      + bar        --&gt;  /foo/bar
215      * /foo       + bar        --&gt;  /foo/bar
216      * /foo       + /bar       --&gt;  /bar
217      * /foo       + C:/bar     --&gt;  C:/bar
218      * /foo       + C:bar      --&gt;  C:bar [1]
219      * /foo/a/    + ../bar     --&gt;  /foo/bar
220      * /foo/      + ../../bar  --&gt;  null
221      * /foo/      + /bar       --&gt;  /bar
222      * /foo/..    + /bar       --&gt;  /bar
223      * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
224      * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
225      * </pre>
226      * <p>
227      * [1] Note that the Windows relative drive prefix is unreliable when
228      * used with this method.
229      * </p>
230      * <p>
231      * [2] Note that the first parameter must be a path. If it ends with a name, then
232      * the name will be built into the concatenated path. If this might be a problem,
233      * use {@link #getFullPath(String)} on the base path argument.
234      * </p>
235      *
236      * @param basePath  the base path to attach to, always treated as a path.
237      * @param fullFileNameToAdd  the file name (or path) to attach to the base.
238      * @return the concatenated path, or null if invalid.
239      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
240      */
241     public static String concat(final String basePath, final String fullFileNameToAdd) {
242         final int prefix = getPrefixLength(fullFileNameToAdd);
243         if (prefix < 0) {
244             return null;
245         }
246         if (prefix > 0) {
247             return normalize(fullFileNameToAdd);
248         }
249         if (basePath == null) {
250             return null;
251         }
252         final int len = basePath.length();
253         if (len == 0) {
254             return normalize(fullFileNameToAdd);
255         }
256         final char ch = basePath.charAt(len - 1);
257         if (isSeparator(ch)) {
258             return normalize(basePath + fullFileNameToAdd);
259         }
260         return normalize(basePath + '/' + fullFileNameToAdd);
261     }
262 
263     /**
264      * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
265      * This does not read from the file system, and there is no guarantee or expectation that
266      * these paths actually exist.
267      * <p>
268      * The files names are expected to be normalized.
269      * </p>
270      *
271      * Edge cases:
272      * <ul>
273      * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
274      * <li>A directory does not contain itself: return false</li>
275      * <li>A null child file is not contained in any parent: return false</li>
276      * </ul>
277      *
278      * @param canonicalParent the path string to consider as the parent.
279      * @param canonicalChild the path string to consider as the child.
280      * @return true if the candidate leaf is under the specified composite. False otherwise.
281      * @since 2.2
282      * @see FileUtils#directoryContains(File, File)
283      */
284     public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
285         if (isEmpty(canonicalParent) || isEmpty(canonicalChild) || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
286             return false;
287         }
288         final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
289         final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
290         return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
291     }
292 
293     /**
294      * Does the work of getting the path.
295      *
296      * @param fileName  the file name.
297      * @param includeEndSeparator  true to include the end separator.
298      * @return the path.
299      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
300      */
301     private static String doGetFullPath(final String fileName, final boolean includeEndSeparator) {
302         if (fileName == null) {
303             return null;
304         }
305         final int prefix = getPrefixLength(fileName);
306         if (prefix < 0) {
307             return null;
308         }
309         if (prefix >= fileName.length()) {
310             if (includeEndSeparator) {
311                 return getPrefix(fileName);  // add end slash if necessary
312             }
313             return fileName;
314         }
315         final int index = indexOfLastSeparator(fileName);
316         if (index < 0) {
317             return fileName.substring(0, prefix);
318         }
319         int end = index + (includeEndSeparator ?  1 : 0);
320         if (end == 0) {
321             end++;
322         }
323         return fileName.substring(0, end);
324     }
325 
326     /**
327      * Does the work of getting the path.
328      *
329      * @param fileName  the file name.
330      * @param separatorAdd  0 to omit the end separator, 1 to return it.
331      * @return the path.
332      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
333      */
334     private static String doGetPath(final String fileName, final int separatorAdd) {
335         if (fileName == null) {
336             return null;
337         }
338         final int prefix = getPrefixLength(fileName);
339         if (prefix < 0) {
340             return null;
341         }
342         final int index = indexOfLastSeparator(fileName);
343         final int endIndex = index + separatorAdd;
344         if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
345             return EMPTY_STRING;
346         }
347         return requireNonNullChars(fileName.substring(prefix, endIndex));
348     }
349 
350     /**
351      * Internal method to perform the normalization.
352      *
353      * @param fileName  the file name.
354      * @param separator The separator character to use.
355      * @param keepSeparator  true to keep the final separator.
356      * @return the normalized fileName.
357      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
358      */
359     private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
360         if (fileName == null) {
361             return null;
362         }
363         requireNonNullChars(fileName);
364         int size = fileName.length();
365         if (size == 0) {
366             return fileName;
367         }
368         final int prefix = getPrefixLength(fileName);
369         if (prefix < 0) {
370             return null;
371         }
372         final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
373         fileName.getChars(0, fileName.length(), array, 0);
374         // fix separators throughout
375         final char otherSeparator = flipSeparator(separator);
376         for (int i = 0; i < array.length; i++) {
377             if (array[i] == otherSeparator) {
378                 array[i] = separator;
379             }
380         }
381         // add extra separator on the end to simplify code below
382         boolean lastIsDirectory = true;
383         if (array[size - 1] != separator) {
384             array[size++] = separator;
385             lastIsDirectory = false;
386         }
387         // adjoining slashes
388         // If we get here, prefix can only be 0 or greater, size 1 or greater
389         // If prefix is 0, set loop start to 1 to prevent index errors
390         for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
391             if (array[i] == separator && array[i - 1] == separator) {
392                 System.arraycopy(array, i, array, i - 1, size - i);
393                 size--;
394                 i--;
395             }
396         }
397         // period slash
398         for (int i = prefix + 1; i < size; i++) {
399             if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) {
400                 if (i == size - 1) {
401                     lastIsDirectory = true;
402                 }
403                 System.arraycopy(array, i + 1, array, i - 1, size - i);
404                 size -= 2;
405                 i--;
406             }
407         }
408         // double period slash
409         outer: for (int i = prefix + 2; i < size; i++) {
410             if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && (i == prefix + 2 || array[i - 3] == separator)) {
411                 if (i == prefix + 2) {
412                     return null;
413                 }
414                 if (i == size - 1) {
415                     lastIsDirectory = true;
416                 }
417                 int j;
418                 for (j = i - 4; j >= prefix; j--) {
419                     if (array[j] == separator) {
420                         // remove b/../ from a/b/../c
421                         System.arraycopy(array, i + 1, array, j + 1, size - i);
422                         size -= i - j;
423                         i = j + 1;
424                         continue outer;
425                     }
426                 }
427                 // remove a/../ from a/../c
428                 System.arraycopy(array, i + 1, array, prefix, size - i);
429                 size -= i + 1 - prefix;
430                 i = prefix + 1;
431             }
432         }
433         if (size <= 0) { // should never be less than 0
434             return EMPTY_STRING;
435         }
436         if (size <= prefix || lastIsDirectory && keepSeparator) {
437             return new String(array, 0, size); // keep trailing separator
438         }
439         return new String(array, 0, size - 1); // lose trailing separator
440     }
441 
442     /**
443      * Checks whether two file names are exactly equal.
444      * <p>
445      * No processing is performed on the file names other than comparison.
446      * This is merely a null-safe case-sensitive string equality.
447      * </p>
448      *
449      * @param fileName1  the first file name, may be null.
450      * @param fileName2  the second file name, may be null.
451      * @return true if the file names are equal, null equals null.
452      * @see IOCase#SENSITIVE
453      */
454     public static boolean equals(final String fileName1, final String fileName2) {
455         return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
456     }
457 
458     /**
459      * Checks whether two file names are equal, optionally normalizing and providing
460      * control over the case-sensitivity.
461      *
462      * @param fileName1  the first file name, may be null.
463      * @param fileName2  the second file name, may be null.
464      * @param normalize  whether to normalize the file names.
465      * @param ioCase  what case sensitivity rule to use, null means case-sensitive.
466      * @return true if the file names are equal, null equals null.
467      * @since 1.3
468      */
469     public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
470         if (fileName1 == null || fileName2 == null) {
471             return fileName1 == null && fileName2 == null;
472         }
473         if (normalize) {
474             fileName1 = normalize(fileName1);
475             if (fileName1 == null) {
476                 return false;
477             }
478             fileName2 = normalize(fileName2);
479             if (fileName2 == null) {
480                 return false;
481             }
482         }
483         return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
484     }
485 
486     /**
487      * Checks whether two file names are equal after both have been normalized.
488      * <p>
489      * Both file names are first passed to {@link #normalize(String)}.
490      * The check is then performed in a case-sensitive manner.
491      * </p>
492      *
493      * @param fileName1  the first file name, may be null.
494      * @param fileName2  the second file name, may be null.
495      * @return true if the file names are equal, null equals null.
496      * @see IOCase#SENSITIVE
497      */
498     public static boolean equalsNormalized(final String fileName1, final String fileName2) {
499         return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
500     }
501 
502     /**
503      * Checks whether two file names are equal using the case rules of the system
504      * after both have been normalized.
505      * <p>
506      * Both file names are first passed to {@link #normalize(String)}.
507      * The check is then performed case-sensitively on Unix and
508      * case-insensitively on Windows.
509      * </p>
510      *
511      * @param fileName1  the first file name, may be null.
512      * @param fileName2  the second file name, may be null.
513      * @return true if the file names are equal, null equals null.
514      * @see IOCase#SYSTEM
515      */
516     public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
517         return equals(fileName1, fileName2, true, IOCase.SYSTEM);
518     }
519 
520     /**
521      * Checks whether two file names are equal using the case rules of the system.
522      * <p>
523      * No processing is performed on the file names other than comparison.
524      * The check is case-sensitive on Unix and case-insensitive on Windows.
525      * </p>
526      *
527      * @param fileName1  the first file name, may be null.
528      * @param fileName2  the second file name, may be null.
529      * @return true if the file names are equal, null equals null.
530      * @see IOCase#SYSTEM
531      */
532     public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
533         return equals(fileName1, fileName2, false, IOCase.SYSTEM);
534     }
535 
536     /**
537      * Flips the Windows name separator to Linux and vice-versa.
538      *
539      * @param ch The Windows or Linux name separator.
540      * @return The Windows or Linux name separator.
541      */
542     static char flipSeparator(final char ch) {
543         if (ch == UNIX_NAME_SEPARATOR) {
544             return WINDOWS_NAME_SEPARATOR;
545         }
546         if (ch == WINDOWS_NAME_SEPARATOR) {
547             return UNIX_NAME_SEPARATOR;
548         }
549         throw new IllegalArgumentException(String.valueOf(ch));
550     }
551 
552     /**
553      * Special handling for NTFS ADS: Don't accept colon in the file name.
554      *
555      * @param fileName a file name.
556      * @return ADS offsets.
557      */
558     private static int getAdsCriticalOffset(final String fileName) {
559         // Step 1: Remove leading path segments.
560         final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
561         final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
562         if (offset1 == -1) {
563             if (offset2 == -1) {
564                 return 0;
565             }
566             return offset2 + 1;
567         }
568         if (offset2 == -1) {
569             return offset1 + 1;
570         }
571         return Math.max(offset1, offset2) + 1;
572     }
573 
574     /**
575      * Gets the base name, minus the full path and extension, from a full file name.
576      * <p>
577      * This method will handle a path in either Unix or Windows format.
578      * The text after the last forward or backslash and before the last period is returned.
579      * </p>
580      * <pre>
581      * a/b/c.txt --&gt; c
582      * a\b\c.txt --&gt; c
583      * a/b/c.foo.txt --&gt; c.foo
584      * a.txt     --&gt; a
585      * a/b/c     --&gt; c
586      * a/b/c/    --&gt; ""
587      * </pre>
588      * <p>
589      * The output will be the same irrespective of the machine that the code is running on.
590      * </p>
591      *
592      * @param fileName  the file name, null returns null.
593      * @return the name of the file without the path, or an empty string if none exists.
594      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
595      */
596     public static String getBaseName(final String fileName) {
597         return removeExtension(getName(fileName));
598     }
599 
600     /**
601      * Gets the extension of a file name.
602      * <p>
603      * This method returns the textual part of the file name after the last period.
604      * There must be no directory separator after the period.
605      * </p>
606      * <pre>
607      * foo.txt      --&gt; "txt"
608      * a/b/c.jpg    --&gt; "jpg"
609      * a/b.txt/c    --&gt; ""
610      * a/b/c        --&gt; ""
611      * </pre>
612      * <p>
613      * The output will be the same irrespective of the machine that the code is running on, with the
614      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
615      * </p>
616      * <p>
617      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
618      * In this case, the name wouldn't be the name of a file, but the identifier of an
619      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
620      * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
621      * an {@link IllegalArgumentException} for names like this.
622      * </p>
623      *
624      * @param fileName the file name to retrieve the extension of.
625      * @return the extension of the file or an empty string if none exists or {@code null}
626      * if the file name is {@code null}.
627      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
628      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
629      */
630     public static String getExtension(final String fileName) throws IllegalArgumentException {
631         if (fileName == null) {
632             return null;
633         }
634         final int index = indexOfExtension(fileName);
635         if (index == NOT_FOUND) {
636             return EMPTY_STRING;
637         }
638         return fileName.substring(index + 1);
639     }
640 
641     /**
642      * Gets the full path (prefix + path) from a full file name.
643      * <p>
644      * This method will handle a file in either Unix or Windows format.
645      * The method is entirely text based, and returns the text before and
646      * including the last forward or backslash.
647      * </p>
648      * <pre>
649      * C:\a\b\c.txt --&gt; C:\a\b\
650      * ~/a/b/c.txt  --&gt; ~/a/b/
651      * a.txt        --&gt; ""
652      * a/b/c        --&gt; a/b/
653      * a/b/c/       --&gt; a/b/c/
654      * C:           --&gt; C:
655      * C:\          --&gt; C:\
656      * ~            --&gt; ~/
657      * ~/           --&gt; ~/
658      * ~user        --&gt; ~user/
659      * ~user/       --&gt; ~user/
660      * </pre>
661      * <p>
662      * The output will be the same irrespective of the machine that the code is running on.
663      * </p>
664      *
665      * @param fileName  the file name, null returns null.
666      * @return the path of the file, an empty string if none exists, null if invalid.
667      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
668      */
669     public static String getFullPath(final String fileName) {
670         return doGetFullPath(fileName, true);
671     }
672 
673     /**
674      * Gets the full path (prefix + path) from a full file name,
675      * excluding the final directory separator.
676      * <p>
677      * This method will handle a file in either Unix or Windows format.
678      * The method is entirely text based, and returns the text before the
679      * last forward or backslash.
680      * </p>
681      * <pre>
682      * C:\a\b\c.txt --&gt; C:\a\b
683      * ~/a/b/c.txt  --&gt; ~/a/b
684      * a.txt        --&gt; ""
685      * a/b/c        --&gt; a/b
686      * a/b/c/       --&gt; a/b/c
687      * C:           --&gt; C:
688      * C:\          --&gt; C:\
689      * ~            --&gt; ~
690      * ~/           --&gt; ~
691      * ~user        --&gt; ~user
692      * ~user/       --&gt; ~user
693      * </pre>
694      * <p>
695      * The output will be the same irrespective of the machine that the code is running on.
696      * </p>
697      *
698      * @param fileName  the file name, null returns null.
699      * @return the path of the file, an empty string if none exists, null if invalid.
700      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
701      */
702     public static String getFullPathNoEndSeparator(final String fileName) {
703         return doGetFullPath(fileName, false);
704     }
705 
706     /**
707      * Gets the name minus the path from a full file name.
708      * <p>
709      * This method will handle a file in either Unix or Windows format.
710      * The text after the last forward or backslash is returned.
711      * </p>
712      * <pre>
713      * a/b/c.txt --&gt; c.txt
714      * a\b\c.txt --&gt; c.txt
715      * a.txt     --&gt; a.txt
716      * a/b/c     --&gt; c
717      * a/b/c/    --&gt; ""
718      * </pre>
719      * <p>
720      * The output will be the same irrespective of the machine that the code is running on.
721      * </p>
722      *
723      * @param fileName  the file name, null returns null.
724      * @return the name of the file without the path, or an empty string if none exists.
725      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
726      */
727     public static String getName(final String fileName) {
728         if (fileName == null) {
729             return null;
730         }
731         return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
732     }
733 
734     /**
735      * Gets the path from a full file name, which excludes the prefix and the name.
736      * <p>
737      * This method will handle a file in either Unix or Windows format.
738      * The method is entirely text based, and returns the text before and
739      * including the last forward or backslash.
740      * </p>
741      * <pre>
742      * C:\a\b\c.txt --&gt; a\b\
743      * ~/a/b/c.txt  --&gt; a/b/
744      * a.txt        --&gt; ""
745      * a/b/c        --&gt; a/b/
746      * a/b/c/       --&gt; a/b/c/
747      * </pre>
748      * <p>
749      * The output will be the same irrespective of the machine that the code is running on.
750      * </p>
751      * <p>
752      * This method drops the prefix from the result.
753      * See {@link #getFullPath(String)} for the method that retains the prefix.
754      * </p>
755      *
756      * @param fileName  the file name, null returns null.
757      * @return the path of the file, an empty string if none exists, null if invalid.
758      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
759      */
760     public static String getPath(final String fileName) {
761         return doGetPath(fileName, 1);
762     }
763 
764     /**
765      * Gets the path (which excludes the prefix) from a full file name, and
766      * also excluding the final directory separator.
767      * <p>
768      * This method will handle a file in either Unix or Windows format.
769      * The method is entirely text based, and returns the text before the
770      * last forward or backslash.
771      * </p>
772      * <pre>
773      * C:\a\b\c.txt --&gt; a\b
774      * ~/a/b/c.txt  --&gt; a/b
775      * a.txt        --&gt; ""
776      * a/b/c        --&gt; a/b
777      * a/b/c/       --&gt; a/b/c
778      * </pre>
779      * <p>
780      * The output will be the same irrespective of the machine that the code is running on.
781      * </p>
782      * <p>
783      * This method drops the prefix from the result.
784      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
785      * </p>
786      *
787      * @param fileName  the file name, null returns null.
788      * @return the path of the file, an empty string if none exists, null if invalid.
789      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
790      */
791     public static String getPathNoEndSeparator(final String fileName) {
792         return doGetPath(fileName, 0);
793     }
794 
795     /**
796      * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
797      * <p>
798      * This method will handle a file in either Unix or Windows format.
799      * The prefix includes the first slash in the full file name where applicable.
800      * </p>
801      * <pre>
802      * Windows:
803      * a\b\c.txt           --&gt; ""          --&gt; relative
804      * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
805      * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
806      * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
807      * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
808      *
809      * Unix:
810      * a/b/c.txt           --&gt; ""          --&gt; relative
811      * /a/b/c.txt          --&gt; "/"         --&gt; absolute
812      * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
813      * ~                   --&gt; "~/"        --&gt; current user (slash added)
814      * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
815      * ~user               --&gt; "~user/"    --&gt; named user (slash added)
816      * </pre>
817      * <p>
818      * The output will be the same irrespective of the machine that the code is running on.
819      * ie. both Unix and Windows prefixes are matched regardless.
820      * </p>
821      *
822      * @param fileName  the file name, null returns null.
823      * @return the prefix of the file, null if invalid.
824      * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}).
825      */
826     public static String getPrefix(final String fileName) {
827         if (fileName == null) {
828             return null;
829         }
830         final int len = getPrefixLength(fileName);
831         if (len < 0) {
832             return null;
833         }
834         if (len > fileName.length()) {
835             requireNonNullChars(fileName);
836             return fileName + UNIX_NAME_SEPARATOR;
837         }
838         return requireNonNullChars(fileName.substring(0, len));
839     }
840 
841     /**
842      * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
843      * <p>
844      * This method will handle a file in either Unix or Windows format.
845      * </p>
846      * <p>
847      * The prefix length includes the first slash in the full file name
848      * if applicable. Thus, it is possible that the length returned is greater
849      * than the length of the input string.
850      * </p>
851      * <pre>
852      * Windows:
853      * a\b\c.txt           --&gt; 0           --&gt; relative
854      * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
855      * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
856      * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
857      * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
858      * \\\a\b\c.txt        --&gt; -1          --&gt; error
859      *
860      * Unix:
861      * a/b/c.txt           --&gt; 0           --&gt; relative
862      * /a/b/c.txt          --&gt; 1           --&gt; absolute
863      * ~/a/b/c.txt         --&gt; 2           --&gt; current user
864      * ~                   --&gt; 2           --&gt; current user (slash added)
865      * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
866      * ~user               --&gt; 6           --&gt; named user (slash added)
867      * //server/a/b/c.txt  --&gt; 9
868      * ///a/b/c.txt        --&gt; -1          --&gt; error
869      * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
870      * </pre>
871      * <p>
872      * The output will be the same irrespective of the machine that the code is running on.
873      * ie. both Unix and Windows prefixes are matched regardless.
874      * </p>
875      * <p>
876      * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
877      * These must be followed by a server name, so double-slashes are not collapsed
878      * to a single slash at the start of the file name.
879      * </p>
880      *
881      * @param fileName  the file name to find the prefix in, null returns -1.
882      * @return the length of the prefix, -1 if invalid or null.
883      */
884     public static int getPrefixLength(final String fileName) {
885         if (fileName == null) {
886             return NOT_FOUND;
887         }
888         final int len = fileName.length();
889         if (len == 0) {
890             return 0;
891         }
892         char ch0 = fileName.charAt(0);
893         if (ch0 == ':') {
894             return NOT_FOUND;
895         }
896         if (len == 1) {
897             if (ch0 == '~') {
898                 return 2;  // return a length greater than the input
899             }
900             return isSeparator(ch0) ? 1 : 0;
901         }
902         if (ch0 == '~') {
903             int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
904             int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
905             if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
906                 return len + 1;  // return a length greater than the input
907             }
908             posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
909             posWin = posWin == NOT_FOUND ? posUnix : posWin;
910             return Math.min(posUnix, posWin) + 1;
911         }
912         final char ch1 = fileName.charAt(1);
913         if (ch1 == ':') {
914             ch0 = Character.toUpperCase(ch0);
915             if (ch0 >= 'A' && ch0 <= 'Z') {
916                 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
917                     return 0;
918                 }
919                 if (len == 2 || !isSeparator(fileName.charAt(2))) {
920                     return 2;
921                 }
922                 return 3;
923             }
924             if (ch0 == UNIX_NAME_SEPARATOR) {
925                 return 1;
926             }
927             return NOT_FOUND;
928 
929         }
930         if (!isSeparator(ch0) || !isSeparator(ch1)) {
931             return isSeparator(ch0) ? 1 : 0;
932         }
933         int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
934         int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
935         if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
936             return NOT_FOUND;
937         }
938         posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
939         posWin = posWin == NOT_FOUND ? posUnix : posWin;
940         final int pos = Math.min(posUnix, posWin) + 1;
941         final String hostnamePart = fileName.substring(2, pos - 1);
942         return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
943     }
944 
945     /**
946      * Returns the index of the last extension separator character, which is a period.
947      * <p>
948      * This method also checks that there is no directory separator after the last period. To do this it uses
949      * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
950      * </p>
951      * <p>
952      * The output will be the same irrespective of the machine that the code is running on, with the
953      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
954      * </p>
955      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
956      * In this case, the name wouldn't be the name of a file, but the identifier of an
957      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
958      * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
959      * an {@link IllegalArgumentException} for names like this.
960      *
961      * @param fileName
962      *            the file name to find the last extension separator in, null returns -1.
963      * @return the index of the last extension separator character, or -1 if there is no such character.
964      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
965      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
966      */
967     public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
968         if (fileName == null) {
969             return NOT_FOUND;
970         }
971         if (isSystemWindows()) {
972             // Special handling for NTFS ADS: Don't accept colon in the file name.
973             final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
974             if (offset != -1) {
975                 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
976             }
977         }
978         final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
979         final int lastSeparator = indexOfLastSeparator(fileName);
980         return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
981     }
982 
983     /**
984      * Returns the index of the last directory separator character.
985      * <p>
986      * This method will handle a file in either Unix or Windows format.
987      * The position of the last forward or backslash is returned.
988      * <p>
989      * The output will be the same irrespective of the machine that the code is running on.
990      *
991      * @param fileName  the file name to find the last path separator in, null returns -1.
992      * @return the index of the last separator character, or -1 if there
993      * is no such character.
994      */
995     public static int indexOfLastSeparator(final String fileName) {
996         if (fileName == null) {
997             return NOT_FOUND;
998         }
999         final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1000         final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1001         return Math.max(lastUnixPos, lastWindowsPos);
1002     }
1003 
1004     private static boolean isEmpty(final String string) {
1005         return string == null || string.isEmpty();
1006     }
1007 
1008     /**
1009      * Checks whether the extension of the file name is one of those specified.
1010      * <p>
1011      * This method obtains the extension as the textual part of the file name
1012      * after the last period. There must be no directory separator after the period.
1013      * The extension check is case-sensitive on all platforms.
1014      *
1015      * @param fileName  the file name, null returns false.
1016      * @param extensions  the extensions to check for, null checks for no extension.
1017      * @return true if the file name is one of the extensions.
1018      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1019      */
1020     public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1021         if (fileName == null) {
1022             return false;
1023         }
1024         requireNonNullChars(fileName);
1025         if (extensions == null || extensions.isEmpty()) {
1026             return indexOfExtension(fileName) == NOT_FOUND;
1027         }
1028         return extensions.contains(getExtension(fileName));
1029     }
1030 
1031     /**
1032      * Checks whether the extension of the file name is that specified.
1033      * <p>
1034      * This method obtains the extension as the textual part of the file name
1035      * after the last period. There must be no directory separator after the period.
1036      * The extension check is case-sensitive on all platforms.
1037      *
1038      * @param fileName  the file name, null returns false.
1039      * @param extension  the extension to check for, null or empty checks for no extension.
1040      * @return true if the file name has the specified extension.
1041      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1042      */
1043     public static boolean isExtension(final String fileName, final String extension) {
1044         if (fileName == null) {
1045             return false;
1046         }
1047         requireNonNullChars(fileName);
1048         if (isEmpty(extension)) {
1049             return indexOfExtension(fileName) == NOT_FOUND;
1050         }
1051         return getExtension(fileName).equals(extension);
1052     }
1053 
1054     /**
1055      * Checks whether the extension of the file name is one of those specified.
1056      * <p>
1057      * This method obtains the extension as the textual part of the file name
1058      * after the last period. There must be no directory separator after the period.
1059      * The extension check is case-sensitive on all platforms.
1060      *
1061      * @param fileName  the file name, null returns false.
1062      * @param extensions  the extensions to check for, null checks for no extension.
1063      * @return true if the file name is one of the extensions.
1064      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1065      */
1066     public static boolean isExtension(final String fileName, final String... extensions) {
1067         if (fileName == null) {
1068             return false;
1069         }
1070         requireNonNullChars(fileName);
1071 
1072         if (extensions == null || extensions.length == 0) {
1073             return indexOfExtension(fileName) == NOT_FOUND;
1074         }
1075         final String fileExt = getExtension(fileName);
1076         return Stream.of(extensions).anyMatch(fileExt::equals);
1077     }
1078 
1079     /**
1080      * Checks whether a given string represents a valid IPv4 address.
1081      *
1082      * @param name the name to validate.
1083      * @return true if the given name is a valid IPv4 address.
1084      */
1085     // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1086     private static boolean isIPv4Address(final String name) {
1087         final Matcher m = IPV4_PATTERN.matcher(name);
1088         if (!m.matches() || m.groupCount() != 4) {
1089             return false;
1090         }
1091         // verify that address subgroups are legal
1092         for (int i = 1; i <= 4; i++) {
1093             final String ipSegment = m.group(i);
1094             final int iIpSegment = Integer.parseInt(ipSegment);
1095             if (iIpSegment > IPV4_MAX_OCTET_VALUE || ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1096                 return false;
1097             }
1098         }
1099         return true;
1100     }
1101 
1102     // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1103     /**
1104      * Checks whether a given string represents a valid IPv6 address.
1105      *
1106      * @param inet6Address the name to validate.
1107      * @return true if the given name is a valid IPv6 address.
1108      */
1109     private static boolean isIPv6Address(final String inet6Address) {
1110         final boolean containsCompressedZeroes = inet6Address.contains("::");
1111         if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1112             return false;
1113         }
1114         if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1115                 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1116             return false;
1117         }
1118         String[] octets = inet6Address.split(":");
1119         if (containsCompressedZeroes) {
1120             final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1121             if (inet6Address.endsWith("::")) {
1122                 // String.split() drops ending empty segments
1123                 octetList.add("");
1124             } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1125                 octetList.remove(0);
1126             }
1127             octets = octetList.toArray(EMPTY_STRING_ARRAY);
1128         }
1129         if (octets.length > IPV6_MAX_HEX_GROUPS) {
1130             return false;
1131         }
1132         int validOctets = 0;
1133         int emptyOctets = 0; // consecutive empty chunks
1134         for (int index = 0; index < octets.length; index++) {
1135             final String octet = octets[index];
1136             if (octet.isEmpty()) {
1137                 emptyOctets++;
1138                 if (emptyOctets > 1) {
1139                     return false;
1140                 }
1141             } else {
1142                 emptyOctets = 0;
1143                 // Is last chunk an IPv4 address?
1144                 if (index == octets.length - 1 && octet.contains(".")) {
1145                     if (!isIPv4Address(octet)) {
1146                         return false;
1147                     }
1148                     validOctets += 2;
1149                     continue;
1150                 }
1151                 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1152                     return false;
1153                 }
1154                 final int octetInt;
1155                 try {
1156                     octetInt = Integer.parseInt(octet, BASE_16);
1157                 } catch (final NumberFormatException e) {
1158                     return false;
1159                 }
1160                 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1161                     return false;
1162                 }
1163             }
1164             validOctets++;
1165         }
1166         return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1167     }
1168 
1169     /**
1170      * Checks whether a given string is a valid host name according to
1171      * RFC 3986 - not accepting IP addresses.
1172      *
1173      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1174      * @param name the hostname to validate.
1175      * @return true if the given name is a valid host name.
1176      */
1177     private static boolean isRFC3986HostName(final String name) {
1178         final String[] parts = name.split("\\.", -1);
1179         for (int i = 0; i < parts.length; i++) {
1180             if (parts[i].isEmpty()) {
1181                 // trailing period is legal, otherwise we've hit a .. sequence
1182                 return i == parts.length - 1;
1183             }
1184             if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1185                 return false;
1186             }
1187         }
1188         return true;
1189     }
1190 
1191     /**
1192      * Checks if the character is a separator.
1193      *
1194      * @param ch  the character to check.
1195      * @return true if it is a separator character.
1196      */
1197     private static boolean isSeparator(final char ch) {
1198         return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1199     }
1200 
1201     /**
1202      * Determines if Windows file system is in use.
1203      *
1204      * @return true if the system is Windows.
1205      */
1206     static boolean isSystemWindows() {
1207         return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1208     }
1209 
1210     /**
1211      * Checks whether a given string is a valid host name according to
1212      * RFC 3986.
1213      *
1214      * <p>Accepted are IP addresses (v4 and v6) as well as what the
1215      * RFC calls a "reg-name". Percent encoded names don't seem to be
1216      * valid names in UNC paths.</p>
1217      *
1218      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1219      * @param name the hostname to validate.
1220      * @return true if the given name is a valid host name.
1221      */
1222     private static boolean isValidHostName(final String name) {
1223         return isIPv6Address(name) || isRFC3986HostName(name);
1224     }
1225 
1226     /**
1227      * Normalizes a path, removing double and single period path steps.
1228      * <p>
1229      * This method normalizes a path to a standard format.
1230      * The input may contain separators in either Unix or Windows format.
1231      * The output will contain separators in the format of the system.
1232      * <p>
1233      * A trailing slash will be retained.
1234      * A double slash will be merged to a single slash (but UNC names are handled).
1235      * A single period path segment will be removed.
1236      * A double period will cause that path segment and the one before to be removed.
1237      * If the double period has no parent path segment, {@code null} is returned.
1238      * <p>
1239      * The output will be the same on both Unix and Windows except
1240      * for the separator character.
1241      * <pre>
1242      * /foo//               --&gt;   /foo/
1243      * /foo/./              --&gt;   /foo/
1244      * /foo/../bar          --&gt;   /bar
1245      * /foo/../bar/         --&gt;   /bar/
1246      * /foo/../bar/../baz   --&gt;   /baz
1247      * //foo//./bar         --&gt;   //foo/bar
1248      * /../                 --&gt;   null
1249      * ../foo               --&gt;   null
1250      * foo/bar/..           --&gt;   foo/
1251      * foo/../../bar        --&gt;   null
1252      * foo/../bar           --&gt;   bar
1253      * //server/foo/../bar  --&gt;   //server/bar
1254      * //server/../bar      --&gt;   null
1255      * C:\foo\..\bar        --&gt;   C:\bar
1256      * C:\..\bar            --&gt;   null
1257      * ~/foo/../bar/        --&gt;   ~/bar/
1258      * ~/../bar             --&gt;   null
1259      * </pre>
1260      * (Note the file separator will be correct for Windows/Unix.)
1261      *
1262      * @param fileName  the file name to normalize, null returns null.
1263      * @return the normalized fileName, or null if invalid.
1264      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1265      */
1266     public static String normalize(final String fileName) {
1267         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1268     }
1269 
1270     /**
1271      * Normalizes a path, removing double and single period path steps.
1272      * <p>
1273      * This method normalizes a path to a standard format.
1274      * The input may contain separators in either Unix or Windows format.
1275      * The output will contain separators in the format specified.
1276      * <p>
1277      * A trailing slash will be retained.
1278      * A double slash will be merged to a single slash (but UNC names are handled).
1279      * A single period path segment will be removed.
1280      * A double period will cause that path segment and the one before to be removed.
1281      * If the double period has no parent path segment to work with, {@code null}
1282      * is returned.
1283      * <p>
1284      * The output will be the same on both Unix and Windows except
1285      * for the separator character.
1286      * <pre>
1287      * /foo//               --&gt;   /foo/
1288      * /foo/./              --&gt;   /foo/
1289      * /foo/../bar          --&gt;   /bar
1290      * /foo/../bar/         --&gt;   /bar/
1291      * /foo/../bar/../baz   --&gt;   /baz
1292      * //foo//./bar         --&gt;   /foo/bar
1293      * /../                 --&gt;   null
1294      * ../foo               --&gt;   null
1295      * foo/bar/..           --&gt;   foo/
1296      * foo/../../bar        --&gt;   null
1297      * foo/../bar           --&gt;   bar
1298      * //server/foo/../bar  --&gt;   //server/bar
1299      * //server/../bar      --&gt;   null
1300      * C:\foo\..\bar        --&gt;   C:\bar
1301      * C:\..\bar            --&gt;   null
1302      * ~/foo/../bar/        --&gt;   ~/bar/
1303      * ~/../bar             --&gt;   null
1304      * </pre>
1305      * The output will be the same on both Unix and Windows including
1306      * the separator character.
1307      *
1308      * @param fileName  the file name to normalize, null returns null.
1309      * @param unixSeparator {@code true} if a Unix separator should
1310      * be used or {@code false} if a Windows separator should be used.
1311      * @return the normalized fileName, or null if invalid.
1312      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1313      * @since 2.0
1314      */
1315     public static String normalize(final String fileName, final boolean unixSeparator) {
1316         return doNormalize(fileName, toSeparator(unixSeparator), true);
1317     }
1318 
1319     /**
1320      * Normalizes a path, removing double and single period path steps,
1321      * and removing any final directory separator.
1322      * <p>
1323      * This method normalizes a path to a standard format.
1324      * The input may contain separators in either Unix or Windows format.
1325      * The output will contain separators in the format of the system.
1326      * <p>
1327      * A trailing slash will be removed.
1328      * A double slash will be merged to a single slash (but UNC names are handled).
1329      * A single period path segment will be removed.
1330      * A double period will cause that path segment and the one before to be removed.
1331      * If the double period has no parent path segment to work with, {@code null}
1332      * is returned.
1333      * <p>
1334      * The output will be the same on both Unix and Windows except
1335      * for the separator character.
1336      * <pre>
1337      * /foo//               --&gt;   /foo
1338      * /foo/./              --&gt;   /foo
1339      * /foo/../bar          --&gt;   /bar
1340      * /foo/../bar/         --&gt;   /bar
1341      * /foo/../bar/../baz   --&gt;   /baz
1342      * //foo//./bar         --&gt;   /foo/bar
1343      * /../                 --&gt;   null
1344      * ../foo               --&gt;   null
1345      * foo/bar/..           --&gt;   foo
1346      * foo/../../bar        --&gt;   null
1347      * foo/../bar           --&gt;   bar
1348      * //server/foo/../bar  --&gt;   //server/bar
1349      * //server/../bar      --&gt;   null
1350      * C:\foo\..\bar        --&gt;   C:\bar
1351      * C:\..\bar            --&gt;   null
1352      * ~/foo/../bar/        --&gt;   ~/bar
1353      * ~/../bar             --&gt;   null
1354      * </pre>
1355      * (Note the file separator returned will be correct for Windows/Unix)
1356      *
1357      * @param fileName  the file name to normalize, null returns null.
1358      * @return the normalized fileName, or null if invalid.
1359      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1360      */
1361     public static String normalizeNoEndSeparator(final String fileName) {
1362         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1363     }
1364 
1365     /**
1366      * Normalizes a path, removing double and single period path steps,
1367      * and removing any final directory separator.
1368      * <p>
1369      * This method normalizes a path to a standard format.
1370      * The input may contain separators in either Unix or Windows format.
1371      * The output will contain separators in the format specified.
1372      * <p>
1373      * A trailing slash will be removed.
1374      * A double slash will be merged to a single slash (but UNC names are handled).
1375      * A single period path segment will be removed.
1376      * A double period will cause that path segment and the one before to be removed.
1377      * If the double period has no parent path segment to work with, {@code null}
1378      * is returned.
1379      * <p>
1380      * The output will be the same on both Unix and Windows including
1381      * the separator character.
1382      * <pre>
1383      * /foo//               --&gt;   /foo
1384      * /foo/./              --&gt;   /foo
1385      * /foo/../bar          --&gt;   /bar
1386      * /foo/../bar/         --&gt;   /bar
1387      * /foo/../bar/../baz   --&gt;   /baz
1388      * //foo//./bar         --&gt;   /foo/bar
1389      * /../                 --&gt;   null
1390      * ../foo               --&gt;   null
1391      * foo/bar/..           --&gt;   foo
1392      * foo/../../bar        --&gt;   null
1393      * foo/../bar           --&gt;   bar
1394      * //server/foo/../bar  --&gt;   //server/bar
1395      * //server/../bar      --&gt;   null
1396      * C:\foo\..\bar        --&gt;   C:\bar
1397      * C:\..\bar            --&gt;   null
1398      * ~/foo/../bar/        --&gt;   ~/bar
1399      * ~/../bar             --&gt;   null
1400      * </pre>
1401      *
1402      * @param fileName  the file name to normalize, null returns null.
1403      * @param unixSeparator {@code true} if a Unix separator should
1404      * be used or {@code false} if a Windows separator should be used.
1405      * @return the normalized fileName, or null if invalid.
1406      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1407      * @since 2.0
1408      */
1409     public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1410          return doNormalize(fileName, toSeparator(unixSeparator), false);
1411     }
1412 
1413     /**
1414      * Removes the extension from a fileName.
1415      * <p>
1416      * This method returns the textual part of the file name before the last period.
1417      * There must be no directory separator after the period.
1418      * <pre>
1419      * foo.txt    --&gt; foo
1420      * .txt       --&gt; "" (empty string)
1421      * a\b\c.jpg  --&gt; a\b\c
1422      * /a/b/c.jpg --&gt; /a/b/c
1423      * a\b\c      --&gt; a\b\c
1424      * a.b\c      --&gt; a.b\c
1425      * </pre>
1426      * <p>
1427      * The output will be the same irrespective of the machine that the code is running on.
1428      *
1429      * @param fileName  the file name, null returns null.
1430      * @return the file name minus the extension.
1431      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1432      */
1433     public static String removeExtension(final String fileName) {
1434         if (fileName == null) {
1435             return null;
1436         }
1437         requireNonNullChars(fileName);
1438         final int index = indexOfExtension(fileName);
1439         if (index == NOT_FOUND) {
1440             return fileName;
1441         }
1442         return fileName.substring(0, index);
1443     }
1444 
1445     /**
1446      * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1447      *
1448      * This may be used to defend against poison byte attacks.
1449      *
1450      * @param path the path to check.
1451      * @return The input.
1452      * @throws IllegalArgumentException if path contains the null character ({@code U+0000}).
1453      */
1454     private static String requireNonNullChars(final String path) {
1455         if (path.indexOf(0) >= 0) {
1456             throw new IllegalArgumentException(
1457                 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1458         }
1459         return path;
1460     }
1461 
1462     /**
1463      * Converts all separators to the system separator.
1464      *
1465      * @param path the path to be changed, null ignored.
1466      * @return the updated path.
1467      */
1468     public static String separatorsToSystem(final String path) {
1469         return FileSystem.getCurrent().normalizeSeparators(path);
1470     }
1471 
1472     /**
1473      * Converts all separators to the Unix separator of forward slash.
1474      *
1475      * @param path the path to be changed, null ignored.
1476      * @return the new path.
1477      */
1478     public static String separatorsToUnix(final String path) {
1479         return FileSystem.LINUX.normalizeSeparators(path);
1480     }
1481 
1482     /**
1483      * Converts all separators to the Windows separator of backslash.
1484      *
1485      * @param path the path to be changed, null ignored.
1486      * @return the updated path.
1487      */
1488     public static String separatorsToWindows(final String path) {
1489         return FileSystem.WINDOWS.normalizeSeparators(path);
1490     }
1491 
1492     /**
1493      * Splits a string into a number of tokens.
1494      * The text is split by '?' and '*'.
1495      * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1496      *
1497      * @param text  the text to split.
1498      * @return the array of tokens, never null.
1499      */
1500     static String[] splitOnTokens(final String text) {
1501         // used by wildcardMatch
1502         // package level so a unit test may run on this
1503         if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1504             return new String[] { text };
1505         }
1506         final char[] array = text.toCharArray();
1507         final ArrayList<String> list = new ArrayList<>();
1508         final StringBuilder buffer = new StringBuilder();
1509         char prevChar = 0;
1510         for (final char ch : array) {
1511             if (ch == '?' || ch == '*') {
1512                 if (buffer.length() != 0) {
1513                     list.add(buffer.toString());
1514                     buffer.setLength(0);
1515                 }
1516                 if (ch == '?') {
1517                     list.add("?");
1518                 } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
1519                     list.add("*");
1520                 }
1521             } else {
1522                 buffer.append(ch);
1523             }
1524             prevChar = ch;
1525         }
1526         if (buffer.length() != 0) {
1527             list.add(buffer.toString());
1528         }
1529         return list.toArray(EMPTY_STRING_ARRAY);
1530     }
1531 
1532     /**
1533      * Returns '/' if given true, '\\' otherwise.
1534      *
1535      * @param unixSeparator which separator to return.
1536      * @return '/' if given true, '\\' otherwise.
1537      */
1538     private static char toSeparator(final boolean unixSeparator) {
1539         return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1540     }
1541 
1542     /**
1543      * Checks a fileName to see if it matches the specified wildcard matcher,
1544      * always testing case-sensitive.
1545      * <p>
1546      * The wildcard matcher uses the characters '?' and '*' to represent a
1547      * single or multiple (zero or more) wildcard characters.
1548      * This is the same as often found on DOS/Unix command lines.
1549      * The check is case-sensitive always.
1550      * <pre>
1551      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1552      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1553      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1554      * wildcardMatch("c.txt", "*.???")      --&gt; true
1555      * wildcardMatch("c.txt", "*.????")     --&gt; false
1556      * </pre>
1557      * The sequence "*?" does not work properly at present in match strings.
1558      *
1559      * @param fileName  the file name to match on, may be null.
1560      * @param wildcardMatcher  the wildcard string to match against, may be null.
1561      * @return true if the file name matches the wildcard string.
1562      * @see IOCase#SENSITIVE
1563      */
1564     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1565         return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1566     }
1567 
1568     /**
1569      * Checks a fileName to see if it matches the specified wildcard matcher
1570      * allowing control over case-sensitivity.
1571      * <p>
1572      * The wildcard matcher uses the characters '?' and '*' to represent a
1573      * single or multiple (zero or more) wildcard characters.
1574      * The sequence "*?" does not work properly at present in match strings.
1575      *
1576      * @param fileName  the file name to match on, may be null.
1577      * @param wildcardMatcher  the wildcard string to match against, may be null.
1578      * @param ioCase  what case sensitivity rule to use, null means case-sensitive.
1579      * @return true if the file name matches the wildcard string.
1580      * @since 1.3
1581      */
1582     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1583         if (fileName == null && wildcardMatcher == null) {
1584             return true;
1585         }
1586         if (fileName == null || wildcardMatcher == null) {
1587             return false;
1588         }
1589         ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1590         final String[] wcs = splitOnTokens(wildcardMatcher);
1591         boolean anyChars = false;
1592         int textIdx = 0;
1593         int wcsIdx = 0;
1594         final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1595         // loop around a backtrack stack, to handle complex * matching
1596         do {
1597             if (!backtrack.isEmpty()) {
1598                 final int[] array = backtrack.pop();
1599                 wcsIdx = array[0];
1600                 textIdx = array[1];
1601                 anyChars = true;
1602             }
1603             // loop whilst tokens and text left to process
1604             while (wcsIdx < wcs.length) {
1605                 if (wcs[wcsIdx].equals("?")) {
1606                     // ? so move to next text char
1607                     textIdx++;
1608                     if (textIdx > fileName.length()) {
1609                         break;
1610                     }
1611                     anyChars = false;
1612                 } else if (wcs[wcsIdx].equals("*")) {
1613                     // set any chars status
1614                     anyChars = true;
1615                     if (wcsIdx == wcs.length - 1) {
1616                         textIdx = fileName.length();
1617                     }
1618                 } else {
1619                     // matching text token
1620                     if (anyChars) {
1621                         // any chars then try to locate text token
1622                         textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1623                         if (textIdx == NOT_FOUND) {
1624                             // token not found
1625                             break;
1626                         }
1627                         final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1628                         if (repeat >= 0) {
1629                             backtrack.push(new int[] { wcsIdx, repeat });
1630                         }
1631                     } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1632                         // matching from current position
1633                         // couldn't match token
1634                         break;
1635                     }
1636                     // matched text token, move text index to end of matched token
1637                     textIdx += wcs[wcsIdx].length();
1638                     anyChars = false;
1639                 }
1640                 wcsIdx++;
1641             }
1642             // full match
1643             if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1644                 return true;
1645             }
1646         } while (!backtrack.isEmpty());
1647         return false;
1648     }
1649 
1650     /**
1651      * Checks a fileName to see if it matches the specified wildcard matcher
1652      * using the case rules of the system.
1653      * <p>
1654      * The wildcard matcher uses the characters '?' and '*' to represent a
1655      * single or multiple (zero or more) wildcard characters.
1656      * This is the same as often found on DOS/Unix command lines.
1657      * The check is case-sensitive on Unix and case-insensitive on Windows.
1658      * <pre>
1659      * wildcardMatch("c.txt", "*.txt")      --&gt; true
1660      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1661      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1662      * wildcardMatch("c.txt", "*.???")      --&gt; true
1663      * wildcardMatch("c.txt", "*.????")     --&gt; false
1664      * </pre>
1665      * The sequence "*?" does not work properly at present in match strings.
1666      *
1667      * @param fileName  the file name to match on.
1668      * @param wildcardMatcher  the wildcard string to match against.
1669      * @return true if the file name matches the wildcard string.
1670      * @see IOCase#SYSTEM
1671      */
1672     public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1673         return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1674     }
1675 
1676     /**
1677      * Instances should NOT be constructed in standard programming.
1678      *
1679      * @deprecated TODO Make private in 3.0.
1680      */
1681     @Deprecated
1682     public FilenameUtils() {
1683         // empty
1684     }
1685 }