1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io;
18
19 import java.io.File;
20 import java.util.ArrayDeque;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Deque;
25 import java.util.List;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.Stream;
29
30 /**
31 * General file name and file path manipulation utilities. The methods in this class
32 * operate on strings that represent relative or absolute paths. Nothing in this class
33 * ever accesses the file system, or depends on whether a path points to a file that exists.
34 * <p>
35 * When dealing with file names, you can hit problems when moving from a Windows
36 * based development machine to a Unix based production machine.
37 * This class aims to help avoid those problems.
38 * </p>
39 * <p>
40 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
41 * using JDK {@link File File} objects and the two argument constructor
42 * {@link File#File(java.io.File, String) File(File,String)}.
43 * </p>
44 * <p>
45 * Most methods in this class are designed to work the same on both Unix and Windows.
46 * Those that don't include 'System', 'Unix', or 'Windows' in their name.
47 * </p>
48 * <p>
49 * Most methods recognize both separators (forward and backslashes), and both
50 * sets of prefixes. See the Javadoc of each method for details.
51 * </p>
52 * <p>
53 * This class defines six components within a path (sometimes called a file name or a full file name).
54 * Given an absolute Windows path such as C:\dev\project\file.txt they are:
55 * </p>
56 * <ul>
57 * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
58 * <li>the prefix - C:\</li>
59 * <li>the path - dev\project\</li>
60 * <li>the full path - C:\dev\project\</li>
61 * <li>the name - file.txt</li>
62 * <li>the base name - file</li>
63 * <li>the extension - txt</li>
64 * </ul>
65 * <p>
66 * Given an absolute Unix path such as /dev/project/file.txt they are:
67 * </p>
68 * <ul>
69 * <li>the full file name, or just file name - /dev/project/file.txt</li>
70 * <li>the prefix - /</li>
71 * <li>the path - dev/project</li>
72 * <li>the full path - /dev/project</li>
73 * <li>the name - file.txt</li>
74 * <li>the base name - file</li>
75 * <li>the extension - txt</li>
76 * </ul>
77 * <p>
78 * Given a relative Windows path such as dev\project\file.txt they are:
79 * </p>
80 * <ul>
81 * <li>the full file name, or just file name - dev\project\file.txt</li>
82 * <li>the prefix - null</li>
83 * <li>the path - dev\project\</li>
84 * <li>the full path - dev\project\</li>
85 * <li>the name - file.txt</li>
86 * <li>the base name - file</li>
87 * <li>the extension - txt</li>
88 * </ul>
89 * <p>
90 * Given an absolute Unix path such as /dev/project/file.txt they are:
91 * </p>
92 * <ul>
93 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
94 * <li>the prefix - /</li>
95 * <li>the path - dev/project</li>
96 * <li>the full path - /dev/project</li>
97 * <li>the name - file.txt</li>
98 * <li>the base name - file</li>
99 * <li>the extension - txt</li>
100 * </ul>
101 *
102 *
103 * <p>
104 * This class works best if directory names end with a separator.
105 * If you omit the last separator, it is impossible to determine if the last component
106 * corresponds to a file or a directory. This class treats final components
107 * that do not end with a separator as files, not directories.
108 * </p>
109 * <p>
110 * This class only supports Unix and Windows style names.
111 * Prefixes are matched as follows:
112 * </p>
113 * <pre>
114 * Windows:
115 * a\b\c.txt --> "" --> relative
116 * \a\b\c.txt --> "\" --> current drive absolute
117 * C:a\b\c.txt --> "C:" --> drive relative
118 * C:\a\b\c.txt --> "C:\" --> absolute
119 * \\server\a\b\c.txt --> "\\server\" --> UNC
120 *
121 * Unix:
122 * a/b/c.txt --> "" --> relative
123 * /a/b/c.txt --> "/" --> absolute
124 * ~/a/b/c.txt --> "~/" --> current user
125 * ~ --> "~/" --> current user (slash added)
126 * ~user/a/b/c.txt --> "~user/" --> named user
127 * ~user --> "~user/" --> named user (slash added)
128 * </pre>
129 * <p>
130 * Both prefix styles are matched, irrespective of the machine that you are
131 * currently running on.
132 * </p>
133 *
134 * @since 1.1
135 */
136 public class FilenameUtils {
137
138 private static final String[] EMPTY_STRING_ARRAY = {};
139
140 private static final String EMPTY_STRING = "";
141
142 private static final int NOT_FOUND = -1;
143
144 /**
145 * The extension separator character.
146 * @since 1.4
147 */
148 public static final char EXTENSION_SEPARATOR = '.';
149
150 /**
151 * The extension separator String.
152 * @since 1.4
153 */
154 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
155
156 /**
157 * The Unix separator character.
158 */
159 private static final char UNIX_NAME_SEPARATOR = '/';
160
161 /**
162 * The Windows separator character.
163 */
164 private static final char WINDOWS_NAME_SEPARATOR = '\\';
165
166 /**
167 * The system separator character.
168 */
169 private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
170
171 /**
172 * The separator character that is the opposite of the system separator.
173 */
174 private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
175
176 private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
177
178 private static final int IPV4_MAX_OCTET_VALUE = 255;
179
180 private static final int IPV6_MAX_HEX_GROUPS = 8;
181
182 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
183
184 private static final int MAX_UNSIGNED_SHORT = 0xffff;
185
186 private static final int BASE_16 = 16;
187
188 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
189
190 /**
191 * Concatenates a fileName to a base path using normal command line style rules.
192 * <p>
193 * The effect is equivalent to resultant directory after changing
194 * directory to the first argument, followed by changing directory to
195 * the second argument.
196 * </p>
197 * <p>
198 * The first argument is the base path, the second is the path to concatenate.
199 * The returned path is always normalized via {@link #normalize(String)},
200 * thus {@code ..} is handled.
201 * </p>
202 * <p>
203 * If {@code pathToAdd} is absolute (has an absolute prefix), then
204 * it will be normalized and returned.
205 * Otherwise, the paths will be joined, normalized and returned.
206 * </p>
207 * <p>
208 * The output will be the same on both Unix and Windows except
209 * for the separator character.
210 * </p>
211 * <pre>
212 * /foo/ + bar --> /foo/bar
213 * /foo + bar --> /foo/bar
214 * /foo + /bar --> /bar
215 * /foo + C:/bar --> C:/bar
216 * /foo + C:bar --> C:bar [1]
217 * /foo/a/ + ../bar --> /foo/bar
218 * /foo/ + ../../bar --> null
219 * /foo/ + /bar --> /bar
220 * /foo/.. + /bar --> /bar
221 * /foo + bar/c.txt --> /foo/bar/c.txt
222 * /foo/c.txt + bar --> /foo/c.txt/bar [2]
223 * </pre>
224 * <p>
225 * [1] Note that the Windows relative drive prefix is unreliable when
226 * used with this method.
227 * </p>
228 * <p>
229 * [2] Note that the first parameter must be a path. If it ends with a name, then
230 * the name will be built into the concatenated path. If this might be a problem,
231 * use {@link #getFullPath(String)} on the base path argument.
232 * </p>
233 *
234 * @param basePath the base path to attach to, always treated as a path
235 * @param fullFileNameToAdd the file name (or path) to attach to the base
236 * @return the concatenated path, or null if invalid
237 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
238 */
239 public static String concat(final String basePath, final String fullFileNameToAdd) {
240 final int prefix = getPrefixLength(fullFileNameToAdd);
241 if (prefix < 0) {
242 return null;
243 }
244 if (prefix > 0) {
245 return normalize(fullFileNameToAdd);
246 }
247 if (basePath == null) {
248 return null;
249 }
250 final int len = basePath.length();
251 if (len == 0) {
252 return normalize(fullFileNameToAdd);
253 }
254 final char ch = basePath.charAt(len - 1);
255 if (isSeparator(ch)) {
256 return normalize(basePath + fullFileNameToAdd);
257 }
258 return normalize(basePath + '/' + fullFileNameToAdd);
259 }
260
261 /**
262 * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
263 * This does not read from the file system, and there is no guarantee or expectation that
264 * these paths actually exist.
265 * <p>
266 * The files names are expected to be normalized.
267 * </p>
268 *
269 * Edge cases:
270 * <ul>
271 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
272 * <li>A directory does not contain itself: return false</li>
273 * <li>A null child file is not contained in any parent: return false</li>
274 * </ul>
275 *
276 * @param canonicalParent the path string to consider as the parent.
277 * @param canonicalChild the path string to consider as the child.
278 * @return true if the candidate leaf is under the specified composite. False otherwise.
279 * @since 2.2
280 * @see FileUtils#directoryContains(File, File)
281 */
282 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
283 if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
284 return false;
285 }
286
287 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
288 return false;
289 }
290
291 final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
292 final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
293
294 return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
295 }
296
297 /**
298 * Does the work of getting the path.
299 *
300 * @param fileName the file name
301 * @param includeSeparator true to include the end separator
302 * @return the path
303 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
304 */
305 private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
306 if (fileName == null) {
307 return null;
308 }
309 final int prefix = getPrefixLength(fileName);
310 if (prefix < 0) {
311 return null;
312 }
313 if (prefix >= fileName.length()) {
314 if (includeSeparator) {
315 return getPrefix(fileName); // add end slash if necessary
316 }
317 return fileName;
318 }
319 final int index = indexOfLastSeparator(fileName);
320 if (index < 0) {
321 return fileName.substring(0, prefix);
322 }
323 int end = index + (includeSeparator ? 1 : 0);
324 if (end == 0) {
325 end++;
326 }
327 return fileName.substring(0, end);
328 }
329
330 /**
331 * Does the work of getting the path.
332 *
333 * @param fileName the file name
334 * @param separatorAdd 0 to omit the end separator, 1 to return it
335 * @return the path
336 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
337 */
338 private static String doGetPath(final String fileName, final int separatorAdd) {
339 if (fileName == null) {
340 return null;
341 }
342 final int prefix = getPrefixLength(fileName);
343 if (prefix < 0) {
344 return null;
345 }
346 final int index = indexOfLastSeparator(fileName);
347 final int endIndex = index + separatorAdd;
348 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
349 return EMPTY_STRING;
350 }
351 return requireNonNullChars(fileName.substring(prefix, endIndex));
352 }
353
354 /**
355 * Internal method to perform the normalization.
356 *
357 * @param fileName the file name
358 * @param separator The separator character to use
359 * @param keepSeparator true to keep the final separator
360 * @return the normalized fileName
361 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
362 */
363 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
364 if (fileName == null) {
365 return null;
366 }
367
368 requireNonNullChars(fileName);
369
370 int size = fileName.length();
371 if (size == 0) {
372 return fileName;
373 }
374 final int prefix = getPrefixLength(fileName);
375 if (prefix < 0) {
376 return null;
377 }
378
379 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
380 fileName.getChars(0, fileName.length(), array, 0);
381
382 // fix separators throughout
383 final char otherSeparator = flipSeparator(separator);
384 for (int i = 0; i < array.length; i++) {
385 if (array[i] == otherSeparator) {
386 array[i] = separator;
387 }
388 }
389
390 // add extra separator on the end to simplify code below
391 boolean lastIsDirectory = true;
392 if (array[size - 1] != separator) {
393 array[size++] = separator;
394 lastIsDirectory = false;
395 }
396
397 // adjoining slashes
398 // If we get here, prefix can only be 0 or greater, size 1 or greater
399 // If prefix is 0, set loop start to 1 to prevent index errors
400 for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
401 if (array[i] == separator && array[i - 1] == separator) {
402 System.arraycopy(array, i, array, i - 1, size - i);
403 size--;
404 i--;
405 }
406 }
407
408 // period slash
409 for (int i = prefix + 1; i < size; i++) {
410 if (array[i] == separator && array[i - 1] == '.' &&
411 (i == prefix + 1 || array[i - 2] == separator)) {
412 if (i == size - 1) {
413 lastIsDirectory = true;
414 }
415 System.arraycopy(array, i + 1, array, i - 1, size - i);
416 size -= 2;
417 i--;
418 }
419 }
420
421 // double period slash
422 outer:
423 for (int i = prefix + 2; i < size; i++) {
424 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
425 (i == prefix + 2 || array[i - 3] == separator)) {
426 if (i == prefix + 2) {
427 return null;
428 }
429 if (i == size - 1) {
430 lastIsDirectory = true;
431 }
432 int j;
433 for (j = i - 4 ; j >= prefix; j--) {
434 if (array[j] == separator) {
435 // remove b/../ from a/b/../c
436 System.arraycopy(array, i + 1, array, j + 1, size - i);
437 size -= i - j;
438 i = j + 1;
439 continue outer;
440 }
441 }
442 // remove a/../ from a/../c
443 System.arraycopy(array, i + 1, array, prefix, size - i);
444 size -= i + 1 - prefix;
445 i = prefix + 1;
446 }
447 }
448
449 if (size <= 0) { // should never be less than 0
450 return EMPTY_STRING;
451 }
452 if (size <= prefix) { // should never be less than prefix
453 return new String(array, 0, size);
454 }
455 if (lastIsDirectory && keepSeparator) {
456 return new String(array, 0, size); // keep trailing separator
457 }
458 return new String(array, 0, size - 1); // lose trailing separator
459 }
460
461 /**
462 * Checks whether two file names are exactly equal.
463 * <p>
464 * No processing is performed on the file names other than comparison.
465 * This is merely a null-safe case-sensitive string equality.
466 * </p>
467 *
468 * @param fileName1 the first file name, may be null
469 * @param fileName2 the second file name, may be null
470 * @return true if the file names are equal, null equals null
471 * @see IOCase#SENSITIVE
472 */
473 public static boolean equals(final String fileName1, final String fileName2) {
474 return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
475 }
476
477 /**
478 * Checks whether two file names are equal, optionally normalizing and providing
479 * control over the case-sensitivity.
480 *
481 * @param fileName1 the first file name, may be null
482 * @param fileName2 the second file name, may be null
483 * @param normalize whether to normalize the file names
484 * @param ioCase what case sensitivity rule to use, null means case-sensitive
485 * @return true if the file names are equal, null equals null
486 * @since 1.3
487 */
488 public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
489
490 if (fileName1 == null || fileName2 == null) {
491 return fileName1 == null && fileName2 == null;
492 }
493 if (normalize) {
494 fileName1 = normalize(fileName1);
495 if (fileName1 == null) {
496 return false;
497 }
498 fileName2 = normalize(fileName2);
499 if (fileName2 == null) {
500 return false;
501 }
502 }
503 return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
504 }
505
506 /**
507 * Checks whether two file names are equal after both have been normalized.
508 * <p>
509 * Both file names are first passed to {@link #normalize(String)}.
510 * The check is then performed in a case-sensitive manner.
511 * </p>
512 *
513 * @param fileName1 the first file name, may be null
514 * @param fileName2 the second file name, may be null
515 * @return true if the file names are equal, null equals null
516 * @see IOCase#SENSITIVE
517 */
518 public static boolean equalsNormalized(final String fileName1, final String fileName2) {
519 return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
520 }
521
522 /**
523 * Checks whether two file names are equal using the case rules of the system
524 * after both have been normalized.
525 * <p>
526 * Both file names are first passed to {@link #normalize(String)}.
527 * The check is then performed case-sensitively on Unix and
528 * case-insensitively on Windows.
529 * </p>
530 *
531 * @param fileName1 the first file name, may be null
532 * @param fileName2 the second file name, may be null
533 * @return true if the file names are equal, null equals null
534 * @see IOCase#SYSTEM
535 */
536 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
537 return equals(fileName1, fileName2, true, IOCase.SYSTEM);
538 }
539
540 /**
541 * Checks whether two file names are equal using the case rules of the system.
542 * <p>
543 * No processing is performed on the file names other than comparison.
544 * The check is case-sensitive on Unix and case-insensitive on Windows.
545 * </p>
546 *
547 * @param fileName1 the first file name, may be null
548 * @param fileName2 the second file name, may be null
549 * @return true if the file names are equal, null equals null
550 * @see IOCase#SYSTEM
551 */
552 public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
553 return equals(fileName1, fileName2, false, IOCase.SYSTEM);
554 }
555
556 /**
557 * Flips the Windows name separator to Linux and vice-versa.
558 *
559 * @param ch The Windows or Linux name separator.
560 * @return The Windows or Linux name separator.
561 */
562 static char flipSeparator(final char ch) {
563 if (ch == UNIX_NAME_SEPARATOR) {
564 return WINDOWS_NAME_SEPARATOR;
565 }
566 if (ch == WINDOWS_NAME_SEPARATOR) {
567 return UNIX_NAME_SEPARATOR;
568 }
569 throw new IllegalArgumentException(String.valueOf(ch));
570 }
571
572 /**
573 * Special handling for NTFS ADS: Don't accept colon in the file name.
574 *
575 * @param fileName a file name
576 * @return ADS offsets.
577 */
578 private static int getAdsCriticalOffset(final String fileName) {
579 // Step 1: Remove leading path segments.
580 final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
581 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
582 if (offset1 == -1) {
583 if (offset2 == -1) {
584 return 0;
585 }
586 return offset2 + 1;
587 }
588 if (offset2 == -1) {
589 return offset1 + 1;
590 }
591 return Math.max(offset1, offset2) + 1;
592 }
593
594 /**
595 * Gets the base name, minus the full path and extension, from a full file name.
596 * <p>
597 * This method will handle a path in either Unix or Windows format.
598 * The text after the last forward or backslash and before the last period is returned.
599 * </p>
600 * <pre>
601 * a/b/c.txt --> c
602 * a\b\c.txt --> c
603 * a/b/c.foo.txt --> c.foo
604 * a.txt --> a
605 * a/b/c --> c
606 * a/b/c/ --> ""
607 * </pre>
608 * <p>
609 * The output will be the same irrespective of the machine that the code is running on.
610 * </p>
611 *
612 * @param fileName the file name, null returns null
613 * @return the name of the file without the path, or an empty string if none exists
614 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
615 */
616 public static String getBaseName(final String fileName) {
617 return removeExtension(getName(fileName));
618 }
619
620 /**
621 * Gets the extension of a file name.
622 * <p>
623 * This method returns the textual part of the file name after the last period.
624 * There must be no directory separator after the period.
625 * </p>
626 * <pre>
627 * foo.txt --> "txt"
628 * a/b/c.jpg --> "jpg"
629 * a/b.txt/c --> ""
630 * a/b/c --> ""
631 * </pre>
632 * <p>
633 * The output will be the same irrespective of the machine that the code is running on, with the
634 * exception of a possible {@link IllegalArgumentException} on Windows (see below).
635 * </p>
636 * <p>
637 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
638 * In this case, the name wouldn't be the name of a file, but the identifier of an
639 * alternate data stream (bar.txt) on the file foo.exe. The method used to return
640 * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
641 * an {@link IllegalArgumentException} for names like this.
642 * </p>
643 *
644 * @param fileName the file name to retrieve the extension of.
645 * @return the extension of the file or an empty string if none exists or {@code null}
646 * if the file name is {@code null}.
647 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
648 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
649 */
650 public static String getExtension(final String fileName) throws IllegalArgumentException {
651 if (fileName == null) {
652 return null;
653 }
654 final int index = indexOfExtension(fileName);
655 if (index == NOT_FOUND) {
656 return EMPTY_STRING;
657 }
658 return fileName.substring(index + 1);
659 }
660
661 /**
662 * Gets the full path (prefix + path) from a full file name.
663 * <p>
664 * This method will handle a file in either Unix or Windows format.
665 * The method is entirely text based, and returns the text before and
666 * including the last forward or backslash.
667 * </p>
668 * <pre>
669 * C:\a\b\c.txt --> C:\a\b\
670 * ~/a/b/c.txt --> ~/a/b/
671 * a.txt --> ""
672 * a/b/c --> a/b/
673 * a/b/c/ --> a/b/c/
674 * C: --> C:
675 * C:\ --> C:\
676 * ~ --> ~/
677 * ~/ --> ~/
678 * ~user --> ~user/
679 * ~user/ --> ~user/
680 * </pre>
681 * <p>
682 * The output will be the same irrespective of the machine that the code is running on.
683 * </p>
684 *
685 * @param fileName the file name, null returns null
686 * @return the path of the file, an empty string if none exists, null if invalid
687 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
688 */
689 public static String getFullPath(final String fileName) {
690 return doGetFullPath(fileName, true);
691 }
692
693 /**
694 * Gets the full path (prefix + path) from a full file name,
695 * excluding the final directory separator.
696 * <p>
697 * This method will handle a file in either Unix or Windows format.
698 * The method is entirely text based, and returns the text before the
699 * last forward or backslash.
700 * </p>
701 * <pre>
702 * C:\a\b\c.txt --> C:\a\b
703 * ~/a/b/c.txt --> ~/a/b
704 * a.txt --> ""
705 * a/b/c --> a/b
706 * a/b/c/ --> a/b/c
707 * C: --> C:
708 * C:\ --> C:\
709 * ~ --> ~
710 * ~/ --> ~
711 * ~user --> ~user
712 * ~user/ --> ~user
713 * </pre>
714 * <p>
715 * The output will be the same irrespective of the machine that the code is running on.
716 * </p>
717 *
718 * @param fileName the file name, null returns null
719 * @return the path of the file, an empty string if none exists, null if invalid
720 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
721 */
722 public static String getFullPathNoEndSeparator(final String fileName) {
723 return doGetFullPath(fileName, false);
724 }
725
726 /**
727 * Gets the name minus the path from a full file name.
728 * <p>
729 * This method will handle a file in either Unix or Windows format.
730 * The text after the last forward or backslash is returned.
731 * </p>
732 * <pre>
733 * a/b/c.txt --> c.txt
734 * a\b\c.txt --> c.txt
735 * a.txt --> a.txt
736 * a/b/c --> c
737 * a/b/c/ --> ""
738 * </pre>
739 * <p>
740 * The output will be the same irrespective of the machine that the code is running on.
741 * </p>
742 *
743 * @param fileName the file name, null returns null
744 * @return the name of the file without the path, or an empty string if none exists
745 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
746 */
747 public static String getName(final String fileName) {
748 if (fileName == null) {
749 return null;
750 }
751 return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
752 }
753
754 /**
755 * Gets the path from a full file name, which excludes the prefix and the name.
756 * <p>
757 * This method will handle a file in either Unix or Windows format.
758 * The method is entirely text based, and returns the text before and
759 * including the last forward or backslash.
760 * </p>
761 * <pre>
762 * C:\a\b\c.txt --> a\b\
763 * ~/a/b/c.txt --> a/b/
764 * a.txt --> ""
765 * a/b/c --> a/b/
766 * a/b/c/ --> a/b/c/
767 * </pre>
768 * <p>
769 * The output will be the same irrespective of the machine that the code is running on.
770 * </p>
771 * <p>
772 * This method drops the prefix from the result.
773 * See {@link #getFullPath(String)} for the method that retains the prefix.
774 * </p>
775 *
776 * @param fileName the file name, null returns null
777 * @return the path of the file, an empty string if none exists, null if invalid
778 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
779 */
780 public static String getPath(final String fileName) {
781 return doGetPath(fileName, 1);
782 }
783
784 /**
785 * Gets the path (which excludes the prefix) from a full file name, and
786 * also excluding the final directory separator.
787 * <p>
788 * This method will handle a file in either Unix or Windows format.
789 * The method is entirely text based, and returns the text before the
790 * last forward or backslash.
791 * </p>
792 * <pre>
793 * C:\a\b\c.txt --> a\b
794 * ~/a/b/c.txt --> a/b
795 * a.txt --> ""
796 * a/b/c --> a/b
797 * a/b/c/ --> a/b/c
798 * </pre>
799 * <p>
800 * The output will be the same irrespective of the machine that the code is running on.
801 * </p>
802 * <p>
803 * This method drops the prefix from the result.
804 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
805 * </p>
806 *
807 * @param fileName the file name, null returns null
808 * @return the path of the file, an empty string if none exists, null if invalid
809 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
810 */
811 public static String getPathNoEndSeparator(final String fileName) {
812 return doGetPath(fileName, 0);
813 }
814
815 /**
816 * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
817 * <p>
818 * This method will handle a file in either Unix or Windows format.
819 * The prefix includes the first slash in the full file name where applicable.
820 * </p>
821 * <pre>
822 * Windows:
823 * a\b\c.txt --> "" --> relative
824 * \a\b\c.txt --> "\" --> current drive absolute
825 * C:a\b\c.txt --> "C:" --> drive relative
826 * C:\a\b\c.txt --> "C:\" --> absolute
827 * \\server\a\b\c.txt --> "\\server\" --> UNC
828 *
829 * Unix:
830 * a/b/c.txt --> "" --> relative
831 * /a/b/c.txt --> "/" --> absolute
832 * ~/a/b/c.txt --> "~/" --> current user
833 * ~ --> "~/" --> current user (slash added)
834 * ~user/a/b/c.txt --> "~user/" --> named user
835 * ~user --> "~user/" --> named user (slash added)
836 * </pre>
837 * <p>
838 * The output will be the same irrespective of the machine that the code is running on.
839 * ie. both Unix and Windows prefixes are matched regardless.
840 * </p>
841 *
842 * @param fileName the file name, null returns null
843 * @return the prefix of the file, null if invalid
844 * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
845 */
846 public static String getPrefix(final String fileName) {
847 if (fileName == null) {
848 return null;
849 }
850 final int len = getPrefixLength(fileName);
851 if (len < 0) {
852 return null;
853 }
854 if (len > fileName.length()) {
855 requireNonNullChars(fileName);
856 return fileName + UNIX_NAME_SEPARATOR;
857 }
858 return requireNonNullChars(fileName.substring(0, len));
859 }
860
861 /**
862 * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
863 * <p>
864 * This method will handle a file in either Unix or Windows format.
865 * </p>
866 * <p>
867 * The prefix length includes the first slash in the full file name
868 * if applicable. Thus, it is possible that the length returned is greater
869 * than the length of the input string.
870 * </p>
871 * <pre>
872 * Windows:
873 * a\b\c.txt --> 0 --> relative
874 * \a\b\c.txt --> 1 --> current drive absolute
875 * C:a\b\c.txt --> 2 --> drive relative
876 * C:\a\b\c.txt --> 3 --> absolute
877 * \\server\a\b\c.txt --> 9 --> UNC
878 * \\\a\b\c.txt --> -1 --> error
879 *
880 * Unix:
881 * a/b/c.txt --> 0 --> relative
882 * /a/b/c.txt --> 1 --> absolute
883 * ~/a/b/c.txt --> 2 --> current user
884 * ~ --> 2 --> current user (slash added)
885 * ~user/a/b/c.txt --> 6 --> named user
886 * ~user --> 6 --> named user (slash added)
887 * //server/a/b/c.txt --> 9
888 * ///a/b/c.txt --> -1 --> error
889 * C: --> 0 --> valid file name as only null character and / are reserved characters
890 * </pre>
891 * <p>
892 * The output will be the same irrespective of the machine that the code is running on.
893 * ie. both Unix and Windows prefixes are matched regardless.
894 * </p>
895 * <p>
896 * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
897 * These must be followed by a server name, so double-slashes are not collapsed
898 * to a single slash at the start of the file name.
899 * </p>
900 *
901 * @param fileName the file name to find the prefix in, null returns -1
902 * @return the length of the prefix, -1 if invalid or null
903 */
904 public static int getPrefixLength(final String fileName) {
905 if (fileName == null) {
906 return NOT_FOUND;
907 }
908 final int len = fileName.length();
909 if (len == 0) {
910 return 0;
911 }
912 char ch0 = fileName.charAt(0);
913 if (ch0 == ':') {
914 return NOT_FOUND;
915 }
916 if (len == 1) {
917 if (ch0 == '~') {
918 return 2; // return a length greater than the input
919 }
920 return isSeparator(ch0) ? 1 : 0;
921 }
922 if (ch0 == '~') {
923 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
924 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
925 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
926 return len + 1; // return a length greater than the input
927 }
928 posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
929 posWin = posWin == NOT_FOUND ? posUnix : posWin;
930 return Math.min(posUnix, posWin) + 1;
931 }
932 final char ch1 = fileName.charAt(1);
933 if (ch1 == ':') {
934 ch0 = Character.toUpperCase(ch0);
935 if (ch0 >= 'A' && ch0 <= 'Z') {
936 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
937 return 0;
938 }
939 if (len == 2 || !isSeparator(fileName.charAt(2))) {
940 return 2;
941 }
942 return 3;
943 }
944 if (ch0 == UNIX_NAME_SEPARATOR) {
945 return 1;
946 }
947 return NOT_FOUND;
948
949 }
950 if (!isSeparator(ch0) || !isSeparator(ch1)) {
951 return isSeparator(ch0) ? 1 : 0;
952 }
953 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
954 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
955 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
956 return NOT_FOUND;
957 }
958 posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
959 posWin = posWin == NOT_FOUND ? posUnix : posWin;
960 final int pos = Math.min(posUnix, posWin) + 1;
961 final String hostnamePart = fileName.substring(2, pos - 1);
962 return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
963 }
964
965 /**
966 * Returns the index of the last extension separator character, which is a period.
967 * <p>
968 * This method also checks that there is no directory separator after the last period. To do this it uses
969 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
970 * </p>
971 * <p>
972 * The output will be the same irrespective of the machine that the code is running on, with the
973 * exception of a possible {@link IllegalArgumentException} on Windows (see below).
974 * </p>
975 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
976 * In this case, the name wouldn't be the name of a file, but the identifier of an
977 * alternate data stream (bar.txt) on the file foo.exe. The method used to return
978 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
979 * an {@link IllegalArgumentException} for names like this.
980 *
981 * @param fileName
982 * the file name to find the last extension separator in, null returns -1
983 * @return the index of the last extension separator character, or -1 if there is no such character
984 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
985 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
986 */
987 public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
988 if (fileName == null) {
989 return NOT_FOUND;
990 }
991 if (isSystemWindows()) {
992 // Special handling for NTFS ADS: Don't accept colon in the file name.
993 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
994 if (offset != -1) {
995 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
996 }
997 }
998 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
999 final int lastSeparator = indexOfLastSeparator(fileName);
1000 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
1001 }
1002
1003 /**
1004 * Returns the index of the last directory separator character.
1005 * <p>
1006 * This method will handle a file in either Unix or Windows format.
1007 * The position of the last forward or backslash is returned.
1008 * <p>
1009 * The output will be the same irrespective of the machine that the code is running on.
1010 *
1011 * @param fileName the file name to find the last path separator in, null returns -1
1012 * @return the index of the last separator character, or -1 if there
1013 * is no such character
1014 */
1015 public static int indexOfLastSeparator(final String fileName) {
1016 if (fileName == null) {
1017 return NOT_FOUND;
1018 }
1019 final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1020 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1021 return Math.max(lastUnixPos, lastWindowsPos);
1022 }
1023
1024 private static boolean isEmpty(final String string) {
1025 return string == null || string.isEmpty();
1026 }
1027
1028 /**
1029 * Checks whether the extension of the file name is one of those specified.
1030 * <p>
1031 * This method obtains the extension as the textual part of the file name
1032 * after the last period. There must be no directory separator after the period.
1033 * The extension check is case-sensitive on all platforms.
1034 *
1035 * @param fileName the file name, null returns false
1036 * @param extensions the extensions to check for, null checks for no extension
1037 * @return true if the file name is one of the extensions
1038 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1039 */
1040 public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1041 if (fileName == null) {
1042 return false;
1043 }
1044 requireNonNullChars(fileName);
1045
1046 if (extensions == null || extensions.isEmpty()) {
1047 return indexOfExtension(fileName) == NOT_FOUND;
1048 }
1049 return extensions.contains(getExtension(fileName));
1050 }
1051
1052 /**
1053 * Checks whether the extension of the file name is that specified.
1054 * <p>
1055 * This method obtains the extension as the textual part of the file name
1056 * after the last period. There must be no directory separator after the period.
1057 * The extension check is case-sensitive on all platforms.
1058 *
1059 * @param fileName the file name, null returns false
1060 * @param extension the extension to check for, null or empty checks for no extension
1061 * @return true if the file name has the specified extension
1062 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1063 */
1064 public static boolean isExtension(final String fileName, final String extension) {
1065 if (fileName == null) {
1066 return false;
1067 }
1068 requireNonNullChars(fileName);
1069
1070 if (isEmpty(extension)) {
1071 return indexOfExtension(fileName) == NOT_FOUND;
1072 }
1073 return getExtension(fileName).equals(extension);
1074 }
1075
1076 /**
1077 * Checks whether the extension of the file name is one of those specified.
1078 * <p>
1079 * This method obtains the extension as the textual part of the file name
1080 * after the last period. There must be no directory separator after the period.
1081 * The extension check is case-sensitive on all platforms.
1082 *
1083 * @param fileName the file name, null returns false
1084 * @param extensions the extensions to check for, null checks for no extension
1085 * @return true if the file name is one of the extensions
1086 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1087 */
1088 public static boolean isExtension(final String fileName, final String... extensions) {
1089 if (fileName == null) {
1090 return false;
1091 }
1092 requireNonNullChars(fileName);
1093
1094 if (extensions == null || extensions.length == 0) {
1095 return indexOfExtension(fileName) == NOT_FOUND;
1096 }
1097 final String fileExt = getExtension(fileName);
1098 return Stream.of(extensions).anyMatch(fileExt::equals);
1099 }
1100
1101 /**
1102 * Checks whether a given string represents a valid IPv4 address.
1103 *
1104 * @param name the name to validate
1105 * @return true if the given name is a valid IPv4 address
1106 */
1107 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1108 private static boolean isIPv4Address(final String name) {
1109 final Matcher m = IPV4_PATTERN.matcher(name);
1110 if (!m.matches() || m.groupCount() != 4) {
1111 return false;
1112 }
1113
1114 // verify that address subgroups are legal
1115 for (int i = 1; i <= 4; i++) {
1116 final String ipSegment = m.group(i);
1117 final int iIpSegment = Integer.parseInt(ipSegment);
1118 if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1119 return false;
1120 }
1121
1122 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1123 return false;
1124 }
1125
1126 }
1127
1128 return true;
1129 }
1130
1131 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1132 /**
1133 * Checks whether a given string represents a valid IPv6 address.
1134 *
1135 * @param inet6Address the name to validate
1136 * @return true if the given name is a valid IPv6 address
1137 */
1138 private static boolean isIPv6Address(final String inet6Address) {
1139 final boolean containsCompressedZeroes = inet6Address.contains("::");
1140 if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1141 return false;
1142 }
1143 if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1144 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1145 return false;
1146 }
1147 String[] octets = inet6Address.split(":");
1148 if (containsCompressedZeroes) {
1149 final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1150 if (inet6Address.endsWith("::")) {
1151 // String.split() drops ending empty segments
1152 octetList.add("");
1153 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1154 octetList.remove(0);
1155 }
1156 octets = octetList.toArray(EMPTY_STRING_ARRAY);
1157 }
1158 if (octets.length > IPV6_MAX_HEX_GROUPS) {
1159 return false;
1160 }
1161 int validOctets = 0;
1162 int emptyOctets = 0; // consecutive empty chunks
1163 for (int index = 0; index < octets.length; index++) {
1164 final String octet = octets[index];
1165 if (octet.isEmpty()) {
1166 emptyOctets++;
1167 if (emptyOctets > 1) {
1168 return false;
1169 }
1170 } else {
1171 emptyOctets = 0;
1172 // Is last chunk an IPv4 address?
1173 if (index == octets.length - 1 && octet.contains(".")) {
1174 if (!isIPv4Address(octet)) {
1175 return false;
1176 }
1177 validOctets += 2;
1178 continue;
1179 }
1180 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1181 return false;
1182 }
1183 final int octetInt;
1184 try {
1185 octetInt = Integer.parseInt(octet, BASE_16);
1186 } catch (final NumberFormatException e) {
1187 return false;
1188 }
1189 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1190 return false;
1191 }
1192 }
1193 validOctets++;
1194 }
1195 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1196 }
1197
1198 /**
1199 * Checks whether a given string is a valid host name according to
1200 * RFC 3986 - not accepting IP addresses.
1201 *
1202 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1203 * @param name the hostname to validate
1204 * @return true if the given name is a valid host name
1205 */
1206 private static boolean isRFC3986HostName(final String name) {
1207 final String[] parts = name.split("\\.", -1);
1208 for (int i = 0; i < parts.length; i++) {
1209 if (parts[i].isEmpty()) {
1210 // trailing period is legal, otherwise we've hit a .. sequence
1211 return i == parts.length - 1;
1212 }
1213 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1214 return false;
1215 }
1216 }
1217 return true;
1218 }
1219
1220 /**
1221 * Checks if the character is a separator.
1222 *
1223 * @param ch the character to check
1224 * @return true if it is a separator character
1225 */
1226 private static boolean isSeparator(final char ch) {
1227 return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1228 }
1229
1230 /**
1231 * Determines if Windows file system is in use.
1232 *
1233 * @return true if the system is Windows
1234 */
1235 static boolean isSystemWindows() {
1236 return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1237 }
1238
1239 /**
1240 * Checks whether a given string is a valid host name according to
1241 * RFC 3986.
1242 *
1243 * <p>Accepted are IP addresses (v4 and v6) as well as what the
1244 * RFC calls a "reg-name". Percent encoded names don't seem to be
1245 * valid names in UNC paths.</p>
1246 *
1247 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1248 * @param name the hostname to validate
1249 * @return true if the given name is a valid host name
1250 */
1251 private static boolean isValidHostName(final String name) {
1252 return isIPv6Address(name) || isRFC3986HostName(name);
1253 }
1254
1255 /**
1256 * Normalizes a path, removing double and single period path steps.
1257 * <p>
1258 * This method normalizes a path to a standard format.
1259 * The input may contain separators in either Unix or Windows format.
1260 * The output will contain separators in the format of the system.
1261 * <p>
1262 * A trailing slash will be retained.
1263 * A double slash will be merged to a single slash (but UNC names are handled).
1264 * A single period path segment will be removed.
1265 * A double period will cause that path segment and the one before to be removed.
1266 * If the double period has no parent path segment, {@code null} is returned.
1267 * <p>
1268 * The output will be the same on both Unix and Windows except
1269 * for the separator character.
1270 * <pre>
1271 * /foo// --> /foo/
1272 * /foo/./ --> /foo/
1273 * /foo/../bar --> /bar
1274 * /foo/../bar/ --> /bar/
1275 * /foo/../bar/../baz --> /baz
1276 * //foo//./bar --> //foo/bar
1277 * /../ --> null
1278 * ../foo --> null
1279 * foo/bar/.. --> foo/
1280 * foo/../../bar --> null
1281 * foo/../bar --> bar
1282 * //server/foo/../bar --> //server/bar
1283 * //server/../bar --> null
1284 * C:\foo\..\bar --> C:\bar
1285 * C:\..\bar --> null
1286 * ~/foo/../bar/ --> ~/bar/
1287 * ~/../bar --> null
1288 * </pre>
1289 * (Note the file separator will be correct for Windows/Unix.)
1290 *
1291 * @param fileName the file name to normalize, null returns null
1292 * @return the normalized fileName, or null if invalid
1293 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1294 */
1295 public static String normalize(final String fileName) {
1296 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1297 }
1298
1299 /**
1300 * Normalizes a path, removing double and single period path steps.
1301 * <p>
1302 * This method normalizes a path to a standard format.
1303 * The input may contain separators in either Unix or Windows format.
1304 * The output will contain separators in the format specified.
1305 * <p>
1306 * A trailing slash will be retained.
1307 * A double slash will be merged to a single slash (but UNC names are handled).
1308 * A single period path segment will be removed.
1309 * A double period will cause that path segment and the one before to be removed.
1310 * If the double period has no parent path segment to work with, {@code null}
1311 * is returned.
1312 * <p>
1313 * The output will be the same on both Unix and Windows except
1314 * for the separator character.
1315 * <pre>
1316 * /foo// --> /foo/
1317 * /foo/./ --> /foo/
1318 * /foo/../bar --> /bar
1319 * /foo/../bar/ --> /bar/
1320 * /foo/../bar/../baz --> /baz
1321 * //foo//./bar --> /foo/bar
1322 * /../ --> null
1323 * ../foo --> null
1324 * foo/bar/.. --> foo/
1325 * foo/../../bar --> null
1326 * foo/../bar --> bar
1327 * //server/foo/../bar --> //server/bar
1328 * //server/../bar --> null
1329 * C:\foo\..\bar --> C:\bar
1330 * C:\..\bar --> null
1331 * ~/foo/../bar/ --> ~/bar/
1332 * ~/../bar --> null
1333 * </pre>
1334 * The output will be the same on both Unix and Windows including
1335 * the separator character.
1336 *
1337 * @param fileName the file name to normalize, null returns null
1338 * @param unixSeparator {@code true} if a Unix separator should
1339 * be used or {@code false} if a Windows separator should be used.
1340 * @return the normalized fileName, or null if invalid
1341 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1342 * @since 2.0
1343 */
1344 public static String normalize(final String fileName, final boolean unixSeparator) {
1345 return doNormalize(fileName, toSeparator(unixSeparator), true);
1346 }
1347
1348 /**
1349 * Normalizes a path, removing double and single period path steps,
1350 * and removing any final directory separator.
1351 * <p>
1352 * This method normalizes a path to a standard format.
1353 * The input may contain separators in either Unix or Windows format.
1354 * The output will contain separators in the format of the system.
1355 * <p>
1356 * A trailing slash will be removed.
1357 * A double slash will be merged to a single slash (but UNC names are handled).
1358 * A single period path segment will be removed.
1359 * A double period will cause that path segment and the one before to be removed.
1360 * If the double period has no parent path segment to work with, {@code null}
1361 * is returned.
1362 * <p>
1363 * The output will be the same on both Unix and Windows except
1364 * for the separator character.
1365 * <pre>
1366 * /foo// --> /foo
1367 * /foo/./ --> /foo
1368 * /foo/../bar --> /bar
1369 * /foo/../bar/ --> /bar
1370 * /foo/../bar/../baz --> /baz
1371 * //foo//./bar --> /foo/bar
1372 * /../ --> null
1373 * ../foo --> null
1374 * foo/bar/.. --> foo
1375 * foo/../../bar --> null
1376 * foo/../bar --> bar
1377 * //server/foo/../bar --> //server/bar
1378 * //server/../bar --> null
1379 * C:\foo\..\bar --> C:\bar
1380 * C:\..\bar --> null
1381 * ~/foo/../bar/ --> ~/bar
1382 * ~/../bar --> null
1383 * </pre>
1384 * (Note the file separator returned will be correct for Windows/Unix)
1385 *
1386 * @param fileName the file name to normalize, null returns null
1387 * @return the normalized fileName, or null if invalid
1388 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1389 */
1390 public static String normalizeNoEndSeparator(final String fileName) {
1391 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1392 }
1393
1394 /**
1395 * Normalizes a path, removing double and single period path steps,
1396 * and removing any final directory separator.
1397 * <p>
1398 * This method normalizes a path to a standard format.
1399 * The input may contain separators in either Unix or Windows format.
1400 * The output will contain separators in the format specified.
1401 * <p>
1402 * A trailing slash will be removed.
1403 * A double slash will be merged to a single slash (but UNC names are handled).
1404 * A single period path segment will be removed.
1405 * A double period will cause that path segment and the one before to be removed.
1406 * If the double period has no parent path segment to work with, {@code null}
1407 * is returned.
1408 * <p>
1409 * The output will be the same on both Unix and Windows including
1410 * the separator character.
1411 * <pre>
1412 * /foo// --> /foo
1413 * /foo/./ --> /foo
1414 * /foo/../bar --> /bar
1415 * /foo/../bar/ --> /bar
1416 * /foo/../bar/../baz --> /baz
1417 * //foo//./bar --> /foo/bar
1418 * /../ --> null
1419 * ../foo --> null
1420 * foo/bar/.. --> foo
1421 * foo/../../bar --> null
1422 * foo/../bar --> bar
1423 * //server/foo/../bar --> //server/bar
1424 * //server/../bar --> null
1425 * C:\foo\..\bar --> C:\bar
1426 * C:\..\bar --> null
1427 * ~/foo/../bar/ --> ~/bar
1428 * ~/../bar --> null
1429 * </pre>
1430 *
1431 * @param fileName the file name to normalize, null returns null
1432 * @param unixSeparator {@code true} if a Unix separator should
1433 * be used or {@code false} if a Windows separator should be used.
1434 * @return the normalized fileName, or null if invalid
1435 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1436 * @since 2.0
1437 */
1438 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1439 return doNormalize(fileName, toSeparator(unixSeparator), false);
1440 }
1441
1442 /**
1443 * Removes the extension from a fileName.
1444 * <p>
1445 * This method returns the textual part of the file name before the last period.
1446 * There must be no directory separator after the period.
1447 * <pre>
1448 * foo.txt --> foo
1449 * .txt --> "" (empty string)
1450 * a\b\c.jpg --> a\b\c
1451 * /a/b/c.jpg --> /a/b/c
1452 * a\b\c --> a\b\c
1453 * a.b\c --> a.b\c
1454 * </pre>
1455 * <p>
1456 * The output will be the same irrespective of the machine that the code is running on.
1457 *
1458 * @param fileName the file name, null returns null
1459 * @return the file name minus the extension
1460 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1461 */
1462 public static String removeExtension(final String fileName) {
1463 if (fileName == null) {
1464 return null;
1465 }
1466 requireNonNullChars(fileName);
1467
1468 final int index = indexOfExtension(fileName);
1469 if (index == NOT_FOUND) {
1470 return fileName;
1471 }
1472 return fileName.substring(0, index);
1473 }
1474
1475 /**
1476 * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1477 *
1478 * This may be used to defend against poison byte attacks.
1479 *
1480 * @param path the path to check
1481 * @return The input
1482 * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
1483 */
1484 private static String requireNonNullChars(final String path) {
1485 if (path.indexOf(0) >= 0) {
1486 throw new IllegalArgumentException(
1487 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1488 }
1489 return path;
1490 }
1491
1492 /**
1493 * Converts all separators to the system separator.
1494 *
1495 * @param path the path to be changed, null ignored.
1496 * @return the updated path.
1497 */
1498 public static String separatorsToSystem(final String path) {
1499 return FileSystem.getCurrent().normalizeSeparators(path);
1500 }
1501
1502 /**
1503 * Converts all separators to the Unix separator of forward slash.
1504 *
1505 * @param path the path to be changed, null ignored.
1506 * @return the new path.
1507 */
1508 public static String separatorsToUnix(final String path) {
1509 return FileSystem.LINUX.normalizeSeparators(path);
1510 }
1511
1512 /**
1513 * Converts all separators to the Windows separator of backslash.
1514 *
1515 * @param path the path to be changed, null ignored.
1516 * @return the updated path.
1517 */
1518 public static String separatorsToWindows(final String path) {
1519 return FileSystem.WINDOWS.normalizeSeparators(path);
1520 }
1521
1522 /**
1523 * Splits a string into a number of tokens.
1524 * The text is split by '?' and '*'.
1525 * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1526 *
1527 * @param text the text to split
1528 * @return the array of tokens, never null
1529 */
1530 static String[] splitOnTokens(final String text) {
1531 // used by wildcardMatch
1532 // package level so a unit test may run on this
1533
1534 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1535 return new String[] { text };
1536 }
1537
1538 final char[] array = text.toCharArray();
1539 final ArrayList<String> list = new ArrayList<>();
1540 final StringBuilder buffer = new StringBuilder();
1541 char prevChar = 0;
1542 for (final char ch : array) {
1543 if (ch == '?' || ch == '*') {
1544 if (buffer.length() != 0) {
1545 list.add(buffer.toString());
1546 buffer.setLength(0);
1547 }
1548 if (ch == '?') {
1549 list.add("?");
1550 } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
1551 list.add("*");
1552 }
1553 } else {
1554 buffer.append(ch);
1555 }
1556 prevChar = ch;
1557 }
1558 if (buffer.length() != 0) {
1559 list.add(buffer.toString());
1560 }
1561
1562 return list.toArray(EMPTY_STRING_ARRAY);
1563 }
1564
1565 /**
1566 * Returns '/' if given true, '\\' otherwise.
1567 *
1568 * @param unixSeparator which separator to return.
1569 * @return '/' if given true, '\\' otherwise.
1570 */
1571 private static char toSeparator(final boolean unixSeparator) {
1572 return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1573 }
1574
1575 /**
1576 * Checks a fileName to see if it matches the specified wildcard matcher,
1577 * always testing case-sensitive.
1578 * <p>
1579 * The wildcard matcher uses the characters '?' and '*' to represent a
1580 * single or multiple (zero or more) wildcard characters.
1581 * This is the same as often found on DOS/Unix command lines.
1582 * The check is case-sensitive always.
1583 * <pre>
1584 * wildcardMatch("c.txt", "*.txt") --> true
1585 * wildcardMatch("c.txt", "*.jpg") --> false
1586 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1587 * wildcardMatch("c.txt", "*.???") --> true
1588 * wildcardMatch("c.txt", "*.????") --> false
1589 * </pre>
1590 * The sequence "*?" does not work properly at present in match strings.
1591 *
1592 * @param fileName the file name to match on
1593 * @param wildcardMatcher the wildcard string to match against
1594 * @return true if the file name matches the wildcard string
1595 * @see IOCase#SENSITIVE
1596 */
1597 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1598 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1599 }
1600
1601 /**
1602 * Checks a fileName to see if it matches the specified wildcard matcher
1603 * allowing control over case-sensitivity.
1604 * <p>
1605 * The wildcard matcher uses the characters '?' and '*' to represent a
1606 * single or multiple (zero or more) wildcard characters.
1607 * The sequence "*?" does not work properly at present in match strings.
1608 *
1609 * @param fileName the file name to match on
1610 * @param wildcardMatcher the wildcard string to match against
1611 * @param ioCase what case sensitivity rule to use, null means case-sensitive
1612 * @return true if the file name matches the wildcard string
1613 * @since 1.3
1614 */
1615 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1616 if (fileName == null && wildcardMatcher == null) {
1617 return true;
1618 }
1619 if (fileName == null || wildcardMatcher == null) {
1620 return false;
1621 }
1622 ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1623 final String[] wcs = splitOnTokens(wildcardMatcher);
1624 boolean anyChars = false;
1625 int textIdx = 0;
1626 int wcsIdx = 0;
1627 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1628
1629 // loop around a backtrack stack, to handle complex * matching
1630 do {
1631 if (!backtrack.isEmpty()) {
1632 final int[] array = backtrack.pop();
1633 wcsIdx = array[0];
1634 textIdx = array[1];
1635 anyChars = true;
1636 }
1637
1638 // loop whilst tokens and text left to process
1639 while (wcsIdx < wcs.length) {
1640
1641 if (wcs[wcsIdx].equals("?")) {
1642 // ? so move to next text char
1643 textIdx++;
1644 if (textIdx > fileName.length()) {
1645 break;
1646 }
1647 anyChars = false;
1648
1649 } else if (wcs[wcsIdx].equals("*")) {
1650 // set any chars status
1651 anyChars = true;
1652 if (wcsIdx == wcs.length - 1) {
1653 textIdx = fileName.length();
1654 }
1655
1656 } else {
1657 // matching text token
1658 if (anyChars) {
1659 // any chars then try to locate text token
1660 textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1661 if (textIdx == NOT_FOUND) {
1662 // token not found
1663 break;
1664 }
1665 final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1666 if (repeat >= 0) {
1667 backtrack.push(new int[] {wcsIdx, repeat});
1668 }
1669 } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1670 // matching from current position
1671 // couldn't match token
1672 break;
1673 }
1674
1675 // matched text token, move text index to end of matched token
1676 textIdx += wcs[wcsIdx].length();
1677 anyChars = false;
1678 }
1679
1680 wcsIdx++;
1681 }
1682
1683 // full match
1684 if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1685 return true;
1686 }
1687
1688 } while (!backtrack.isEmpty());
1689
1690 return false;
1691 }
1692
1693 /**
1694 * Checks a fileName to see if it matches the specified wildcard matcher
1695 * using the case rules of the system.
1696 * <p>
1697 * The wildcard matcher uses the characters '?' and '*' to represent a
1698 * single or multiple (zero or more) wildcard characters.
1699 * This is the same as often found on DOS/Unix command lines.
1700 * The check is case-sensitive on Unix and case-insensitive on Windows.
1701 * <pre>
1702 * wildcardMatch("c.txt", "*.txt") --> true
1703 * wildcardMatch("c.txt", "*.jpg") --> false
1704 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1705 * wildcardMatch("c.txt", "*.???") --> true
1706 * wildcardMatch("c.txt", "*.????") --> false
1707 * </pre>
1708 * The sequence "*?" does not work properly at present in match strings.
1709 *
1710 * @param fileName the file name to match on
1711 * @param wildcardMatcher the wildcard string to match against
1712 * @return true if the file name matches the wildcard string
1713 * @see IOCase#SYSTEM
1714 */
1715 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1716 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1717 }
1718
1719 /**
1720 * Instances should NOT be constructed in standard programming.
1721 *
1722 * @deprecated TODO Make private in 3.0.
1723 */
1724 @Deprecated
1725 public FilenameUtils() {
1726 // empty
1727 }
1728 }