1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io;
18
19 import java.io.File;
20 import java.util.ArrayDeque;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Deque;
25 import java.util.List;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.Stream;
29
30 /**
31 * General file name and file path manipulation utilities. The methods in this class
32 * operate on strings that represent relative or absolute paths. Nothing in this class
33 * ever accesses the file system, or depends on whether a path points to a file that exists.
34 * <p>
35 * When dealing with file names, you can hit problems when moving from a Windows
36 * based development machine to a Unix based production machine.
37 * This class aims to help avoid those problems.
38 * </p>
39 * <p>
40 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
41 * using JDK {@link File File} objects and the two argument constructor
42 * {@link File#File(java.io.File, String) File(File,String)}.
43 * </p>
44 * <p>
45 * Most methods in this class are designed to work the same on both Unix and Windows.
46 * Those that don't include 'System', 'Unix', or 'Windows' in their name.
47 * </p>
48 * <p>
49 * Most methods recognize both separators (forward and backslashes), and both
50 * sets of prefixes. See the Javadoc of each method for details.
51 * </p>
52 * <p>
53 * This class defines six components within a path (sometimes called a file name or a full file name).
54 * Given an absolute Windows path such as C:\dev\project\file.txt they are:
55 * </p>
56 * <ul>
57 * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
58 * <li>the prefix - C:\</li>
59 * <li>the path - dev\project\</li>
60 * <li>the full path - C:\dev\project\</li>
61 * <li>the name - file.txt</li>
62 * <li>the base name - file</li>
63 * <li>the extension - txt</li>
64 * </ul>
65 * <p>
66 * Given an absolute Unix path such as /dev/project/file.txt they are:
67 * </p>
68 * <ul>
69 * <li>the full file name, or just file name - /dev/project/file.txt</li>
70 * <li>the prefix - /</li>
71 * <li>the path - dev/project</li>
72 * <li>the full path - /dev/project</li>
73 * <li>the name - file.txt</li>
74 * <li>the base name - file</li>
75 * <li>the extension - txt</li>
76 * </ul>
77 * <p>
78 * Given a relative Windows path such as dev\project\file.txt they are:
79 * </p>
80 * <ul>
81 * <li>the full file name, or just file name - dev\project\file.txt</li>
82 * <li>the prefix - null</li>
83 * <li>the path - dev\project\</li>
84 * <li>the full path - dev\project\</li>
85 * <li>the name - file.txt</li>
86 * <li>the base name - file</li>
87 * <li>the extension - txt</li>
88 * </ul>
89 * <p>
90 * Given an absolute Unix path such as /dev/project/file.txt they are:
91 * </p>
92 * <ul>
93 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
94 * <li>the prefix - /</li>
95 * <li>the path - dev/project</li>
96 * <li>the full path - /dev/project</li>
97 * <li>the name - file.txt</li>
98 * <li>the base name - file</li>
99 * <li>the extension - txt</li>
100 * </ul>
101 *
102 *
103 * <p>
104 * This class works best if directory names end with a separator.
105 * If you omit the last separator, it is impossible to determine if the last component
106 * corresponds to a file or a directory. This class treats final components
107 * that do not end with a separator as files, not directories.
108 * </p>
109 * <p>
110 * This class only supports Unix and Windows style names.
111 * Prefixes are matched as follows:
112 * </p>
113 * <pre>
114 * Windows:
115 * a\b\c.txt --> "" --> relative
116 * \a\b\c.txt --> "\" --> current drive absolute
117 * C:a\b\c.txt --> "C:" --> drive relative
118 * C:\a\b\c.txt --> "C:\" --> absolute
119 * \\server\a\b\c.txt --> "\\server\" --> UNC
120 *
121 * Unix:
122 * a/b/c.txt --> "" --> relative
123 * /a/b/c.txt --> "/" --> absolute
124 * ~/a/b/c.txt --> "~/" --> current user
125 * ~ --> "~/" --> current user (slash added)
126 * ~user/a/b/c.txt --> "~user/" --> named user
127 * ~user --> "~user/" --> named user (slash added)
128 * </pre>
129 * <p>
130 * Both prefix styles are matched, irrespective of the machine that you are
131 * currently running on.
132 * </p>
133 *
134 * @since 1.1
135 */
136 public class FilenameUtils {
137
138 private static final String[] EMPTY_STRING_ARRAY = {};
139
140 private static final String EMPTY_STRING = "";
141
142 private static final int NOT_FOUND = -1;
143
144 /**
145 * The extension separator character.
146 *
147 * @since 1.4
148 */
149 public static final char EXTENSION_SEPARATOR = '.';
150
151 /**
152 * The extension separator String.
153 *
154 * @since 1.4
155 */
156 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
157
158 /**
159 * The Unix separator character.
160 */
161 private static final char UNIX_NAME_SEPARATOR = '/';
162
163 /**
164 * The Windows separator character.
165 */
166 private static final char WINDOWS_NAME_SEPARATOR = '\\';
167
168 /**
169 * The system separator character.
170 */
171 private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
172
173 /**
174 * The separator character that is the opposite of the system separator.
175 */
176 private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
177
178 private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
179
180 private static final int IPV4_MAX_OCTET_VALUE = 255;
181
182 private static final int IPV6_MAX_HEX_GROUPS = 8;
183
184 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
185
186 private static final int MAX_UNSIGNED_SHORT = 0xffff;
187
188 private static final int BASE_16 = 16;
189
190 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
191
192 /**
193 * Concatenates a fileName to a base path using normal command line style rules.
194 * <p>
195 * The effect is equivalent to resultant directory after changing
196 * directory to the first argument, followed by changing directory to
197 * the second argument.
198 * </p>
199 * <p>
200 * The first argument is the base path, the second is the path to concatenate.
201 * The returned path is always normalized via {@link #normalize(String)},
202 * thus {@code ..} is handled.
203 * </p>
204 * <p>
205 * If {@code pathToAdd} is absolute (has an absolute prefix), then
206 * it will be normalized and returned.
207 * Otherwise, the paths will be joined, normalized and returned.
208 * </p>
209 * <p>
210 * The output will be the same on both Unix and Windows except
211 * for the separator character.
212 * </p>
213 * <pre>
214 * /foo/ + bar --> /foo/bar
215 * /foo + bar --> /foo/bar
216 * /foo + /bar --> /bar
217 * /foo + C:/bar --> C:/bar
218 * /foo + C:bar --> C:bar [1]
219 * /foo/a/ + ../bar --> /foo/bar
220 * /foo/ + ../../bar --> null
221 * /foo/ + /bar --> /bar
222 * /foo/.. + /bar --> /bar
223 * /foo + bar/c.txt --> /foo/bar/c.txt
224 * /foo/c.txt + bar --> /foo/c.txt/bar [2]
225 * </pre>
226 * <p>
227 * [1] Note that the Windows relative drive prefix is unreliable when
228 * used with this method.
229 * </p>
230 * <p>
231 * [2] Note that the first parameter must be a path. If it ends with a name, then
232 * the name will be built into the concatenated path. If this might be a problem,
233 * use {@link #getFullPath(String)} on the base path argument.
234 * </p>
235 *
236 * @param basePath the base path to attach to, always treated as a path.
237 * @param fullFileNameToAdd the file name (or path) to attach to the base.
238 * @return the concatenated path, or null if invalid.
239 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
240 */
241 public static String concat(final String basePath, final String fullFileNameToAdd) {
242 final int prefix = getPrefixLength(fullFileNameToAdd);
243 if (prefix < 0) {
244 return null;
245 }
246 if (prefix > 0) {
247 return normalize(fullFileNameToAdd);
248 }
249 if (basePath == null) {
250 return null;
251 }
252 final int len = basePath.length();
253 if (len == 0) {
254 return normalize(fullFileNameToAdd);
255 }
256 final char ch = basePath.charAt(len - 1);
257 if (isSeparator(ch)) {
258 return normalize(basePath + fullFileNameToAdd);
259 }
260 return normalize(basePath + '/' + fullFileNameToAdd);
261 }
262
263 /**
264 * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
265 * This does not read from the file system, and there is no guarantee or expectation that
266 * these paths actually exist.
267 * <p>
268 * The files names are expected to be normalized.
269 * </p>
270 *
271 * Edge cases:
272 * <ul>
273 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
274 * <li>A directory does not contain itself: return false</li>
275 * <li>A null child file is not contained in any parent: return false</li>
276 * </ul>
277 *
278 * @param canonicalParent the path string to consider as the parent.
279 * @param canonicalChild the path string to consider as the child.
280 * @return true if the candidate leaf is under the specified composite. False otherwise.
281 * @since 2.2
282 * @see FileUtils#directoryContains(File, File)
283 */
284 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
285 if (isEmpty(canonicalParent) || isEmpty(canonicalChild) || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
286 return false;
287 }
288 final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
289 final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
290 return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
291 }
292
293 /**
294 * Does the work of getting the path.
295 *
296 * @param fileName the file name.
297 * @param includeEndSeparator true to include the end separator.
298 * @return the path.
299 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
300 */
301 private static String doGetFullPath(final String fileName, final boolean includeEndSeparator) {
302 if (fileName == null) {
303 return null;
304 }
305 final int prefix = getPrefixLength(fileName);
306 if (prefix < 0) {
307 return null;
308 }
309 if (prefix >= fileName.length()) {
310 if (includeEndSeparator) {
311 return getPrefix(fileName); // add end slash if necessary
312 }
313 return fileName;
314 }
315 final int index = indexOfLastSeparator(fileName);
316 if (index < 0) {
317 return fileName.substring(0, prefix);
318 }
319 int end = index + (includeEndSeparator ? 1 : 0);
320 if (end == 0) {
321 end++;
322 }
323 return fileName.substring(0, end);
324 }
325
326 /**
327 * Does the work of getting the path.
328 *
329 * @param fileName the file name.
330 * @param separatorAdd 0 to omit the end separator, 1 to return it.
331 * @return the path.
332 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
333 */
334 private static String doGetPath(final String fileName, final int separatorAdd) {
335 if (fileName == null) {
336 return null;
337 }
338 final int prefix = getPrefixLength(fileName);
339 if (prefix < 0) {
340 return null;
341 }
342 final int index = indexOfLastSeparator(fileName);
343 final int endIndex = index + separatorAdd;
344 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
345 return EMPTY_STRING;
346 }
347 return requireNonNullChars(fileName.substring(prefix, endIndex));
348 }
349
350 /**
351 * Internal method to perform the normalization.
352 *
353 * @param fileName the file name.
354 * @param separator The separator character to use.
355 * @param keepSeparator true to keep the final separator.
356 * @return the normalized fileName.
357 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
358 */
359 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
360 if (fileName == null) {
361 return null;
362 }
363 requireNonNullChars(fileName);
364 int size = fileName.length();
365 if (size == 0) {
366 return fileName;
367 }
368 final int prefix = getPrefixLength(fileName);
369 if (prefix < 0) {
370 return null;
371 }
372 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
373 fileName.getChars(0, fileName.length(), array, 0);
374 // fix separators throughout
375 final char otherSeparator = flipSeparator(separator);
376 for (int i = 0; i < array.length; i++) {
377 if (array[i] == otherSeparator) {
378 array[i] = separator;
379 }
380 }
381 // add extra separator on the end to simplify code below
382 boolean lastIsDirectory = true;
383 if (array[size - 1] != separator) {
384 array[size++] = separator;
385 lastIsDirectory = false;
386 }
387 // adjoining slashes
388 // If we get here, prefix can only be 0 or greater, size 1 or greater
389 // If prefix is 0, set loop start to 1 to prevent index errors
390 for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
391 if (array[i] == separator && array[i - 1] == separator) {
392 System.arraycopy(array, i, array, i - 1, size - i);
393 size--;
394 i--;
395 }
396 }
397 // period slash
398 for (int i = prefix + 1; i < size; i++) {
399 if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) {
400 if (i == size - 1) {
401 lastIsDirectory = true;
402 }
403 System.arraycopy(array, i + 1, array, i - 1, size - i);
404 size -= 2;
405 i--;
406 }
407 }
408 // double period slash
409 outer: for (int i = prefix + 2; i < size; i++) {
410 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && (i == prefix + 2 || array[i - 3] == separator)) {
411 if (i == prefix + 2) {
412 return null;
413 }
414 if (i == size - 1) {
415 lastIsDirectory = true;
416 }
417 int j;
418 for (j = i - 4; j >= prefix; j--) {
419 if (array[j] == separator) {
420 // remove b/../ from a/b/../c
421 System.arraycopy(array, i + 1, array, j + 1, size - i);
422 size -= i - j;
423 i = j + 1;
424 continue outer;
425 }
426 }
427 // remove a/../ from a/../c
428 System.arraycopy(array, i + 1, array, prefix, size - i);
429 size -= i + 1 - prefix;
430 i = prefix + 1;
431 }
432 }
433 if (size <= 0) { // should never be less than 0
434 return EMPTY_STRING;
435 }
436 if (size <= prefix || lastIsDirectory && keepSeparator) {
437 return new String(array, 0, size); // keep trailing separator
438 }
439 return new String(array, 0, size - 1); // lose trailing separator
440 }
441
442 /**
443 * Checks whether two file names are exactly equal.
444 * <p>
445 * No processing is performed on the file names other than comparison.
446 * This is merely a null-safe case-sensitive string equality.
447 * </p>
448 *
449 * @param fileName1 the first file name, may be null.
450 * @param fileName2 the second file name, may be null.
451 * @return true if the file names are equal, null equals null.
452 * @see IOCase#SENSITIVE
453 */
454 public static boolean equals(final String fileName1, final String fileName2) {
455 return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
456 }
457
458 /**
459 * Checks whether two file names are equal, optionally normalizing and providing
460 * control over the case-sensitivity.
461 *
462 * @param fileName1 the first file name, may be null.
463 * @param fileName2 the second file name, may be null.
464 * @param normalize whether to normalize the file names.
465 * @param ioCase what case sensitivity rule to use, null means case-sensitive.
466 * @return true if the file names are equal, null equals null.
467 * @since 1.3
468 */
469 public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
470 if (fileName1 == null || fileName2 == null) {
471 return fileName1 == null && fileName2 == null;
472 }
473 if (normalize) {
474 fileName1 = normalize(fileName1);
475 if (fileName1 == null) {
476 return false;
477 }
478 fileName2 = normalize(fileName2);
479 if (fileName2 == null) {
480 return false;
481 }
482 }
483 return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
484 }
485
486 /**
487 * Checks whether two file names are equal after both have been normalized.
488 * <p>
489 * Both file names are first passed to {@link #normalize(String)}.
490 * The check is then performed in a case-sensitive manner.
491 * </p>
492 *
493 * @param fileName1 the first file name, may be null.
494 * @param fileName2 the second file name, may be null.
495 * @return true if the file names are equal, null equals null.
496 * @see IOCase#SENSITIVE
497 */
498 public static boolean equalsNormalized(final String fileName1, final String fileName2) {
499 return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
500 }
501
502 /**
503 * Checks whether two file names are equal using the case rules of the system
504 * after both have been normalized.
505 * <p>
506 * Both file names are first passed to {@link #normalize(String)}.
507 * The check is then performed case-sensitively on Unix and
508 * case-insensitively on Windows.
509 * </p>
510 *
511 * @param fileName1 the first file name, may be null.
512 * @param fileName2 the second file name, may be null.
513 * @return true if the file names are equal, null equals null.
514 * @see IOCase#SYSTEM
515 */
516 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
517 return equals(fileName1, fileName2, true, IOCase.SYSTEM);
518 }
519
520 /**
521 * Checks whether two file names are equal using the case rules of the system.
522 * <p>
523 * No processing is performed on the file names other than comparison.
524 * The check is case-sensitive on Unix and case-insensitive on Windows.
525 * </p>
526 *
527 * @param fileName1 the first file name, may be null.
528 * @param fileName2 the second file name, may be null.
529 * @return true if the file names are equal, null equals null.
530 * @see IOCase#SYSTEM
531 */
532 public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
533 return equals(fileName1, fileName2, false, IOCase.SYSTEM);
534 }
535
536 /**
537 * Flips the Windows name separator to Linux and vice-versa.
538 *
539 * @param ch The Windows or Linux name separator.
540 * @return The Windows or Linux name separator.
541 */
542 static char flipSeparator(final char ch) {
543 if (ch == UNIX_NAME_SEPARATOR) {
544 return WINDOWS_NAME_SEPARATOR;
545 }
546 if (ch == WINDOWS_NAME_SEPARATOR) {
547 return UNIX_NAME_SEPARATOR;
548 }
549 throw new IllegalArgumentException(String.valueOf(ch));
550 }
551
552 /**
553 * Special handling for NTFS ADS: Don't accept colon in the file name.
554 *
555 * @param fileName a file name.
556 * @return ADS offsets.
557 */
558 private static int getAdsCriticalOffset(final String fileName) {
559 // Step 1: Remove leading path segments.
560 final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
561 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
562 if (offset1 == -1) {
563 if (offset2 == -1) {
564 return 0;
565 }
566 return offset2 + 1;
567 }
568 if (offset2 == -1) {
569 return offset1 + 1;
570 }
571 return Math.max(offset1, offset2) + 1;
572 }
573
574 /**
575 * Gets the base name, minus the full path and extension, from a full file name.
576 * <p>
577 * This method will handle a path in either Unix or Windows format.
578 * The text after the last forward or backslash and before the last period is returned.
579 * </p>
580 * <pre>
581 * a/b/c.txt --> c
582 * a\b\c.txt --> c
583 * a/b/c.foo.txt --> c.foo
584 * a.txt --> a
585 * a/b/c --> c
586 * a/b/c/ --> ""
587 * </pre>
588 * <p>
589 * The output will be the same irrespective of the machine that the code is running on.
590 * </p>
591 *
592 * @param fileName the file name, null returns null.
593 * @return the name of the file without the path, or an empty string if none exists.
594 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
595 */
596 public static String getBaseName(final String fileName) {
597 return removeExtension(getName(fileName));
598 }
599
600 /**
601 * Gets the extension of a file name.
602 * <p>
603 * This method returns the textual part of the file name after the last period.
604 * There must be no directory separator after the period.
605 * </p>
606 * <pre>
607 * foo.txt --> "txt"
608 * a/b/c.jpg --> "jpg"
609 * a/b.txt/c --> ""
610 * a/b/c --> ""
611 * </pre>
612 * <p>
613 * The output will be the same irrespective of the machine that the code is running on, with the
614 * exception of a possible {@link IllegalArgumentException} on Windows (see below).
615 * </p>
616 * <p>
617 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
618 * In this case, the name wouldn't be the name of a file, but the identifier of an
619 * alternate data stream (bar.txt) on the file foo.exe. The method used to return
620 * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
621 * an {@link IllegalArgumentException} for names like this.
622 * </p>
623 *
624 * @param fileName the file name to retrieve the extension of.
625 * @return the extension of the file or an empty string if none exists or {@code null}
626 * if the file name is {@code null}.
627 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
628 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
629 */
630 public static String getExtension(final String fileName) throws IllegalArgumentException {
631 if (fileName == null) {
632 return null;
633 }
634 final int index = indexOfExtension(fileName);
635 if (index == NOT_FOUND) {
636 return EMPTY_STRING;
637 }
638 return fileName.substring(index + 1);
639 }
640
641 /**
642 * Gets the full path (prefix + path) from a full file name.
643 * <p>
644 * This method will handle a file in either Unix or Windows format.
645 * The method is entirely text based, and returns the text before and
646 * including the last forward or backslash.
647 * </p>
648 * <pre>
649 * C:\a\b\c.txt --> C:\a\b\
650 * ~/a/b/c.txt --> ~/a/b/
651 * a.txt --> ""
652 * a/b/c --> a/b/
653 * a/b/c/ --> a/b/c/
654 * C: --> C:
655 * C:\ --> C:\
656 * ~ --> ~/
657 * ~/ --> ~/
658 * ~user --> ~user/
659 * ~user/ --> ~user/
660 * </pre>
661 * <p>
662 * The output will be the same irrespective of the machine that the code is running on.
663 * </p>
664 *
665 * @param fileName the file name, null returns null.
666 * @return the path of the file, an empty string if none exists, null if invalid.
667 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
668 */
669 public static String getFullPath(final String fileName) {
670 return doGetFullPath(fileName, true);
671 }
672
673 /**
674 * Gets the full path (prefix + path) from a full file name,
675 * excluding the final directory separator.
676 * <p>
677 * This method will handle a file in either Unix or Windows format.
678 * The method is entirely text based, and returns the text before the
679 * last forward or backslash.
680 * </p>
681 * <pre>
682 * C:\a\b\c.txt --> C:\a\b
683 * ~/a/b/c.txt --> ~/a/b
684 * a.txt --> ""
685 * a/b/c --> a/b
686 * a/b/c/ --> a/b/c
687 * C: --> C:
688 * C:\ --> C:\
689 * ~ --> ~
690 * ~/ --> ~
691 * ~user --> ~user
692 * ~user/ --> ~user
693 * </pre>
694 * <p>
695 * The output will be the same irrespective of the machine that the code is running on.
696 * </p>
697 *
698 * @param fileName the file name, null returns null.
699 * @return the path of the file, an empty string if none exists, null if invalid.
700 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
701 */
702 public static String getFullPathNoEndSeparator(final String fileName) {
703 return doGetFullPath(fileName, false);
704 }
705
706 /**
707 * Gets the name minus the path from a full file name.
708 * <p>
709 * This method will handle a file in either Unix or Windows format.
710 * The text after the last forward or backslash is returned.
711 * </p>
712 * <pre>
713 * a/b/c.txt --> c.txt
714 * a\b\c.txt --> c.txt
715 * a.txt --> a.txt
716 * a/b/c --> c
717 * a/b/c/ --> ""
718 * </pre>
719 * <p>
720 * The output will be the same irrespective of the machine that the code is running on.
721 * </p>
722 *
723 * @param fileName the file name, null returns null.
724 * @return the name of the file without the path, or an empty string if none exists.
725 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
726 */
727 public static String getName(final String fileName) {
728 if (fileName == null) {
729 return null;
730 }
731 return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
732 }
733
734 /**
735 * Gets the path from a full file name, which excludes the prefix and the name.
736 * <p>
737 * This method will handle a file in either Unix or Windows format.
738 * The method is entirely text based, and returns the text before and
739 * including the last forward or backslash.
740 * </p>
741 * <pre>
742 * C:\a\b\c.txt --> a\b\
743 * ~/a/b/c.txt --> a/b/
744 * a.txt --> ""
745 * a/b/c --> a/b/
746 * a/b/c/ --> a/b/c/
747 * </pre>
748 * <p>
749 * The output will be the same irrespective of the machine that the code is running on.
750 * </p>
751 * <p>
752 * This method drops the prefix from the result.
753 * See {@link #getFullPath(String)} for the method that retains the prefix.
754 * </p>
755 *
756 * @param fileName the file name, null returns null.
757 * @return the path of the file, an empty string if none exists, null if invalid.
758 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
759 */
760 public static String getPath(final String fileName) {
761 return doGetPath(fileName, 1);
762 }
763
764 /**
765 * Gets the path (which excludes the prefix) from a full file name, and
766 * also excluding the final directory separator.
767 * <p>
768 * This method will handle a file in either Unix or Windows format.
769 * The method is entirely text based, and returns the text before the
770 * last forward or backslash.
771 * </p>
772 * <pre>
773 * C:\a\b\c.txt --> a\b
774 * ~/a/b/c.txt --> a/b
775 * a.txt --> ""
776 * a/b/c --> a/b
777 * a/b/c/ --> a/b/c
778 * </pre>
779 * <p>
780 * The output will be the same irrespective of the machine that the code is running on.
781 * </p>
782 * <p>
783 * This method drops the prefix from the result.
784 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
785 * </p>
786 *
787 * @param fileName the file name, null returns null.
788 * @return the path of the file, an empty string if none exists, null if invalid.
789 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
790 */
791 public static String getPathNoEndSeparator(final String fileName) {
792 return doGetPath(fileName, 0);
793 }
794
795 /**
796 * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
797 * <p>
798 * This method will handle a file in either Unix or Windows format.
799 * The prefix includes the first slash in the full file name where applicable.
800 * </p>
801 * <pre>
802 * Windows:
803 * a\b\c.txt --> "" --> relative
804 * \a\b\c.txt --> "\" --> current drive absolute
805 * C:a\b\c.txt --> "C:" --> drive relative
806 * C:\a\b\c.txt --> "C:\" --> absolute
807 * \\server\a\b\c.txt --> "\\server\" --> UNC
808 *
809 * Unix:
810 * a/b/c.txt --> "" --> relative
811 * /a/b/c.txt --> "/" --> absolute
812 * ~/a/b/c.txt --> "~/" --> current user
813 * ~ --> "~/" --> current user (slash added)
814 * ~user/a/b/c.txt --> "~user/" --> named user
815 * ~user --> "~user/" --> named user (slash added)
816 * </pre>
817 * <p>
818 * The output will be the same irrespective of the machine that the code is running on.
819 * ie. both Unix and Windows prefixes are matched regardless.
820 * </p>
821 *
822 * @param fileName the file name, null returns null.
823 * @return the prefix of the file, null if invalid.
824 * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}).
825 */
826 public static String getPrefix(final String fileName) {
827 if (fileName == null) {
828 return null;
829 }
830 final int len = getPrefixLength(fileName);
831 if (len < 0) {
832 return null;
833 }
834 if (len > fileName.length()) {
835 requireNonNullChars(fileName);
836 return fileName + UNIX_NAME_SEPARATOR;
837 }
838 return requireNonNullChars(fileName.substring(0, len));
839 }
840
841 /**
842 * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
843 * <p>
844 * This method will handle a file in either Unix or Windows format.
845 * </p>
846 * <p>
847 * The prefix length includes the first slash in the full file name
848 * if applicable. Thus, it is possible that the length returned is greater
849 * than the length of the input string.
850 * </p>
851 * <pre>
852 * Windows:
853 * a\b\c.txt --> 0 --> relative
854 * \a\b\c.txt --> 1 --> current drive absolute
855 * C:a\b\c.txt --> 2 --> drive relative
856 * C:\a\b\c.txt --> 3 --> absolute
857 * \\server\a\b\c.txt --> 9 --> UNC
858 * \\\a\b\c.txt --> -1 --> error
859 *
860 * Unix:
861 * a/b/c.txt --> 0 --> relative
862 * /a/b/c.txt --> 1 --> absolute
863 * ~/a/b/c.txt --> 2 --> current user
864 * ~ --> 2 --> current user (slash added)
865 * ~user/a/b/c.txt --> 6 --> named user
866 * ~user --> 6 --> named user (slash added)
867 * //server/a/b/c.txt --> 9
868 * ///a/b/c.txt --> -1 --> error
869 * C: --> 0 --> valid file name as only null character and / are reserved characters
870 * </pre>
871 * <p>
872 * The output will be the same irrespective of the machine that the code is running on.
873 * ie. both Unix and Windows prefixes are matched regardless.
874 * </p>
875 * <p>
876 * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
877 * These must be followed by a server name, so double-slashes are not collapsed
878 * to a single slash at the start of the file name.
879 * </p>
880 *
881 * @param fileName the file name to find the prefix in, null returns -1.
882 * @return the length of the prefix, -1 if invalid or null.
883 */
884 public static int getPrefixLength(final String fileName) {
885 if (fileName == null) {
886 return NOT_FOUND;
887 }
888 final int len = fileName.length();
889 if (len == 0) {
890 return 0;
891 }
892 char ch0 = fileName.charAt(0);
893 if (ch0 == ':') {
894 return NOT_FOUND;
895 }
896 if (len == 1) {
897 if (ch0 == '~') {
898 return 2; // return a length greater than the input
899 }
900 return isSeparator(ch0) ? 1 : 0;
901 }
902 if (ch0 == '~') {
903 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
904 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
905 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
906 return len + 1; // return a length greater than the input
907 }
908 posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
909 posWin = posWin == NOT_FOUND ? posUnix : posWin;
910 return Math.min(posUnix, posWin) + 1;
911 }
912 final char ch1 = fileName.charAt(1);
913 if (ch1 == ':') {
914 ch0 = Character.toUpperCase(ch0);
915 if (ch0 >= 'A' && ch0 <= 'Z') {
916 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
917 return 0;
918 }
919 if (len == 2 || !isSeparator(fileName.charAt(2))) {
920 return 2;
921 }
922 return 3;
923 }
924 if (ch0 == UNIX_NAME_SEPARATOR) {
925 return 1;
926 }
927 return NOT_FOUND;
928
929 }
930 if (!isSeparator(ch0) || !isSeparator(ch1)) {
931 return isSeparator(ch0) ? 1 : 0;
932 }
933 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
934 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
935 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
936 return NOT_FOUND;
937 }
938 posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
939 posWin = posWin == NOT_FOUND ? posUnix : posWin;
940 final int pos = Math.min(posUnix, posWin) + 1;
941 final String hostnamePart = fileName.substring(2, pos - 1);
942 return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
943 }
944
945 /**
946 * Returns the index of the last extension separator character, which is a period.
947 * <p>
948 * This method also checks that there is no directory separator after the last period. To do this it uses
949 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
950 * </p>
951 * <p>
952 * The output will be the same irrespective of the machine that the code is running on, with the
953 * exception of a possible {@link IllegalArgumentException} on Windows (see below).
954 * </p>
955 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
956 * In this case, the name wouldn't be the name of a file, but the identifier of an
957 * alternate data stream (bar.txt) on the file foo.exe. The method used to return
958 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
959 * an {@link IllegalArgumentException} for names like this.
960 *
961 * @param fileName
962 * the file name to find the last extension separator in, null returns -1.
963 * @return the index of the last extension separator character, or -1 if there is no such character.
964 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
965 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
966 */
967 public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
968 if (fileName == null) {
969 return NOT_FOUND;
970 }
971 if (isSystemWindows()) {
972 // Special handling for NTFS ADS: Don't accept colon in the file name.
973 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
974 if (offset != -1) {
975 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
976 }
977 }
978 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
979 final int lastSeparator = indexOfLastSeparator(fileName);
980 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
981 }
982
983 /**
984 * Returns the index of the last directory separator character.
985 * <p>
986 * This method will handle a file in either Unix or Windows format.
987 * The position of the last forward or backslash is returned.
988 * <p>
989 * The output will be the same irrespective of the machine that the code is running on.
990 *
991 * @param fileName the file name to find the last path separator in, null returns -1.
992 * @return the index of the last separator character, or -1 if there
993 * is no such character.
994 */
995 public static int indexOfLastSeparator(final String fileName) {
996 if (fileName == null) {
997 return NOT_FOUND;
998 }
999 final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1000 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1001 return Math.max(lastUnixPos, lastWindowsPos);
1002 }
1003
1004 private static boolean isEmpty(final String string) {
1005 return string == null || string.isEmpty();
1006 }
1007
1008 /**
1009 * Checks whether the extension of the file name is one of those specified.
1010 * <p>
1011 * This method obtains the extension as the textual part of the file name
1012 * after the last period. There must be no directory separator after the period.
1013 * The extension check is case-sensitive on all platforms.
1014 *
1015 * @param fileName the file name, null returns false.
1016 * @param extensions the extensions to check for, null checks for no extension.
1017 * @return true if the file name is one of the extensions.
1018 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1019 */
1020 public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1021 if (fileName == null) {
1022 return false;
1023 }
1024 requireNonNullChars(fileName);
1025 if (extensions == null || extensions.isEmpty()) {
1026 return indexOfExtension(fileName) == NOT_FOUND;
1027 }
1028 return extensions.contains(getExtension(fileName));
1029 }
1030
1031 /**
1032 * Checks whether the extension of the file name is that specified.
1033 * <p>
1034 * This method obtains the extension as the textual part of the file name
1035 * after the last period. There must be no directory separator after the period.
1036 * The extension check is case-sensitive on all platforms.
1037 *
1038 * @param fileName the file name, null returns false.
1039 * @param extension the extension to check for, null or empty checks for no extension.
1040 * @return true if the file name has the specified extension.
1041 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1042 */
1043 public static boolean isExtension(final String fileName, final String extension) {
1044 if (fileName == null) {
1045 return false;
1046 }
1047 requireNonNullChars(fileName);
1048 if (isEmpty(extension)) {
1049 return indexOfExtension(fileName) == NOT_FOUND;
1050 }
1051 return getExtension(fileName).equals(extension);
1052 }
1053
1054 /**
1055 * Checks whether the extension of the file name is one of those specified.
1056 * <p>
1057 * This method obtains the extension as the textual part of the file name
1058 * after the last period. There must be no directory separator after the period.
1059 * The extension check is case-sensitive on all platforms.
1060 *
1061 * @param fileName the file name, null returns false.
1062 * @param extensions the extensions to check for, null checks for no extension.
1063 * @return true if the file name is one of the extensions.
1064 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1065 */
1066 public static boolean isExtension(final String fileName, final String... extensions) {
1067 if (fileName == null) {
1068 return false;
1069 }
1070 requireNonNullChars(fileName);
1071
1072 if (extensions == null || extensions.length == 0) {
1073 return indexOfExtension(fileName) == NOT_FOUND;
1074 }
1075 final String fileExt = getExtension(fileName);
1076 return Stream.of(extensions).anyMatch(fileExt::equals);
1077 }
1078
1079 /**
1080 * Checks whether a given string represents a valid IPv4 address.
1081 *
1082 * @param name the name to validate.
1083 * @return true if the given name is a valid IPv4 address.
1084 */
1085 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1086 private static boolean isIPv4Address(final String name) {
1087 final Matcher m = IPV4_PATTERN.matcher(name);
1088 if (!m.matches() || m.groupCount() != 4) {
1089 return false;
1090 }
1091 // verify that address subgroups are legal
1092 for (int i = 1; i <= 4; i++) {
1093 final String ipSegment = m.group(i);
1094 final int iIpSegment = Integer.parseInt(ipSegment);
1095 if (iIpSegment > IPV4_MAX_OCTET_VALUE || ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1096 return false;
1097 }
1098 }
1099 return true;
1100 }
1101
1102 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1103 /**
1104 * Checks whether a given string represents a valid IPv6 address.
1105 *
1106 * @param inet6Address the name to validate.
1107 * @return true if the given name is a valid IPv6 address.
1108 */
1109 private static boolean isIPv6Address(final String inet6Address) {
1110 final boolean containsCompressedZeroes = inet6Address.contains("::");
1111 if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1112 return false;
1113 }
1114 if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1115 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1116 return false;
1117 }
1118 String[] octets = inet6Address.split(":");
1119 if (containsCompressedZeroes) {
1120 final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1121 if (inet6Address.endsWith("::")) {
1122 // String.split() drops ending empty segments
1123 octetList.add("");
1124 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1125 octetList.remove(0);
1126 }
1127 octets = octetList.toArray(EMPTY_STRING_ARRAY);
1128 }
1129 if (octets.length > IPV6_MAX_HEX_GROUPS) {
1130 return false;
1131 }
1132 int validOctets = 0;
1133 int emptyOctets = 0; // consecutive empty chunks
1134 for (int index = 0; index < octets.length; index++) {
1135 final String octet = octets[index];
1136 if (octet.isEmpty()) {
1137 emptyOctets++;
1138 if (emptyOctets > 1) {
1139 return false;
1140 }
1141 } else {
1142 emptyOctets = 0;
1143 // Is last chunk an IPv4 address?
1144 if (index == octets.length - 1 && octet.contains(".")) {
1145 if (!isIPv4Address(octet)) {
1146 return false;
1147 }
1148 validOctets += 2;
1149 continue;
1150 }
1151 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1152 return false;
1153 }
1154 final int octetInt;
1155 try {
1156 octetInt = Integer.parseInt(octet, BASE_16);
1157 } catch (final NumberFormatException e) {
1158 return false;
1159 }
1160 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1161 return false;
1162 }
1163 }
1164 validOctets++;
1165 }
1166 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1167 }
1168
1169 /**
1170 * Checks whether a given string is a valid host name according to
1171 * RFC 3986 - not accepting IP addresses.
1172 *
1173 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1174 * @param name the hostname to validate.
1175 * @return true if the given name is a valid host name.
1176 */
1177 private static boolean isRFC3986HostName(final String name) {
1178 final String[] parts = name.split("\\.", -1);
1179 for (int i = 0; i < parts.length; i++) {
1180 if (parts[i].isEmpty()) {
1181 // trailing period is legal, otherwise we've hit a .. sequence
1182 return i == parts.length - 1;
1183 }
1184 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1185 return false;
1186 }
1187 }
1188 return true;
1189 }
1190
1191 /**
1192 * Checks if the character is a separator.
1193 *
1194 * @param ch the character to check.
1195 * @return true if it is a separator character.
1196 */
1197 private static boolean isSeparator(final char ch) {
1198 return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1199 }
1200
1201 /**
1202 * Determines if Windows file system is in use.
1203 *
1204 * @return true if the system is Windows.
1205 */
1206 static boolean isSystemWindows() {
1207 return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1208 }
1209
1210 /**
1211 * Checks whether a given string is a valid host name according to
1212 * RFC 3986.
1213 *
1214 * <p>Accepted are IP addresses (v4 and v6) as well as what the
1215 * RFC calls a "reg-name". Percent encoded names don't seem to be
1216 * valid names in UNC paths.</p>
1217 *
1218 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1219 * @param name the hostname to validate.
1220 * @return true if the given name is a valid host name.
1221 */
1222 private static boolean isValidHostName(final String name) {
1223 return isIPv6Address(name) || isRFC3986HostName(name);
1224 }
1225
1226 /**
1227 * Normalizes a path, removing double and single period path steps.
1228 * <p>
1229 * This method normalizes a path to a standard format.
1230 * The input may contain separators in either Unix or Windows format.
1231 * The output will contain separators in the format of the system.
1232 * <p>
1233 * A trailing slash will be retained.
1234 * A double slash will be merged to a single slash (but UNC names are handled).
1235 * A single period path segment will be removed.
1236 * A double period will cause that path segment and the one before to be removed.
1237 * If the double period has no parent path segment, {@code null} is returned.
1238 * <p>
1239 * The output will be the same on both Unix and Windows except
1240 * for the separator character.
1241 * <pre>
1242 * /foo// --> /foo/
1243 * /foo/./ --> /foo/
1244 * /foo/../bar --> /bar
1245 * /foo/../bar/ --> /bar/
1246 * /foo/../bar/../baz --> /baz
1247 * //foo//./bar --> //foo/bar
1248 * /../ --> null
1249 * ../foo --> null
1250 * foo/bar/.. --> foo/
1251 * foo/../../bar --> null
1252 * foo/../bar --> bar
1253 * //server/foo/../bar --> //server/bar
1254 * //server/../bar --> null
1255 * C:\foo\..\bar --> C:\bar
1256 * C:\..\bar --> null
1257 * ~/foo/../bar/ --> ~/bar/
1258 * ~/../bar --> null
1259 * </pre>
1260 * (Note the file separator will be correct for Windows/Unix.)
1261 *
1262 * @param fileName the file name to normalize, null returns null.
1263 * @return the normalized fileName, or null if invalid.
1264 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1265 */
1266 public static String normalize(final String fileName) {
1267 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1268 }
1269
1270 /**
1271 * Normalizes a path, removing double and single period path steps.
1272 * <p>
1273 * This method normalizes a path to a standard format.
1274 * The input may contain separators in either Unix or Windows format.
1275 * The output will contain separators in the format specified.
1276 * <p>
1277 * A trailing slash will be retained.
1278 * A double slash will be merged to a single slash (but UNC names are handled).
1279 * A single period path segment will be removed.
1280 * A double period will cause that path segment and the one before to be removed.
1281 * If the double period has no parent path segment to work with, {@code null}
1282 * is returned.
1283 * <p>
1284 * The output will be the same on both Unix and Windows except
1285 * for the separator character.
1286 * <pre>
1287 * /foo// --> /foo/
1288 * /foo/./ --> /foo/
1289 * /foo/../bar --> /bar
1290 * /foo/../bar/ --> /bar/
1291 * /foo/../bar/../baz --> /baz
1292 * //foo//./bar --> /foo/bar
1293 * /../ --> null
1294 * ../foo --> null
1295 * foo/bar/.. --> foo/
1296 * foo/../../bar --> null
1297 * foo/../bar --> bar
1298 * //server/foo/../bar --> //server/bar
1299 * //server/../bar --> null
1300 * C:\foo\..\bar --> C:\bar
1301 * C:\..\bar --> null
1302 * ~/foo/../bar/ --> ~/bar/
1303 * ~/../bar --> null
1304 * </pre>
1305 * The output will be the same on both Unix and Windows including
1306 * the separator character.
1307 *
1308 * @param fileName the file name to normalize, null returns null.
1309 * @param unixSeparator {@code true} if a Unix separator should
1310 * be used or {@code false} if a Windows separator should be used.
1311 * @return the normalized fileName, or null if invalid.
1312 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1313 * @since 2.0
1314 */
1315 public static String normalize(final String fileName, final boolean unixSeparator) {
1316 return doNormalize(fileName, toSeparator(unixSeparator), true);
1317 }
1318
1319 /**
1320 * Normalizes a path, removing double and single period path steps,
1321 * and removing any final directory separator.
1322 * <p>
1323 * This method normalizes a path to a standard format.
1324 * The input may contain separators in either Unix or Windows format.
1325 * The output will contain separators in the format of the system.
1326 * <p>
1327 * A trailing slash will be removed.
1328 * A double slash will be merged to a single slash (but UNC names are handled).
1329 * A single period path segment will be removed.
1330 * A double period will cause that path segment and the one before to be removed.
1331 * If the double period has no parent path segment to work with, {@code null}
1332 * is returned.
1333 * <p>
1334 * The output will be the same on both Unix and Windows except
1335 * for the separator character.
1336 * <pre>
1337 * /foo// --> /foo
1338 * /foo/./ --> /foo
1339 * /foo/../bar --> /bar
1340 * /foo/../bar/ --> /bar
1341 * /foo/../bar/../baz --> /baz
1342 * //foo//./bar --> /foo/bar
1343 * /../ --> null
1344 * ../foo --> null
1345 * foo/bar/.. --> foo
1346 * foo/../../bar --> null
1347 * foo/../bar --> bar
1348 * //server/foo/../bar --> //server/bar
1349 * //server/../bar --> null
1350 * C:\foo\..\bar --> C:\bar
1351 * C:\..\bar --> null
1352 * ~/foo/../bar/ --> ~/bar
1353 * ~/../bar --> null
1354 * </pre>
1355 * (Note the file separator returned will be correct for Windows/Unix)
1356 *
1357 * @param fileName the file name to normalize, null returns null.
1358 * @return the normalized fileName, or null if invalid.
1359 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1360 */
1361 public static String normalizeNoEndSeparator(final String fileName) {
1362 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1363 }
1364
1365 /**
1366 * Normalizes a path, removing double and single period path steps,
1367 * and removing any final directory separator.
1368 * <p>
1369 * This method normalizes a path to a standard format.
1370 * The input may contain separators in either Unix or Windows format.
1371 * The output will contain separators in the format specified.
1372 * <p>
1373 * A trailing slash will be removed.
1374 * A double slash will be merged to a single slash (but UNC names are handled).
1375 * A single period path segment will be removed.
1376 * A double period will cause that path segment and the one before to be removed.
1377 * If the double period has no parent path segment to work with, {@code null}
1378 * is returned.
1379 * <p>
1380 * The output will be the same on both Unix and Windows including
1381 * the separator character.
1382 * <pre>
1383 * /foo// --> /foo
1384 * /foo/./ --> /foo
1385 * /foo/../bar --> /bar
1386 * /foo/../bar/ --> /bar
1387 * /foo/../bar/../baz --> /baz
1388 * //foo//./bar --> /foo/bar
1389 * /../ --> null
1390 * ../foo --> null
1391 * foo/bar/.. --> foo
1392 * foo/../../bar --> null
1393 * foo/../bar --> bar
1394 * //server/foo/../bar --> //server/bar
1395 * //server/../bar --> null
1396 * C:\foo\..\bar --> C:\bar
1397 * C:\..\bar --> null
1398 * ~/foo/../bar/ --> ~/bar
1399 * ~/../bar --> null
1400 * </pre>
1401 *
1402 * @param fileName the file name to normalize, null returns null.
1403 * @param unixSeparator {@code true} if a Unix separator should
1404 * be used or {@code false} if a Windows separator should be used.
1405 * @return the normalized fileName, or null if invalid.
1406 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1407 * @since 2.0
1408 */
1409 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1410 return doNormalize(fileName, toSeparator(unixSeparator), false);
1411 }
1412
1413 /**
1414 * Removes the extension from a fileName.
1415 * <p>
1416 * This method returns the textual part of the file name before the last period.
1417 * There must be no directory separator after the period.
1418 * <pre>
1419 * foo.txt --> foo
1420 * .txt --> "" (empty string)
1421 * a\b\c.jpg --> a\b\c
1422 * /a/b/c.jpg --> /a/b/c
1423 * a\b\c --> a\b\c
1424 * a.b\c --> a.b\c
1425 * </pre>
1426 * <p>
1427 * The output will be the same irrespective of the machine that the code is running on.
1428 *
1429 * @param fileName the file name, null returns null.
1430 * @return the file name minus the extension.
1431 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1432 */
1433 public static String removeExtension(final String fileName) {
1434 if (fileName == null) {
1435 return null;
1436 }
1437 requireNonNullChars(fileName);
1438 final int index = indexOfExtension(fileName);
1439 if (index == NOT_FOUND) {
1440 return fileName;
1441 }
1442 return fileName.substring(0, index);
1443 }
1444
1445 /**
1446 * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1447 *
1448 * This may be used to defend against poison byte attacks.
1449 *
1450 * @param path the path to check.
1451 * @return The input.
1452 * @throws IllegalArgumentException if path contains the null character ({@code U+0000}).
1453 */
1454 private static String requireNonNullChars(final String path) {
1455 if (path.indexOf(0) >= 0) {
1456 throw new IllegalArgumentException(
1457 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1458 }
1459 return path;
1460 }
1461
1462 /**
1463 * Converts all separators to the system separator.
1464 *
1465 * @param path the path to be changed, null ignored.
1466 * @return the updated path.
1467 */
1468 public static String separatorsToSystem(final String path) {
1469 return FileSystem.getCurrent().normalizeSeparators(path);
1470 }
1471
1472 /**
1473 * Converts all separators to the Unix separator of forward slash.
1474 *
1475 * @param path the path to be changed, null ignored.
1476 * @return the new path.
1477 */
1478 public static String separatorsToUnix(final String path) {
1479 return FileSystem.LINUX.normalizeSeparators(path);
1480 }
1481
1482 /**
1483 * Converts all separators to the Windows separator of backslash.
1484 *
1485 * @param path the path to be changed, null ignored.
1486 * @return the updated path.
1487 */
1488 public static String separatorsToWindows(final String path) {
1489 return FileSystem.WINDOWS.normalizeSeparators(path);
1490 }
1491
1492 /**
1493 * Splits a string into a number of tokens.
1494 * The text is split by '?' and '*'.
1495 * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1496 *
1497 * @param text the text to split.
1498 * @return the array of tokens, never null.
1499 */
1500 static String[] splitOnTokens(final String text) {
1501 // used by wildcardMatch
1502 // package level so a unit test may run on this
1503 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1504 return new String[] { text };
1505 }
1506 final char[] array = text.toCharArray();
1507 final ArrayList<String> list = new ArrayList<>();
1508 final StringBuilder buffer = new StringBuilder();
1509 char prevChar = 0;
1510 for (final char ch : array) {
1511 if (ch == '?' || ch == '*') {
1512 if (buffer.length() != 0) {
1513 list.add(buffer.toString());
1514 buffer.setLength(0);
1515 }
1516 if (ch == '?') {
1517 list.add("?");
1518 } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
1519 list.add("*");
1520 }
1521 } else {
1522 buffer.append(ch);
1523 }
1524 prevChar = ch;
1525 }
1526 if (buffer.length() != 0) {
1527 list.add(buffer.toString());
1528 }
1529 return list.toArray(EMPTY_STRING_ARRAY);
1530 }
1531
1532 /**
1533 * Returns '/' if given true, '\\' otherwise.
1534 *
1535 * @param unixSeparator which separator to return.
1536 * @return '/' if given true, '\\' otherwise.
1537 */
1538 private static char toSeparator(final boolean unixSeparator) {
1539 return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1540 }
1541
1542 /**
1543 * Checks a fileName to see if it matches the specified wildcard matcher,
1544 * always testing case-sensitive.
1545 * <p>
1546 * The wildcard matcher uses the characters '?' and '*' to represent a
1547 * single or multiple (zero or more) wildcard characters.
1548 * This is the same as often found on DOS/Unix command lines.
1549 * The check is case-sensitive always.
1550 * <pre>
1551 * wildcardMatch("c.txt", "*.txt") --> true
1552 * wildcardMatch("c.txt", "*.jpg") --> false
1553 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1554 * wildcardMatch("c.txt", "*.???") --> true
1555 * wildcardMatch("c.txt", "*.????") --> false
1556 * </pre>
1557 * The sequence "*?" does not work properly at present in match strings.
1558 *
1559 * @param fileName the file name to match on, may be null.
1560 * @param wildcardMatcher the wildcard string to match against, may be null.
1561 * @return true if the file name matches the wildcard string.
1562 * @see IOCase#SENSITIVE
1563 */
1564 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1565 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1566 }
1567
1568 /**
1569 * Checks a fileName to see if it matches the specified wildcard matcher
1570 * allowing control over case-sensitivity.
1571 * <p>
1572 * The wildcard matcher uses the characters '?' and '*' to represent a
1573 * single or multiple (zero or more) wildcard characters.
1574 * The sequence "*?" does not work properly at present in match strings.
1575 *
1576 * @param fileName the file name to match on, may be null.
1577 * @param wildcardMatcher the wildcard string to match against, may be null.
1578 * @param ioCase what case sensitivity rule to use, null means case-sensitive.
1579 * @return true if the file name matches the wildcard string.
1580 * @since 1.3
1581 */
1582 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1583 if (fileName == null && wildcardMatcher == null) {
1584 return true;
1585 }
1586 if (fileName == null || wildcardMatcher == null) {
1587 return false;
1588 }
1589 ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1590 final String[] wcs = splitOnTokens(wildcardMatcher);
1591 boolean anyChars = false;
1592 int textIdx = 0;
1593 int wcsIdx = 0;
1594 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1595 // loop around a backtrack stack, to handle complex * matching
1596 do {
1597 if (!backtrack.isEmpty()) {
1598 final int[] array = backtrack.pop();
1599 wcsIdx = array[0];
1600 textIdx = array[1];
1601 anyChars = true;
1602 }
1603 // loop whilst tokens and text left to process
1604 while (wcsIdx < wcs.length) {
1605 if (wcs[wcsIdx].equals("?")) {
1606 // ? so move to next text char
1607 textIdx++;
1608 if (textIdx > fileName.length()) {
1609 break;
1610 }
1611 anyChars = false;
1612 } else if (wcs[wcsIdx].equals("*")) {
1613 // set any chars status
1614 anyChars = true;
1615 if (wcsIdx == wcs.length - 1) {
1616 textIdx = fileName.length();
1617 }
1618 } else {
1619 // matching text token
1620 if (anyChars) {
1621 // any chars then try to locate text token
1622 textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1623 if (textIdx == NOT_FOUND) {
1624 // token not found
1625 break;
1626 }
1627 final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1628 if (repeat >= 0) {
1629 backtrack.push(new int[] { wcsIdx, repeat });
1630 }
1631 } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1632 // matching from current position
1633 // couldn't match token
1634 break;
1635 }
1636 // matched text token, move text index to end of matched token
1637 textIdx += wcs[wcsIdx].length();
1638 anyChars = false;
1639 }
1640 wcsIdx++;
1641 }
1642 // full match
1643 if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1644 return true;
1645 }
1646 } while (!backtrack.isEmpty());
1647 return false;
1648 }
1649
1650 /**
1651 * Checks a fileName to see if it matches the specified wildcard matcher
1652 * using the case rules of the system.
1653 * <p>
1654 * The wildcard matcher uses the characters '?' and '*' to represent a
1655 * single or multiple (zero or more) wildcard characters.
1656 * This is the same as often found on DOS/Unix command lines.
1657 * The check is case-sensitive on Unix and case-insensitive on Windows.
1658 * <pre>
1659 * wildcardMatch("c.txt", "*.txt") --> true
1660 * wildcardMatch("c.txt", "*.jpg") --> false
1661 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1662 * wildcardMatch("c.txt", "*.???") --> true
1663 * wildcardMatch("c.txt", "*.????") --> false
1664 * </pre>
1665 * The sequence "*?" does not work properly at present in match strings.
1666 *
1667 * @param fileName the file name to match on.
1668 * @param wildcardMatcher the wildcard string to match against.
1669 * @return true if the file name matches the wildcard string.
1670 * @see IOCase#SYSTEM
1671 */
1672 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1673 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1674 }
1675
1676 /**
1677 * Instances should NOT be constructed in standard programming.
1678 *
1679 * @deprecated TODO Make private in 3.0.
1680 */
1681 @Deprecated
1682 public FilenameUtils() {
1683 // empty
1684 }
1685 }