001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io;
018
019 import java.io.File;
020 import java.io.IOException;
021 import java.util.ArrayList;
022 import java.util.Collection;
023 import java.util.Stack;
024
025 /**
026 * General filename and filepath manipulation utilities.
027 * <p>
028 * When dealing with filenames you can hit problems when moving from a Windows
029 * based development machine to a Unix based production machine.
030 * This class aims to help avoid those problems.
031 * <p>
032 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
033 * using JDK {@link java.io.File File} objects and the two argument constructor
034 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
035 * <p>
036 * Most methods on this class are designed to work the same on both Unix and Windows.
037 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
038 * <p>
039 * Most methods recognise both separators (forward and back), and both
040 * sets of prefixes. See the javadoc of each method for details.
041 * <p>
042 * This class defines six components within a filename
043 * (example C:\dev\project\file.txt):
044 * <ul>
045 * <li>the prefix - C:\</li>
046 * <li>the path - dev\project\</li>
047 * <li>the full path - C:\dev\project\</li>
048 * <li>the name - file.txt</li>
049 * <li>the base name - file</li>
050 * <li>the extension - txt</li>
051 * </ul>
052 * Note that this class works best if directory filenames end with a separator.
053 * If you omit the last separator, it is impossible to determine if the filename
054 * corresponds to a file or a directory. As a result, we have chosen to say
055 * it corresponds to a file.
056 * <p>
057 * This class only supports Unix and Windows style names.
058 * Prefixes are matched as follows:
059 * <pre>
060 * Windows:
061 * a\b\c.txt --> "" --> relative
062 * \a\b\c.txt --> "\" --> current drive absolute
063 * C:a\b\c.txt --> "C:" --> drive relative
064 * C:\a\b\c.txt --> "C:\" --> absolute
065 * \\server\a\b\c.txt --> "\\server\" --> UNC
066 *
067 * Unix:
068 * a/b/c.txt --> "" --> relative
069 * /a/b/c.txt --> "/" --> absolute
070 * ~/a/b/c.txt --> "~/" --> current user
071 * ~ --> "~/" --> current user (slash added)
072 * ~user/a/b/c.txt --> "~user/" --> named user
073 * ~user --> "~user/" --> named user (slash added)
074 * </pre>
075 * Both prefix styles are matched always, irrespective of the machine that you are
076 * currently running on.
077 * <p>
078 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
079 *
080 * @version $Id: FilenameUtils.java 1304052 2012-03-22 20:55:29Z ggregory $
081 * @since 1.1
082 */
083 public class FilenameUtils {
084
085 /**
086 * The extension separator character.
087 * @since 1.4
088 */
089 public static final char EXTENSION_SEPARATOR = '.';
090
091 /**
092 * The extension separator String.
093 * @since 1.4
094 */
095 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
096
097 /**
098 * The Unix separator character.
099 */
100 private static final char UNIX_SEPARATOR = '/';
101
102 /**
103 * The Windows separator character.
104 */
105 private static final char WINDOWS_SEPARATOR = '\\';
106
107 /**
108 * The system separator character.
109 */
110 private static final char SYSTEM_SEPARATOR = File.separatorChar;
111
112 /**
113 * The separator character that is the opposite of the system separator.
114 */
115 private static final char OTHER_SEPARATOR;
116 static {
117 if (isSystemWindows()) {
118 OTHER_SEPARATOR = UNIX_SEPARATOR;
119 } else {
120 OTHER_SEPARATOR = WINDOWS_SEPARATOR;
121 }
122 }
123
124 /**
125 * Instances should NOT be constructed in standard programming.
126 */
127 public FilenameUtils() {
128 super();
129 }
130
131 //-----------------------------------------------------------------------
132 /**
133 * Determines if Windows file system is in use.
134 *
135 * @return true if the system is Windows
136 */
137 static boolean isSystemWindows() {
138 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
139 }
140
141 //-----------------------------------------------------------------------
142 /**
143 * Checks if the character is a separator.
144 *
145 * @param ch the character to check
146 * @return true if it is a separator character
147 */
148 private static boolean isSeparator(char ch) {
149 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
150 }
151
152 //-----------------------------------------------------------------------
153 /**
154 * Normalizes a path, removing double and single dot path steps.
155 * <p>
156 * This method normalizes a path to a standard format.
157 * The input may contain separators in either Unix or Windows format.
158 * The output will contain separators in the format of the system.
159 * <p>
160 * A trailing slash will be retained.
161 * A double slash will be merged to a single slash (but UNC names are handled).
162 * A single dot path segment will be removed.
163 * A double dot will cause that path segment and the one before to be removed.
164 * If the double dot has no parent path segment to work with, <code>null</code>
165 * is returned.
166 * <p>
167 * The output will be the same on both Unix and Windows except
168 * for the separator character.
169 * <pre>
170 * /foo// --> /foo/
171 * /foo/./ --> /foo/
172 * /foo/../bar --> /bar
173 * /foo/../bar/ --> /bar/
174 * /foo/../bar/../baz --> /baz
175 * //foo//./bar --> /foo/bar
176 * /../ --> null
177 * ../foo --> null
178 * foo/bar/.. --> foo/
179 * foo/../../bar --> null
180 * foo/../bar --> bar
181 * //server/foo/../bar --> //server/bar
182 * //server/../bar --> null
183 * C:\foo\..\bar --> C:\bar
184 * C:\..\bar --> null
185 * ~/foo/../bar/ --> ~/bar/
186 * ~/../bar --> null
187 * </pre>
188 * (Note the file separator returned will be correct for Windows/Unix)
189 *
190 * @param filename the filename to normalize, null returns null
191 * @return the normalized filename, or null if invalid
192 */
193 public static String normalize(String filename) {
194 return doNormalize(filename, SYSTEM_SEPARATOR, true);
195 }
196 /**
197 * Normalizes a path, removing double and single dot path steps.
198 * <p>
199 * This method normalizes a path to a standard format.
200 * The input may contain separators in either Unix or Windows format.
201 * The output will contain separators in the format specified.
202 * <p>
203 * A trailing slash will be retained.
204 * A double slash will be merged to a single slash (but UNC names are handled).
205 * A single dot path segment will be removed.
206 * A double dot will cause that path segment and the one before to be removed.
207 * If the double dot has no parent path segment to work with, <code>null</code>
208 * is returned.
209 * <p>
210 * The output will be the same on both Unix and Windows except
211 * for the separator character.
212 * <pre>
213 * /foo// --> /foo/
214 * /foo/./ --> /foo/
215 * /foo/../bar --> /bar
216 * /foo/../bar/ --> /bar/
217 * /foo/../bar/../baz --> /baz
218 * //foo//./bar --> /foo/bar
219 * /../ --> null
220 * ../foo --> null
221 * foo/bar/.. --> foo/
222 * foo/../../bar --> null
223 * foo/../bar --> bar
224 * //server/foo/../bar --> //server/bar
225 * //server/../bar --> null
226 * C:\foo\..\bar --> C:\bar
227 * C:\..\bar --> null
228 * ~/foo/../bar/ --> ~/bar/
229 * ~/../bar --> null
230 * </pre>
231 * The output will be the same on both Unix and Windows including
232 * the separator character.
233 *
234 * @param filename the filename to normalize, null returns null
235 * @param unixSeparator <code>true</code> if a unix separator should
236 * be used or <code>false</code> if a windows separator should be used.
237 * @return the normalized filename, or null if invalid
238 * @since 2.0
239 */
240 public static String normalize(String filename, boolean unixSeparator) {
241 char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
242 return doNormalize(filename, separator, true);
243 }
244
245 //-----------------------------------------------------------------------
246 /**
247 * Normalizes a path, removing double and single dot path steps,
248 * and removing any final directory separator.
249 * <p>
250 * This method normalizes a path to a standard format.
251 * The input may contain separators in either Unix or Windows format.
252 * The output will contain separators in the format of the system.
253 * <p>
254 * A trailing slash will be removed.
255 * A double slash will be merged to a single slash (but UNC names are handled).
256 * A single dot path segment will be removed.
257 * A double dot will cause that path segment and the one before to be removed.
258 * If the double dot has no parent path segment to work with, <code>null</code>
259 * is returned.
260 * <p>
261 * The output will be the same on both Unix and Windows except
262 * for the separator character.
263 * <pre>
264 * /foo// --> /foo
265 * /foo/./ --> /foo
266 * /foo/../bar --> /bar
267 * /foo/../bar/ --> /bar
268 * /foo/../bar/../baz --> /baz
269 * //foo//./bar --> /foo/bar
270 * /../ --> null
271 * ../foo --> null
272 * foo/bar/.. --> foo
273 * foo/../../bar --> null
274 * foo/../bar --> bar
275 * //server/foo/../bar --> //server/bar
276 * //server/../bar --> null
277 * C:\foo\..\bar --> C:\bar
278 * C:\..\bar --> null
279 * ~/foo/../bar/ --> ~/bar
280 * ~/../bar --> null
281 * </pre>
282 * (Note the file separator returned will be correct for Windows/Unix)
283 *
284 * @param filename the filename to normalize, null returns null
285 * @return the normalized filename, or null if invalid
286 */
287 public static String normalizeNoEndSeparator(String filename) {
288 return doNormalize(filename, SYSTEM_SEPARATOR, false);
289 }
290
291 /**
292 * Normalizes a path, removing double and single dot path steps,
293 * and removing any final directory separator.
294 * <p>
295 * This method normalizes a path to a standard format.
296 * The input may contain separators in either Unix or Windows format.
297 * The output will contain separators in the format specified.
298 * <p>
299 * A trailing slash will be removed.
300 * A double slash will be merged to a single slash (but UNC names are handled).
301 * A single dot path segment will be removed.
302 * A double dot will cause that path segment and the one before to be removed.
303 * If the double dot has no parent path segment to work with, <code>null</code>
304 * is returned.
305 * <p>
306 * The output will be the same on both Unix and Windows including
307 * the separator character.
308 * <pre>
309 * /foo// --> /foo
310 * /foo/./ --> /foo
311 * /foo/../bar --> /bar
312 * /foo/../bar/ --> /bar
313 * /foo/../bar/../baz --> /baz
314 * //foo//./bar --> /foo/bar
315 * /../ --> null
316 * ../foo --> null
317 * foo/bar/.. --> foo
318 * foo/../../bar --> null
319 * foo/../bar --> bar
320 * //server/foo/../bar --> //server/bar
321 * //server/../bar --> null
322 * C:\foo\..\bar --> C:\bar
323 * C:\..\bar --> null
324 * ~/foo/../bar/ --> ~/bar
325 * ~/../bar --> null
326 * </pre>
327 *
328 * @param filename the filename to normalize, null returns null
329 * @param unixSeparator <code>true</code> if a unix separator should
330 * be used or <code>false</code> if a windows separtor should be used.
331 * @return the normalized filename, or null if invalid
332 * @since 2.0
333 */
334 public static String normalizeNoEndSeparator(String filename, boolean unixSeparator) {
335 char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
336 return doNormalize(filename, separator, false);
337 }
338
339 /**
340 * Internal method to perform the normalization.
341 *
342 * @param filename the filename
343 * @param separator The separator character to use
344 * @param keepSeparator true to keep the final separator
345 * @return the normalized filename
346 */
347 private static String doNormalize(String filename, char separator, boolean keepSeparator) {
348 if (filename == null) {
349 return null;
350 }
351 int size = filename.length();
352 if (size == 0) {
353 return filename;
354 }
355 int prefix = getPrefixLength(filename);
356 if (prefix < 0) {
357 return null;
358 }
359
360 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
361 filename.getChars(0, filename.length(), array, 0);
362
363 // fix separators throughout
364 char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
365 for (int i = 0; i < array.length; i++) {
366 if (array[i] == otherSeparator) {
367 array[i] = separator;
368 }
369 }
370
371 // add extra separator on the end to simplify code below
372 boolean lastIsDirectory = true;
373 if (array[size - 1] != separator) {
374 array[size++] = separator;
375 lastIsDirectory = false;
376 }
377
378 // adjoining slashes
379 for (int i = prefix + 1; i < size; i++) {
380 if (array[i] == separator && array[i - 1] == separator) {
381 System.arraycopy(array, i, array, i - 1, size - i);
382 size--;
383 i--;
384 }
385 }
386
387 // dot slash
388 for (int i = prefix + 1; i < size; i++) {
389 if (array[i] == separator && array[i - 1] == '.' &&
390 (i == prefix + 1 || array[i - 2] == separator)) {
391 if (i == size - 1) {
392 lastIsDirectory = true;
393 }
394 System.arraycopy(array, i + 1, array, i - 1, size - i);
395 size -=2;
396 i--;
397 }
398 }
399
400 // double dot slash
401 outer:
402 for (int i = prefix + 2; i < size; i++) {
403 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
404 (i == prefix + 2 || array[i - 3] == separator)) {
405 if (i == prefix + 2) {
406 return null;
407 }
408 if (i == size - 1) {
409 lastIsDirectory = true;
410 }
411 int j;
412 for (j = i - 4 ; j >= prefix; j--) {
413 if (array[j] == separator) {
414 // remove b/../ from a/b/../c
415 System.arraycopy(array, i + 1, array, j + 1, size - i);
416 size -= i - j;
417 i = j + 1;
418 continue outer;
419 }
420 }
421 // remove a/../ from a/../c
422 System.arraycopy(array, i + 1, array, prefix, size - i);
423 size -= i + 1 - prefix;
424 i = prefix + 1;
425 }
426 }
427
428 if (size <= 0) { // should never be less than 0
429 return "";
430 }
431 if (size <= prefix) { // should never be less than prefix
432 return new String(array, 0, size);
433 }
434 if (lastIsDirectory && keepSeparator) {
435 return new String(array, 0, size); // keep trailing separator
436 }
437 return new String(array, 0, size - 1); // lose trailing separator
438 }
439
440 //-----------------------------------------------------------------------
441 /**
442 * Concatenates a filename to a base path using normal command line style rules.
443 * <p>
444 * The effect is equivalent to resultant directory after changing
445 * directory to the first argument, followed by changing directory to
446 * the second argument.
447 * <p>
448 * The first argument is the base path, the second is the path to concatenate.
449 * The returned path is always normalized via {@link #normalize(String)},
450 * thus <code>..</code> is handled.
451 * <p>
452 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
453 * it will be normalized and returned.
454 * Otherwise, the paths will be joined, normalized and returned.
455 * <p>
456 * The output will be the same on both Unix and Windows except
457 * for the separator character.
458 * <pre>
459 * /foo/ + bar --> /foo/bar
460 * /foo + bar --> /foo/bar
461 * /foo + /bar --> /bar
462 * /foo + C:/bar --> C:/bar
463 * /foo + C:bar --> C:bar (*)
464 * /foo/a/ + ../bar --> foo/bar
465 * /foo/ + ../../bar --> null
466 * /foo/ + /bar --> /bar
467 * /foo/.. + /bar --> /bar
468 * /foo + bar/c.txt --> /foo/bar/c.txt
469 * /foo/c.txt + bar --> /foo/c.txt/bar (!)
470 * </pre>
471 * (*) Note that the Windows relative drive prefix is unreliable when
472 * used with this method.
473 * (!) Note that the first parameter must be a path. If it ends with a name, then
474 * the name will be built into the concatenated path. If this might be a problem,
475 * use {@link #getFullPath(String)} on the base path argument.
476 *
477 * @param basePath the base path to attach to, always treated as a path
478 * @param fullFilenameToAdd the filename (or path) to attach to the base
479 * @return the concatenated path, or null if invalid
480 */
481 public static String concat(String basePath, String fullFilenameToAdd) {
482 int prefix = getPrefixLength(fullFilenameToAdd);
483 if (prefix < 0) {
484 return null;
485 }
486 if (prefix > 0) {
487 return normalize(fullFilenameToAdd);
488 }
489 if (basePath == null) {
490 return null;
491 }
492 int len = basePath.length();
493 if (len == 0) {
494 return normalize(fullFilenameToAdd);
495 }
496 char ch = basePath.charAt(len - 1);
497 if (isSeparator(ch)) {
498 return normalize(basePath + fullFilenameToAdd);
499 } else {
500 return normalize(basePath + '/' + fullFilenameToAdd);
501 }
502 }
503
504 /**
505 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
506 * <p>
507 * The files names are expected to be normalized.
508 * </p>
509 *
510 * Edge cases:
511 * <ul>
512 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
513 * <li>A directory does not contain itself: return false</li>
514 * <li>A null child file is not contained in any parent: return false</li>
515 * </ul>
516 *
517 * @param canonicalParent
518 * the file to consider as the parent.
519 * @param canonicalChild
520 * the file to consider as the child.
521 * @return true is the candidate leaf is under by the specified composite. False otherwise.
522 * @throws IOException
523 * if an IO error occurs while checking the files.
524 * @since 2.2
525 * @see FileUtils#directoryContains(File, File)
526 */
527 public static boolean directoryContains(final String canonicalParent, final String canonicalChild)
528 throws IOException {
529
530 // Fail fast against NullPointerException
531 if (canonicalParent == null) {
532 throw new IllegalArgumentException("Directory must not be null");
533 }
534
535 if (canonicalChild == null) {
536 return false;
537 }
538
539 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
540 return false;
541 }
542
543 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
544 }
545
546 //-----------------------------------------------------------------------
547 /**
548 * Converts all separators to the Unix separator of forward slash.
549 *
550 * @param path the path to be changed, null ignored
551 * @return the updated path
552 */
553 public static String separatorsToUnix(String path) {
554 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
555 return path;
556 }
557 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
558 }
559
560 /**
561 * Converts all separators to the Windows separator of backslash.
562 *
563 * @param path the path to be changed, null ignored
564 * @return the updated path
565 */
566 public static String separatorsToWindows(String path) {
567 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
568 return path;
569 }
570 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
571 }
572
573 /**
574 * Converts all separators to the system separator.
575 *
576 * @param path the path to be changed, null ignored
577 * @return the updated path
578 */
579 public static String separatorsToSystem(String path) {
580 if (path == null) {
581 return null;
582 }
583 if (isSystemWindows()) {
584 return separatorsToWindows(path);
585 } else {
586 return separatorsToUnix(path);
587 }
588 }
589
590 //-----------------------------------------------------------------------
591 /**
592 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
593 * <p>
594 * This method will handle a file in either Unix or Windows format.
595 * <p>
596 * The prefix length includes the first slash in the full filename
597 * if applicable. Thus, it is possible that the length returned is greater
598 * than the length of the input string.
599 * <pre>
600 * Windows:
601 * a\b\c.txt --> "" --> relative
602 * \a\b\c.txt --> "\" --> current drive absolute
603 * C:a\b\c.txt --> "C:" --> drive relative
604 * C:\a\b\c.txt --> "C:\" --> absolute
605 * \\server\a\b\c.txt --> "\\server\" --> UNC
606 *
607 * Unix:
608 * a/b/c.txt --> "" --> relative
609 * /a/b/c.txt --> "/" --> absolute
610 * ~/a/b/c.txt --> "~/" --> current user
611 * ~ --> "~/" --> current user (slash added)
612 * ~user/a/b/c.txt --> "~user/" --> named user
613 * ~user --> "~user/" --> named user (slash added)
614 * </pre>
615 * <p>
616 * The output will be the same irrespective of the machine that the code is running on.
617 * ie. both Unix and Windows prefixes are matched regardless.
618 *
619 * @param filename the filename to find the prefix in, null returns -1
620 * @return the length of the prefix, -1 if invalid or null
621 */
622 public static int getPrefixLength(String filename) {
623 if (filename == null) {
624 return -1;
625 }
626 int len = filename.length();
627 if (len == 0) {
628 return 0;
629 }
630 char ch0 = filename.charAt(0);
631 if (ch0 == ':') {
632 return -1;
633 }
634 if (len == 1) {
635 if (ch0 == '~') {
636 return 2; // return a length greater than the input
637 }
638 return isSeparator(ch0) ? 1 : 0;
639 } else {
640 if (ch0 == '~') {
641 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
642 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
643 if (posUnix == -1 && posWin == -1) {
644 return len + 1; // return a length greater than the input
645 }
646 posUnix = posUnix == -1 ? posWin : posUnix;
647 posWin = posWin == -1 ? posUnix : posWin;
648 return Math.min(posUnix, posWin) + 1;
649 }
650 char ch1 = filename.charAt(1);
651 if (ch1 == ':') {
652 ch0 = Character.toUpperCase(ch0);
653 if (ch0 >= 'A' && ch0 <= 'Z') {
654 if (len == 2 || isSeparator(filename.charAt(2)) == false) {
655 return 2;
656 }
657 return 3;
658 }
659 return -1;
660
661 } else if (isSeparator(ch0) && isSeparator(ch1)) {
662 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
663 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
664 if (posUnix == -1 && posWin == -1 || posUnix == 2 || posWin == 2) {
665 return -1;
666 }
667 posUnix = posUnix == -1 ? posWin : posUnix;
668 posWin = posWin == -1 ? posUnix : posWin;
669 return Math.min(posUnix, posWin) + 1;
670 } else {
671 return isSeparator(ch0) ? 1 : 0;
672 }
673 }
674 }
675
676 /**
677 * Returns the index of the last directory separator character.
678 * <p>
679 * This method will handle a file in either Unix or Windows format.
680 * The position of the last forward or backslash is returned.
681 * <p>
682 * The output will be the same irrespective of the machine that the code is running on.
683 *
684 * @param filename the filename to find the last path separator in, null returns -1
685 * @return the index of the last separator character, or -1 if there
686 * is no such character
687 */
688 public static int indexOfLastSeparator(String filename) {
689 if (filename == null) {
690 return -1;
691 }
692 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
693 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
694 return Math.max(lastUnixPos, lastWindowsPos);
695 }
696
697 /**
698 * Returns the index of the last extension separator character, which is a dot.
699 * <p>
700 * This method also checks that there is no directory separator after the last dot.
701 * To do this it uses {@link #indexOfLastSeparator(String)} which will
702 * handle a file in either Unix or Windows format.
703 * <p>
704 * The output will be the same irrespective of the machine that the code is running on.
705 *
706 * @param filename the filename to find the last path separator in, null returns -1
707 * @return the index of the last separator character, or -1 if there
708 * is no such character
709 */
710 public static int indexOfExtension(String filename) {
711 if (filename == null) {
712 return -1;
713 }
714 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
715 int lastSeparator = indexOfLastSeparator(filename);
716 return lastSeparator > extensionPos ? -1 : extensionPos;
717 }
718
719 //-----------------------------------------------------------------------
720 /**
721 * Gets the prefix from a full filename, such as <code>C:/</code>
722 * or <code>~/</code>.
723 * <p>
724 * This method will handle a file in either Unix or Windows format.
725 * The prefix includes the first slash in the full filename where applicable.
726 * <pre>
727 * Windows:
728 * a\b\c.txt --> "" --> relative
729 * \a\b\c.txt --> "\" --> current drive absolute
730 * C:a\b\c.txt --> "C:" --> drive relative
731 * C:\a\b\c.txt --> "C:\" --> absolute
732 * \\server\a\b\c.txt --> "\\server\" --> UNC
733 *
734 * Unix:
735 * a/b/c.txt --> "" --> relative
736 * /a/b/c.txt --> "/" --> absolute
737 * ~/a/b/c.txt --> "~/" --> current user
738 * ~ --> "~/" --> current user (slash added)
739 * ~user/a/b/c.txt --> "~user/" --> named user
740 * ~user --> "~user/" --> named user (slash added)
741 * </pre>
742 * <p>
743 * The output will be the same irrespective of the machine that the code is running on.
744 * ie. both Unix and Windows prefixes are matched regardless.
745 *
746 * @param filename the filename to query, null returns null
747 * @return the prefix of the file, null if invalid
748 */
749 public static String getPrefix(String filename) {
750 if (filename == null) {
751 return null;
752 }
753 int len = getPrefixLength(filename);
754 if (len < 0) {
755 return null;
756 }
757 if (len > filename.length()) {
758 return filename + UNIX_SEPARATOR; // we know this only happens for unix
759 }
760 return filename.substring(0, len);
761 }
762
763 /**
764 * Gets the path from a full filename, which excludes the prefix.
765 * <p>
766 * This method will handle a file in either Unix or Windows format.
767 * The method is entirely text based, and returns the text before and
768 * including the last forward or backslash.
769 * <pre>
770 * C:\a\b\c.txt --> a\b\
771 * ~/a/b/c.txt --> a/b/
772 * a.txt --> ""
773 * a/b/c --> a/b/
774 * a/b/c/ --> a/b/c/
775 * </pre>
776 * <p>
777 * The output will be the same irrespective of the machine that the code is running on.
778 * <p>
779 * This method drops the prefix from the result.
780 * See {@link #getFullPath(String)} for the method that retains the prefix.
781 *
782 * @param filename the filename to query, null returns null
783 * @return the path of the file, an empty string if none exists, null if invalid
784 */
785 public static String getPath(String filename) {
786 return doGetPath(filename, 1);
787 }
788
789 /**
790 * Gets the path from a full filename, which excludes the prefix, and
791 * also excluding the final directory separator.
792 * <p>
793 * This method will handle a file in either Unix or Windows format.
794 * The method is entirely text based, and returns the text before the
795 * last forward or backslash.
796 * <pre>
797 * C:\a\b\c.txt --> a\b
798 * ~/a/b/c.txt --> a/b
799 * a.txt --> ""
800 * a/b/c --> a/b
801 * a/b/c/ --> a/b/c
802 * </pre>
803 * <p>
804 * The output will be the same irrespective of the machine that the code is running on.
805 * <p>
806 * This method drops the prefix from the result.
807 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
808 *
809 * @param filename the filename to query, null returns null
810 * @return the path of the file, an empty string if none exists, null if invalid
811 */
812 public static String getPathNoEndSeparator(String filename) {
813 return doGetPath(filename, 0);
814 }
815
816 /**
817 * Does the work of getting the path.
818 *
819 * @param filename the filename
820 * @param separatorAdd 0 to omit the end separator, 1 to return it
821 * @return the path
822 */
823 private static String doGetPath(String filename, int separatorAdd) {
824 if (filename == null) {
825 return null;
826 }
827 int prefix = getPrefixLength(filename);
828 if (prefix < 0) {
829 return null;
830 }
831 int index = indexOfLastSeparator(filename);
832 int endIndex = index+separatorAdd;
833 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
834 return "";
835 }
836 return filename.substring(prefix, endIndex);
837 }
838
839 /**
840 * Gets the full path from a full filename, which is the prefix + path.
841 * <p>
842 * This method will handle a file in either Unix or Windows format.
843 * The method is entirely text based, and returns the text before and
844 * including the last forward or backslash.
845 * <pre>
846 * C:\a\b\c.txt --> C:\a\b\
847 * ~/a/b/c.txt --> ~/a/b/
848 * a.txt --> ""
849 * a/b/c --> a/b/
850 * a/b/c/ --> a/b/c/
851 * C: --> C:
852 * C:\ --> C:\
853 * ~ --> ~/
854 * ~/ --> ~/
855 * ~user --> ~user/
856 * ~user/ --> ~user/
857 * </pre>
858 * <p>
859 * The output will be the same irrespective of the machine that the code is running on.
860 *
861 * @param filename the filename to query, null returns null
862 * @return the path of the file, an empty string if none exists, null if invalid
863 */
864 public static String getFullPath(String filename) {
865 return doGetFullPath(filename, true);
866 }
867
868 /**
869 * Gets the full path from a full filename, which is the prefix + path,
870 * and also excluding the final directory separator.
871 * <p>
872 * This method will handle a file in either Unix or Windows format.
873 * The method is entirely text based, and returns the text before the
874 * last forward or backslash.
875 * <pre>
876 * C:\a\b\c.txt --> C:\a\b
877 * ~/a/b/c.txt --> ~/a/b
878 * a.txt --> ""
879 * a/b/c --> a/b
880 * a/b/c/ --> a/b/c
881 * C: --> C:
882 * C:\ --> C:\
883 * ~ --> ~
884 * ~/ --> ~
885 * ~user --> ~user
886 * ~user/ --> ~user
887 * </pre>
888 * <p>
889 * The output will be the same irrespective of the machine that the code is running on.
890 *
891 * @param filename the filename to query, null returns null
892 * @return the path of the file, an empty string if none exists, null if invalid
893 */
894 public static String getFullPathNoEndSeparator(String filename) {
895 return doGetFullPath(filename, false);
896 }
897
898 /**
899 * Does the work of getting the path.
900 *
901 * @param filename the filename
902 * @param includeSeparator true to include the end separator
903 * @return the path
904 */
905 private static String doGetFullPath(String filename, boolean includeSeparator) {
906 if (filename == null) {
907 return null;
908 }
909 int prefix = getPrefixLength(filename);
910 if (prefix < 0) {
911 return null;
912 }
913 if (prefix >= filename.length()) {
914 if (includeSeparator) {
915 return getPrefix(filename); // add end slash if necessary
916 } else {
917 return filename;
918 }
919 }
920 int index = indexOfLastSeparator(filename);
921 if (index < 0) {
922 return filename.substring(0, prefix);
923 }
924 int end = index + (includeSeparator ? 1 : 0);
925 if (end == 0) {
926 end++;
927 }
928 return filename.substring(0, end);
929 }
930
931 /**
932 * Gets the name minus the path from a full filename.
933 * <p>
934 * This method will handle a file in either Unix or Windows format.
935 * The text after the last forward or backslash is returned.
936 * <pre>
937 * a/b/c.txt --> c.txt
938 * a.txt --> a.txt
939 * a/b/c --> c
940 * a/b/c/ --> ""
941 * </pre>
942 * <p>
943 * The output will be the same irrespective of the machine that the code is running on.
944 *
945 * @param filename the filename to query, null returns null
946 * @return the name of the file without the path, or an empty string if none exists
947 */
948 public static String getName(String filename) {
949 if (filename == null) {
950 return null;
951 }
952 int index = indexOfLastSeparator(filename);
953 return filename.substring(index + 1);
954 }
955
956 /**
957 * Gets the base name, minus the full path and extension, from a full filename.
958 * <p>
959 * This method will handle a file in either Unix or Windows format.
960 * The text after the last forward or backslash and before the last dot is returned.
961 * <pre>
962 * a/b/c.txt --> c
963 * a.txt --> a
964 * a/b/c --> c
965 * a/b/c/ --> ""
966 * </pre>
967 * <p>
968 * The output will be the same irrespective of the machine that the code is running on.
969 *
970 * @param filename the filename to query, null returns null
971 * @return the name of the file without the path, or an empty string if none exists
972 */
973 public static String getBaseName(String filename) {
974 return removeExtension(getName(filename));
975 }
976
977 /**
978 * Gets the extension of a filename.
979 * <p>
980 * This method returns the textual part of the filename after the last dot.
981 * There must be no directory separator after the dot.
982 * <pre>
983 * foo.txt --> "txt"
984 * a/b/c.jpg --> "jpg"
985 * a/b.txt/c --> ""
986 * a/b/c --> ""
987 * </pre>
988 * <p>
989 * The output will be the same irrespective of the machine that the code is running on.
990 *
991 * @param filename the filename to retrieve the extension of.
992 * @return the extension of the file or an empty string if none exists or <code>null</code>
993 * if the filename is <code>null</code>.
994 */
995 public static String getExtension(String filename) {
996 if (filename == null) {
997 return null;
998 }
999 int index = indexOfExtension(filename);
1000 if (index == -1) {
1001 return "";
1002 } else {
1003 return filename.substring(index + 1);
1004 }
1005 }
1006
1007 //-----------------------------------------------------------------------
1008 /**
1009 * Removes the extension from a filename.
1010 * <p>
1011 * This method returns the textual part of the filename before the last dot.
1012 * There must be no directory separator after the dot.
1013 * <pre>
1014 * foo.txt --> foo
1015 * a\b\c.jpg --> a\b\c
1016 * a\b\c --> a\b\c
1017 * a.b\c --> a.b\c
1018 * </pre>
1019 * <p>
1020 * The output will be the same irrespective of the machine that the code is running on.
1021 *
1022 * @param filename the filename to query, null returns null
1023 * @return the filename minus the extension
1024 */
1025 public static String removeExtension(String filename) {
1026 if (filename == null) {
1027 return null;
1028 }
1029 int index = indexOfExtension(filename);
1030 if (index == -1) {
1031 return filename;
1032 } else {
1033 return filename.substring(0, index);
1034 }
1035 }
1036
1037 //-----------------------------------------------------------------------
1038 /**
1039 * Checks whether two filenames are equal exactly.
1040 * <p>
1041 * No processing is performed on the filenames other than comparison,
1042 * thus this is merely a null-safe case-sensitive equals.
1043 *
1044 * @param filename1 the first filename to query, may be null
1045 * @param filename2 the second filename to query, may be null
1046 * @return true if the filenames are equal, null equals null
1047 * @see IOCase#SENSITIVE
1048 */
1049 public static boolean equals(String filename1, String filename2) {
1050 return equals(filename1, filename2, false, IOCase.SENSITIVE);
1051 }
1052
1053 /**
1054 * Checks whether two filenames are equal using the case rules of the system.
1055 * <p>
1056 * No processing is performed on the filenames other than comparison.
1057 * The check is case-sensitive on Unix and case-insensitive on Windows.
1058 *
1059 * @param filename1 the first filename to query, may be null
1060 * @param filename2 the second filename to query, may be null
1061 * @return true if the filenames are equal, null equals null
1062 * @see IOCase#SYSTEM
1063 */
1064 public static boolean equalsOnSystem(String filename1, String filename2) {
1065 return equals(filename1, filename2, false, IOCase.SYSTEM);
1066 }
1067
1068 //-----------------------------------------------------------------------
1069 /**
1070 * Checks whether two filenames are equal after both have been normalized.
1071 * <p>
1072 * Both filenames are first passed to {@link #normalize(String)}.
1073 * The check is then performed in a case-sensitive manner.
1074 *
1075 * @param filename1 the first filename to query, may be null
1076 * @param filename2 the second filename to query, may be null
1077 * @return true if the filenames are equal, null equals null
1078 * @see IOCase#SENSITIVE
1079 */
1080 public static boolean equalsNormalized(String filename1, String filename2) {
1081 return equals(filename1, filename2, true, IOCase.SENSITIVE);
1082 }
1083
1084 /**
1085 * Checks whether two filenames are equal after both have been normalized
1086 * and using the case rules of the system.
1087 * <p>
1088 * Both filenames are first passed to {@link #normalize(String)}.
1089 * The check is then performed case-sensitive on Unix and
1090 * case-insensitive on Windows.
1091 *
1092 * @param filename1 the first filename to query, may be null
1093 * @param filename2 the second filename to query, may be null
1094 * @return true if the filenames are equal, null equals null
1095 * @see IOCase#SYSTEM
1096 */
1097 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
1098 return equals(filename1, filename2, true, IOCase.SYSTEM);
1099 }
1100
1101 /**
1102 * Checks whether two filenames are equal, optionally normalizing and providing
1103 * control over the case-sensitivity.
1104 *
1105 * @param filename1 the first filename to query, may be null
1106 * @param filename2 the second filename to query, may be null
1107 * @param normalized whether to normalize the filenames
1108 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1109 * @return true if the filenames are equal, null equals null
1110 * @since 1.3
1111 */
1112 public static boolean equals(
1113 String filename1, String filename2,
1114 boolean normalized, IOCase caseSensitivity) {
1115
1116 if (filename1 == null || filename2 == null) {
1117 return filename1 == null && filename2 == null;
1118 }
1119 if (normalized) {
1120 filename1 = normalize(filename1);
1121 filename2 = normalize(filename2);
1122 if (filename1 == null || filename2 == null) {
1123 throw new NullPointerException(
1124 "Error normalizing one or both of the file names");
1125 }
1126 }
1127 if (caseSensitivity == null) {
1128 caseSensitivity = IOCase.SENSITIVE;
1129 }
1130 return caseSensitivity.checkEquals(filename1, filename2);
1131 }
1132
1133 //-----------------------------------------------------------------------
1134 /**
1135 * Checks whether the extension of the filename is that specified.
1136 * <p>
1137 * This method obtains the extension as the textual part of the filename
1138 * after the last dot. There must be no directory separator after the dot.
1139 * The extension check is case-sensitive on all platforms.
1140 *
1141 * @param filename the filename to query, null returns false
1142 * @param extension the extension to check for, null or empty checks for no extension
1143 * @return true if the filename has the specified extension
1144 */
1145 public static boolean isExtension(String filename, String extension) {
1146 if (filename == null) {
1147 return false;
1148 }
1149 if (extension == null || extension.length() == 0) {
1150 return indexOfExtension(filename) == -1;
1151 }
1152 String fileExt = getExtension(filename);
1153 return fileExt.equals(extension);
1154 }
1155
1156 /**
1157 * Checks whether the extension of the filename is one of those specified.
1158 * <p>
1159 * This method obtains the extension as the textual part of the filename
1160 * after the last dot. There must be no directory separator after the dot.
1161 * The extension check is case-sensitive on all platforms.
1162 *
1163 * @param filename the filename to query, null returns false
1164 * @param extensions the extensions to check for, null checks for no extension
1165 * @return true if the filename is one of the extensions
1166 */
1167 public static boolean isExtension(String filename, String[] extensions) {
1168 if (filename == null) {
1169 return false;
1170 }
1171 if (extensions == null || extensions.length == 0) {
1172 return indexOfExtension(filename) == -1;
1173 }
1174 String fileExt = getExtension(filename);
1175 for (String extension : extensions) {
1176 if (fileExt.equals(extension)) {
1177 return true;
1178 }
1179 }
1180 return false;
1181 }
1182
1183 /**
1184 * Checks whether the extension of the filename is one of those specified.
1185 * <p>
1186 * This method obtains the extension as the textual part of the filename
1187 * after the last dot. There must be no directory separator after the dot.
1188 * The extension check is case-sensitive on all platforms.
1189 *
1190 * @param filename the filename to query, null returns false
1191 * @param extensions the extensions to check for, null checks for no extension
1192 * @return true if the filename is one of the extensions
1193 */
1194 public static boolean isExtension(String filename, Collection<String> extensions) {
1195 if (filename == null) {
1196 return false;
1197 }
1198 if (extensions == null || extensions.isEmpty()) {
1199 return indexOfExtension(filename) == -1;
1200 }
1201 String fileExt = getExtension(filename);
1202 for (String extension : extensions) {
1203 if (fileExt.equals(extension)) {
1204 return true;
1205 }
1206 }
1207 return false;
1208 }
1209
1210 //-----------------------------------------------------------------------
1211 /**
1212 * Checks a filename to see if it matches the specified wildcard matcher,
1213 * always testing case-sensitive.
1214 * <p>
1215 * The wildcard matcher uses the characters '?' and '*' to represent a
1216 * single or multiple (zero or more) wildcard characters.
1217 * This is the same as often found on Dos/Unix command lines.
1218 * The check is case-sensitive always.
1219 * <pre>
1220 * wildcardMatch("c.txt", "*.txt") --> true
1221 * wildcardMatch("c.txt", "*.jpg") --> false
1222 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1223 * wildcardMatch("c.txt", "*.???") --> true
1224 * wildcardMatch("c.txt", "*.????") --> false
1225 * </pre>
1226 * N.B. the sequence "*?" does not work properly at present in match strings.
1227 *
1228 * @param filename the filename to match on
1229 * @param wildcardMatcher the wildcard string to match against
1230 * @return true if the filename matches the wilcard string
1231 * @see IOCase#SENSITIVE
1232 */
1233 public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1234 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1235 }
1236
1237 /**
1238 * Checks a filename to see if it matches the specified wildcard matcher
1239 * using the case rules of the system.
1240 * <p>
1241 * The wildcard matcher uses the characters '?' and '*' to represent a
1242 * single or multiple (zero or more) wildcard characters.
1243 * This is the same as often found on Dos/Unix command lines.
1244 * The check is case-sensitive on Unix and case-insensitive on Windows.
1245 * <pre>
1246 * wildcardMatch("c.txt", "*.txt") --> true
1247 * wildcardMatch("c.txt", "*.jpg") --> false
1248 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1249 * wildcardMatch("c.txt", "*.???") --> true
1250 * wildcardMatch("c.txt", "*.????") --> false
1251 * </pre>
1252 * N.B. the sequence "*?" does not work properly at present in match strings.
1253 *
1254 * @param filename the filename to match on
1255 * @param wildcardMatcher the wildcard string to match against
1256 * @return true if the filename matches the wilcard string
1257 * @see IOCase#SYSTEM
1258 */
1259 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1260 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1261 }
1262
1263 /**
1264 * Checks a filename to see if it matches the specified wildcard matcher
1265 * allowing control over case-sensitivity.
1266 * <p>
1267 * The wildcard matcher uses the characters '?' and '*' to represent a
1268 * single or multiple (zero or more) wildcard characters.
1269 * N.B. the sequence "*?" does not work properly at present in match strings.
1270 *
1271 * @param filename the filename to match on
1272 * @param wildcardMatcher the wildcard string to match against
1273 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1274 * @return true if the filename matches the wilcard string
1275 * @since 1.3
1276 */
1277 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1278 if (filename == null && wildcardMatcher == null) {
1279 return true;
1280 }
1281 if (filename == null || wildcardMatcher == null) {
1282 return false;
1283 }
1284 if (caseSensitivity == null) {
1285 caseSensitivity = IOCase.SENSITIVE;
1286 }
1287 String[] wcs = splitOnTokens(wildcardMatcher);
1288 boolean anyChars = false;
1289 int textIdx = 0;
1290 int wcsIdx = 0;
1291 Stack<int[]> backtrack = new Stack<int[]>();
1292
1293 // loop around a backtrack stack, to handle complex * matching
1294 do {
1295 if (backtrack.size() > 0) {
1296 int[] array = backtrack.pop();
1297 wcsIdx = array[0];
1298 textIdx = array[1];
1299 anyChars = true;
1300 }
1301
1302 // loop whilst tokens and text left to process
1303 while (wcsIdx < wcs.length) {
1304
1305 if (wcs[wcsIdx].equals("?")) {
1306 // ? so move to next text char
1307 textIdx++;
1308 if (textIdx > filename.length()) {
1309 break;
1310 }
1311 anyChars = false;
1312
1313 } else if (wcs[wcsIdx].equals("*")) {
1314 // set any chars status
1315 anyChars = true;
1316 if (wcsIdx == wcs.length - 1) {
1317 textIdx = filename.length();
1318 }
1319
1320 } else {
1321 // matching text token
1322 if (anyChars) {
1323 // any chars then try to locate text token
1324 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1325 if (textIdx == -1) {
1326 // token not found
1327 break;
1328 }
1329 int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1330 if (repeat >= 0) {
1331 backtrack.push(new int[] {wcsIdx, repeat});
1332 }
1333 } else {
1334 // matching from current position
1335 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1336 // couldnt match token
1337 break;
1338 }
1339 }
1340
1341 // matched text token, move text index to end of matched token
1342 textIdx += wcs[wcsIdx].length();
1343 anyChars = false;
1344 }
1345
1346 wcsIdx++;
1347 }
1348
1349 // full match
1350 if (wcsIdx == wcs.length && textIdx == filename.length()) {
1351 return true;
1352 }
1353
1354 } while (backtrack.size() > 0);
1355
1356 return false;
1357 }
1358
1359 /**
1360 * Splits a string into a number of tokens.
1361 * The text is split by '?' and '*'.
1362 * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1363 *
1364 * @param text the text to split
1365 * @return the array of tokens, never null
1366 */
1367 static String[] splitOnTokens(String text) {
1368 // used by wildcardMatch
1369 // package level so a unit test may run on this
1370
1371 if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
1372 return new String[] { text };
1373 }
1374
1375 char[] array = text.toCharArray();
1376 ArrayList<String> list = new ArrayList<String>();
1377 StringBuilder buffer = new StringBuilder();
1378 for (int i = 0; i < array.length; i++) {
1379 if (array[i] == '?' || array[i] == '*') {
1380 if (buffer.length() != 0) {
1381 list.add(buffer.toString());
1382 buffer.setLength(0);
1383 }
1384 if (array[i] == '?') {
1385 list.add("?");
1386 } else if (list.isEmpty() ||
1387 i > 0 && list.get(list.size() - 1).equals("*") == false) {
1388 list.add("*");
1389 }
1390 } else {
1391 buffer.append(array[i]);
1392 }
1393 }
1394 if (buffer.length() != 0) {
1395 list.add(buffer.toString());
1396 }
1397
1398 return list.toArray( new String[ list.size() ] );
1399 }
1400
1401 }