001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io;
018
019 import java.io.File;
020 import java.util.ArrayList;
021 import java.util.Collection;
022 import java.util.Stack;
023
024 /**
025 * General filename and filepath manipulation utilities.
026 * <p>
027 * When dealing with filenames you can hit problems when moving from a Windows
028 * based development machine to a Unix based production machine.
029 * This class aims to help avoid those problems.
030 * <p>
031 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
032 * using JDK {@link java.io.File File} objects and the two argument constructor
033 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
034 * <p>
035 * Most methods on this class are designed to work the same on both Unix and Windows.
036 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
037 * <p>
038 * Most methods recognise both separators (forward and back), and both
039 * sets of prefixes. See the javadoc of each method for details.
040 * <p>
041 * This class defines six components within a filename
042 * (example C:\dev\project\file.txt):
043 * <ul>
044 * <li>the prefix - C:\</li>
045 * <li>the path - dev\project\</li>
046 * <li>the full path - C:\dev\project\</li>
047 * <li>the name - file.txt</li>
048 * <li>the base name - file</li>
049 * <li>the extension - txt</li>
050 * </ul>
051 * Note that this class works best if directory filenames end with a separator.
052 * If you omit the last separator, it is impossible to determine if the filename
053 * corresponds to a file or a directory. As a result, we have chosen to say
054 * it corresponds to a file.
055 * <p>
056 * This class only supports Unix and Windows style names.
057 * Prefixes are matched as follows:
058 * <pre>
059 * Windows:
060 * a\b\c.txt --> "" --> relative
061 * \a\b\c.txt --> "\" --> current drive absolute
062 * C:a\b\c.txt --> "C:" --> drive relative
063 * C:\a\b\c.txt --> "C:\" --> absolute
064 * \\server\a\b\c.txt --> "\\server\" --> UNC
065 *
066 * Unix:
067 * a/b/c.txt --> "" --> relative
068 * /a/b/c.txt --> "/" --> absolute
069 * ~/a/b/c.txt --> "~/" --> current user
070 * ~ --> "~/" --> current user (slash added)
071 * ~user/a/b/c.txt --> "~user/" --> named user
072 * ~user --> "~user/" --> named user (slash added)
073 * </pre>
074 * Both prefix styles are matched always, irrespective of the machine that you are
075 * currently running on.
076 * <p>
077 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
078 *
079 * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
080 * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
081 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
082 * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
083 * @author <a href="mailto:peter@apache.org">Peter Donald</a>
084 * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
085 * @author Matthew Hawthorne
086 * @author Martin Cooper
087 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
088 * @author Stephen Colebourne
089 * @version $Id: FilenameUtils.java 1004077 2010-10-04 00:58:42Z niallp $
090 * @since Commons IO 1.1
091 */
092 public class FilenameUtils {
093
094 /**
095 * The extension separator character.
096 * @since Commons IO 1.4
097 */
098 public static final char EXTENSION_SEPARATOR = '.';
099
100 /**
101 * The extension separator String.
102 * @since Commons IO 1.4
103 */
104 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
105
106 /**
107 * The Unix separator character.
108 */
109 private static final char UNIX_SEPARATOR = '/';
110
111 /**
112 * The Windows separator character.
113 */
114 private static final char WINDOWS_SEPARATOR = '\\';
115
116 /**
117 * The system separator character.
118 */
119 private static final char SYSTEM_SEPARATOR = File.separatorChar;
120
121 /**
122 * The separator character that is the opposite of the system separator.
123 */
124 private static final char OTHER_SEPARATOR;
125 static {
126 if (isSystemWindows()) {
127 OTHER_SEPARATOR = UNIX_SEPARATOR;
128 } else {
129 OTHER_SEPARATOR = WINDOWS_SEPARATOR;
130 }
131 }
132
133 /**
134 * Instances should NOT be constructed in standard programming.
135 */
136 public FilenameUtils() {
137 super();
138 }
139
140 //-----------------------------------------------------------------------
141 /**
142 * Determines if Windows file system is in use.
143 *
144 * @return true if the system is Windows
145 */
146 static boolean isSystemWindows() {
147 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
148 }
149
150 //-----------------------------------------------------------------------
151 /**
152 * Checks if the character is a separator.
153 *
154 * @param ch the character to check
155 * @return true if it is a separator character
156 */
157 private static boolean isSeparator(char ch) {
158 return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
159 }
160
161 //-----------------------------------------------------------------------
162 /**
163 * Normalizes a path, removing double and single dot path steps.
164 * <p>
165 * This method normalizes a path to a standard format.
166 * The input may contain separators in either Unix or Windows format.
167 * The output will contain separators in the format of the system.
168 * <p>
169 * A trailing slash will be retained.
170 * A double slash will be merged to a single slash (but UNC names are handled).
171 * A single dot path segment will be removed.
172 * A double dot will cause that path segment and the one before to be removed.
173 * If the double dot has no parent path segment to work with, <code>null</code>
174 * is returned.
175 * <p>
176 * The output will be the same on both Unix and Windows except
177 * for the separator character.
178 * <pre>
179 * /foo// --> /foo/
180 * /foo/./ --> /foo/
181 * /foo/../bar --> /bar
182 * /foo/../bar/ --> /bar/
183 * /foo/../bar/../baz --> /baz
184 * //foo//./bar --> /foo/bar
185 * /../ --> null
186 * ../foo --> null
187 * foo/bar/.. --> foo/
188 * foo/../../bar --> null
189 * foo/../bar --> bar
190 * //server/foo/../bar --> //server/bar
191 * //server/../bar --> null
192 * C:\foo\..\bar --> C:\bar
193 * C:\..\bar --> null
194 * ~/foo/../bar/ --> ~/bar/
195 * ~/../bar --> null
196 * </pre>
197 * (Note the file separator returned will be correct for Windows/Unix)
198 *
199 * @param filename the filename to normalize, null returns null
200 * @return the normalized filename, or null if invalid
201 */
202 public static String normalize(String filename) {
203 return doNormalize(filename, SYSTEM_SEPARATOR, true);
204 }
205 /**
206 * Normalizes a path, removing double and single dot path steps.
207 * <p>
208 * This method normalizes a path to a standard format.
209 * The input may contain separators in either Unix or Windows format.
210 * The output will contain separators in the format specified.
211 * <p>
212 * A trailing slash will be retained.
213 * A double slash will be merged to a single slash (but UNC names are handled).
214 * A single dot path segment will be removed.
215 * A double dot will cause that path segment and the one before to be removed.
216 * If the double dot has no parent path segment to work with, <code>null</code>
217 * is returned.
218 * <p>
219 * The output will be the same on both Unix and Windows except
220 * for the separator character.
221 * <pre>
222 * /foo// --> /foo/
223 * /foo/./ --> /foo/
224 * /foo/../bar --> /bar
225 * /foo/../bar/ --> /bar/
226 * /foo/../bar/../baz --> /baz
227 * //foo//./bar --> /foo/bar
228 * /../ --> null
229 * ../foo --> null
230 * foo/bar/.. --> foo/
231 * foo/../../bar --> null
232 * foo/../bar --> bar
233 * //server/foo/../bar --> //server/bar
234 * //server/../bar --> null
235 * C:\foo\..\bar --> C:\bar
236 * C:\..\bar --> null
237 * ~/foo/../bar/ --> ~/bar/
238 * ~/../bar --> null
239 * </pre>
240 * The output will be the same on both Unix and Windows including
241 * the separator character.
242 *
243 * @param filename the filename to normalize, null returns null
244 * @param unixSeparator <code>true</code> if a unix separator should
245 * be used or <code>false</code> if a windows separator should be used.
246 * @return the normalized filename, or null if invalid
247 * @since Commons IO 2.0
248 */
249 public static String normalize(String filename, boolean unixSeparator) {
250 char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR);
251 return doNormalize(filename, separator, true);
252 }
253
254 //-----------------------------------------------------------------------
255 /**
256 * Normalizes a path, removing double and single dot path steps,
257 * and removing any final directory separator.
258 * <p>
259 * This method normalizes a path to a standard format.
260 * The input may contain separators in either Unix or Windows format.
261 * The output will contain separators in the format of the system.
262 * <p>
263 * A trailing slash will be removed.
264 * A double slash will be merged to a single slash (but UNC names are handled).
265 * A single dot path segment will be removed.
266 * A double dot will cause that path segment and the one before to be removed.
267 * If the double dot has no parent path segment to work with, <code>null</code>
268 * is returned.
269 * <p>
270 * The output will be the same on both Unix and Windows except
271 * for the separator character.
272 * <pre>
273 * /foo// --> /foo
274 * /foo/./ --> /foo
275 * /foo/../bar --> /bar
276 * /foo/../bar/ --> /bar
277 * /foo/../bar/../baz --> /baz
278 * //foo//./bar --> /foo/bar
279 * /../ --> null
280 * ../foo --> null
281 * foo/bar/.. --> foo
282 * foo/../../bar --> null
283 * foo/../bar --> bar
284 * //server/foo/../bar --> //server/bar
285 * //server/../bar --> null
286 * C:\foo\..\bar --> C:\bar
287 * C:\..\bar --> null
288 * ~/foo/../bar/ --> ~/bar
289 * ~/../bar --> null
290 * </pre>
291 * (Note the file separator returned will be correct for Windows/Unix)
292 *
293 * @param filename the filename to normalize, null returns null
294 * @return the normalized filename, or null if invalid
295 */
296 public static String normalizeNoEndSeparator(String filename) {
297 return doNormalize(filename, SYSTEM_SEPARATOR, false);
298 }
299
300 /**
301 * Normalizes a path, removing double and single dot path steps,
302 * and removing any final directory separator.
303 * <p>
304 * This method normalizes a path to a standard format.
305 * The input may contain separators in either Unix or Windows format.
306 * The output will contain separators in the format specified.
307 * <p>
308 * A trailing slash will be removed.
309 * A double slash will be merged to a single slash (but UNC names are handled).
310 * A single dot path segment will be removed.
311 * A double dot will cause that path segment and the one before to be removed.
312 * If the double dot has no parent path segment to work with, <code>null</code>
313 * is returned.
314 * <p>
315 * The output will be the same on both Unix and Windows including
316 * the separator character.
317 * <pre>
318 * /foo// --> /foo
319 * /foo/./ --> /foo
320 * /foo/../bar --> /bar
321 * /foo/../bar/ --> /bar
322 * /foo/../bar/../baz --> /baz
323 * //foo//./bar --> /foo/bar
324 * /../ --> null
325 * ../foo --> null
326 * foo/bar/.. --> foo
327 * foo/../../bar --> null
328 * foo/../bar --> bar
329 * //server/foo/../bar --> //server/bar
330 * //server/../bar --> null
331 * C:\foo\..\bar --> C:\bar
332 * C:\..\bar --> null
333 * ~/foo/../bar/ --> ~/bar
334 * ~/../bar --> null
335 * </pre>
336 *
337 * @param filename the filename to normalize, null returns null
338 * @param unixSeparator <code>true</code> if a unix separator should
339 * be used or <code>false</code> if a windows separtor should be used.
340 * @return the normalized filename, or null if invalid
341 * @since Commons IO 2.0
342 */
343 public static String normalizeNoEndSeparator(String filename, boolean unixSeparator) {
344 char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR);
345 return doNormalize(filename, separator, false);
346 }
347
348 /**
349 * Internal method to perform the normalization.
350 *
351 * @param filename the filename
352 * @param separator The separator character to use
353 * @param keepSeparator true to keep the final separator
354 * @return the normalized filename
355 */
356 private static String doNormalize(String filename, char separator, boolean keepSeparator) {
357 if (filename == null) {
358 return null;
359 }
360 int size = filename.length();
361 if (size == 0) {
362 return filename;
363 }
364 int prefix = getPrefixLength(filename);
365 if (prefix < 0) {
366 return null;
367 }
368
369 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
370 filename.getChars(0, filename.length(), array, 0);
371
372 // fix separators throughout
373 char otherSeparator = (separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR);
374 for (int i = 0; i < array.length; i++) {
375 if (array[i] == otherSeparator) {
376 array[i] = separator;
377 }
378 }
379
380 // add extra separator on the end to simplify code below
381 boolean lastIsDirectory = true;
382 if (array[size - 1] != separator) {
383 array[size++] = separator;
384 lastIsDirectory = false;
385 }
386
387 // adjoining slashes
388 for (int i = prefix + 1; i < size; i++) {
389 if (array[i] == separator && array[i - 1] == separator) {
390 System.arraycopy(array, i, array, i - 1, size - i);
391 size--;
392 i--;
393 }
394 }
395
396 // dot slash
397 for (int i = prefix + 1; i < size; i++) {
398 if (array[i] == separator && array[i - 1] == '.' &&
399 (i == prefix + 1 || array[i - 2] == separator)) {
400 if (i == size - 1) {
401 lastIsDirectory = true;
402 }
403 System.arraycopy(array, i + 1, array, i - 1, size - i);
404 size -=2;
405 i--;
406 }
407 }
408
409 // double dot slash
410 outer:
411 for (int i = prefix + 2; i < size; i++) {
412 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
413 (i == prefix + 2 || array[i - 3] == separator)) {
414 if (i == prefix + 2) {
415 return null;
416 }
417 if (i == size - 1) {
418 lastIsDirectory = true;
419 }
420 int j;
421 for (j = i - 4 ; j >= prefix; j--) {
422 if (array[j] == separator) {
423 // remove b/../ from a/b/../c
424 System.arraycopy(array, i + 1, array, j + 1, size - i);
425 size -= (i - j);
426 i = j + 1;
427 continue outer;
428 }
429 }
430 // remove a/../ from a/../c
431 System.arraycopy(array, i + 1, array, prefix, size - i);
432 size -= (i + 1 - prefix);
433 i = prefix + 1;
434 }
435 }
436
437 if (size <= 0) { // should never be less than 0
438 return "";
439 }
440 if (size <= prefix) { // should never be less than prefix
441 return new String(array, 0, size);
442 }
443 if (lastIsDirectory && keepSeparator) {
444 return new String(array, 0, size); // keep trailing separator
445 }
446 return new String(array, 0, size - 1); // lose trailing separator
447 }
448
449 //-----------------------------------------------------------------------
450 /**
451 * Concatenates a filename to a base path using normal command line style rules.
452 * <p>
453 * The effect is equivalent to resultant directory after changing
454 * directory to the first argument, followed by changing directory to
455 * the second argument.
456 * <p>
457 * The first argument is the base path, the second is the path to concatenate.
458 * The returned path is always normalized via {@link #normalize(String)},
459 * thus <code>..</code> is handled.
460 * <p>
461 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
462 * it will be normalized and returned.
463 * Otherwise, the paths will be joined, normalized and returned.
464 * <p>
465 * The output will be the same on both Unix and Windows except
466 * for the separator character.
467 * <pre>
468 * /foo/ + bar --> /foo/bar
469 * /foo + bar --> /foo/bar
470 * /foo + /bar --> /bar
471 * /foo + C:/bar --> C:/bar
472 * /foo + C:bar --> C:bar (*)
473 * /foo/a/ + ../bar --> foo/bar
474 * /foo/ + ../../bar --> null
475 * /foo/ + /bar --> /bar
476 * /foo/.. + /bar --> /bar
477 * /foo + bar/c.txt --> /foo/bar/c.txt
478 * /foo/c.txt + bar --> /foo/c.txt/bar (!)
479 * </pre>
480 * (*) Note that the Windows relative drive prefix is unreliable when
481 * used with this method.
482 * (!) Note that the first parameter must be a path. If it ends with a name, then
483 * the name will be built into the concatenated path. If this might be a problem,
484 * use {@link #getFullPath(String)} on the base path argument.
485 *
486 * @param basePath the base path to attach to, always treated as a path
487 * @param fullFilenameToAdd the filename (or path) to attach to the base
488 * @return the concatenated path, or null if invalid
489 */
490 public static String concat(String basePath, String fullFilenameToAdd) {
491 int prefix = getPrefixLength(fullFilenameToAdd);
492 if (prefix < 0) {
493 return null;
494 }
495 if (prefix > 0) {
496 return normalize(fullFilenameToAdd);
497 }
498 if (basePath == null) {
499 return null;
500 }
501 int len = basePath.length();
502 if (len == 0) {
503 return normalize(fullFilenameToAdd);
504 }
505 char ch = basePath.charAt(len - 1);
506 if (isSeparator(ch)) {
507 return normalize(basePath + fullFilenameToAdd);
508 } else {
509 return normalize(basePath + '/' + fullFilenameToAdd);
510 }
511 }
512
513 //-----------------------------------------------------------------------
514 /**
515 * Converts all separators to the Unix separator of forward slash.
516 *
517 * @param path the path to be changed, null ignored
518 * @return the updated path
519 */
520 public static String separatorsToUnix(String path) {
521 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
522 return path;
523 }
524 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
525 }
526
527 /**
528 * Converts all separators to the Windows separator of backslash.
529 *
530 * @param path the path to be changed, null ignored
531 * @return the updated path
532 */
533 public static String separatorsToWindows(String path) {
534 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
535 return path;
536 }
537 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
538 }
539
540 /**
541 * Converts all separators to the system separator.
542 *
543 * @param path the path to be changed, null ignored
544 * @return the updated path
545 */
546 public static String separatorsToSystem(String path) {
547 if (path == null) {
548 return null;
549 }
550 if (isSystemWindows()) {
551 return separatorsToWindows(path);
552 } else {
553 return separatorsToUnix(path);
554 }
555 }
556
557 //-----------------------------------------------------------------------
558 /**
559 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
560 * <p>
561 * This method will handle a file in either Unix or Windows format.
562 * <p>
563 * The prefix length includes the first slash in the full filename
564 * if applicable. Thus, it is possible that the length returned is greater
565 * than the length of the input string.
566 * <pre>
567 * Windows:
568 * a\b\c.txt --> "" --> relative
569 * \a\b\c.txt --> "\" --> current drive absolute
570 * C:a\b\c.txt --> "C:" --> drive relative
571 * C:\a\b\c.txt --> "C:\" --> absolute
572 * \\server\a\b\c.txt --> "\\server\" --> UNC
573 *
574 * Unix:
575 * a/b/c.txt --> "" --> relative
576 * /a/b/c.txt --> "/" --> absolute
577 * ~/a/b/c.txt --> "~/" --> current user
578 * ~ --> "~/" --> current user (slash added)
579 * ~user/a/b/c.txt --> "~user/" --> named user
580 * ~user --> "~user/" --> named user (slash added)
581 * </pre>
582 * <p>
583 * The output will be the same irrespective of the machine that the code is running on.
584 * ie. both Unix and Windows prefixes are matched regardless.
585 *
586 * @param filename the filename to find the prefix in, null returns -1
587 * @return the length of the prefix, -1 if invalid or null
588 */
589 public static int getPrefixLength(String filename) {
590 if (filename == null) {
591 return -1;
592 }
593 int len = filename.length();
594 if (len == 0) {
595 return 0;
596 }
597 char ch0 = filename.charAt(0);
598 if (ch0 == ':') {
599 return -1;
600 }
601 if (len == 1) {
602 if (ch0 == '~') {
603 return 2; // return a length greater than the input
604 }
605 return (isSeparator(ch0) ? 1 : 0);
606 } else {
607 if (ch0 == '~') {
608 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
609 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
610 if (posUnix == -1 && posWin == -1) {
611 return len + 1; // return a length greater than the input
612 }
613 posUnix = (posUnix == -1 ? posWin : posUnix);
614 posWin = (posWin == -1 ? posUnix : posWin);
615 return Math.min(posUnix, posWin) + 1;
616 }
617 char ch1 = filename.charAt(1);
618 if (ch1 == ':') {
619 ch0 = Character.toUpperCase(ch0);
620 if (ch0 >= 'A' && ch0 <= 'Z') {
621 if (len == 2 || isSeparator(filename.charAt(2)) == false) {
622 return 2;
623 }
624 return 3;
625 }
626 return -1;
627
628 } else if (isSeparator(ch0) && isSeparator(ch1)) {
629 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
630 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
631 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
632 return -1;
633 }
634 posUnix = (posUnix == -1 ? posWin : posUnix);
635 posWin = (posWin == -1 ? posUnix : posWin);
636 return Math.min(posUnix, posWin) + 1;
637 } else {
638 return (isSeparator(ch0) ? 1 : 0);
639 }
640 }
641 }
642
643 /**
644 * Returns the index of the last directory separator character.
645 * <p>
646 * This method will handle a file in either Unix or Windows format.
647 * The position of the last forward or backslash is returned.
648 * <p>
649 * The output will be the same irrespective of the machine that the code is running on.
650 *
651 * @param filename the filename to find the last path separator in, null returns -1
652 * @return the index of the last separator character, or -1 if there
653 * is no such character
654 */
655 public static int indexOfLastSeparator(String filename) {
656 if (filename == null) {
657 return -1;
658 }
659 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
660 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
661 return Math.max(lastUnixPos, lastWindowsPos);
662 }
663
664 /**
665 * Returns the index of the last extension separator character, which is a dot.
666 * <p>
667 * This method also checks that there is no directory separator after the last dot.
668 * To do this it uses {@link #indexOfLastSeparator(String)} which will
669 * handle a file in either Unix or Windows format.
670 * <p>
671 * The output will be the same irrespective of the machine that the code is running on.
672 *
673 * @param filename the filename to find the last path separator in, null returns -1
674 * @return the index of the last separator character, or -1 if there
675 * is no such character
676 */
677 public static int indexOfExtension(String filename) {
678 if (filename == null) {
679 return -1;
680 }
681 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
682 int lastSeparator = indexOfLastSeparator(filename);
683 return (lastSeparator > extensionPos ? -1 : extensionPos);
684 }
685
686 //-----------------------------------------------------------------------
687 /**
688 * Gets the prefix from a full filename, such as <code>C:/</code>
689 * or <code>~/</code>.
690 * <p>
691 * This method will handle a file in either Unix or Windows format.
692 * The prefix includes the first slash in the full filename where applicable.
693 * <pre>
694 * Windows:
695 * a\b\c.txt --> "" --> relative
696 * \a\b\c.txt --> "\" --> current drive absolute
697 * C:a\b\c.txt --> "C:" --> drive relative
698 * C:\a\b\c.txt --> "C:\" --> absolute
699 * \\server\a\b\c.txt --> "\\server\" --> UNC
700 *
701 * Unix:
702 * a/b/c.txt --> "" --> relative
703 * /a/b/c.txt --> "/" --> absolute
704 * ~/a/b/c.txt --> "~/" --> current user
705 * ~ --> "~/" --> current user (slash added)
706 * ~user/a/b/c.txt --> "~user/" --> named user
707 * ~user --> "~user/" --> named user (slash added)
708 * </pre>
709 * <p>
710 * The output will be the same irrespective of the machine that the code is running on.
711 * ie. both Unix and Windows prefixes are matched regardless.
712 *
713 * @param filename the filename to query, null returns null
714 * @return the prefix of the file, null if invalid
715 */
716 public static String getPrefix(String filename) {
717 if (filename == null) {
718 return null;
719 }
720 int len = getPrefixLength(filename);
721 if (len < 0) {
722 return null;
723 }
724 if (len > filename.length()) {
725 return filename + UNIX_SEPARATOR; // we know this only happens for unix
726 }
727 return filename.substring(0, len);
728 }
729
730 /**
731 * Gets the path from a full filename, which excludes the prefix.
732 * <p>
733 * This method will handle a file in either Unix or Windows format.
734 * The method is entirely text based, and returns the text before and
735 * including the last forward or backslash.
736 * <pre>
737 * C:\a\b\c.txt --> a\b\
738 * ~/a/b/c.txt --> a/b/
739 * a.txt --> ""
740 * a/b/c --> a/b/
741 * a/b/c/ --> a/b/c/
742 * </pre>
743 * <p>
744 * The output will be the same irrespective of the machine that the code is running on.
745 * <p>
746 * This method drops the prefix from the result.
747 * See {@link #getFullPath(String)} for the method that retains the prefix.
748 *
749 * @param filename the filename to query, null returns null
750 * @return the path of the file, an empty string if none exists, null if invalid
751 */
752 public static String getPath(String filename) {
753 return doGetPath(filename, 1);
754 }
755
756 /**
757 * Gets the path from a full filename, which excludes the prefix, and
758 * also excluding the final directory separator.
759 * <p>
760 * This method will handle a file in either Unix or Windows format.
761 * The method is entirely text based, and returns the text before the
762 * last forward or backslash.
763 * <pre>
764 * C:\a\b\c.txt --> a\b
765 * ~/a/b/c.txt --> a/b
766 * a.txt --> ""
767 * a/b/c --> a/b
768 * a/b/c/ --> a/b/c
769 * </pre>
770 * <p>
771 * The output will be the same irrespective of the machine that the code is running on.
772 * <p>
773 * This method drops the prefix from the result.
774 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
775 *
776 * @param filename the filename to query, null returns null
777 * @return the path of the file, an empty string if none exists, null if invalid
778 */
779 public static String getPathNoEndSeparator(String filename) {
780 return doGetPath(filename, 0);
781 }
782
783 /**
784 * Does the work of getting the path.
785 *
786 * @param filename the filename
787 * @param separatorAdd 0 to omit the end separator, 1 to return it
788 * @return the path
789 */
790 private static String doGetPath(String filename, int separatorAdd) {
791 if (filename == null) {
792 return null;
793 }
794 int prefix = getPrefixLength(filename);
795 if (prefix < 0) {
796 return null;
797 }
798 int index = indexOfLastSeparator(filename);
799 int endIndex = index+separatorAdd;
800 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
801 return "";
802 }
803 return filename.substring(prefix, endIndex);
804 }
805
806 /**
807 * Gets the full path from a full filename, which is the prefix + path.
808 * <p>
809 * This method will handle a file in either Unix or Windows format.
810 * The method is entirely text based, and returns the text before and
811 * including the last forward or backslash.
812 * <pre>
813 * C:\a\b\c.txt --> C:\a\b\
814 * ~/a/b/c.txt --> ~/a/b/
815 * a.txt --> ""
816 * a/b/c --> a/b/
817 * a/b/c/ --> a/b/c/
818 * C: --> C:
819 * C:\ --> C:\
820 * ~ --> ~/
821 * ~/ --> ~/
822 * ~user --> ~user/
823 * ~user/ --> ~user/
824 * </pre>
825 * <p>
826 * The output will be the same irrespective of the machine that the code is running on.
827 *
828 * @param filename the filename to query, null returns null
829 * @return the path of the file, an empty string if none exists, null if invalid
830 */
831 public static String getFullPath(String filename) {
832 return doGetFullPath(filename, true);
833 }
834
835 /**
836 * Gets the full path from a full filename, which is the prefix + path,
837 * and also excluding the final directory separator.
838 * <p>
839 * This method will handle a file in either Unix or Windows format.
840 * The method is entirely text based, and returns the text before the
841 * last forward or backslash.
842 * <pre>
843 * C:\a\b\c.txt --> C:\a\b
844 * ~/a/b/c.txt --> ~/a/b
845 * a.txt --> ""
846 * a/b/c --> a/b
847 * a/b/c/ --> a/b/c
848 * C: --> C:
849 * C:\ --> C:\
850 * ~ --> ~
851 * ~/ --> ~
852 * ~user --> ~user
853 * ~user/ --> ~user
854 * </pre>
855 * <p>
856 * The output will be the same irrespective of the machine that the code is running on.
857 *
858 * @param filename the filename to query, null returns null
859 * @return the path of the file, an empty string if none exists, null if invalid
860 */
861 public static String getFullPathNoEndSeparator(String filename) {
862 return doGetFullPath(filename, false);
863 }
864
865 /**
866 * Does the work of getting the path.
867 *
868 * @param filename the filename
869 * @param includeSeparator true to include the end separator
870 * @return the path
871 */
872 private static String doGetFullPath(String filename, boolean includeSeparator) {
873 if (filename == null) {
874 return null;
875 }
876 int prefix = getPrefixLength(filename);
877 if (prefix < 0) {
878 return null;
879 }
880 if (prefix >= filename.length()) {
881 if (includeSeparator) {
882 return getPrefix(filename); // add end slash if necessary
883 } else {
884 return filename;
885 }
886 }
887 int index = indexOfLastSeparator(filename);
888 if (index < 0) {
889 return filename.substring(0, prefix);
890 }
891 int end = index + (includeSeparator ? 1 : 0);
892 if (end == 0) {
893 end++;
894 }
895 return filename.substring(0, end);
896 }
897
898 /**
899 * Gets the name minus the path from a full filename.
900 * <p>
901 * This method will handle a file in either Unix or Windows format.
902 * The text after the last forward or backslash is returned.
903 * <pre>
904 * a/b/c.txt --> c.txt
905 * a.txt --> a.txt
906 * a/b/c --> c
907 * a/b/c/ --> ""
908 * </pre>
909 * <p>
910 * The output will be the same irrespective of the machine that the code is running on.
911 *
912 * @param filename the filename to query, null returns null
913 * @return the name of the file without the path, or an empty string if none exists
914 */
915 public static String getName(String filename) {
916 if (filename == null) {
917 return null;
918 }
919 int index = indexOfLastSeparator(filename);
920 return filename.substring(index + 1);
921 }
922
923 /**
924 * Gets the base name, minus the full path and extension, from a full filename.
925 * <p>
926 * This method will handle a file in either Unix or Windows format.
927 * The text after the last forward or backslash and before the last dot is returned.
928 * <pre>
929 * a/b/c.txt --> c
930 * a.txt --> a
931 * a/b/c --> c
932 * a/b/c/ --> ""
933 * </pre>
934 * <p>
935 * The output will be the same irrespective of the machine that the code is running on.
936 *
937 * @param filename the filename to query, null returns null
938 * @return the name of the file without the path, or an empty string if none exists
939 */
940 public static String getBaseName(String filename) {
941 return removeExtension(getName(filename));
942 }
943
944 /**
945 * Gets the extension of a filename.
946 * <p>
947 * This method returns the textual part of the filename after the last dot.
948 * There must be no directory separator after the dot.
949 * <pre>
950 * foo.txt --> "txt"
951 * a/b/c.jpg --> "jpg"
952 * a/b.txt/c --> ""
953 * a/b/c --> ""
954 * </pre>
955 * <p>
956 * The output will be the same irrespective of the machine that the code is running on.
957 *
958 * @param filename the filename to retrieve the extension of.
959 * @return the extension of the file or an empty string if none exists or <code>null</code>
960 * if the filename is <code>null</code>.
961 */
962 public static String getExtension(String filename) {
963 if (filename == null) {
964 return null;
965 }
966 int index = indexOfExtension(filename);
967 if (index == -1) {
968 return "";
969 } else {
970 return filename.substring(index + 1);
971 }
972 }
973
974 //-----------------------------------------------------------------------
975 /**
976 * Removes the extension from a filename.
977 * <p>
978 * This method returns the textual part of the filename before the last dot.
979 * There must be no directory separator after the dot.
980 * <pre>
981 * foo.txt --> foo
982 * a\b\c.jpg --> a\b\c
983 * a\b\c --> a\b\c
984 * a.b\c --> a.b\c
985 * </pre>
986 * <p>
987 * The output will be the same irrespective of the machine that the code is running on.
988 *
989 * @param filename the filename to query, null returns null
990 * @return the filename minus the extension
991 */
992 public static String removeExtension(String filename) {
993 if (filename == null) {
994 return null;
995 }
996 int index = indexOfExtension(filename);
997 if (index == -1) {
998 return filename;
999 } else {
1000 return filename.substring(0, index);
1001 }
1002 }
1003
1004 //-----------------------------------------------------------------------
1005 /**
1006 * Checks whether two filenames are equal exactly.
1007 * <p>
1008 * No processing is performed on the filenames other than comparison,
1009 * thus this is merely a null-safe case-sensitive equals.
1010 *
1011 * @param filename1 the first filename to query, may be null
1012 * @param filename2 the second filename to query, may be null
1013 * @return true if the filenames are equal, null equals null
1014 * @see IOCase#SENSITIVE
1015 */
1016 public static boolean equals(String filename1, String filename2) {
1017 return equals(filename1, filename2, false, IOCase.SENSITIVE);
1018 }
1019
1020 /**
1021 * Checks whether two filenames are equal using the case rules of the system.
1022 * <p>
1023 * No processing is performed on the filenames other than comparison.
1024 * The check is case-sensitive on Unix and case-insensitive on Windows.
1025 *
1026 * @param filename1 the first filename to query, may be null
1027 * @param filename2 the second filename to query, may be null
1028 * @return true if the filenames are equal, null equals null
1029 * @see IOCase#SYSTEM
1030 */
1031 public static boolean equalsOnSystem(String filename1, String filename2) {
1032 return equals(filename1, filename2, false, IOCase.SYSTEM);
1033 }
1034
1035 //-----------------------------------------------------------------------
1036 /**
1037 * Checks whether two filenames are equal after both have been normalized.
1038 * <p>
1039 * Both filenames are first passed to {@link #normalize(String)}.
1040 * The check is then performed in a case-sensitive manner.
1041 *
1042 * @param filename1 the first filename to query, may be null
1043 * @param filename2 the second filename to query, may be null
1044 * @return true if the filenames are equal, null equals null
1045 * @see IOCase#SENSITIVE
1046 */
1047 public static boolean equalsNormalized(String filename1, String filename2) {
1048 return equals(filename1, filename2, true, IOCase.SENSITIVE);
1049 }
1050
1051 /**
1052 * Checks whether two filenames are equal after both have been normalized
1053 * and using the case rules of the system.
1054 * <p>
1055 * Both filenames are first passed to {@link #normalize(String)}.
1056 * The check is then performed case-sensitive on Unix and
1057 * case-insensitive on Windows.
1058 *
1059 * @param filename1 the first filename to query, may be null
1060 * @param filename2 the second filename to query, may be null
1061 * @return true if the filenames are equal, null equals null
1062 * @see IOCase#SYSTEM
1063 */
1064 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
1065 return equals(filename1, filename2, true, IOCase.SYSTEM);
1066 }
1067
1068 /**
1069 * Checks whether two filenames are equal, optionally normalizing and providing
1070 * control over the case-sensitivity.
1071 *
1072 * @param filename1 the first filename to query, may be null
1073 * @param filename2 the second filename to query, may be null
1074 * @param normalized whether to normalize the filenames
1075 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1076 * @return true if the filenames are equal, null equals null
1077 * @since Commons IO 1.3
1078 */
1079 public static boolean equals(
1080 String filename1, String filename2,
1081 boolean normalized, IOCase caseSensitivity) {
1082
1083 if (filename1 == null || filename2 == null) {
1084 return (filename1 == null && filename2 == null);
1085 }
1086 if (normalized) {
1087 filename1 = normalize(filename1);
1088 filename2 = normalize(filename2);
1089 if (filename1 == null || filename2 == null) {
1090 throw new NullPointerException(
1091 "Error normalizing one or both of the file names");
1092 }
1093 }
1094 if (caseSensitivity == null) {
1095 caseSensitivity = IOCase.SENSITIVE;
1096 }
1097 return caseSensitivity.checkEquals(filename1, filename2);
1098 }
1099
1100 //-----------------------------------------------------------------------
1101 /**
1102 * Checks whether the extension of the filename is that specified.
1103 * <p>
1104 * This method obtains the extension as the textual part of the filename
1105 * after the last dot. There must be no directory separator after the dot.
1106 * The extension check is case-sensitive on all platforms.
1107 *
1108 * @param filename the filename to query, null returns false
1109 * @param extension the extension to check for, null or empty checks for no extension
1110 * @return true if the filename has the specified extension
1111 */
1112 public static boolean isExtension(String filename, String extension) {
1113 if (filename == null) {
1114 return false;
1115 }
1116 if (extension == null || extension.length() == 0) {
1117 return (indexOfExtension(filename) == -1);
1118 }
1119 String fileExt = getExtension(filename);
1120 return fileExt.equals(extension);
1121 }
1122
1123 /**
1124 * Checks whether the extension of the filename is one of those specified.
1125 * <p>
1126 * This method obtains the extension as the textual part of the filename
1127 * after the last dot. There must be no directory separator after the dot.
1128 * The extension check is case-sensitive on all platforms.
1129 *
1130 * @param filename the filename to query, null returns false
1131 * @param extensions the extensions to check for, null checks for no extension
1132 * @return true if the filename is one of the extensions
1133 */
1134 public static boolean isExtension(String filename, String[] extensions) {
1135 if (filename == null) {
1136 return false;
1137 }
1138 if (extensions == null || extensions.length == 0) {
1139 return (indexOfExtension(filename) == -1);
1140 }
1141 String fileExt = getExtension(filename);
1142 for (String extension : extensions) {
1143 if (fileExt.equals(extension)) {
1144 return true;
1145 }
1146 }
1147 return false;
1148 }
1149
1150 /**
1151 * Checks whether the extension of the filename is one of those specified.
1152 * <p>
1153 * This method obtains the extension as the textual part of the filename
1154 * after the last dot. There must be no directory separator after the dot.
1155 * The extension check is case-sensitive on all platforms.
1156 *
1157 * @param filename the filename to query, null returns false
1158 * @param extensions the extensions to check for, null checks for no extension
1159 * @return true if the filename is one of the extensions
1160 */
1161 public static boolean isExtension(String filename, Collection<String> extensions) {
1162 if (filename == null) {
1163 return false;
1164 }
1165 if (extensions == null || extensions.isEmpty()) {
1166 return (indexOfExtension(filename) == -1);
1167 }
1168 String fileExt = getExtension(filename);
1169 for (String extension : extensions) {
1170 if (fileExt.equals(extension)) {
1171 return true;
1172 }
1173 }
1174 return false;
1175 }
1176
1177 //-----------------------------------------------------------------------
1178 /**
1179 * Checks a filename to see if it matches the specified wildcard matcher,
1180 * always testing case-sensitive.
1181 * <p>
1182 * The wildcard matcher uses the characters '?' and '*' to represent a
1183 * single or multiple (zero or more) wildcard characters.
1184 * This is the same as often found on Dos/Unix command lines.
1185 * The check is case-sensitive always.
1186 * <pre>
1187 * wildcardMatch("c.txt", "*.txt") --> true
1188 * wildcardMatch("c.txt", "*.jpg") --> false
1189 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1190 * wildcardMatch("c.txt", "*.???") --> true
1191 * wildcardMatch("c.txt", "*.????") --> false
1192 * </pre>
1193 * N.B. the sequence "*?" does not work properly at present in match strings.
1194 *
1195 * @param filename the filename to match on
1196 * @param wildcardMatcher the wildcard string to match against
1197 * @return true if the filename matches the wilcard string
1198 * @see IOCase#SENSITIVE
1199 */
1200 public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1201 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1202 }
1203
1204 /**
1205 * Checks a filename to see if it matches the specified wildcard matcher
1206 * using the case rules of the system.
1207 * <p>
1208 * The wildcard matcher uses the characters '?' and '*' to represent a
1209 * single or multiple (zero or more) wildcard characters.
1210 * This is the same as often found on Dos/Unix command lines.
1211 * The check is case-sensitive on Unix and case-insensitive on Windows.
1212 * <pre>
1213 * wildcardMatch("c.txt", "*.txt") --> true
1214 * wildcardMatch("c.txt", "*.jpg") --> false
1215 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1216 * wildcardMatch("c.txt", "*.???") --> true
1217 * wildcardMatch("c.txt", "*.????") --> false
1218 * </pre>
1219 * N.B. the sequence "*?" does not work properly at present in match strings.
1220 *
1221 * @param filename the filename to match on
1222 * @param wildcardMatcher the wildcard string to match against
1223 * @return true if the filename matches the wilcard string
1224 * @see IOCase#SYSTEM
1225 */
1226 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1227 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1228 }
1229
1230 /**
1231 * Checks a filename to see if it matches the specified wildcard matcher
1232 * allowing control over case-sensitivity.
1233 * <p>
1234 * The wildcard matcher uses the characters '?' and '*' to represent a
1235 * single or multiple (zero or more) wildcard characters.
1236 * N.B. the sequence "*?" does not work properly at present in match strings.
1237 *
1238 * @param filename the filename to match on
1239 * @param wildcardMatcher the wildcard string to match against
1240 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1241 * @return true if the filename matches the wilcard string
1242 * @since Commons IO 1.3
1243 */
1244 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1245 if (filename == null && wildcardMatcher == null) {
1246 return true;
1247 }
1248 if (filename == null || wildcardMatcher == null) {
1249 return false;
1250 }
1251 if (caseSensitivity == null) {
1252 caseSensitivity = IOCase.SENSITIVE;
1253 }
1254 String[] wcs = splitOnTokens(wildcardMatcher);
1255 boolean anyChars = false;
1256 int textIdx = 0;
1257 int wcsIdx = 0;
1258 Stack<int[]> backtrack = new Stack<int[]>();
1259
1260 // loop around a backtrack stack, to handle complex * matching
1261 do {
1262 if (backtrack.size() > 0) {
1263 int[] array = backtrack.pop();
1264 wcsIdx = array[0];
1265 textIdx = array[1];
1266 anyChars = true;
1267 }
1268
1269 // loop whilst tokens and text left to process
1270 while (wcsIdx < wcs.length) {
1271
1272 if (wcs[wcsIdx].equals("?")) {
1273 // ? so move to next text char
1274 textIdx++;
1275 if (textIdx > filename.length()) {
1276 break;
1277 }
1278 anyChars = false;
1279
1280 } else if (wcs[wcsIdx].equals("*")) {
1281 // set any chars status
1282 anyChars = true;
1283 if (wcsIdx == wcs.length - 1) {
1284 textIdx = filename.length();
1285 }
1286
1287 } else {
1288 // matching text token
1289 if (anyChars) {
1290 // any chars then try to locate text token
1291 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1292 if (textIdx == -1) {
1293 // token not found
1294 break;
1295 }
1296 int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1297 if (repeat >= 0) {
1298 backtrack.push(new int[] {wcsIdx, repeat});
1299 }
1300 } else {
1301 // matching from current position
1302 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1303 // couldnt match token
1304 break;
1305 }
1306 }
1307
1308 // matched text token, move text index to end of matched token
1309 textIdx += wcs[wcsIdx].length();
1310 anyChars = false;
1311 }
1312
1313 wcsIdx++;
1314 }
1315
1316 // full match
1317 if (wcsIdx == wcs.length && textIdx == filename.length()) {
1318 return true;
1319 }
1320
1321 } while (backtrack.size() > 0);
1322
1323 return false;
1324 }
1325
1326 /**
1327 * Splits a string into a number of tokens.
1328 * The text is split by '?' and '*'.
1329 * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1330 *
1331 * @param text the text to split
1332 * @return the array of tokens, never null
1333 */
1334 static String[] splitOnTokens(String text) {
1335 // used by wildcardMatch
1336 // package level so a unit test may run on this
1337
1338 if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
1339 return new String[] { text };
1340 }
1341
1342 char[] array = text.toCharArray();
1343 ArrayList<String> list = new ArrayList<String>();
1344 StringBuilder buffer = new StringBuilder();
1345 for (int i = 0; i < array.length; i++) {
1346 if (array[i] == '?' || array[i] == '*') {
1347 if (buffer.length() != 0) {
1348 list.add(buffer.toString());
1349 buffer.setLength(0);
1350 }
1351 if (array[i] == '?') {
1352 list.add("?");
1353 } else if (list.size() == 0 ||
1354 (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1355 list.add("*");
1356 }
1357 } else {
1358 buffer.append(array[i]);
1359 }
1360 }
1361 if (buffer.length() != 0) {
1362 list.add(buffer.toString());
1363 }
1364
1365 return list.toArray( new String[ list.size() ] );
1366 }
1367
1368 }