001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io; 018 019 import java.io.File; 020 import java.io.IOException; 021 import java.util.ArrayList; 022 import java.util.Collection; 023 import java.util.Stack; 024 025 /** 026 * General filename and filepath manipulation utilities. 027 * <p> 028 * When dealing with filenames you can hit problems when moving from a Windows 029 * based development machine to a Unix based production machine. 030 * This class aims to help avoid those problems. 031 * <p> 032 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 033 * using JDK {@link java.io.File File} objects and the two argument constructor 034 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 035 * <p> 036 * Most methods on this class are designed to work the same on both Unix and Windows. 037 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 038 * <p> 039 * Most methods recognise both separators (forward and back), and both 040 * sets of prefixes. See the javadoc of each method for details. 041 * <p> 042 * This class defines six components within a filename 043 * (example C:\dev\project\file.txt): 044 * <ul> 045 * <li>the prefix - C:\</li> 046 * <li>the path - dev\project\</li> 047 * <li>the full path - C:\dev\project\</li> 048 * <li>the name - file.txt</li> 049 * <li>the base name - file</li> 050 * <li>the extension - txt</li> 051 * </ul> 052 * Note that this class works best if directory filenames end with a separator. 053 * If you omit the last separator, it is impossible to determine if the filename 054 * corresponds to a file or a directory. As a result, we have chosen to say 055 * it corresponds to a file. 056 * <p> 057 * This class only supports Unix and Windows style names. 058 * Prefixes are matched as follows: 059 * <pre> 060 * Windows: 061 * a\b\c.txt --> "" --> relative 062 * \a\b\c.txt --> "\" --> current drive absolute 063 * C:a\b\c.txt --> "C:" --> drive relative 064 * C:\a\b\c.txt --> "C:\" --> absolute 065 * \\server\a\b\c.txt --> "\\server\" --> UNC 066 * 067 * Unix: 068 * a/b/c.txt --> "" --> relative 069 * /a/b/c.txt --> "/" --> absolute 070 * ~/a/b/c.txt --> "~/" --> current user 071 * ~ --> "~/" --> current user (slash added) 072 * ~user/a/b/c.txt --> "~user/" --> named user 073 * ~user --> "~user/" --> named user (slash added) 074 * </pre> 075 * Both prefix styles are matched always, irrespective of the machine that you are 076 * currently running on. 077 * <p> 078 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 079 * 080 * @version $Id: FilenameUtils.java 1304052 2012-03-22 20:55:29Z ggregory $ 081 * @since 1.1 082 */ 083 public class FilenameUtils { 084 085 /** 086 * The extension separator character. 087 * @since 1.4 088 */ 089 public static final char EXTENSION_SEPARATOR = '.'; 090 091 /** 092 * The extension separator String. 093 * @since 1.4 094 */ 095 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 096 097 /** 098 * The Unix separator character. 099 */ 100 private static final char UNIX_SEPARATOR = '/'; 101 102 /** 103 * The Windows separator character. 104 */ 105 private static final char WINDOWS_SEPARATOR = '\\'; 106 107 /** 108 * The system separator character. 109 */ 110 private static final char SYSTEM_SEPARATOR = File.separatorChar; 111 112 /** 113 * The separator character that is the opposite of the system separator. 114 */ 115 private static final char OTHER_SEPARATOR; 116 static { 117 if (isSystemWindows()) { 118 OTHER_SEPARATOR = UNIX_SEPARATOR; 119 } else { 120 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 121 } 122 } 123 124 /** 125 * Instances should NOT be constructed in standard programming. 126 */ 127 public FilenameUtils() { 128 super(); 129 } 130 131 //----------------------------------------------------------------------- 132 /** 133 * Determines if Windows file system is in use. 134 * 135 * @return true if the system is Windows 136 */ 137 static boolean isSystemWindows() { 138 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 139 } 140 141 //----------------------------------------------------------------------- 142 /** 143 * Checks if the character is a separator. 144 * 145 * @param ch the character to check 146 * @return true if it is a separator character 147 */ 148 private static boolean isSeparator(char ch) { 149 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 150 } 151 152 //----------------------------------------------------------------------- 153 /** 154 * Normalizes a path, removing double and single dot path steps. 155 * <p> 156 * This method normalizes a path to a standard format. 157 * The input may contain separators in either Unix or Windows format. 158 * The output will contain separators in the format of the system. 159 * <p> 160 * A trailing slash will be retained. 161 * A double slash will be merged to a single slash (but UNC names are handled). 162 * A single dot path segment will be removed. 163 * A double dot will cause that path segment and the one before to be removed. 164 * If the double dot has no parent path segment to work with, <code>null</code> 165 * is returned. 166 * <p> 167 * The output will be the same on both Unix and Windows except 168 * for the separator character. 169 * <pre> 170 * /foo// --> /foo/ 171 * /foo/./ --> /foo/ 172 * /foo/../bar --> /bar 173 * /foo/../bar/ --> /bar/ 174 * /foo/../bar/../baz --> /baz 175 * //foo//./bar --> /foo/bar 176 * /../ --> null 177 * ../foo --> null 178 * foo/bar/.. --> foo/ 179 * foo/../../bar --> null 180 * foo/../bar --> bar 181 * //server/foo/../bar --> //server/bar 182 * //server/../bar --> null 183 * C:\foo\..\bar --> C:\bar 184 * C:\..\bar --> null 185 * ~/foo/../bar/ --> ~/bar/ 186 * ~/../bar --> null 187 * </pre> 188 * (Note the file separator returned will be correct for Windows/Unix) 189 * 190 * @param filename the filename to normalize, null returns null 191 * @return the normalized filename, or null if invalid 192 */ 193 public static String normalize(String filename) { 194 return doNormalize(filename, SYSTEM_SEPARATOR, true); 195 } 196 /** 197 * Normalizes a path, removing double and single dot path steps. 198 * <p> 199 * This method normalizes a path to a standard format. 200 * The input may contain separators in either Unix or Windows format. 201 * The output will contain separators in the format specified. 202 * <p> 203 * A trailing slash will be retained. 204 * A double slash will be merged to a single slash (but UNC names are handled). 205 * A single dot path segment will be removed. 206 * A double dot will cause that path segment and the one before to be removed. 207 * If the double dot has no parent path segment to work with, <code>null</code> 208 * is returned. 209 * <p> 210 * The output will be the same on both Unix and Windows except 211 * for the separator character. 212 * <pre> 213 * /foo// --> /foo/ 214 * /foo/./ --> /foo/ 215 * /foo/../bar --> /bar 216 * /foo/../bar/ --> /bar/ 217 * /foo/../bar/../baz --> /baz 218 * //foo//./bar --> /foo/bar 219 * /../ --> null 220 * ../foo --> null 221 * foo/bar/.. --> foo/ 222 * foo/../../bar --> null 223 * foo/../bar --> bar 224 * //server/foo/../bar --> //server/bar 225 * //server/../bar --> null 226 * C:\foo\..\bar --> C:\bar 227 * C:\..\bar --> null 228 * ~/foo/../bar/ --> ~/bar/ 229 * ~/../bar --> null 230 * </pre> 231 * The output will be the same on both Unix and Windows including 232 * the separator character. 233 * 234 * @param filename the filename to normalize, null returns null 235 * @param unixSeparator <code>true</code> if a unix separator should 236 * be used or <code>false</code> if a windows separator should be used. 237 * @return the normalized filename, or null if invalid 238 * @since 2.0 239 */ 240 public static String normalize(String filename, boolean unixSeparator) { 241 char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 242 return doNormalize(filename, separator, true); 243 } 244 245 //----------------------------------------------------------------------- 246 /** 247 * Normalizes a path, removing double and single dot path steps, 248 * and removing any final directory separator. 249 * <p> 250 * This method normalizes a path to a standard format. 251 * The input may contain separators in either Unix or Windows format. 252 * The output will contain separators in the format of the system. 253 * <p> 254 * A trailing slash will be removed. 255 * A double slash will be merged to a single slash (but UNC names are handled). 256 * A single dot path segment will be removed. 257 * A double dot will cause that path segment and the one before to be removed. 258 * If the double dot has no parent path segment to work with, <code>null</code> 259 * is returned. 260 * <p> 261 * The output will be the same on both Unix and Windows except 262 * for the separator character. 263 * <pre> 264 * /foo// --> /foo 265 * /foo/./ --> /foo 266 * /foo/../bar --> /bar 267 * /foo/../bar/ --> /bar 268 * /foo/../bar/../baz --> /baz 269 * //foo//./bar --> /foo/bar 270 * /../ --> null 271 * ../foo --> null 272 * foo/bar/.. --> foo 273 * foo/../../bar --> null 274 * foo/../bar --> bar 275 * //server/foo/../bar --> //server/bar 276 * //server/../bar --> null 277 * C:\foo\..\bar --> C:\bar 278 * C:\..\bar --> null 279 * ~/foo/../bar/ --> ~/bar 280 * ~/../bar --> null 281 * </pre> 282 * (Note the file separator returned will be correct for Windows/Unix) 283 * 284 * @param filename the filename to normalize, null returns null 285 * @return the normalized filename, or null if invalid 286 */ 287 public static String normalizeNoEndSeparator(String filename) { 288 return doNormalize(filename, SYSTEM_SEPARATOR, false); 289 } 290 291 /** 292 * Normalizes a path, removing double and single dot path steps, 293 * and removing any final directory separator. 294 * <p> 295 * This method normalizes a path to a standard format. 296 * The input may contain separators in either Unix or Windows format. 297 * The output will contain separators in the format specified. 298 * <p> 299 * A trailing slash will be removed. 300 * A double slash will be merged to a single slash (but UNC names are handled). 301 * A single dot path segment will be removed. 302 * A double dot will cause that path segment and the one before to be removed. 303 * If the double dot has no parent path segment to work with, <code>null</code> 304 * is returned. 305 * <p> 306 * The output will be the same on both Unix and Windows including 307 * the separator character. 308 * <pre> 309 * /foo// --> /foo 310 * /foo/./ --> /foo 311 * /foo/../bar --> /bar 312 * /foo/../bar/ --> /bar 313 * /foo/../bar/../baz --> /baz 314 * //foo//./bar --> /foo/bar 315 * /../ --> null 316 * ../foo --> null 317 * foo/bar/.. --> foo 318 * foo/../../bar --> null 319 * foo/../bar --> bar 320 * //server/foo/../bar --> //server/bar 321 * //server/../bar --> null 322 * C:\foo\..\bar --> C:\bar 323 * C:\..\bar --> null 324 * ~/foo/../bar/ --> ~/bar 325 * ~/../bar --> null 326 * </pre> 327 * 328 * @param filename the filename to normalize, null returns null 329 * @param unixSeparator <code>true</code> if a unix separator should 330 * be used or <code>false</code> if a windows separtor should be used. 331 * @return the normalized filename, or null if invalid 332 * @since 2.0 333 */ 334 public static String normalizeNoEndSeparator(String filename, boolean unixSeparator) { 335 char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 336 return doNormalize(filename, separator, false); 337 } 338 339 /** 340 * Internal method to perform the normalization. 341 * 342 * @param filename the filename 343 * @param separator The separator character to use 344 * @param keepSeparator true to keep the final separator 345 * @return the normalized filename 346 */ 347 private static String doNormalize(String filename, char separator, boolean keepSeparator) { 348 if (filename == null) { 349 return null; 350 } 351 int size = filename.length(); 352 if (size == 0) { 353 return filename; 354 } 355 int prefix = getPrefixLength(filename); 356 if (prefix < 0) { 357 return null; 358 } 359 360 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 361 filename.getChars(0, filename.length(), array, 0); 362 363 // fix separators throughout 364 char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 365 for (int i = 0; i < array.length; i++) { 366 if (array[i] == otherSeparator) { 367 array[i] = separator; 368 } 369 } 370 371 // add extra separator on the end to simplify code below 372 boolean lastIsDirectory = true; 373 if (array[size - 1] != separator) { 374 array[size++] = separator; 375 lastIsDirectory = false; 376 } 377 378 // adjoining slashes 379 for (int i = prefix + 1; i < size; i++) { 380 if (array[i] == separator && array[i - 1] == separator) { 381 System.arraycopy(array, i, array, i - 1, size - i); 382 size--; 383 i--; 384 } 385 } 386 387 // dot slash 388 for (int i = prefix + 1; i < size; i++) { 389 if (array[i] == separator && array[i - 1] == '.' && 390 (i == prefix + 1 || array[i - 2] == separator)) { 391 if (i == size - 1) { 392 lastIsDirectory = true; 393 } 394 System.arraycopy(array, i + 1, array, i - 1, size - i); 395 size -=2; 396 i--; 397 } 398 } 399 400 // double dot slash 401 outer: 402 for (int i = prefix + 2; i < size; i++) { 403 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 404 (i == prefix + 2 || array[i - 3] == separator)) { 405 if (i == prefix + 2) { 406 return null; 407 } 408 if (i == size - 1) { 409 lastIsDirectory = true; 410 } 411 int j; 412 for (j = i - 4 ; j >= prefix; j--) { 413 if (array[j] == separator) { 414 // remove b/../ from a/b/../c 415 System.arraycopy(array, i + 1, array, j + 1, size - i); 416 size -= i - j; 417 i = j + 1; 418 continue outer; 419 } 420 } 421 // remove a/../ from a/../c 422 System.arraycopy(array, i + 1, array, prefix, size - i); 423 size -= i + 1 - prefix; 424 i = prefix + 1; 425 } 426 } 427 428 if (size <= 0) { // should never be less than 0 429 return ""; 430 } 431 if (size <= prefix) { // should never be less than prefix 432 return new String(array, 0, size); 433 } 434 if (lastIsDirectory && keepSeparator) { 435 return new String(array, 0, size); // keep trailing separator 436 } 437 return new String(array, 0, size - 1); // lose trailing separator 438 } 439 440 //----------------------------------------------------------------------- 441 /** 442 * Concatenates a filename to a base path using normal command line style rules. 443 * <p> 444 * The effect is equivalent to resultant directory after changing 445 * directory to the first argument, followed by changing directory to 446 * the second argument. 447 * <p> 448 * The first argument is the base path, the second is the path to concatenate. 449 * The returned path is always normalized via {@link #normalize(String)}, 450 * thus <code>..</code> is handled. 451 * <p> 452 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 453 * it will be normalized and returned. 454 * Otherwise, the paths will be joined, normalized and returned. 455 * <p> 456 * The output will be the same on both Unix and Windows except 457 * for the separator character. 458 * <pre> 459 * /foo/ + bar --> /foo/bar 460 * /foo + bar --> /foo/bar 461 * /foo + /bar --> /bar 462 * /foo + C:/bar --> C:/bar 463 * /foo + C:bar --> C:bar (*) 464 * /foo/a/ + ../bar --> foo/bar 465 * /foo/ + ../../bar --> null 466 * /foo/ + /bar --> /bar 467 * /foo/.. + /bar --> /bar 468 * /foo + bar/c.txt --> /foo/bar/c.txt 469 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 470 * </pre> 471 * (*) Note that the Windows relative drive prefix is unreliable when 472 * used with this method. 473 * (!) Note that the first parameter must be a path. If it ends with a name, then 474 * the name will be built into the concatenated path. If this might be a problem, 475 * use {@link #getFullPath(String)} on the base path argument. 476 * 477 * @param basePath the base path to attach to, always treated as a path 478 * @param fullFilenameToAdd the filename (or path) to attach to the base 479 * @return the concatenated path, or null if invalid 480 */ 481 public static String concat(String basePath, String fullFilenameToAdd) { 482 int prefix = getPrefixLength(fullFilenameToAdd); 483 if (prefix < 0) { 484 return null; 485 } 486 if (prefix > 0) { 487 return normalize(fullFilenameToAdd); 488 } 489 if (basePath == null) { 490 return null; 491 } 492 int len = basePath.length(); 493 if (len == 0) { 494 return normalize(fullFilenameToAdd); 495 } 496 char ch = basePath.charAt(len - 1); 497 if (isSeparator(ch)) { 498 return normalize(basePath + fullFilenameToAdd); 499 } else { 500 return normalize(basePath + '/' + fullFilenameToAdd); 501 } 502 } 503 504 /** 505 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 506 * <p> 507 * The files names are expected to be normalized. 508 * </p> 509 * 510 * Edge cases: 511 * <ul> 512 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 513 * <li>A directory does not contain itself: return false</li> 514 * <li>A null child file is not contained in any parent: return false</li> 515 * </ul> 516 * 517 * @param canonicalParent 518 * the file to consider as the parent. 519 * @param canonicalChild 520 * the file to consider as the child. 521 * @return true is the candidate leaf is under by the specified composite. False otherwise. 522 * @throws IOException 523 * if an IO error occurs while checking the files. 524 * @since 2.2 525 * @see FileUtils#directoryContains(File, File) 526 */ 527 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) 528 throws IOException { 529 530 // Fail fast against NullPointerException 531 if (canonicalParent == null) { 532 throw new IllegalArgumentException("Directory must not be null"); 533 } 534 535 if (canonicalChild == null) { 536 return false; 537 } 538 539 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 540 return false; 541 } 542 543 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 544 } 545 546 //----------------------------------------------------------------------- 547 /** 548 * Converts all separators to the Unix separator of forward slash. 549 * 550 * @param path the path to be changed, null ignored 551 * @return the updated path 552 */ 553 public static String separatorsToUnix(String path) { 554 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) { 555 return path; 556 } 557 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 558 } 559 560 /** 561 * Converts all separators to the Windows separator of backslash. 562 * 563 * @param path the path to be changed, null ignored 564 * @return the updated path 565 */ 566 public static String separatorsToWindows(String path) { 567 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) { 568 return path; 569 } 570 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 571 } 572 573 /** 574 * Converts all separators to the system separator. 575 * 576 * @param path the path to be changed, null ignored 577 * @return the updated path 578 */ 579 public static String separatorsToSystem(String path) { 580 if (path == null) { 581 return null; 582 } 583 if (isSystemWindows()) { 584 return separatorsToWindows(path); 585 } else { 586 return separatorsToUnix(path); 587 } 588 } 589 590 //----------------------------------------------------------------------- 591 /** 592 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. 593 * <p> 594 * This method will handle a file in either Unix or Windows format. 595 * <p> 596 * The prefix length includes the first slash in the full filename 597 * if applicable. Thus, it is possible that the length returned is greater 598 * than the length of the input string. 599 * <pre> 600 * Windows: 601 * a\b\c.txt --> "" --> relative 602 * \a\b\c.txt --> "\" --> current drive absolute 603 * C:a\b\c.txt --> "C:" --> drive relative 604 * C:\a\b\c.txt --> "C:\" --> absolute 605 * \\server\a\b\c.txt --> "\\server\" --> UNC 606 * 607 * Unix: 608 * a/b/c.txt --> "" --> relative 609 * /a/b/c.txt --> "/" --> absolute 610 * ~/a/b/c.txt --> "~/" --> current user 611 * ~ --> "~/" --> current user (slash added) 612 * ~user/a/b/c.txt --> "~user/" --> named user 613 * ~user --> "~user/" --> named user (slash added) 614 * </pre> 615 * <p> 616 * The output will be the same irrespective of the machine that the code is running on. 617 * ie. both Unix and Windows prefixes are matched regardless. 618 * 619 * @param filename the filename to find the prefix in, null returns -1 620 * @return the length of the prefix, -1 if invalid or null 621 */ 622 public static int getPrefixLength(String filename) { 623 if (filename == null) { 624 return -1; 625 } 626 int len = filename.length(); 627 if (len == 0) { 628 return 0; 629 } 630 char ch0 = filename.charAt(0); 631 if (ch0 == ':') { 632 return -1; 633 } 634 if (len == 1) { 635 if (ch0 == '~') { 636 return 2; // return a length greater than the input 637 } 638 return isSeparator(ch0) ? 1 : 0; 639 } else { 640 if (ch0 == '~') { 641 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 642 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 643 if (posUnix == -1 && posWin == -1) { 644 return len + 1; // return a length greater than the input 645 } 646 posUnix = posUnix == -1 ? posWin : posUnix; 647 posWin = posWin == -1 ? posUnix : posWin; 648 return Math.min(posUnix, posWin) + 1; 649 } 650 char ch1 = filename.charAt(1); 651 if (ch1 == ':') { 652 ch0 = Character.toUpperCase(ch0); 653 if (ch0 >= 'A' && ch0 <= 'Z') { 654 if (len == 2 || isSeparator(filename.charAt(2)) == false) { 655 return 2; 656 } 657 return 3; 658 } 659 return -1; 660 661 } else if (isSeparator(ch0) && isSeparator(ch1)) { 662 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 663 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 664 if (posUnix == -1 && posWin == -1 || posUnix == 2 || posWin == 2) { 665 return -1; 666 } 667 posUnix = posUnix == -1 ? posWin : posUnix; 668 posWin = posWin == -1 ? posUnix : posWin; 669 return Math.min(posUnix, posWin) + 1; 670 } else { 671 return isSeparator(ch0) ? 1 : 0; 672 } 673 } 674 } 675 676 /** 677 * Returns the index of the last directory separator character. 678 * <p> 679 * This method will handle a file in either Unix or Windows format. 680 * The position of the last forward or backslash is returned. 681 * <p> 682 * The output will be the same irrespective of the machine that the code is running on. 683 * 684 * @param filename the filename to find the last path separator in, null returns -1 685 * @return the index of the last separator character, or -1 if there 686 * is no such character 687 */ 688 public static int indexOfLastSeparator(String filename) { 689 if (filename == null) { 690 return -1; 691 } 692 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 693 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 694 return Math.max(lastUnixPos, lastWindowsPos); 695 } 696 697 /** 698 * Returns the index of the last extension separator character, which is a dot. 699 * <p> 700 * This method also checks that there is no directory separator after the last dot. 701 * To do this it uses {@link #indexOfLastSeparator(String)} which will 702 * handle a file in either Unix or Windows format. 703 * <p> 704 * The output will be the same irrespective of the machine that the code is running on. 705 * 706 * @param filename the filename to find the last path separator in, null returns -1 707 * @return the index of the last separator character, or -1 if there 708 * is no such character 709 */ 710 public static int indexOfExtension(String filename) { 711 if (filename == null) { 712 return -1; 713 } 714 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 715 int lastSeparator = indexOfLastSeparator(filename); 716 return lastSeparator > extensionPos ? -1 : extensionPos; 717 } 718 719 //----------------------------------------------------------------------- 720 /** 721 * Gets the prefix from a full filename, such as <code>C:/</code> 722 * or <code>~/</code>. 723 * <p> 724 * This method will handle a file in either Unix or Windows format. 725 * The prefix includes the first slash in the full filename where applicable. 726 * <pre> 727 * Windows: 728 * a\b\c.txt --> "" --> relative 729 * \a\b\c.txt --> "\" --> current drive absolute 730 * C:a\b\c.txt --> "C:" --> drive relative 731 * C:\a\b\c.txt --> "C:\" --> absolute 732 * \\server\a\b\c.txt --> "\\server\" --> UNC 733 * 734 * Unix: 735 * a/b/c.txt --> "" --> relative 736 * /a/b/c.txt --> "/" --> absolute 737 * ~/a/b/c.txt --> "~/" --> current user 738 * ~ --> "~/" --> current user (slash added) 739 * ~user/a/b/c.txt --> "~user/" --> named user 740 * ~user --> "~user/" --> named user (slash added) 741 * </pre> 742 * <p> 743 * The output will be the same irrespective of the machine that the code is running on. 744 * ie. both Unix and Windows prefixes are matched regardless. 745 * 746 * @param filename the filename to query, null returns null 747 * @return the prefix of the file, null if invalid 748 */ 749 public static String getPrefix(String filename) { 750 if (filename == null) { 751 return null; 752 } 753 int len = getPrefixLength(filename); 754 if (len < 0) { 755 return null; 756 } 757 if (len > filename.length()) { 758 return filename + UNIX_SEPARATOR; // we know this only happens for unix 759 } 760 return filename.substring(0, len); 761 } 762 763 /** 764 * Gets the path from a full filename, which excludes the prefix. 765 * <p> 766 * This method will handle a file in either Unix or Windows format. 767 * The method is entirely text based, and returns the text before and 768 * including the last forward or backslash. 769 * <pre> 770 * C:\a\b\c.txt --> a\b\ 771 * ~/a/b/c.txt --> a/b/ 772 * a.txt --> "" 773 * a/b/c --> a/b/ 774 * a/b/c/ --> a/b/c/ 775 * </pre> 776 * <p> 777 * The output will be the same irrespective of the machine that the code is running on. 778 * <p> 779 * This method drops the prefix from the result. 780 * See {@link #getFullPath(String)} for the method that retains the prefix. 781 * 782 * @param filename the filename to query, null returns null 783 * @return the path of the file, an empty string if none exists, null if invalid 784 */ 785 public static String getPath(String filename) { 786 return doGetPath(filename, 1); 787 } 788 789 /** 790 * Gets the path from a full filename, which excludes the prefix, and 791 * also excluding the final directory separator. 792 * <p> 793 * This method will handle a file in either Unix or Windows format. 794 * The method is entirely text based, and returns the text before the 795 * last forward or backslash. 796 * <pre> 797 * C:\a\b\c.txt --> a\b 798 * ~/a/b/c.txt --> a/b 799 * a.txt --> "" 800 * a/b/c --> a/b 801 * a/b/c/ --> a/b/c 802 * </pre> 803 * <p> 804 * The output will be the same irrespective of the machine that the code is running on. 805 * <p> 806 * This method drops the prefix from the result. 807 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 808 * 809 * @param filename the filename to query, null returns null 810 * @return the path of the file, an empty string if none exists, null if invalid 811 */ 812 public static String getPathNoEndSeparator(String filename) { 813 return doGetPath(filename, 0); 814 } 815 816 /** 817 * Does the work of getting the path. 818 * 819 * @param filename the filename 820 * @param separatorAdd 0 to omit the end separator, 1 to return it 821 * @return the path 822 */ 823 private static String doGetPath(String filename, int separatorAdd) { 824 if (filename == null) { 825 return null; 826 } 827 int prefix = getPrefixLength(filename); 828 if (prefix < 0) { 829 return null; 830 } 831 int index = indexOfLastSeparator(filename); 832 int endIndex = index+separatorAdd; 833 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) { 834 return ""; 835 } 836 return filename.substring(prefix, endIndex); 837 } 838 839 /** 840 * Gets the full path from a full filename, which is the prefix + path. 841 * <p> 842 * This method will handle a file in either Unix or Windows format. 843 * The method is entirely text based, and returns the text before and 844 * including the last forward or backslash. 845 * <pre> 846 * C:\a\b\c.txt --> C:\a\b\ 847 * ~/a/b/c.txt --> ~/a/b/ 848 * a.txt --> "" 849 * a/b/c --> a/b/ 850 * a/b/c/ --> a/b/c/ 851 * C: --> C: 852 * C:\ --> C:\ 853 * ~ --> ~/ 854 * ~/ --> ~/ 855 * ~user --> ~user/ 856 * ~user/ --> ~user/ 857 * </pre> 858 * <p> 859 * The output will be the same irrespective of the machine that the code is running on. 860 * 861 * @param filename the filename to query, null returns null 862 * @return the path of the file, an empty string if none exists, null if invalid 863 */ 864 public static String getFullPath(String filename) { 865 return doGetFullPath(filename, true); 866 } 867 868 /** 869 * Gets the full path from a full filename, which is the prefix + path, 870 * and also excluding the final directory separator. 871 * <p> 872 * This method will handle a file in either Unix or Windows format. 873 * The method is entirely text based, and returns the text before the 874 * last forward or backslash. 875 * <pre> 876 * C:\a\b\c.txt --> C:\a\b 877 * ~/a/b/c.txt --> ~/a/b 878 * a.txt --> "" 879 * a/b/c --> a/b 880 * a/b/c/ --> a/b/c 881 * C: --> C: 882 * C:\ --> C:\ 883 * ~ --> ~ 884 * ~/ --> ~ 885 * ~user --> ~user 886 * ~user/ --> ~user 887 * </pre> 888 * <p> 889 * The output will be the same irrespective of the machine that the code is running on. 890 * 891 * @param filename the filename to query, null returns null 892 * @return the path of the file, an empty string if none exists, null if invalid 893 */ 894 public static String getFullPathNoEndSeparator(String filename) { 895 return doGetFullPath(filename, false); 896 } 897 898 /** 899 * Does the work of getting the path. 900 * 901 * @param filename the filename 902 * @param includeSeparator true to include the end separator 903 * @return the path 904 */ 905 private static String doGetFullPath(String filename, boolean includeSeparator) { 906 if (filename == null) { 907 return null; 908 } 909 int prefix = getPrefixLength(filename); 910 if (prefix < 0) { 911 return null; 912 } 913 if (prefix >= filename.length()) { 914 if (includeSeparator) { 915 return getPrefix(filename); // add end slash if necessary 916 } else { 917 return filename; 918 } 919 } 920 int index = indexOfLastSeparator(filename); 921 if (index < 0) { 922 return filename.substring(0, prefix); 923 } 924 int end = index + (includeSeparator ? 1 : 0); 925 if (end == 0) { 926 end++; 927 } 928 return filename.substring(0, end); 929 } 930 931 /** 932 * Gets the name minus the path from a full filename. 933 * <p> 934 * This method will handle a file in either Unix or Windows format. 935 * The text after the last forward or backslash is returned. 936 * <pre> 937 * a/b/c.txt --> c.txt 938 * a.txt --> a.txt 939 * a/b/c --> c 940 * a/b/c/ --> "" 941 * </pre> 942 * <p> 943 * The output will be the same irrespective of the machine that the code is running on. 944 * 945 * @param filename the filename to query, null returns null 946 * @return the name of the file without the path, or an empty string if none exists 947 */ 948 public static String getName(String filename) { 949 if (filename == null) { 950 return null; 951 } 952 int index = indexOfLastSeparator(filename); 953 return filename.substring(index + 1); 954 } 955 956 /** 957 * Gets the base name, minus the full path and extension, from a full filename. 958 * <p> 959 * This method will handle a file in either Unix or Windows format. 960 * The text after the last forward or backslash and before the last dot is returned. 961 * <pre> 962 * a/b/c.txt --> c 963 * a.txt --> a 964 * a/b/c --> c 965 * a/b/c/ --> "" 966 * </pre> 967 * <p> 968 * The output will be the same irrespective of the machine that the code is running on. 969 * 970 * @param filename the filename to query, null returns null 971 * @return the name of the file without the path, or an empty string if none exists 972 */ 973 public static String getBaseName(String filename) { 974 return removeExtension(getName(filename)); 975 } 976 977 /** 978 * Gets the extension of a filename. 979 * <p> 980 * This method returns the textual part of the filename after the last dot. 981 * There must be no directory separator after the dot. 982 * <pre> 983 * foo.txt --> "txt" 984 * a/b/c.jpg --> "jpg" 985 * a/b.txt/c --> "" 986 * a/b/c --> "" 987 * </pre> 988 * <p> 989 * The output will be the same irrespective of the machine that the code is running on. 990 * 991 * @param filename the filename to retrieve the extension of. 992 * @return the extension of the file or an empty string if none exists or <code>null</code> 993 * if the filename is <code>null</code>. 994 */ 995 public static String getExtension(String filename) { 996 if (filename == null) { 997 return null; 998 } 999 int index = indexOfExtension(filename); 1000 if (index == -1) { 1001 return ""; 1002 } else { 1003 return filename.substring(index + 1); 1004 } 1005 } 1006 1007 //----------------------------------------------------------------------- 1008 /** 1009 * Removes the extension from a filename. 1010 * <p> 1011 * This method returns the textual part of the filename before the last dot. 1012 * There must be no directory separator after the dot. 1013 * <pre> 1014 * foo.txt --> foo 1015 * a\b\c.jpg --> a\b\c 1016 * a\b\c --> a\b\c 1017 * a.b\c --> a.b\c 1018 * </pre> 1019 * <p> 1020 * The output will be the same irrespective of the machine that the code is running on. 1021 * 1022 * @param filename the filename to query, null returns null 1023 * @return the filename minus the extension 1024 */ 1025 public static String removeExtension(String filename) { 1026 if (filename == null) { 1027 return null; 1028 } 1029 int index = indexOfExtension(filename); 1030 if (index == -1) { 1031 return filename; 1032 } else { 1033 return filename.substring(0, index); 1034 } 1035 } 1036 1037 //----------------------------------------------------------------------- 1038 /** 1039 * Checks whether two filenames are equal exactly. 1040 * <p> 1041 * No processing is performed on the filenames other than comparison, 1042 * thus this is merely a null-safe case-sensitive equals. 1043 * 1044 * @param filename1 the first filename to query, may be null 1045 * @param filename2 the second filename to query, may be null 1046 * @return true if the filenames are equal, null equals null 1047 * @see IOCase#SENSITIVE 1048 */ 1049 public static boolean equals(String filename1, String filename2) { 1050 return equals(filename1, filename2, false, IOCase.SENSITIVE); 1051 } 1052 1053 /** 1054 * Checks whether two filenames are equal using the case rules of the system. 1055 * <p> 1056 * No processing is performed on the filenames other than comparison. 1057 * The check is case-sensitive on Unix and case-insensitive on Windows. 1058 * 1059 * @param filename1 the first filename to query, may be null 1060 * @param filename2 the second filename to query, may be null 1061 * @return true if the filenames are equal, null equals null 1062 * @see IOCase#SYSTEM 1063 */ 1064 public static boolean equalsOnSystem(String filename1, String filename2) { 1065 return equals(filename1, filename2, false, IOCase.SYSTEM); 1066 } 1067 1068 //----------------------------------------------------------------------- 1069 /** 1070 * Checks whether two filenames are equal after both have been normalized. 1071 * <p> 1072 * Both filenames are first passed to {@link #normalize(String)}. 1073 * The check is then performed in a case-sensitive manner. 1074 * 1075 * @param filename1 the first filename to query, may be null 1076 * @param filename2 the second filename to query, may be null 1077 * @return true if the filenames are equal, null equals null 1078 * @see IOCase#SENSITIVE 1079 */ 1080 public static boolean equalsNormalized(String filename1, String filename2) { 1081 return equals(filename1, filename2, true, IOCase.SENSITIVE); 1082 } 1083 1084 /** 1085 * Checks whether two filenames are equal after both have been normalized 1086 * and using the case rules of the system. 1087 * <p> 1088 * Both filenames are first passed to {@link #normalize(String)}. 1089 * The check is then performed case-sensitive on Unix and 1090 * case-insensitive on Windows. 1091 * 1092 * @param filename1 the first filename to query, may be null 1093 * @param filename2 the second filename to query, may be null 1094 * @return true if the filenames are equal, null equals null 1095 * @see IOCase#SYSTEM 1096 */ 1097 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) { 1098 return equals(filename1, filename2, true, IOCase.SYSTEM); 1099 } 1100 1101 /** 1102 * Checks whether two filenames are equal, optionally normalizing and providing 1103 * control over the case-sensitivity. 1104 * 1105 * @param filename1 the first filename to query, may be null 1106 * @param filename2 the second filename to query, may be null 1107 * @param normalized whether to normalize the filenames 1108 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1109 * @return true if the filenames are equal, null equals null 1110 * @since 1.3 1111 */ 1112 public static boolean equals( 1113 String filename1, String filename2, 1114 boolean normalized, IOCase caseSensitivity) { 1115 1116 if (filename1 == null || filename2 == null) { 1117 return filename1 == null && filename2 == null; 1118 } 1119 if (normalized) { 1120 filename1 = normalize(filename1); 1121 filename2 = normalize(filename2); 1122 if (filename1 == null || filename2 == null) { 1123 throw new NullPointerException( 1124 "Error normalizing one or both of the file names"); 1125 } 1126 } 1127 if (caseSensitivity == null) { 1128 caseSensitivity = IOCase.SENSITIVE; 1129 } 1130 return caseSensitivity.checkEquals(filename1, filename2); 1131 } 1132 1133 //----------------------------------------------------------------------- 1134 /** 1135 * Checks whether the extension of the filename is that specified. 1136 * <p> 1137 * This method obtains the extension as the textual part of the filename 1138 * after the last dot. There must be no directory separator after the dot. 1139 * The extension check is case-sensitive on all platforms. 1140 * 1141 * @param filename the filename to query, null returns false 1142 * @param extension the extension to check for, null or empty checks for no extension 1143 * @return true if the filename has the specified extension 1144 */ 1145 public static boolean isExtension(String filename, String extension) { 1146 if (filename == null) { 1147 return false; 1148 } 1149 if (extension == null || extension.length() == 0) { 1150 return indexOfExtension(filename) == -1; 1151 } 1152 String fileExt = getExtension(filename); 1153 return fileExt.equals(extension); 1154 } 1155 1156 /** 1157 * Checks whether the extension of the filename is one of those specified. 1158 * <p> 1159 * This method obtains the extension as the textual part of the filename 1160 * after the last dot. There must be no directory separator after the dot. 1161 * The extension check is case-sensitive on all platforms. 1162 * 1163 * @param filename the filename to query, null returns false 1164 * @param extensions the extensions to check for, null checks for no extension 1165 * @return true if the filename is one of the extensions 1166 */ 1167 public static boolean isExtension(String filename, String[] extensions) { 1168 if (filename == null) { 1169 return false; 1170 } 1171 if (extensions == null || extensions.length == 0) { 1172 return indexOfExtension(filename) == -1; 1173 } 1174 String fileExt = getExtension(filename); 1175 for (String extension : extensions) { 1176 if (fileExt.equals(extension)) { 1177 return true; 1178 } 1179 } 1180 return false; 1181 } 1182 1183 /** 1184 * Checks whether the extension of the filename is one of those specified. 1185 * <p> 1186 * This method obtains the extension as the textual part of the filename 1187 * after the last dot. There must be no directory separator after the dot. 1188 * The extension check is case-sensitive on all platforms. 1189 * 1190 * @param filename the filename to query, null returns false 1191 * @param extensions the extensions to check for, null checks for no extension 1192 * @return true if the filename is one of the extensions 1193 */ 1194 public static boolean isExtension(String filename, Collection<String> extensions) { 1195 if (filename == null) { 1196 return false; 1197 } 1198 if (extensions == null || extensions.isEmpty()) { 1199 return indexOfExtension(filename) == -1; 1200 } 1201 String fileExt = getExtension(filename); 1202 for (String extension : extensions) { 1203 if (fileExt.equals(extension)) { 1204 return true; 1205 } 1206 } 1207 return false; 1208 } 1209 1210 //----------------------------------------------------------------------- 1211 /** 1212 * Checks a filename to see if it matches the specified wildcard matcher, 1213 * always testing case-sensitive. 1214 * <p> 1215 * The wildcard matcher uses the characters '?' and '*' to represent a 1216 * single or multiple (zero or more) wildcard characters. 1217 * This is the same as often found on Dos/Unix command lines. 1218 * The check is case-sensitive always. 1219 * <pre> 1220 * wildcardMatch("c.txt", "*.txt") --> true 1221 * wildcardMatch("c.txt", "*.jpg") --> false 1222 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1223 * wildcardMatch("c.txt", "*.???") --> true 1224 * wildcardMatch("c.txt", "*.????") --> false 1225 * </pre> 1226 * N.B. the sequence "*?" does not work properly at present in match strings. 1227 * 1228 * @param filename the filename to match on 1229 * @param wildcardMatcher the wildcard string to match against 1230 * @return true if the filename matches the wilcard string 1231 * @see IOCase#SENSITIVE 1232 */ 1233 public static boolean wildcardMatch(String filename, String wildcardMatcher) { 1234 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1235 } 1236 1237 /** 1238 * Checks a filename to see if it matches the specified wildcard matcher 1239 * using the case rules of the system. 1240 * <p> 1241 * The wildcard matcher uses the characters '?' and '*' to represent a 1242 * single or multiple (zero or more) wildcard characters. 1243 * This is the same as often found on Dos/Unix command lines. 1244 * The check is case-sensitive on Unix and case-insensitive on Windows. 1245 * <pre> 1246 * wildcardMatch("c.txt", "*.txt") --> true 1247 * wildcardMatch("c.txt", "*.jpg") --> false 1248 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1249 * wildcardMatch("c.txt", "*.???") --> true 1250 * wildcardMatch("c.txt", "*.????") --> false 1251 * </pre> 1252 * N.B. the sequence "*?" does not work properly at present in match strings. 1253 * 1254 * @param filename the filename to match on 1255 * @param wildcardMatcher the wildcard string to match against 1256 * @return true if the filename matches the wilcard string 1257 * @see IOCase#SYSTEM 1258 */ 1259 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) { 1260 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1261 } 1262 1263 /** 1264 * Checks a filename to see if it matches the specified wildcard matcher 1265 * allowing control over case-sensitivity. 1266 * <p> 1267 * The wildcard matcher uses the characters '?' and '*' to represent a 1268 * single or multiple (zero or more) wildcard characters. 1269 * N.B. the sequence "*?" does not work properly at present in match strings. 1270 * 1271 * @param filename the filename to match on 1272 * @param wildcardMatcher the wildcard string to match against 1273 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1274 * @return true if the filename matches the wilcard string 1275 * @since 1.3 1276 */ 1277 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) { 1278 if (filename == null && wildcardMatcher == null) { 1279 return true; 1280 } 1281 if (filename == null || wildcardMatcher == null) { 1282 return false; 1283 } 1284 if (caseSensitivity == null) { 1285 caseSensitivity = IOCase.SENSITIVE; 1286 } 1287 String[] wcs = splitOnTokens(wildcardMatcher); 1288 boolean anyChars = false; 1289 int textIdx = 0; 1290 int wcsIdx = 0; 1291 Stack<int[]> backtrack = new Stack<int[]>(); 1292 1293 // loop around a backtrack stack, to handle complex * matching 1294 do { 1295 if (backtrack.size() > 0) { 1296 int[] array = backtrack.pop(); 1297 wcsIdx = array[0]; 1298 textIdx = array[1]; 1299 anyChars = true; 1300 } 1301 1302 // loop whilst tokens and text left to process 1303 while (wcsIdx < wcs.length) { 1304 1305 if (wcs[wcsIdx].equals("?")) { 1306 // ? so move to next text char 1307 textIdx++; 1308 if (textIdx > filename.length()) { 1309 break; 1310 } 1311 anyChars = false; 1312 1313 } else if (wcs[wcsIdx].equals("*")) { 1314 // set any chars status 1315 anyChars = true; 1316 if (wcsIdx == wcs.length - 1) { 1317 textIdx = filename.length(); 1318 } 1319 1320 } else { 1321 // matching text token 1322 if (anyChars) { 1323 // any chars then try to locate text token 1324 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]); 1325 if (textIdx == -1) { 1326 // token not found 1327 break; 1328 } 1329 int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]); 1330 if (repeat >= 0) { 1331 backtrack.push(new int[] {wcsIdx, repeat}); 1332 } 1333 } else { 1334 // matching from current position 1335 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) { 1336 // couldnt match token 1337 break; 1338 } 1339 } 1340 1341 // matched text token, move text index to end of matched token 1342 textIdx += wcs[wcsIdx].length(); 1343 anyChars = false; 1344 } 1345 1346 wcsIdx++; 1347 } 1348 1349 // full match 1350 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1351 return true; 1352 } 1353 1354 } while (backtrack.size() > 0); 1355 1356 return false; 1357 } 1358 1359 /** 1360 * Splits a string into a number of tokens. 1361 * The text is split by '?' and '*'. 1362 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1363 * 1364 * @param text the text to split 1365 * @return the array of tokens, never null 1366 */ 1367 static String[] splitOnTokens(String text) { 1368 // used by wildcardMatch 1369 // package level so a unit test may run on this 1370 1371 if (text.indexOf('?') == -1 && text.indexOf('*') == -1) { 1372 return new String[] { text }; 1373 } 1374 1375 char[] array = text.toCharArray(); 1376 ArrayList<String> list = new ArrayList<String>(); 1377 StringBuilder buffer = new StringBuilder(); 1378 for (int i = 0; i < array.length; i++) { 1379 if (array[i] == '?' || array[i] == '*') { 1380 if (buffer.length() != 0) { 1381 list.add(buffer.toString()); 1382 buffer.setLength(0); 1383 } 1384 if (array[i] == '?') { 1385 list.add("?"); 1386 } else if (list.isEmpty() || 1387 i > 0 && list.get(list.size() - 1).equals("*") == false) { 1388 list.add("*"); 1389 } 1390 } else { 1391 buffer.append(array[i]); 1392 } 1393 } 1394 if (buffer.length() != 0) { 1395 list.add(buffer.toString()); 1396 } 1397 1398 return list.toArray( new String[ list.size() ] ); 1399 } 1400 1401 }