001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io; 018 019 import java.io.File; 020 import java.util.ArrayList; 021 import java.util.Collection; 022 import java.util.Stack; 023 024 /** 025 * General filename and filepath manipulation utilities. 026 * <p> 027 * When dealing with filenames you can hit problems when moving from a Windows 028 * based development machine to a Unix based production machine. 029 * This class aims to help avoid those problems. 030 * <p> 031 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 032 * using JDK {@link java.io.File File} objects and the two argument constructor 033 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 034 * <p> 035 * Most methods on this class are designed to work the same on both Unix and Windows. 036 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 037 * <p> 038 * Most methods recognise both separators (forward and back), and both 039 * sets of prefixes. See the javadoc of each method for details. 040 * <p> 041 * This class defines six components within a filename 042 * (example C:\dev\project\file.txt): 043 * <ul> 044 * <li>the prefix - C:\</li> 045 * <li>the path - dev\project\</li> 046 * <li>the full path - C:\dev\project\</li> 047 * <li>the name - file.txt</li> 048 * <li>the base name - file</li> 049 * <li>the extension - txt</li> 050 * </ul> 051 * Note that this class works best if directory filenames end with a separator. 052 * If you omit the last separator, it is impossible to determine if the filename 053 * corresponds to a file or a directory. As a result, we have chosen to say 054 * it corresponds to a file. 055 * <p> 056 * This class only supports Unix and Windows style names. 057 * Prefixes are matched as follows: 058 * <pre> 059 * Windows: 060 * a\b\c.txt --> "" --> relative 061 * \a\b\c.txt --> "\" --> current drive absolute 062 * C:a\b\c.txt --> "C:" --> drive relative 063 * C:\a\b\c.txt --> "C:\" --> absolute 064 * \\server\a\b\c.txt --> "\\server\" --> UNC 065 * 066 * Unix: 067 * a/b/c.txt --> "" --> relative 068 * /a/b/c.txt --> "/" --> absolute 069 * ~/a/b/c.txt --> "~/" --> current user 070 * ~ --> "~/" --> current user (slash added) 071 * ~user/a/b/c.txt --> "~user/" --> named user 072 * ~user --> "~user/" --> named user (slash added) 073 * </pre> 074 * Both prefix styles are matched always, irrespective of the machine that you are 075 * currently running on. 076 * <p> 077 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 078 * 079 * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A> 080 * @author <a href="mailto:sanders@apache.org">Scott Sanders</a> 081 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a> 082 * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a> 083 * @author <a href="mailto:peter@apache.org">Peter Donald</a> 084 * @author <a href="mailto:jefft@apache.org">Jeff Turner</a> 085 * @author Matthew Hawthorne 086 * @author Martin Cooper 087 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a> 088 * @author Stephen Colebourne 089 * @version $Id: FilenameUtils.java 1004077 2010-10-04 00:58:42Z niallp $ 090 * @since Commons IO 1.1 091 */ 092 public class FilenameUtils { 093 094 /** 095 * The extension separator character. 096 * @since Commons IO 1.4 097 */ 098 public static final char EXTENSION_SEPARATOR = '.'; 099 100 /** 101 * The extension separator String. 102 * @since Commons IO 1.4 103 */ 104 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 105 106 /** 107 * The Unix separator character. 108 */ 109 private static final char UNIX_SEPARATOR = '/'; 110 111 /** 112 * The Windows separator character. 113 */ 114 private static final char WINDOWS_SEPARATOR = '\\'; 115 116 /** 117 * The system separator character. 118 */ 119 private static final char SYSTEM_SEPARATOR = File.separatorChar; 120 121 /** 122 * The separator character that is the opposite of the system separator. 123 */ 124 private static final char OTHER_SEPARATOR; 125 static { 126 if (isSystemWindows()) { 127 OTHER_SEPARATOR = UNIX_SEPARATOR; 128 } else { 129 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 130 } 131 } 132 133 /** 134 * Instances should NOT be constructed in standard programming. 135 */ 136 public FilenameUtils() { 137 super(); 138 } 139 140 //----------------------------------------------------------------------- 141 /** 142 * Determines if Windows file system is in use. 143 * 144 * @return true if the system is Windows 145 */ 146 static boolean isSystemWindows() { 147 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 148 } 149 150 //----------------------------------------------------------------------- 151 /** 152 * Checks if the character is a separator. 153 * 154 * @param ch the character to check 155 * @return true if it is a separator character 156 */ 157 private static boolean isSeparator(char ch) { 158 return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR); 159 } 160 161 //----------------------------------------------------------------------- 162 /** 163 * Normalizes a path, removing double and single dot path steps. 164 * <p> 165 * This method normalizes a path to a standard format. 166 * The input may contain separators in either Unix or Windows format. 167 * The output will contain separators in the format of the system. 168 * <p> 169 * A trailing slash will be retained. 170 * A double slash will be merged to a single slash (but UNC names are handled). 171 * A single dot path segment will be removed. 172 * A double dot will cause that path segment and the one before to be removed. 173 * If the double dot has no parent path segment to work with, <code>null</code> 174 * is returned. 175 * <p> 176 * The output will be the same on both Unix and Windows except 177 * for the separator character. 178 * <pre> 179 * /foo// --> /foo/ 180 * /foo/./ --> /foo/ 181 * /foo/../bar --> /bar 182 * /foo/../bar/ --> /bar/ 183 * /foo/../bar/../baz --> /baz 184 * //foo//./bar --> /foo/bar 185 * /../ --> null 186 * ../foo --> null 187 * foo/bar/.. --> foo/ 188 * foo/../../bar --> null 189 * foo/../bar --> bar 190 * //server/foo/../bar --> //server/bar 191 * //server/../bar --> null 192 * C:\foo\..\bar --> C:\bar 193 * C:\..\bar --> null 194 * ~/foo/../bar/ --> ~/bar/ 195 * ~/../bar --> null 196 * </pre> 197 * (Note the file separator returned will be correct for Windows/Unix) 198 * 199 * @param filename the filename to normalize, null returns null 200 * @return the normalized filename, or null if invalid 201 */ 202 public static String normalize(String filename) { 203 return doNormalize(filename, SYSTEM_SEPARATOR, true); 204 } 205 /** 206 * Normalizes a path, removing double and single dot path steps. 207 * <p> 208 * This method normalizes a path to a standard format. 209 * The input may contain separators in either Unix or Windows format. 210 * The output will contain separators in the format specified. 211 * <p> 212 * A trailing slash will be retained. 213 * A double slash will be merged to a single slash (but UNC names are handled). 214 * A single dot path segment will be removed. 215 * A double dot will cause that path segment and the one before to be removed. 216 * If the double dot has no parent path segment to work with, <code>null</code> 217 * is returned. 218 * <p> 219 * The output will be the same on both Unix and Windows except 220 * for the separator character. 221 * <pre> 222 * /foo// --> /foo/ 223 * /foo/./ --> /foo/ 224 * /foo/../bar --> /bar 225 * /foo/../bar/ --> /bar/ 226 * /foo/../bar/../baz --> /baz 227 * //foo//./bar --> /foo/bar 228 * /../ --> null 229 * ../foo --> null 230 * foo/bar/.. --> foo/ 231 * foo/../../bar --> null 232 * foo/../bar --> bar 233 * //server/foo/../bar --> //server/bar 234 * //server/../bar --> null 235 * C:\foo\..\bar --> C:\bar 236 * C:\..\bar --> null 237 * ~/foo/../bar/ --> ~/bar/ 238 * ~/../bar --> null 239 * </pre> 240 * The output will be the same on both Unix and Windows including 241 * the separator character. 242 * 243 * @param filename the filename to normalize, null returns null 244 * @param unixSeparator <code>true</code> if a unix separator should 245 * be used or <code>false</code> if a windows separator should be used. 246 * @return the normalized filename, or null if invalid 247 * @since Commons IO 2.0 248 */ 249 public static String normalize(String filename, boolean unixSeparator) { 250 char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR); 251 return doNormalize(filename, separator, true); 252 } 253 254 //----------------------------------------------------------------------- 255 /** 256 * Normalizes a path, removing double and single dot path steps, 257 * and removing any final directory separator. 258 * <p> 259 * This method normalizes a path to a standard format. 260 * The input may contain separators in either Unix or Windows format. 261 * The output will contain separators in the format of the system. 262 * <p> 263 * A trailing slash will be removed. 264 * A double slash will be merged to a single slash (but UNC names are handled). 265 * A single dot path segment will be removed. 266 * A double dot will cause that path segment and the one before to be removed. 267 * If the double dot has no parent path segment to work with, <code>null</code> 268 * is returned. 269 * <p> 270 * The output will be the same on both Unix and Windows except 271 * for the separator character. 272 * <pre> 273 * /foo// --> /foo 274 * /foo/./ --> /foo 275 * /foo/../bar --> /bar 276 * /foo/../bar/ --> /bar 277 * /foo/../bar/../baz --> /baz 278 * //foo//./bar --> /foo/bar 279 * /../ --> null 280 * ../foo --> null 281 * foo/bar/.. --> foo 282 * foo/../../bar --> null 283 * foo/../bar --> bar 284 * //server/foo/../bar --> //server/bar 285 * //server/../bar --> null 286 * C:\foo\..\bar --> C:\bar 287 * C:\..\bar --> null 288 * ~/foo/../bar/ --> ~/bar 289 * ~/../bar --> null 290 * </pre> 291 * (Note the file separator returned will be correct for Windows/Unix) 292 * 293 * @param filename the filename to normalize, null returns null 294 * @return the normalized filename, or null if invalid 295 */ 296 public static String normalizeNoEndSeparator(String filename) { 297 return doNormalize(filename, SYSTEM_SEPARATOR, false); 298 } 299 300 /** 301 * Normalizes a path, removing double and single dot path steps, 302 * and removing any final directory separator. 303 * <p> 304 * This method normalizes a path to a standard format. 305 * The input may contain separators in either Unix or Windows format. 306 * The output will contain separators in the format specified. 307 * <p> 308 * A trailing slash will be removed. 309 * A double slash will be merged to a single slash (but UNC names are handled). 310 * A single dot path segment will be removed. 311 * A double dot will cause that path segment and the one before to be removed. 312 * If the double dot has no parent path segment to work with, <code>null</code> 313 * is returned. 314 * <p> 315 * The output will be the same on both Unix and Windows including 316 * the separator character. 317 * <pre> 318 * /foo// --> /foo 319 * /foo/./ --> /foo 320 * /foo/../bar --> /bar 321 * /foo/../bar/ --> /bar 322 * /foo/../bar/../baz --> /baz 323 * //foo//./bar --> /foo/bar 324 * /../ --> null 325 * ../foo --> null 326 * foo/bar/.. --> foo 327 * foo/../../bar --> null 328 * foo/../bar --> bar 329 * //server/foo/../bar --> //server/bar 330 * //server/../bar --> null 331 * C:\foo\..\bar --> C:\bar 332 * C:\..\bar --> null 333 * ~/foo/../bar/ --> ~/bar 334 * ~/../bar --> null 335 * </pre> 336 * 337 * @param filename the filename to normalize, null returns null 338 * @param unixSeparator <code>true</code> if a unix separator should 339 * be used or <code>false</code> if a windows separtor should be used. 340 * @return the normalized filename, or null if invalid 341 * @since Commons IO 2.0 342 */ 343 public static String normalizeNoEndSeparator(String filename, boolean unixSeparator) { 344 char separator = (unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR); 345 return doNormalize(filename, separator, false); 346 } 347 348 /** 349 * Internal method to perform the normalization. 350 * 351 * @param filename the filename 352 * @param separator The separator character to use 353 * @param keepSeparator true to keep the final separator 354 * @return the normalized filename 355 */ 356 private static String doNormalize(String filename, char separator, boolean keepSeparator) { 357 if (filename == null) { 358 return null; 359 } 360 int size = filename.length(); 361 if (size == 0) { 362 return filename; 363 } 364 int prefix = getPrefixLength(filename); 365 if (prefix < 0) { 366 return null; 367 } 368 369 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 370 filename.getChars(0, filename.length(), array, 0); 371 372 // fix separators throughout 373 char otherSeparator = (separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR); 374 for (int i = 0; i < array.length; i++) { 375 if (array[i] == otherSeparator) { 376 array[i] = separator; 377 } 378 } 379 380 // add extra separator on the end to simplify code below 381 boolean lastIsDirectory = true; 382 if (array[size - 1] != separator) { 383 array[size++] = separator; 384 lastIsDirectory = false; 385 } 386 387 // adjoining slashes 388 for (int i = prefix + 1; i < size; i++) { 389 if (array[i] == separator && array[i - 1] == separator) { 390 System.arraycopy(array, i, array, i - 1, size - i); 391 size--; 392 i--; 393 } 394 } 395 396 // dot slash 397 for (int i = prefix + 1; i < size; i++) { 398 if (array[i] == separator && array[i - 1] == '.' && 399 (i == prefix + 1 || array[i - 2] == separator)) { 400 if (i == size - 1) { 401 lastIsDirectory = true; 402 } 403 System.arraycopy(array, i + 1, array, i - 1, size - i); 404 size -=2; 405 i--; 406 } 407 } 408 409 // double dot slash 410 outer: 411 for (int i = prefix + 2; i < size; i++) { 412 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 413 (i == prefix + 2 || array[i - 3] == separator)) { 414 if (i == prefix + 2) { 415 return null; 416 } 417 if (i == size - 1) { 418 lastIsDirectory = true; 419 } 420 int j; 421 for (j = i - 4 ; j >= prefix; j--) { 422 if (array[j] == separator) { 423 // remove b/../ from a/b/../c 424 System.arraycopy(array, i + 1, array, j + 1, size - i); 425 size -= (i - j); 426 i = j + 1; 427 continue outer; 428 } 429 } 430 // remove a/../ from a/../c 431 System.arraycopy(array, i + 1, array, prefix, size - i); 432 size -= (i + 1 - prefix); 433 i = prefix + 1; 434 } 435 } 436 437 if (size <= 0) { // should never be less than 0 438 return ""; 439 } 440 if (size <= prefix) { // should never be less than prefix 441 return new String(array, 0, size); 442 } 443 if (lastIsDirectory && keepSeparator) { 444 return new String(array, 0, size); // keep trailing separator 445 } 446 return new String(array, 0, size - 1); // lose trailing separator 447 } 448 449 //----------------------------------------------------------------------- 450 /** 451 * Concatenates a filename to a base path using normal command line style rules. 452 * <p> 453 * The effect is equivalent to resultant directory after changing 454 * directory to the first argument, followed by changing directory to 455 * the second argument. 456 * <p> 457 * The first argument is the base path, the second is the path to concatenate. 458 * The returned path is always normalized via {@link #normalize(String)}, 459 * thus <code>..</code> is handled. 460 * <p> 461 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 462 * it will be normalized and returned. 463 * Otherwise, the paths will be joined, normalized and returned. 464 * <p> 465 * The output will be the same on both Unix and Windows except 466 * for the separator character. 467 * <pre> 468 * /foo/ + bar --> /foo/bar 469 * /foo + bar --> /foo/bar 470 * /foo + /bar --> /bar 471 * /foo + C:/bar --> C:/bar 472 * /foo + C:bar --> C:bar (*) 473 * /foo/a/ + ../bar --> foo/bar 474 * /foo/ + ../../bar --> null 475 * /foo/ + /bar --> /bar 476 * /foo/.. + /bar --> /bar 477 * /foo + bar/c.txt --> /foo/bar/c.txt 478 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 479 * </pre> 480 * (*) Note that the Windows relative drive prefix is unreliable when 481 * used with this method. 482 * (!) Note that the first parameter must be a path. If it ends with a name, then 483 * the name will be built into the concatenated path. If this might be a problem, 484 * use {@link #getFullPath(String)} on the base path argument. 485 * 486 * @param basePath the base path to attach to, always treated as a path 487 * @param fullFilenameToAdd the filename (or path) to attach to the base 488 * @return the concatenated path, or null if invalid 489 */ 490 public static String concat(String basePath, String fullFilenameToAdd) { 491 int prefix = getPrefixLength(fullFilenameToAdd); 492 if (prefix < 0) { 493 return null; 494 } 495 if (prefix > 0) { 496 return normalize(fullFilenameToAdd); 497 } 498 if (basePath == null) { 499 return null; 500 } 501 int len = basePath.length(); 502 if (len == 0) { 503 return normalize(fullFilenameToAdd); 504 } 505 char ch = basePath.charAt(len - 1); 506 if (isSeparator(ch)) { 507 return normalize(basePath + fullFilenameToAdd); 508 } else { 509 return normalize(basePath + '/' + fullFilenameToAdd); 510 } 511 } 512 513 //----------------------------------------------------------------------- 514 /** 515 * Converts all separators to the Unix separator of forward slash. 516 * 517 * @param path the path to be changed, null ignored 518 * @return the updated path 519 */ 520 public static String separatorsToUnix(String path) { 521 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) { 522 return path; 523 } 524 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 525 } 526 527 /** 528 * Converts all separators to the Windows separator of backslash. 529 * 530 * @param path the path to be changed, null ignored 531 * @return the updated path 532 */ 533 public static String separatorsToWindows(String path) { 534 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) { 535 return path; 536 } 537 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 538 } 539 540 /** 541 * Converts all separators to the system separator. 542 * 543 * @param path the path to be changed, null ignored 544 * @return the updated path 545 */ 546 public static String separatorsToSystem(String path) { 547 if (path == null) { 548 return null; 549 } 550 if (isSystemWindows()) { 551 return separatorsToWindows(path); 552 } else { 553 return separatorsToUnix(path); 554 } 555 } 556 557 //----------------------------------------------------------------------- 558 /** 559 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. 560 * <p> 561 * This method will handle a file in either Unix or Windows format. 562 * <p> 563 * The prefix length includes the first slash in the full filename 564 * if applicable. Thus, it is possible that the length returned is greater 565 * than the length of the input string. 566 * <pre> 567 * Windows: 568 * a\b\c.txt --> "" --> relative 569 * \a\b\c.txt --> "\" --> current drive absolute 570 * C:a\b\c.txt --> "C:" --> drive relative 571 * C:\a\b\c.txt --> "C:\" --> absolute 572 * \\server\a\b\c.txt --> "\\server\" --> UNC 573 * 574 * Unix: 575 * a/b/c.txt --> "" --> relative 576 * /a/b/c.txt --> "/" --> absolute 577 * ~/a/b/c.txt --> "~/" --> current user 578 * ~ --> "~/" --> current user (slash added) 579 * ~user/a/b/c.txt --> "~user/" --> named user 580 * ~user --> "~user/" --> named user (slash added) 581 * </pre> 582 * <p> 583 * The output will be the same irrespective of the machine that the code is running on. 584 * ie. both Unix and Windows prefixes are matched regardless. 585 * 586 * @param filename the filename to find the prefix in, null returns -1 587 * @return the length of the prefix, -1 if invalid or null 588 */ 589 public static int getPrefixLength(String filename) { 590 if (filename == null) { 591 return -1; 592 } 593 int len = filename.length(); 594 if (len == 0) { 595 return 0; 596 } 597 char ch0 = filename.charAt(0); 598 if (ch0 == ':') { 599 return -1; 600 } 601 if (len == 1) { 602 if (ch0 == '~') { 603 return 2; // return a length greater than the input 604 } 605 return (isSeparator(ch0) ? 1 : 0); 606 } else { 607 if (ch0 == '~') { 608 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1); 609 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1); 610 if (posUnix == -1 && posWin == -1) { 611 return len + 1; // return a length greater than the input 612 } 613 posUnix = (posUnix == -1 ? posWin : posUnix); 614 posWin = (posWin == -1 ? posUnix : posWin); 615 return Math.min(posUnix, posWin) + 1; 616 } 617 char ch1 = filename.charAt(1); 618 if (ch1 == ':') { 619 ch0 = Character.toUpperCase(ch0); 620 if (ch0 >= 'A' && ch0 <= 'Z') { 621 if (len == 2 || isSeparator(filename.charAt(2)) == false) { 622 return 2; 623 } 624 return 3; 625 } 626 return -1; 627 628 } else if (isSeparator(ch0) && isSeparator(ch1)) { 629 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2); 630 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2); 631 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) { 632 return -1; 633 } 634 posUnix = (posUnix == -1 ? posWin : posUnix); 635 posWin = (posWin == -1 ? posUnix : posWin); 636 return Math.min(posUnix, posWin) + 1; 637 } else { 638 return (isSeparator(ch0) ? 1 : 0); 639 } 640 } 641 } 642 643 /** 644 * Returns the index of the last directory separator character. 645 * <p> 646 * This method will handle a file in either Unix or Windows format. 647 * The position of the last forward or backslash is returned. 648 * <p> 649 * The output will be the same irrespective of the machine that the code is running on. 650 * 651 * @param filename the filename to find the last path separator in, null returns -1 652 * @return the index of the last separator character, or -1 if there 653 * is no such character 654 */ 655 public static int indexOfLastSeparator(String filename) { 656 if (filename == null) { 657 return -1; 658 } 659 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR); 660 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR); 661 return Math.max(lastUnixPos, lastWindowsPos); 662 } 663 664 /** 665 * Returns the index of the last extension separator character, which is a dot. 666 * <p> 667 * This method also checks that there is no directory separator after the last dot. 668 * To do this it uses {@link #indexOfLastSeparator(String)} which will 669 * handle a file in either Unix or Windows format. 670 * <p> 671 * The output will be the same irrespective of the machine that the code is running on. 672 * 673 * @param filename the filename to find the last path separator in, null returns -1 674 * @return the index of the last separator character, or -1 if there 675 * is no such character 676 */ 677 public static int indexOfExtension(String filename) { 678 if (filename == null) { 679 return -1; 680 } 681 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); 682 int lastSeparator = indexOfLastSeparator(filename); 683 return (lastSeparator > extensionPos ? -1 : extensionPos); 684 } 685 686 //----------------------------------------------------------------------- 687 /** 688 * Gets the prefix from a full filename, such as <code>C:/</code> 689 * or <code>~/</code>. 690 * <p> 691 * This method will handle a file in either Unix or Windows format. 692 * The prefix includes the first slash in the full filename where applicable. 693 * <pre> 694 * Windows: 695 * a\b\c.txt --> "" --> relative 696 * \a\b\c.txt --> "\" --> current drive absolute 697 * C:a\b\c.txt --> "C:" --> drive relative 698 * C:\a\b\c.txt --> "C:\" --> absolute 699 * \\server\a\b\c.txt --> "\\server\" --> UNC 700 * 701 * Unix: 702 * a/b/c.txt --> "" --> relative 703 * /a/b/c.txt --> "/" --> absolute 704 * ~/a/b/c.txt --> "~/" --> current user 705 * ~ --> "~/" --> current user (slash added) 706 * ~user/a/b/c.txt --> "~user/" --> named user 707 * ~user --> "~user/" --> named user (slash added) 708 * </pre> 709 * <p> 710 * The output will be the same irrespective of the machine that the code is running on. 711 * ie. both Unix and Windows prefixes are matched regardless. 712 * 713 * @param filename the filename to query, null returns null 714 * @return the prefix of the file, null if invalid 715 */ 716 public static String getPrefix(String filename) { 717 if (filename == null) { 718 return null; 719 } 720 int len = getPrefixLength(filename); 721 if (len < 0) { 722 return null; 723 } 724 if (len > filename.length()) { 725 return filename + UNIX_SEPARATOR; // we know this only happens for unix 726 } 727 return filename.substring(0, len); 728 } 729 730 /** 731 * Gets the path from a full filename, which excludes the prefix. 732 * <p> 733 * This method will handle a file in either Unix or Windows format. 734 * The method is entirely text based, and returns the text before and 735 * including the last forward or backslash. 736 * <pre> 737 * C:\a\b\c.txt --> a\b\ 738 * ~/a/b/c.txt --> a/b/ 739 * a.txt --> "" 740 * a/b/c --> a/b/ 741 * a/b/c/ --> a/b/c/ 742 * </pre> 743 * <p> 744 * The output will be the same irrespective of the machine that the code is running on. 745 * <p> 746 * This method drops the prefix from the result. 747 * See {@link #getFullPath(String)} for the method that retains the prefix. 748 * 749 * @param filename the filename to query, null returns null 750 * @return the path of the file, an empty string if none exists, null if invalid 751 */ 752 public static String getPath(String filename) { 753 return doGetPath(filename, 1); 754 } 755 756 /** 757 * Gets the path from a full filename, which excludes the prefix, and 758 * also excluding the final directory separator. 759 * <p> 760 * This method will handle a file in either Unix or Windows format. 761 * The method is entirely text based, and returns the text before the 762 * last forward or backslash. 763 * <pre> 764 * C:\a\b\c.txt --> a\b 765 * ~/a/b/c.txt --> a/b 766 * a.txt --> "" 767 * a/b/c --> a/b 768 * a/b/c/ --> a/b/c 769 * </pre> 770 * <p> 771 * The output will be the same irrespective of the machine that the code is running on. 772 * <p> 773 * This method drops the prefix from the result. 774 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 775 * 776 * @param filename the filename to query, null returns null 777 * @return the path of the file, an empty string if none exists, null if invalid 778 */ 779 public static String getPathNoEndSeparator(String filename) { 780 return doGetPath(filename, 0); 781 } 782 783 /** 784 * Does the work of getting the path. 785 * 786 * @param filename the filename 787 * @param separatorAdd 0 to omit the end separator, 1 to return it 788 * @return the path 789 */ 790 private static String doGetPath(String filename, int separatorAdd) { 791 if (filename == null) { 792 return null; 793 } 794 int prefix = getPrefixLength(filename); 795 if (prefix < 0) { 796 return null; 797 } 798 int index = indexOfLastSeparator(filename); 799 int endIndex = index+separatorAdd; 800 if (prefix >= filename.length() || index < 0 || prefix >= endIndex) { 801 return ""; 802 } 803 return filename.substring(prefix, endIndex); 804 } 805 806 /** 807 * Gets the full path from a full filename, which is the prefix + path. 808 * <p> 809 * This method will handle a file in either Unix or Windows format. 810 * The method is entirely text based, and returns the text before and 811 * including the last forward or backslash. 812 * <pre> 813 * C:\a\b\c.txt --> C:\a\b\ 814 * ~/a/b/c.txt --> ~/a/b/ 815 * a.txt --> "" 816 * a/b/c --> a/b/ 817 * a/b/c/ --> a/b/c/ 818 * C: --> C: 819 * C:\ --> C:\ 820 * ~ --> ~/ 821 * ~/ --> ~/ 822 * ~user --> ~user/ 823 * ~user/ --> ~user/ 824 * </pre> 825 * <p> 826 * The output will be the same irrespective of the machine that the code is running on. 827 * 828 * @param filename the filename to query, null returns null 829 * @return the path of the file, an empty string if none exists, null if invalid 830 */ 831 public static String getFullPath(String filename) { 832 return doGetFullPath(filename, true); 833 } 834 835 /** 836 * Gets the full path from a full filename, which is the prefix + path, 837 * and also excluding the final directory separator. 838 * <p> 839 * This method will handle a file in either Unix or Windows format. 840 * The method is entirely text based, and returns the text before the 841 * last forward or backslash. 842 * <pre> 843 * C:\a\b\c.txt --> C:\a\b 844 * ~/a/b/c.txt --> ~/a/b 845 * a.txt --> "" 846 * a/b/c --> a/b 847 * a/b/c/ --> a/b/c 848 * C: --> C: 849 * C:\ --> C:\ 850 * ~ --> ~ 851 * ~/ --> ~ 852 * ~user --> ~user 853 * ~user/ --> ~user 854 * </pre> 855 * <p> 856 * The output will be the same irrespective of the machine that the code is running on. 857 * 858 * @param filename the filename to query, null returns null 859 * @return the path of the file, an empty string if none exists, null if invalid 860 */ 861 public static String getFullPathNoEndSeparator(String filename) { 862 return doGetFullPath(filename, false); 863 } 864 865 /** 866 * Does the work of getting the path. 867 * 868 * @param filename the filename 869 * @param includeSeparator true to include the end separator 870 * @return the path 871 */ 872 private static String doGetFullPath(String filename, boolean includeSeparator) { 873 if (filename == null) { 874 return null; 875 } 876 int prefix = getPrefixLength(filename); 877 if (prefix < 0) { 878 return null; 879 } 880 if (prefix >= filename.length()) { 881 if (includeSeparator) { 882 return getPrefix(filename); // add end slash if necessary 883 } else { 884 return filename; 885 } 886 } 887 int index = indexOfLastSeparator(filename); 888 if (index < 0) { 889 return filename.substring(0, prefix); 890 } 891 int end = index + (includeSeparator ? 1 : 0); 892 if (end == 0) { 893 end++; 894 } 895 return filename.substring(0, end); 896 } 897 898 /** 899 * Gets the name minus the path from a full filename. 900 * <p> 901 * This method will handle a file in either Unix or Windows format. 902 * The text after the last forward or backslash is returned. 903 * <pre> 904 * a/b/c.txt --> c.txt 905 * a.txt --> a.txt 906 * a/b/c --> c 907 * a/b/c/ --> "" 908 * </pre> 909 * <p> 910 * The output will be the same irrespective of the machine that the code is running on. 911 * 912 * @param filename the filename to query, null returns null 913 * @return the name of the file without the path, or an empty string if none exists 914 */ 915 public static String getName(String filename) { 916 if (filename == null) { 917 return null; 918 } 919 int index = indexOfLastSeparator(filename); 920 return filename.substring(index + 1); 921 } 922 923 /** 924 * Gets the base name, minus the full path and extension, from a full filename. 925 * <p> 926 * This method will handle a file in either Unix or Windows format. 927 * The text after the last forward or backslash and before the last dot is returned. 928 * <pre> 929 * a/b/c.txt --> c 930 * a.txt --> a 931 * a/b/c --> c 932 * a/b/c/ --> "" 933 * </pre> 934 * <p> 935 * The output will be the same irrespective of the machine that the code is running on. 936 * 937 * @param filename the filename to query, null returns null 938 * @return the name of the file without the path, or an empty string if none exists 939 */ 940 public static String getBaseName(String filename) { 941 return removeExtension(getName(filename)); 942 } 943 944 /** 945 * Gets the extension of a filename. 946 * <p> 947 * This method returns the textual part of the filename after the last dot. 948 * There must be no directory separator after the dot. 949 * <pre> 950 * foo.txt --> "txt" 951 * a/b/c.jpg --> "jpg" 952 * a/b.txt/c --> "" 953 * a/b/c --> "" 954 * </pre> 955 * <p> 956 * The output will be the same irrespective of the machine that the code is running on. 957 * 958 * @param filename the filename to retrieve the extension of. 959 * @return the extension of the file or an empty string if none exists or <code>null</code> 960 * if the filename is <code>null</code>. 961 */ 962 public static String getExtension(String filename) { 963 if (filename == null) { 964 return null; 965 } 966 int index = indexOfExtension(filename); 967 if (index == -1) { 968 return ""; 969 } else { 970 return filename.substring(index + 1); 971 } 972 } 973 974 //----------------------------------------------------------------------- 975 /** 976 * Removes the extension from a filename. 977 * <p> 978 * This method returns the textual part of the filename before the last dot. 979 * There must be no directory separator after the dot. 980 * <pre> 981 * foo.txt --> foo 982 * a\b\c.jpg --> a\b\c 983 * a\b\c --> a\b\c 984 * a.b\c --> a.b\c 985 * </pre> 986 * <p> 987 * The output will be the same irrespective of the machine that the code is running on. 988 * 989 * @param filename the filename to query, null returns null 990 * @return the filename minus the extension 991 */ 992 public static String removeExtension(String filename) { 993 if (filename == null) { 994 return null; 995 } 996 int index = indexOfExtension(filename); 997 if (index == -1) { 998 return filename; 999 } else { 1000 return filename.substring(0, index); 1001 } 1002 } 1003 1004 //----------------------------------------------------------------------- 1005 /** 1006 * Checks whether two filenames are equal exactly. 1007 * <p> 1008 * No processing is performed on the filenames other than comparison, 1009 * thus this is merely a null-safe case-sensitive equals. 1010 * 1011 * @param filename1 the first filename to query, may be null 1012 * @param filename2 the second filename to query, may be null 1013 * @return true if the filenames are equal, null equals null 1014 * @see IOCase#SENSITIVE 1015 */ 1016 public static boolean equals(String filename1, String filename2) { 1017 return equals(filename1, filename2, false, IOCase.SENSITIVE); 1018 } 1019 1020 /** 1021 * Checks whether two filenames are equal using the case rules of the system. 1022 * <p> 1023 * No processing is performed on the filenames other than comparison. 1024 * The check is case-sensitive on Unix and case-insensitive on Windows. 1025 * 1026 * @param filename1 the first filename to query, may be null 1027 * @param filename2 the second filename to query, may be null 1028 * @return true if the filenames are equal, null equals null 1029 * @see IOCase#SYSTEM 1030 */ 1031 public static boolean equalsOnSystem(String filename1, String filename2) { 1032 return equals(filename1, filename2, false, IOCase.SYSTEM); 1033 } 1034 1035 //----------------------------------------------------------------------- 1036 /** 1037 * Checks whether two filenames are equal after both have been normalized. 1038 * <p> 1039 * Both filenames are first passed to {@link #normalize(String)}. 1040 * The check is then performed in a case-sensitive manner. 1041 * 1042 * @param filename1 the first filename to query, may be null 1043 * @param filename2 the second filename to query, may be null 1044 * @return true if the filenames are equal, null equals null 1045 * @see IOCase#SENSITIVE 1046 */ 1047 public static boolean equalsNormalized(String filename1, String filename2) { 1048 return equals(filename1, filename2, true, IOCase.SENSITIVE); 1049 } 1050 1051 /** 1052 * Checks whether two filenames are equal after both have been normalized 1053 * and using the case rules of the system. 1054 * <p> 1055 * Both filenames are first passed to {@link #normalize(String)}. 1056 * The check is then performed case-sensitive on Unix and 1057 * case-insensitive on Windows. 1058 * 1059 * @param filename1 the first filename to query, may be null 1060 * @param filename2 the second filename to query, may be null 1061 * @return true if the filenames are equal, null equals null 1062 * @see IOCase#SYSTEM 1063 */ 1064 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) { 1065 return equals(filename1, filename2, true, IOCase.SYSTEM); 1066 } 1067 1068 /** 1069 * Checks whether two filenames are equal, optionally normalizing and providing 1070 * control over the case-sensitivity. 1071 * 1072 * @param filename1 the first filename to query, may be null 1073 * @param filename2 the second filename to query, may be null 1074 * @param normalized whether to normalize the filenames 1075 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1076 * @return true if the filenames are equal, null equals null 1077 * @since Commons IO 1.3 1078 */ 1079 public static boolean equals( 1080 String filename1, String filename2, 1081 boolean normalized, IOCase caseSensitivity) { 1082 1083 if (filename1 == null || filename2 == null) { 1084 return (filename1 == null && filename2 == null); 1085 } 1086 if (normalized) { 1087 filename1 = normalize(filename1); 1088 filename2 = normalize(filename2); 1089 if (filename1 == null || filename2 == null) { 1090 throw new NullPointerException( 1091 "Error normalizing one or both of the file names"); 1092 } 1093 } 1094 if (caseSensitivity == null) { 1095 caseSensitivity = IOCase.SENSITIVE; 1096 } 1097 return caseSensitivity.checkEquals(filename1, filename2); 1098 } 1099 1100 //----------------------------------------------------------------------- 1101 /** 1102 * Checks whether the extension of the filename is that specified. 1103 * <p> 1104 * This method obtains the extension as the textual part of the filename 1105 * after the last dot. There must be no directory separator after the dot. 1106 * The extension check is case-sensitive on all platforms. 1107 * 1108 * @param filename the filename to query, null returns false 1109 * @param extension the extension to check for, null or empty checks for no extension 1110 * @return true if the filename has the specified extension 1111 */ 1112 public static boolean isExtension(String filename, String extension) { 1113 if (filename == null) { 1114 return false; 1115 } 1116 if (extension == null || extension.length() == 0) { 1117 return (indexOfExtension(filename) == -1); 1118 } 1119 String fileExt = getExtension(filename); 1120 return fileExt.equals(extension); 1121 } 1122 1123 /** 1124 * Checks whether the extension of the filename is one of those specified. 1125 * <p> 1126 * This method obtains the extension as the textual part of the filename 1127 * after the last dot. There must be no directory separator after the dot. 1128 * The extension check is case-sensitive on all platforms. 1129 * 1130 * @param filename the filename to query, null returns false 1131 * @param extensions the extensions to check for, null checks for no extension 1132 * @return true if the filename is one of the extensions 1133 */ 1134 public static boolean isExtension(String filename, String[] extensions) { 1135 if (filename == null) { 1136 return false; 1137 } 1138 if (extensions == null || extensions.length == 0) { 1139 return (indexOfExtension(filename) == -1); 1140 } 1141 String fileExt = getExtension(filename); 1142 for (String extension : extensions) { 1143 if (fileExt.equals(extension)) { 1144 return true; 1145 } 1146 } 1147 return false; 1148 } 1149 1150 /** 1151 * Checks whether the extension of the filename is one of those specified. 1152 * <p> 1153 * This method obtains the extension as the textual part of the filename 1154 * after the last dot. There must be no directory separator after the dot. 1155 * The extension check is case-sensitive on all platforms. 1156 * 1157 * @param filename the filename to query, null returns false 1158 * @param extensions the extensions to check for, null checks for no extension 1159 * @return true if the filename is one of the extensions 1160 */ 1161 public static boolean isExtension(String filename, Collection<String> extensions) { 1162 if (filename == null) { 1163 return false; 1164 } 1165 if (extensions == null || extensions.isEmpty()) { 1166 return (indexOfExtension(filename) == -1); 1167 } 1168 String fileExt = getExtension(filename); 1169 for (String extension : extensions) { 1170 if (fileExt.equals(extension)) { 1171 return true; 1172 } 1173 } 1174 return false; 1175 } 1176 1177 //----------------------------------------------------------------------- 1178 /** 1179 * Checks a filename to see if it matches the specified wildcard matcher, 1180 * always testing case-sensitive. 1181 * <p> 1182 * The wildcard matcher uses the characters '?' and '*' to represent a 1183 * single or multiple (zero or more) wildcard characters. 1184 * This is the same as often found on Dos/Unix command lines. 1185 * The check is case-sensitive always. 1186 * <pre> 1187 * wildcardMatch("c.txt", "*.txt") --> true 1188 * wildcardMatch("c.txt", "*.jpg") --> false 1189 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1190 * wildcardMatch("c.txt", "*.???") --> true 1191 * wildcardMatch("c.txt", "*.????") --> false 1192 * </pre> 1193 * N.B. the sequence "*?" does not work properly at present in match strings. 1194 * 1195 * @param filename the filename to match on 1196 * @param wildcardMatcher the wildcard string to match against 1197 * @return true if the filename matches the wilcard string 1198 * @see IOCase#SENSITIVE 1199 */ 1200 public static boolean wildcardMatch(String filename, String wildcardMatcher) { 1201 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE); 1202 } 1203 1204 /** 1205 * Checks a filename to see if it matches the specified wildcard matcher 1206 * using the case rules of the system. 1207 * <p> 1208 * The wildcard matcher uses the characters '?' and '*' to represent a 1209 * single or multiple (zero or more) wildcard characters. 1210 * This is the same as often found on Dos/Unix command lines. 1211 * The check is case-sensitive on Unix and case-insensitive on Windows. 1212 * <pre> 1213 * wildcardMatch("c.txt", "*.txt") --> true 1214 * wildcardMatch("c.txt", "*.jpg") --> false 1215 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1216 * wildcardMatch("c.txt", "*.???") --> true 1217 * wildcardMatch("c.txt", "*.????") --> false 1218 * </pre> 1219 * N.B. the sequence "*?" does not work properly at present in match strings. 1220 * 1221 * @param filename the filename to match on 1222 * @param wildcardMatcher the wildcard string to match against 1223 * @return true if the filename matches the wilcard string 1224 * @see IOCase#SYSTEM 1225 */ 1226 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) { 1227 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM); 1228 } 1229 1230 /** 1231 * Checks a filename to see if it matches the specified wildcard matcher 1232 * allowing control over case-sensitivity. 1233 * <p> 1234 * The wildcard matcher uses the characters '?' and '*' to represent a 1235 * single or multiple (zero or more) wildcard characters. 1236 * N.B. the sequence "*?" does not work properly at present in match strings. 1237 * 1238 * @param filename the filename to match on 1239 * @param wildcardMatcher the wildcard string to match against 1240 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1241 * @return true if the filename matches the wilcard string 1242 * @since Commons IO 1.3 1243 */ 1244 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) { 1245 if (filename == null && wildcardMatcher == null) { 1246 return true; 1247 } 1248 if (filename == null || wildcardMatcher == null) { 1249 return false; 1250 } 1251 if (caseSensitivity == null) { 1252 caseSensitivity = IOCase.SENSITIVE; 1253 } 1254 String[] wcs = splitOnTokens(wildcardMatcher); 1255 boolean anyChars = false; 1256 int textIdx = 0; 1257 int wcsIdx = 0; 1258 Stack<int[]> backtrack = new Stack<int[]>(); 1259 1260 // loop around a backtrack stack, to handle complex * matching 1261 do { 1262 if (backtrack.size() > 0) { 1263 int[] array = backtrack.pop(); 1264 wcsIdx = array[0]; 1265 textIdx = array[1]; 1266 anyChars = true; 1267 } 1268 1269 // loop whilst tokens and text left to process 1270 while (wcsIdx < wcs.length) { 1271 1272 if (wcs[wcsIdx].equals("?")) { 1273 // ? so move to next text char 1274 textIdx++; 1275 if (textIdx > filename.length()) { 1276 break; 1277 } 1278 anyChars = false; 1279 1280 } else if (wcs[wcsIdx].equals("*")) { 1281 // set any chars status 1282 anyChars = true; 1283 if (wcsIdx == wcs.length - 1) { 1284 textIdx = filename.length(); 1285 } 1286 1287 } else { 1288 // matching text token 1289 if (anyChars) { 1290 // any chars then try to locate text token 1291 textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]); 1292 if (textIdx == -1) { 1293 // token not found 1294 break; 1295 } 1296 int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]); 1297 if (repeat >= 0) { 1298 backtrack.push(new int[] {wcsIdx, repeat}); 1299 } 1300 } else { 1301 // matching from current position 1302 if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) { 1303 // couldnt match token 1304 break; 1305 } 1306 } 1307 1308 // matched text token, move text index to end of matched token 1309 textIdx += wcs[wcsIdx].length(); 1310 anyChars = false; 1311 } 1312 1313 wcsIdx++; 1314 } 1315 1316 // full match 1317 if (wcsIdx == wcs.length && textIdx == filename.length()) { 1318 return true; 1319 } 1320 1321 } while (backtrack.size() > 0); 1322 1323 return false; 1324 } 1325 1326 /** 1327 * Splits a string into a number of tokens. 1328 * The text is split by '?' and '*'. 1329 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1330 * 1331 * @param text the text to split 1332 * @return the array of tokens, never null 1333 */ 1334 static String[] splitOnTokens(String text) { 1335 // used by wildcardMatch 1336 // package level so a unit test may run on this 1337 1338 if (text.indexOf('?') == -1 && text.indexOf('*') == -1) { 1339 return new String[] { text }; 1340 } 1341 1342 char[] array = text.toCharArray(); 1343 ArrayList<String> list = new ArrayList<String>(); 1344 StringBuilder buffer = new StringBuilder(); 1345 for (int i = 0; i < array.length; i++) { 1346 if (array[i] == '?' || array[i] == '*') { 1347 if (buffer.length() != 0) { 1348 list.add(buffer.toString()); 1349 buffer.setLength(0); 1350 } 1351 if (array[i] == '?') { 1352 list.add("?"); 1353 } else if (list.size() == 0 || 1354 (i > 0 && list.get(list.size() - 1).equals("*") == false)) { 1355 list.add("*"); 1356 } 1357 } else { 1358 buffer.append(array[i]); 1359 } 1360 } 1361 if (buffer.length() != 0) { 1362 list.add(buffer.toString()); 1363 } 1364 1365 return list.toArray( new String[ list.size() ] ); 1366 } 1367 1368 }