001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.util.ArrayDeque; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Deque; 025import java.util.List; 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028import java.util.stream.Stream; 029 030/** 031 * General file name and file path manipulation utilities. 032 * <p> 033 * When dealing with file names you can hit problems when moving from a Windows 034 * based development machine to a Unix based production machine. 035 * This class aims to help avoid those problems. 036 * </p> 037 * <p> 038 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 039 * using JDK {@link java.io.File File} objects and the two argument constructor 040 * {@link java.io.File#File(java.io.File, String) File(File,String)}. 041 * </p> 042 * <p> 043 * Most methods on this class are designed to work the same on both Unix and Windows. 044 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 045 * </p> 046 * <p> 047 * Most methods recognize both separators (forward and back), and both 048 * sets of prefixes. See the Javadoc of each method for details. 049 * </p> 050 * <p> 051 * This class defines six components within a file name 052 * (example C:\dev\project\file.txt): 053 * </p> 054 * <ul> 055 * <li>the prefix - C:\</li> 056 * <li>the path - dev\project\</li> 057 * <li>the full path - C:\dev\project\</li> 058 * <li>the name - file.txt</li> 059 * <li>the base name - file</li> 060 * <li>the extension - txt</li> 061 * </ul> 062 * <p> 063 * Note that this class works best if directory file names end with a separator. 064 * If you omit the last separator, it is impossible to determine if the file name 065 * corresponds to a file or a directory. As a result, we have chosen to say 066 * it corresponds to a file. 067 * </p> 068 * <p> 069 * This class only supports Unix and Windows style names. 070 * Prefixes are matched as follows: 071 * </p> 072 * <pre> 073 * Windows: 074 * a\b\c.txt --> "" --> relative 075 * \a\b\c.txt --> "\" --> current drive absolute 076 * C:a\b\c.txt --> "C:" --> drive relative 077 * C:\a\b\c.txt --> "C:\" --> absolute 078 * \\server\a\b\c.txt --> "\\server\" --> UNC 079 * 080 * Unix: 081 * a/b/c.txt --> "" --> relative 082 * /a/b/c.txt --> "/" --> absolute 083 * ~/a/b/c.txt --> "~/" --> current user 084 * ~ --> "~/" --> current user (slash added) 085 * ~user/a/b/c.txt --> "~user/" --> named user 086 * ~user --> "~user/" --> named user (slash added) 087 * </pre> 088 * <p> 089 * Both prefix styles are matched always, irrespective of the machine that you are 090 * currently running on. 091 * </p> 092 * <p> 093 * Provenance: Excalibur, Alexandria, Tomcat, Commons-Utils. 094 * </p> 095 * 096 * @since 1.1 097 */ 098public class FilenameUtils { 099 100 private static final String[] EMPTY_STRING_ARRAY = {}; 101 102 private static final String EMPTY_STRING = ""; 103 104 private static final int NOT_FOUND = -1; 105 106 /** 107 * The extension separator character. 108 * @since 1.4 109 */ 110 public static final char EXTENSION_SEPARATOR = '.'; 111 112 /** 113 * The extension separator String. 114 * @since 1.4 115 */ 116 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 117 118 /** 119 * The Unix separator character. 120 */ 121 private static final char UNIX_NAME_SEPARATOR = '/'; 122 123 /** 124 * The Windows separator character. 125 */ 126 private static final char WINDOWS_NAME_SEPARATOR = '\\'; 127 128 /** 129 * The system separator character. 130 */ 131 private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; 132 133 /** 134 * The separator character that is the opposite of the system separator. 135 */ 136 private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR); 137 138 private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 139 140 private static final int IPV4_MAX_OCTET_VALUE = 255; 141 142 private static final int IPV6_MAX_HEX_GROUPS = 8; 143 144 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 145 146 private static final int MAX_UNSIGNED_SHORT = 0xffff; 147 148 private static final int BASE_16 = 16; 149 150 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 151 152 /** 153 * Concatenates a fileName to a base path using normal command line style rules. 154 * <p> 155 * The effect is equivalent to resultant directory after changing 156 * directory to the first argument, followed by changing directory to 157 * the second argument. 158 * </p> 159 * <p> 160 * The first argument is the base path, the second is the path to concatenate. 161 * The returned path is always normalized via {@link #normalize(String)}, 162 * thus {@code ..} is handled. 163 * </p> 164 * <p> 165 * If {@code pathToAdd} is absolute (has an absolute prefix), then 166 * it will be normalized and returned. 167 * Otherwise, the paths will be joined, normalized and returned. 168 * </p> 169 * <p> 170 * The output will be the same on both Unix and Windows except 171 * for the separator character. 172 * </p> 173 * <pre> 174 * /foo/ + bar --> /foo/bar 175 * /foo + bar --> /foo/bar 176 * /foo + /bar --> /bar 177 * /foo + C:/bar --> C:/bar 178 * /foo + C:bar --> C:bar [1] 179 * /foo/a/ + ../bar --> /foo/bar 180 * /foo/ + ../../bar --> null 181 * /foo/ + /bar --> /bar 182 * /foo/.. + /bar --> /bar 183 * /foo + bar/c.txt --> /foo/bar/c.txt 184 * /foo/c.txt + bar --> /foo/c.txt/bar [2] 185 * </pre> 186 * <p> 187 * [1] Note that the Windows relative drive prefix is unreliable when 188 * used with this method. 189 * </p> 190 * <p> 191 * [2] Note that the first parameter must be a path. If it ends with a name, then 192 * the name will be built into the concatenated path. If this might be a problem, 193 * use {@link #getFullPath(String)} on the base path argument. 194 * </p> 195 * 196 * @param basePath the base path to attach to, always treated as a path 197 * @param fullFileNameToAdd the fileName (or path) to attach to the base 198 * @return the concatenated path, or null if invalid 199 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 200 */ 201 public static String concat(final String basePath, final String fullFileNameToAdd) { 202 final int prefix = getPrefixLength(fullFileNameToAdd); 203 if (prefix < 0) { 204 return null; 205 } 206 if (prefix > 0) { 207 return normalize(fullFileNameToAdd); 208 } 209 if (basePath == null) { 210 return null; 211 } 212 final int len = basePath.length(); 213 if (len == 0) { 214 return normalize(fullFileNameToAdd); 215 } 216 final char ch = basePath.charAt(len - 1); 217 if (isSeparator(ch)) { 218 return normalize(basePath + fullFileNameToAdd); 219 } 220 return normalize(basePath + '/' + fullFileNameToAdd); 221 } 222 223 /** 224 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 225 * <p> 226 * The files names are expected to be normalized. 227 * </p> 228 * 229 * Edge cases: 230 * <ul> 231 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 232 * <li>A directory does not contain itself: return false</li> 233 * <li>A null child file is not contained in any parent: return false</li> 234 * </ul> 235 * 236 * @param canonicalParent 237 * the file to consider as the parent. 238 * @param canonicalChild 239 * the file to consider as the child. 240 * @return true is the candidate leaf is under by the specified composite. False otherwise. 241 * @since 2.2 242 * @see FileUtils#directoryContains(File, File) 243 */ 244 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { 245 if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) { 246 return false; 247 } 248 249 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 250 return false; 251 } 252 253 final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR); 254 final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; 255 256 return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); 257 } 258 259 /** 260 * Does the work of getting the path. 261 * 262 * @param fileName the fileName 263 * @param includeSeparator true to include the end separator 264 * @return the path 265 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 266 */ 267 private static String doGetFullPath(final String fileName, final boolean includeSeparator) { 268 if (fileName == null) { 269 return null; 270 } 271 final int prefix = getPrefixLength(fileName); 272 if (prefix < 0) { 273 return null; 274 } 275 if (prefix >= fileName.length()) { 276 if (includeSeparator) { 277 return getPrefix(fileName); // add end slash if necessary 278 } 279 return fileName; 280 } 281 final int index = indexOfLastSeparator(fileName); 282 if (index < 0) { 283 return fileName.substring(0, prefix); 284 } 285 int end = index + (includeSeparator ? 1 : 0); 286 if (end == 0) { 287 end++; 288 } 289 return fileName.substring(0, end); 290 } 291 292 /** 293 * Does the work of getting the path. 294 * 295 * @param fileName the fileName 296 * @param separatorAdd 0 to omit the end separator, 1 to return it 297 * @return the path 298 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 299 */ 300 private static String doGetPath(final String fileName, final int separatorAdd) { 301 if (fileName == null) { 302 return null; 303 } 304 final int prefix = getPrefixLength(fileName); 305 if (prefix < 0) { 306 return null; 307 } 308 final int index = indexOfLastSeparator(fileName); 309 final int endIndex = index + separatorAdd; 310 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 311 return EMPTY_STRING; 312 } 313 return requireNonNullChars(fileName.substring(prefix, endIndex)); 314 } 315 316 /** 317 * Internal method to perform the normalization. 318 * 319 * @param fileName the fileName 320 * @param separator The separator character to use 321 * @param keepSeparator true to keep the final separator 322 * @return the normalized fileName 323 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 324 */ 325 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 326 if (fileName == null) { 327 return null; 328 } 329 330 requireNonNullChars(fileName); 331 332 int size = fileName.length(); 333 if (size == 0) { 334 return fileName; 335 } 336 final int prefix = getPrefixLength(fileName); 337 if (prefix < 0) { 338 return null; 339 } 340 341 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 342 fileName.getChars(0, fileName.length(), array, 0); 343 344 // fix separators throughout 345 final char otherSeparator = flipSeparator(separator); 346 for (int i = 0; i < array.length; i++) { 347 if (array[i] == otherSeparator) { 348 array[i] = separator; 349 } 350 } 351 352 // add extra separator on the end to simplify code below 353 boolean lastIsDirectory = true; 354 if (array[size - 1] != separator) { 355 array[size++] = separator; 356 lastIsDirectory = false; 357 } 358 359 // adjoining slashes 360 // If we get here, prefix can only be 0 or greater, size 1 or greater 361 // If prefix is 0, set loop start to 1 to prevent index errors 362 for (int i = prefix != 0 ? prefix : 1; i < size; i++) { 363 if (array[i] == separator && array[i - 1] == separator) { 364 System.arraycopy(array, i, array, i - 1, size - i); 365 size--; 366 i--; 367 } 368 } 369 370 // dot slash 371 for (int i = prefix + 1; i < size; i++) { 372 if (array[i] == separator && array[i - 1] == '.' && 373 (i == prefix + 1 || array[i - 2] == separator)) { 374 if (i == size - 1) { 375 lastIsDirectory = true; 376 } 377 System.arraycopy(array, i + 1, array, i - 1, size - i); 378 size -=2; 379 i--; 380 } 381 } 382 383 // double dot slash 384 outer: 385 for (int i = prefix + 2; i < size; i++) { 386 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 387 (i == prefix + 2 || array[i - 3] == separator)) { 388 if (i == prefix + 2) { 389 return null; 390 } 391 if (i == size - 1) { 392 lastIsDirectory = true; 393 } 394 int j; 395 for (j = i - 4 ; j >= prefix; j--) { 396 if (array[j] == separator) { 397 // remove b/../ from a/b/../c 398 System.arraycopy(array, i + 1, array, j + 1, size - i); 399 size -= i - j; 400 i = j + 1; 401 continue outer; 402 } 403 } 404 // remove a/../ from a/../c 405 System.arraycopy(array, i + 1, array, prefix, size - i); 406 size -= i + 1 - prefix; 407 i = prefix + 1; 408 } 409 } 410 411 if (size <= 0) { // should never be less than 0 412 return EMPTY_STRING; 413 } 414 if (size <= prefix) { // should never be less than prefix 415 return new String(array, 0, size); 416 } 417 if (lastIsDirectory && keepSeparator) { 418 return new String(array, 0, size); // keep trailing separator 419 } 420 return new String(array, 0, size - 1); // lose trailing separator 421 } 422 423 /** 424 * Checks whether two fileNames are equal exactly. 425 * <p> 426 * No processing is performed on the fileNames other than comparison, 427 * thus this is merely a null-safe case-sensitive equals. 428 * </p> 429 * 430 * @param fileName1 the first fileName to query, may be null 431 * @param fileName2 the second fileName to query, may be null 432 * @return true if the fileNames are equal, null equals null 433 * @see IOCase#SENSITIVE 434 */ 435 public static boolean equals(final String fileName1, final String fileName2) { 436 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 437 } 438 439 /** 440 * Checks whether two fileNames are equal, optionally normalizing and providing 441 * control over the case-sensitivity. 442 * 443 * @param fileName1 the first fileName to query, may be null 444 * @param fileName2 the second fileName to query, may be null 445 * @param normalize whether to normalize the fileNames 446 * @param ioCase what case sensitivity rule to use, null means case-sensitive 447 * @return true if the fileNames are equal, null equals null 448 * @since 1.3 449 */ 450 public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) { 451 452 if (fileName1 == null || fileName2 == null) { 453 return fileName1 == null && fileName2 == null; 454 } 455 if (normalize) { 456 fileName1 = normalize(fileName1); 457 if (fileName1 == null) { 458 return false; 459 } 460 fileName2 = normalize(fileName2); 461 if (fileName2 == null) { 462 return false; 463 } 464 } 465 return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2); 466 } 467 468 /** 469 * Checks whether two fileNames are equal after both have been normalized. 470 * <p> 471 * Both fileNames are first passed to {@link #normalize(String)}. 472 * The check is then performed in a case-sensitive manner. 473 * </p> 474 * 475 * @param fileName1 the first fileName to query, may be null 476 * @param fileName2 the second fileName to query, may be null 477 * @return true if the fileNames are equal, null equals null 478 * @see IOCase#SENSITIVE 479 */ 480 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 481 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 482 } 483 484 /** 485 * Checks whether two fileNames are equal after both have been normalized 486 * and using the case rules of the system. 487 * <p> 488 * Both fileNames are first passed to {@link #normalize(String)}. 489 * The check is then performed case-sensitive on Unix and 490 * case-insensitive on Windows. 491 * </p> 492 * 493 * @param fileName1 the first fileName to query, may be null 494 * @param fileName2 the second fileName to query, may be null 495 * @return true if the fileNames are equal, null equals null 496 * @see IOCase#SYSTEM 497 */ 498 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 499 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 500 } 501 502 /** 503 * Checks whether two fileNames are equal using the case rules of the system. 504 * <p> 505 * No processing is performed on the fileNames other than comparison. 506 * The check is case-sensitive on Unix and case-insensitive on Windows. 507 * </p> 508 * 509 * @param fileName1 the first fileName to query, may be null 510 * @param fileName2 the second fileName to query, may be null 511 * @return true if the fileNames are equal, null equals null 512 * @see IOCase#SYSTEM 513 */ 514 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 515 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 516 } 517 518 /** 519 * Flips the Windows name separator to Linux and vice-versa. 520 * 521 * @param ch The Windows or Linux name separator. 522 * @return The Windows or Linux name separator. 523 */ 524 static char flipSeparator(final char ch) { 525 if (ch == UNIX_NAME_SEPARATOR) { 526 return WINDOWS_NAME_SEPARATOR; 527 } 528 if (ch == WINDOWS_NAME_SEPARATOR) { 529 return UNIX_NAME_SEPARATOR; 530 } 531 throw new IllegalArgumentException(String.valueOf(ch)); 532 } 533 534 /** 535 * Special handling for NTFS ADS: Don't accept colon in the fileName. 536 * 537 * @param fileName a file name 538 * @return ADS offsets. 539 */ 540 private static int getAdsCriticalOffset(final String fileName) { 541 // Step 1: Remove leading path segments. 542 final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); 543 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 544 if (offset1 == -1) { 545 if (offset2 == -1) { 546 return 0; 547 } 548 return offset2 + 1; 549 } 550 if (offset2 == -1) { 551 return offset1 + 1; 552 } 553 return Math.max(offset1, offset2) + 1; 554 } 555 556 /** 557 * Gets the base name, minus the full path and extension, from a full fileName. 558 * <p> 559 * This method will handle a file in either Unix or Windows format. 560 * The text after the last forward or backslash and before the last dot is returned. 561 * </p> 562 * <pre> 563 * a/b/c.txt --> c 564 * a.txt --> a 565 * a/b/c --> c 566 * a/b/c/ --> "" 567 * </pre> 568 * <p> 569 * The output will be the same irrespective of the machine that the code is running on. 570 * </p> 571 * 572 * @param fileName the fileName to query, null returns null 573 * @return the name of the file without the path, or an empty string if none exists 574 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 575 */ 576 public static String getBaseName(final String fileName) { 577 return removeExtension(getName(fileName)); 578 } 579 580 /** 581 * Gets the extension of a fileName. 582 * <p> 583 * This method returns the textual part of the fileName after the last dot. 584 * There must be no directory separator after the dot. 585 * </p> 586 * <pre> 587 * foo.txt --> "txt" 588 * a/b/c.jpg --> "jpg" 589 * a/b.txt/c --> "" 590 * a/b/c --> "" 591 * </pre> 592 * <p> 593 * The output will be the same irrespective of the machine that the code is running on, with the 594 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 595 * </p> 596 * <p> 597 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 598 * In this case, the name wouldn't be the name of a file, but the identifier of an 599 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 600 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 601 * an {@link IllegalArgumentException} for names like this. 602 * </p> 603 * 604 * @param fileName the fileName to retrieve the extension of. 605 * @return the extension of the file or an empty string if none exists or {@code null} 606 * if the fileName is {@code null}. 607 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 608 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 609 */ 610 public static String getExtension(final String fileName) throws IllegalArgumentException { 611 if (fileName == null) { 612 return null; 613 } 614 final int index = indexOfExtension(fileName); 615 if (index == NOT_FOUND) { 616 return EMPTY_STRING; 617 } 618 return fileName.substring(index + 1); 619 } 620 621 /** 622 * Gets the full path from a full fileName, which is the prefix + path. 623 * <p> 624 * This method will handle a file in either Unix or Windows format. 625 * The method is entirely text based, and returns the text before and 626 * including the last forward or backslash. 627 * </p> 628 * <pre> 629 * C:\a\b\c.txt --> C:\a\b\ 630 * ~/a/b/c.txt --> ~/a/b/ 631 * a.txt --> "" 632 * a/b/c --> a/b/ 633 * a/b/c/ --> a/b/c/ 634 * C: --> C: 635 * C:\ --> C:\ 636 * ~ --> ~/ 637 * ~/ --> ~/ 638 * ~user --> ~user/ 639 * ~user/ --> ~user/ 640 * </pre> 641 * <p> 642 * The output will be the same irrespective of the machine that the code is running on. 643 * </p> 644 * 645 * @param fileName the fileName to query, null returns null 646 * @return the path of the file, an empty string if none exists, null if invalid 647 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 648 */ 649 public static String getFullPath(final String fileName) { 650 return doGetFullPath(fileName, true); 651 } 652 653 /** 654 * Gets the full path from a full fileName, which is the prefix + path, 655 * and also excluding the final directory separator. 656 * <p> 657 * This method will handle a file in either Unix or Windows format. 658 * The method is entirely text based, and returns the text before the 659 * last forward or backslash. 660 * </p> 661 * <pre> 662 * C:\a\b\c.txt --> C:\a\b 663 * ~/a/b/c.txt --> ~/a/b 664 * a.txt --> "" 665 * a/b/c --> a/b 666 * a/b/c/ --> a/b/c 667 * C: --> C: 668 * C:\ --> C:\ 669 * ~ --> ~ 670 * ~/ --> ~ 671 * ~user --> ~user 672 * ~user/ --> ~user 673 * </pre> 674 * <p> 675 * The output will be the same irrespective of the machine that the code is running on. 676 * </p> 677 * 678 * @param fileName the fileName to query, null returns null 679 * @return the path of the file, an empty string if none exists, null if invalid 680 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 681 */ 682 public static String getFullPathNoEndSeparator(final String fileName) { 683 return doGetFullPath(fileName, false); 684 } 685 686 /** 687 * Gets the name minus the path from a full fileName. 688 * <p> 689 * This method will handle a file in either Unix or Windows format. 690 * The text after the last forward or backslash is returned. 691 * </p> 692 * <pre> 693 * a/b/c.txt --> c.txt 694 * a.txt --> a.txt 695 * a/b/c --> c 696 * a/b/c/ --> "" 697 * </pre> 698 * <p> 699 * The output will be the same irrespective of the machine that the code is running on. 700 * </p> 701 * 702 * @param fileName the fileName to query, null returns null 703 * @return the name of the file without the path, or an empty string if none exists 704 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 705 */ 706 public static String getName(final String fileName) { 707 if (fileName == null) { 708 return null; 709 } 710 return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1); 711 } 712 713 /** 714 * Gets the path from a full fileName, which excludes the prefix. 715 * <p> 716 * This method will handle a file in either Unix or Windows format. 717 * The method is entirely text based, and returns the text before and 718 * including the last forward or backslash. 719 * </p> 720 * <pre> 721 * C:\a\b\c.txt --> a\b\ 722 * ~/a/b/c.txt --> a/b/ 723 * a.txt --> "" 724 * a/b/c --> a/b/ 725 * a/b/c/ --> a/b/c/ 726 * </pre> 727 * <p> 728 * The output will be the same irrespective of the machine that the code is running on. 729 * </p> 730 * <p> 731 * This method drops the prefix from the result. 732 * See {@link #getFullPath(String)} for the method that retains the prefix. 733 * </p> 734 * 735 * @param fileName the fileName to query, null returns null 736 * @return the path of the file, an empty string if none exists, null if invalid 737 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 738 */ 739 public static String getPath(final String fileName) { 740 return doGetPath(fileName, 1); 741 } 742 743 /** 744 * Gets the path from a full fileName, which excludes the prefix, and 745 * also excluding the final directory separator. 746 * <p> 747 * This method will handle a file in either Unix or Windows format. 748 * The method is entirely text based, and returns the text before the 749 * last forward or backslash. 750 * </p> 751 * <pre> 752 * C:\a\b\c.txt --> a\b 753 * ~/a/b/c.txt --> a/b 754 * a.txt --> "" 755 * a/b/c --> a/b 756 * a/b/c/ --> a/b/c 757 * </pre> 758 * <p> 759 * The output will be the same irrespective of the machine that the code is running on. 760 * </p> 761 * <p> 762 * This method drops the prefix from the result. 763 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 764 * </p> 765 * 766 * @param fileName the fileName to query, null returns null 767 * @return the path of the file, an empty string if none exists, null if invalid 768 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 769 */ 770 public static String getPathNoEndSeparator(final String fileName) { 771 return doGetPath(fileName, 0); 772 } 773 774 /** 775 * Gets the prefix from a full fileName, such as {@code C:/} 776 * or {@code ~/}. 777 * <p> 778 * This method will handle a file in either Unix or Windows format. 779 * The prefix includes the first slash in the full fileName where applicable. 780 * </p> 781 * <pre> 782 * Windows: 783 * a\b\c.txt --> "" --> relative 784 * \a\b\c.txt --> "\" --> current drive absolute 785 * C:a\b\c.txt --> "C:" --> drive relative 786 * C:\a\b\c.txt --> "C:\" --> absolute 787 * \\server\a\b\c.txt --> "\\server\" --> UNC 788 * 789 * Unix: 790 * a/b/c.txt --> "" --> relative 791 * /a/b/c.txt --> "/" --> absolute 792 * ~/a/b/c.txt --> "~/" --> current user 793 * ~ --> "~/" --> current user (slash added) 794 * ~user/a/b/c.txt --> "~user/" --> named user 795 * ~user --> "~user/" --> named user (slash added) 796 * </pre> 797 * <p> 798 * The output will be the same irrespective of the machine that the code is running on. 799 * ie. both Unix and Windows prefixes are matched regardless. 800 * </p> 801 * 802 * @param fileName the fileName to query, null returns null 803 * @return the prefix of the file, null if invalid 804 * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}) 805 */ 806 public static String getPrefix(final String fileName) { 807 if (fileName == null) { 808 return null; 809 } 810 final int len = getPrefixLength(fileName); 811 if (len < 0) { 812 return null; 813 } 814 if (len > fileName.length()) { 815 requireNonNullChars(fileName); 816 return fileName + UNIX_NAME_SEPARATOR; 817 } 818 return requireNonNullChars(fileName.substring(0, len)); 819 } 820 821 /** 822 * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}. 823 * <p> 824 * This method will handle a file in either Unix or Windows format. 825 * </p> 826 * <p> 827 * The prefix length includes the first slash in the full fileName 828 * if applicable. Thus, it is possible that the length returned is greater 829 * than the length of the input string. 830 * </p> 831 * <pre> 832 * Windows: 833 * a\b\c.txt --> 0 --> relative 834 * \a\b\c.txt --> 1 --> current drive absolute 835 * C:a\b\c.txt --> 2 --> drive relative 836 * C:\a\b\c.txt --> 3 --> absolute 837 * \\server\a\b\c.txt --> 9 --> UNC 838 * \\\a\b\c.txt --> -1 --> error 839 * 840 * Unix: 841 * a/b/c.txt --> 0 --> relative 842 * /a/b/c.txt --> 1 --> absolute 843 * ~/a/b/c.txt --> 2 --> current user 844 * ~ --> 2 --> current user (slash added) 845 * ~user/a/b/c.txt --> 6 --> named user 846 * ~user --> 6 --> named user (slash added) 847 * //server/a/b/c.txt --> 9 848 * ///a/b/c.txt --> -1 --> error 849 * C: --> 0 --> valid file name as only null character and / are reserved characters 850 * </pre> 851 * <p> 852 * The output will be the same irrespective of the machine that the code is running on. 853 * ie. both Unix and Windows prefixes are matched regardless. 854 * </p> 855 * <p> 856 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 857 * These must be followed by a server name, so double-slashes are not collapsed 858 * to a single slash at the start of the fileName. 859 * </p> 860 * 861 * @param fileName the fileName to find the prefix in, null returns -1 862 * @return the length of the prefix, -1 if invalid or null 863 */ 864 public static int getPrefixLength(final String fileName) { 865 if (fileName == null) { 866 return NOT_FOUND; 867 } 868 final int len = fileName.length(); 869 if (len == 0) { 870 return 0; 871 } 872 char ch0 = fileName.charAt(0); 873 if (ch0 == ':') { 874 return NOT_FOUND; 875 } 876 if (len == 1) { 877 if (ch0 == '~') { 878 return 2; // return a length greater than the input 879 } 880 return isSeparator(ch0) ? 1 : 0; 881 } 882 if (ch0 == '~') { 883 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); 884 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); 885 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 886 return len + 1; // return a length greater than the input 887 } 888 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 889 posWin = posWin == NOT_FOUND ? posUnix : posWin; 890 return Math.min(posUnix, posWin) + 1; 891 } 892 final char ch1 = fileName.charAt(1); 893 if (ch1 == ':') { 894 ch0 = Character.toUpperCase(ch0); 895 if (ch0 >= 'A' && ch0 <= 'Z') { 896 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { 897 return 0; 898 } 899 if (len == 2 || !isSeparator(fileName.charAt(2))) { 900 return 2; 901 } 902 return 3; 903 } 904 if (ch0 == UNIX_NAME_SEPARATOR) { 905 return 1; 906 } 907 return NOT_FOUND; 908 909 } 910 if (!isSeparator(ch0) || !isSeparator(ch1)) { 911 return isSeparator(ch0) ? 1 : 0; 912 } 913 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); 914 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); 915 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 916 return NOT_FOUND; 917 } 918 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 919 posWin = posWin == NOT_FOUND ? posUnix : posWin; 920 final int pos = Math.min(posUnix, posWin) + 1; 921 final String hostnamePart = fileName.substring(2, pos - 1); 922 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 923 } 924 925 /** 926 * Returns the index of the last extension separator character, which is a dot. 927 * <p> 928 * This method also checks that there is no directory separator after the last dot. To do this it uses 929 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 930 * </p> 931 * <p> 932 * The output will be the same irrespective of the machine that the code is running on, with the 933 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 934 * </p> 935 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 936 * In this case, the name wouldn't be the name of a file, but the identifier of an 937 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 938 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 939 * an {@link IllegalArgumentException} for names like this. 940 * 941 * @param fileName 942 * the fileName to find the last extension separator in, null returns -1 943 * @return the index of the last extension separator character, or -1 if there is no such character 944 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 945 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 946 */ 947 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 948 if (fileName == null) { 949 return NOT_FOUND; 950 } 951 if (isSystemWindows()) { 952 // Special handling for NTFS ADS: Don't accept colon in the fileName. 953 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 954 if (offset != -1) { 955 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 956 } 957 } 958 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 959 final int lastSeparator = indexOfLastSeparator(fileName); 960 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 961 } 962 963 /** 964 * Returns the index of the last directory separator character. 965 * <p> 966 * This method will handle a file in either Unix or Windows format. 967 * The position of the last forward or backslash is returned. 968 * <p> 969 * The output will be the same irrespective of the machine that the code is running on. 970 * 971 * @param fileName the fileName to find the last path separator in, null returns -1 972 * @return the index of the last separator character, or -1 if there 973 * is no such character 974 */ 975 public static int indexOfLastSeparator(final String fileName) { 976 if (fileName == null) { 977 return NOT_FOUND; 978 } 979 final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); 980 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); 981 return Math.max(lastUnixPos, lastWindowsPos); 982 } 983 984 private static boolean isEmpty(final String string) { 985 return string == null || string.isEmpty(); 986 } 987 988 /** 989 * Checks whether the extension of the fileName is one of those specified. 990 * <p> 991 * This method obtains the extension as the textual part of the fileName 992 * after the last dot. There must be no directory separator after the dot. 993 * The extension check is case-sensitive on all platforms. 994 * 995 * @param fileName the fileName to query, null returns false 996 * @param extensions the extensions to check for, null checks for no extension 997 * @return true if the fileName is one of the extensions 998 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 999 */ 1000 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1001 if (fileName == null) { 1002 return false; 1003 } 1004 requireNonNullChars(fileName); 1005 1006 if (extensions == null || extensions.isEmpty()) { 1007 return indexOfExtension(fileName) == NOT_FOUND; 1008 } 1009 return extensions.contains(getExtension(fileName)); 1010 } 1011 1012 /** 1013 * Checks whether the extension of the fileName is that specified. 1014 * <p> 1015 * This method obtains the extension as the textual part of the fileName 1016 * after the last dot. There must be no directory separator after the dot. 1017 * The extension check is case-sensitive on all platforms. 1018 * 1019 * @param fileName the fileName to query, null returns false 1020 * @param extension the extension to check for, null or empty checks for no extension 1021 * @return true if the fileName has the specified extension 1022 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1023 */ 1024 public static boolean isExtension(final String fileName, final String extension) { 1025 if (fileName == null) { 1026 return false; 1027 } 1028 requireNonNullChars(fileName); 1029 1030 if (isEmpty(extension)) { 1031 return indexOfExtension(fileName) == NOT_FOUND; 1032 } 1033 return getExtension(fileName).equals(extension); 1034 } 1035 1036 /** 1037 * Checks whether the extension of the fileName is one of those specified. 1038 * <p> 1039 * This method obtains the extension as the textual part of the fileName 1040 * after the last dot. There must be no directory separator after the dot. 1041 * The extension check is case-sensitive on all platforms. 1042 * 1043 * @param fileName the fileName to query, null returns false 1044 * @param extensions the extensions to check for, null checks for no extension 1045 * @return true if the fileName is one of the extensions 1046 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1047 */ 1048 public static boolean isExtension(final String fileName, final String... extensions) { 1049 if (fileName == null) { 1050 return false; 1051 } 1052 requireNonNullChars(fileName); 1053 1054 if (extensions == null || extensions.length == 0) { 1055 return indexOfExtension(fileName) == NOT_FOUND; 1056 } 1057 final String fileExt = getExtension(fileName); 1058 return Stream.of(extensions).anyMatch(fileExt::equals); 1059 } 1060 1061 /** 1062 * Checks whether a given string represents a valid IPv4 address. 1063 * 1064 * @param name the name to validate 1065 * @return true if the given name is a valid IPv4 address 1066 */ 1067 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1068 private static boolean isIPv4Address(final String name) { 1069 final Matcher m = IPV4_PATTERN.matcher(name); 1070 if (!m.matches() || m.groupCount() != 4) { 1071 return false; 1072 } 1073 1074 // verify that address subgroups are legal 1075 for (int i = 1; i <= 4; i++) { 1076 final String ipSegment = m.group(i); 1077 final int iIpSegment = Integer.parseInt(ipSegment); 1078 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1079 return false; 1080 } 1081 1082 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1083 return false; 1084 } 1085 1086 } 1087 1088 return true; 1089 } 1090 1091 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1092 /** 1093 * Checks whether a given string represents a valid IPv6 address. 1094 * 1095 * @param inet6Address the name to validate 1096 * @return true if the given name is a valid IPv6 address 1097 */ 1098 private static boolean isIPv6Address(final String inet6Address) { 1099 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1100 if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) { 1101 return false; 1102 } 1103 if (inet6Address.startsWith(":") && !inet6Address.startsWith("::") 1104 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) { 1105 return false; 1106 } 1107 String[] octets = inet6Address.split(":"); 1108 if (containsCompressedZeroes) { 1109 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1110 if (inet6Address.endsWith("::")) { 1111 // String.split() drops ending empty segments 1112 octetList.add(""); 1113 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1114 octetList.remove(0); 1115 } 1116 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1117 } 1118 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1119 return false; 1120 } 1121 int validOctets = 0; 1122 int emptyOctets = 0; // consecutive empty chunks 1123 for (int index = 0; index < octets.length; index++) { 1124 final String octet = octets[index]; 1125 if (octet.isEmpty()) { 1126 emptyOctets++; 1127 if (emptyOctets > 1) { 1128 return false; 1129 } 1130 } else { 1131 emptyOctets = 0; 1132 // Is last chunk an IPv4 address? 1133 if (index == octets.length - 1 && octet.contains(".")) { 1134 if (!isIPv4Address(octet)) { 1135 return false; 1136 } 1137 validOctets += 2; 1138 continue; 1139 } 1140 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1141 return false; 1142 } 1143 final int octetInt; 1144 try { 1145 octetInt = Integer.parseInt(octet, BASE_16); 1146 } catch (final NumberFormatException e) { 1147 return false; 1148 } 1149 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1150 return false; 1151 } 1152 } 1153 validOctets++; 1154 } 1155 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1156 } 1157 1158 /** 1159 * Checks whether a given string is a valid host name according to 1160 * RFC 3986 - not accepting IP addresses. 1161 * 1162 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1163 * @param name the hostname to validate 1164 * @return true if the given name is a valid host name 1165 */ 1166 private static boolean isRFC3986HostName(final String name) { 1167 final String[] parts = name.split("\\.", -1); 1168 for (int i = 0; i < parts.length; i++) { 1169 if (parts[i].isEmpty()) { 1170 // trailing dot is legal, otherwise we've hit a .. sequence 1171 return i == parts.length - 1; 1172 } 1173 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1174 return false; 1175 } 1176 } 1177 return true; 1178 } 1179 1180 /** 1181 * Checks if the character is a separator. 1182 * 1183 * @param ch the character to check 1184 * @return true if it is a separator character 1185 */ 1186 private static boolean isSeparator(final char ch) { 1187 return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR; 1188 } 1189 1190 /** 1191 * Determines if Windows file system is in use. 1192 * 1193 * @return true if the system is Windows 1194 */ 1195 static boolean isSystemWindows() { 1196 return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR; 1197 } 1198 1199 /** 1200 * Checks whether a given string is a valid host name according to 1201 * RFC 3986. 1202 * 1203 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1204 * RFC calls a "reg-name". Percent encoded names don't seem to be 1205 * valid names in UNC paths.</p> 1206 * 1207 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1208 * @param name the hostname to validate 1209 * @return true if the given name is a valid host name 1210 */ 1211 private static boolean isValidHostName(final String name) { 1212 return isIPv6Address(name) || isRFC3986HostName(name); 1213 } 1214 1215 /** 1216 * Normalizes a path, removing double and single dot path steps. 1217 * <p> 1218 * This method normalizes a path to a standard format. 1219 * The input may contain separators in either Unix or Windows format. 1220 * The output will contain separators in the format of the system. 1221 * <p> 1222 * A trailing slash will be retained. 1223 * A double slash will be merged to a single slash (but UNC names are handled). 1224 * A single dot path segment will be removed. 1225 * A double dot will cause that path segment and the one before to be removed. 1226 * If the double dot has no parent path segment to work with, {@code null} 1227 * is returned. 1228 * <p> 1229 * The output will be the same on both Unix and Windows except 1230 * for the separator character. 1231 * <pre> 1232 * /foo// --> /foo/ 1233 * /foo/./ --> /foo/ 1234 * /foo/../bar --> /bar 1235 * /foo/../bar/ --> /bar/ 1236 * /foo/../bar/../baz --> /baz 1237 * //foo//./bar --> //foo/bar 1238 * /../ --> null 1239 * ../foo --> null 1240 * foo/bar/.. --> foo/ 1241 * foo/../../bar --> null 1242 * foo/../bar --> bar 1243 * //server/foo/../bar --> //server/bar 1244 * //server/../bar --> null 1245 * C:\foo\..\bar --> C:\bar 1246 * C:\..\bar --> null 1247 * ~/foo/../bar/ --> ~/bar/ 1248 * ~/../bar --> null 1249 * </pre> 1250 * (Note the file separator returned will be correct for Windows/Unix) 1251 * 1252 * @param fileName the fileName to normalize, null returns null 1253 * @return the normalized fileName, or null if invalid 1254 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1255 */ 1256 public static String normalize(final String fileName) { 1257 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true); 1258 } 1259 1260 /** 1261 * Normalizes a path, removing double and single dot path steps. 1262 * <p> 1263 * This method normalizes a path to a standard format. 1264 * The input may contain separators in either Unix or Windows format. 1265 * The output will contain separators in the format specified. 1266 * <p> 1267 * A trailing slash will be retained. 1268 * A double slash will be merged to a single slash (but UNC names are handled). 1269 * A single dot path segment will be removed. 1270 * A double dot will cause that path segment and the one before to be removed. 1271 * If the double dot has no parent path segment to work with, {@code null} 1272 * is returned. 1273 * <p> 1274 * The output will be the same on both Unix and Windows except 1275 * for the separator character. 1276 * <pre> 1277 * /foo// --> /foo/ 1278 * /foo/./ --> /foo/ 1279 * /foo/../bar --> /bar 1280 * /foo/../bar/ --> /bar/ 1281 * /foo/../bar/../baz --> /baz 1282 * //foo//./bar --> /foo/bar 1283 * /../ --> null 1284 * ../foo --> null 1285 * foo/bar/.. --> foo/ 1286 * foo/../../bar --> null 1287 * foo/../bar --> bar 1288 * //server/foo/../bar --> //server/bar 1289 * //server/../bar --> null 1290 * C:\foo\..\bar --> C:\bar 1291 * C:\..\bar --> null 1292 * ~/foo/../bar/ --> ~/bar/ 1293 * ~/../bar --> null 1294 * </pre> 1295 * The output will be the same on both Unix and Windows including 1296 * the separator character. 1297 * 1298 * @param fileName the fileName to normalize, null returns null 1299 * @param unixSeparator {@code true} if a Unix separator should 1300 * be used or {@code false} if a Windows separator should be used. 1301 * @return the normalized fileName, or null if invalid 1302 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1303 * @since 2.0 1304 */ 1305 public static String normalize(final String fileName, final boolean unixSeparator) { 1306 return doNormalize(fileName, toSeparator(unixSeparator), true); 1307 } 1308 1309 /** 1310 * Normalizes a path, removing double and single dot path steps, 1311 * and removing any final directory separator. 1312 * <p> 1313 * This method normalizes a path to a standard format. 1314 * The input may contain separators in either Unix or Windows format. 1315 * The output will contain separators in the format of the system. 1316 * <p> 1317 * A trailing slash will be removed. 1318 * A double slash will be merged to a single slash (but UNC names are handled). 1319 * A single dot path segment will be removed. 1320 * A double dot will cause that path segment and the one before to be removed. 1321 * If the double dot has no parent path segment to work with, {@code null} 1322 * is returned. 1323 * <p> 1324 * The output will be the same on both Unix and Windows except 1325 * for the separator character. 1326 * <pre> 1327 * /foo// --> /foo 1328 * /foo/./ --> /foo 1329 * /foo/../bar --> /bar 1330 * /foo/../bar/ --> /bar 1331 * /foo/../bar/../baz --> /baz 1332 * //foo//./bar --> /foo/bar 1333 * /../ --> null 1334 * ../foo --> null 1335 * foo/bar/.. --> foo 1336 * foo/../../bar --> null 1337 * foo/../bar --> bar 1338 * //server/foo/../bar --> //server/bar 1339 * //server/../bar --> null 1340 * C:\foo\..\bar --> C:\bar 1341 * C:\..\bar --> null 1342 * ~/foo/../bar/ --> ~/bar 1343 * ~/../bar --> null 1344 * </pre> 1345 * (Note the file separator returned will be correct for Windows/Unix) 1346 * 1347 * @param fileName the fileName to normalize, null returns null 1348 * @return the normalized fileName, or null if invalid 1349 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1350 */ 1351 public static String normalizeNoEndSeparator(final String fileName) { 1352 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false); 1353 } 1354 1355 /** 1356 * Normalizes a path, removing double and single dot path steps, 1357 * and removing any final directory separator. 1358 * <p> 1359 * This method normalizes a path to a standard format. 1360 * The input may contain separators in either Unix or Windows format. 1361 * The output will contain separators in the format specified. 1362 * <p> 1363 * A trailing slash will be removed. 1364 * A double slash will be merged to a single slash (but UNC names are handled). 1365 * A single dot path segment will be removed. 1366 * A double dot will cause that path segment and the one before to be removed. 1367 * If the double dot has no parent path segment to work with, {@code null} 1368 * is returned. 1369 * <p> 1370 * The output will be the same on both Unix and Windows including 1371 * the separator character. 1372 * <pre> 1373 * /foo// --> /foo 1374 * /foo/./ --> /foo 1375 * /foo/../bar --> /bar 1376 * /foo/../bar/ --> /bar 1377 * /foo/../bar/../baz --> /baz 1378 * //foo//./bar --> /foo/bar 1379 * /../ --> null 1380 * ../foo --> null 1381 * foo/bar/.. --> foo 1382 * foo/../../bar --> null 1383 * foo/../bar --> bar 1384 * //server/foo/../bar --> //server/bar 1385 * //server/../bar --> null 1386 * C:\foo\..\bar --> C:\bar 1387 * C:\..\bar --> null 1388 * ~/foo/../bar/ --> ~/bar 1389 * ~/../bar --> null 1390 * </pre> 1391 * 1392 * @param fileName the fileName to normalize, null returns null 1393 * @param unixSeparator {@code true} if a Unix separator should 1394 * be used or {@code false} if a Windows separator should be used. 1395 * @return the normalized fileName, or null if invalid 1396 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1397 * @since 2.0 1398 */ 1399 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 1400 return doNormalize(fileName, toSeparator(unixSeparator), false); 1401 } 1402 1403 /** 1404 * Removes the extension from a fileName. 1405 * <p> 1406 * This method returns the textual part of the fileName before the last dot. 1407 * There must be no directory separator after the dot. 1408 * <pre> 1409 * foo.txt --> foo 1410 * a\b\c.jpg --> a\b\c 1411 * a\b\c --> a\b\c 1412 * a.b\c --> a.b\c 1413 * </pre> 1414 * <p> 1415 * The output will be the same irrespective of the machine that the code is running on. 1416 * 1417 * @param fileName the fileName to query, null returns null 1418 * @return the fileName minus the extension 1419 * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) 1420 */ 1421 public static String removeExtension(final String fileName) { 1422 if (fileName == null) { 1423 return null; 1424 } 1425 requireNonNullChars(fileName); 1426 1427 final int index = indexOfExtension(fileName); 1428 if (index == NOT_FOUND) { 1429 return fileName; 1430 } 1431 return fileName.substring(0, index); 1432 } 1433 1434 /** 1435 * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions. 1436 * 1437 * This may be used for poison byte attacks. 1438 * 1439 * @param path the path to check 1440 * @return The input 1441 * @throws IllegalArgumentException if path contains the null character ({@code U+0000}) 1442 */ 1443 private static String requireNonNullChars(final String path) { 1444 if (path.indexOf(0) >= 0) { 1445 throw new IllegalArgumentException( 1446 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it"); 1447 } 1448 return path; 1449 } 1450 1451 /** 1452 * Converts all separators to the system separator. 1453 * 1454 * @param path the path to be changed, null ignored. 1455 * @return the updated path. 1456 */ 1457 public static String separatorsToSystem(final String path) { 1458 return FileSystem.getCurrent().normalizeSeparators(path); 1459 } 1460 1461 /** 1462 * Converts all separators to the Unix separator of forward slash. 1463 * 1464 * @param path the path to be changed, null ignored. 1465 * @return the new path. 1466 */ 1467 public static String separatorsToUnix(final String path) { 1468 return FileSystem.LINUX.normalizeSeparators(path); 1469 } 1470 1471 /** 1472 * Converts all separators to the Windows separator of backslash. 1473 * 1474 * @param path the path to be changed, null ignored. 1475 * @return the updated path. 1476 */ 1477 public static String separatorsToWindows(final String path) { 1478 return FileSystem.WINDOWS.normalizeSeparators(path); 1479 } 1480 1481 /** 1482 * Splits a string into a number of tokens. 1483 * The text is split by '?' and '*'. 1484 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1485 * 1486 * @param text the text to split 1487 * @return the array of tokens, never null 1488 */ 1489 static String[] splitOnTokens(final String text) { 1490 // used by wildcardMatch 1491 // package level so a unit test may run on this 1492 1493 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1494 return new String[] { text }; 1495 } 1496 1497 final char[] array = text.toCharArray(); 1498 final ArrayList<String> list = new ArrayList<>(); 1499 final StringBuilder buffer = new StringBuilder(); 1500 char prevChar = 0; 1501 for (final char ch : array) { 1502 if (ch == '?' || ch == '*') { 1503 if (buffer.length() != 0) { 1504 list.add(buffer.toString()); 1505 buffer.setLength(0); 1506 } 1507 if (ch == '?') { 1508 list.add("?"); 1509 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1510 list.add("*"); 1511 } 1512 } else { 1513 buffer.append(ch); 1514 } 1515 prevChar = ch; 1516 } 1517 if (buffer.length() != 0) { 1518 list.add(buffer.toString()); 1519 } 1520 1521 return list.toArray(EMPTY_STRING_ARRAY); 1522 } 1523 1524 /** 1525 * Returns '/' if given true, '\\' otherwise. 1526 * 1527 * @param unixSeparator which separator to return. 1528 * @return '/' if given true, '\\' otherwise. 1529 */ 1530 private static char toSeparator(final boolean unixSeparator) { 1531 return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; 1532 } 1533 1534 /** 1535 * Checks a fileName to see if it matches the specified wildcard matcher, 1536 * always testing case-sensitive. 1537 * <p> 1538 * The wildcard matcher uses the characters '?' and '*' to represent a 1539 * single or multiple (zero or more) wildcard characters. 1540 * This is the same as often found on DOS/Unix command lines. 1541 * The check is case-sensitive always. 1542 * <pre> 1543 * wildcardMatch("c.txt", "*.txt") --> true 1544 * wildcardMatch("c.txt", "*.jpg") --> false 1545 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1546 * wildcardMatch("c.txt", "*.???") --> true 1547 * wildcardMatch("c.txt", "*.????") --> false 1548 * </pre> 1549 * N.B. the sequence "*?" does not work properly at present in match strings. 1550 * 1551 * @param fileName the fileName to match on 1552 * @param wildcardMatcher the wildcard string to match against 1553 * @return true if the fileName matches the wildcard string 1554 * @see IOCase#SENSITIVE 1555 */ 1556 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1557 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1558 } 1559 1560 /** 1561 * Checks a fileName to see if it matches the specified wildcard matcher 1562 * allowing control over case-sensitivity. 1563 * <p> 1564 * The wildcard matcher uses the characters '?' and '*' to represent a 1565 * single or multiple (zero or more) wildcard characters. 1566 * N.B. the sequence "*?" does not work properly at present in match strings. 1567 * 1568 * @param fileName the fileName to match on 1569 * @param wildcardMatcher the wildcard string to match against 1570 * @param ioCase what case sensitivity rule to use, null means case-sensitive 1571 * @return true if the fileName matches the wildcard string 1572 * @since 1.3 1573 */ 1574 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) { 1575 if (fileName == null && wildcardMatcher == null) { 1576 return true; 1577 } 1578 if (fileName == null || wildcardMatcher == null) { 1579 return false; 1580 } 1581 ioCase = IOCase.value(ioCase, IOCase.SENSITIVE); 1582 final String[] wcs = splitOnTokens(wildcardMatcher); 1583 boolean anyChars = false; 1584 int textIdx = 0; 1585 int wcsIdx = 0; 1586 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1587 1588 // loop around a backtrack stack, to handle complex * matching 1589 do { 1590 if (!backtrack.isEmpty()) { 1591 final int[] array = backtrack.pop(); 1592 wcsIdx = array[0]; 1593 textIdx = array[1]; 1594 anyChars = true; 1595 } 1596 1597 // loop whilst tokens and text left to process 1598 while (wcsIdx < wcs.length) { 1599 1600 if (wcs[wcsIdx].equals("?")) { 1601 // ? so move to next text char 1602 textIdx++; 1603 if (textIdx > fileName.length()) { 1604 break; 1605 } 1606 anyChars = false; 1607 1608 } else if (wcs[wcsIdx].equals("*")) { 1609 // set any chars status 1610 anyChars = true; 1611 if (wcsIdx == wcs.length - 1) { 1612 textIdx = fileName.length(); 1613 } 1614 1615 } else { 1616 // matching text token 1617 if (anyChars) { 1618 // any chars then try to locate text token 1619 textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1620 if (textIdx == NOT_FOUND) { 1621 // token not found 1622 break; 1623 } 1624 final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1625 if (repeat >= 0) { 1626 backtrack.push(new int[] {wcsIdx, repeat}); 1627 } 1628 } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1629 // matching from current position 1630 // couldn't match token 1631 break; 1632 } 1633 1634 // matched text token, move text index to end of matched token 1635 textIdx += wcs[wcsIdx].length(); 1636 anyChars = false; 1637 } 1638 1639 wcsIdx++; 1640 } 1641 1642 // full match 1643 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1644 return true; 1645 } 1646 1647 } while (!backtrack.isEmpty()); 1648 1649 return false; 1650 } 1651 1652 /** 1653 * Checks a fileName to see if it matches the specified wildcard matcher 1654 * using the case rules of the system. 1655 * <p> 1656 * The wildcard matcher uses the characters '?' and '*' to represent a 1657 * single or multiple (zero or more) wildcard characters. 1658 * This is the same as often found on DOS/Unix command lines. 1659 * The check is case-sensitive on Unix and case-insensitive on Windows. 1660 * <pre> 1661 * wildcardMatch("c.txt", "*.txt") --> true 1662 * wildcardMatch("c.txt", "*.jpg") --> false 1663 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1664 * wildcardMatch("c.txt", "*.???") --> true 1665 * wildcardMatch("c.txt", "*.????") --> false 1666 * </pre> 1667 * N.B. the sequence "*?" does not work properly at present in match strings. 1668 * 1669 * @param fileName the fileName to match on 1670 * @param wildcardMatcher the wildcard string to match against 1671 * @return true if the fileName matches the wildcard string 1672 * @see IOCase#SYSTEM 1673 */ 1674 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1675 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1676 } 1677 1678 /** 1679 * Instances should NOT be constructed in standard programming. 1680 */ 1681 public FilenameUtils() { 1682 } 1683}