001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.io.IOException; 021import java.util.ArrayDeque; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Deque; 026import java.util.List; 027import java.util.Objects; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031/** 032 * General file name and file path manipulation utilities. 033 * <p> 034 * When dealing with file names you can hit problems when moving from a Windows 035 * based development machine to a Unix based production machine. 036 * This class aims to help avoid those problems. 037 * <p> 038 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 039 * using JDK {@link java.io.File File} objects and the two argument constructor 040 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 041 * <p> 042 * Most methods on this class are designed to work the same on both Unix and Windows. 043 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 044 * <p> 045 * Most methods recognise both separators (forward and back), and both 046 * sets of prefixes. See the javadoc of each method for details. 047 * <p> 048 * This class defines six components within a file name 049 * (example C:\dev\project\file.txt): 050 * <ul> 051 * <li>the prefix - C:\</li> 052 * <li>the path - dev\project\</li> 053 * <li>the full path - C:\dev\project\</li> 054 * <li>the name - file.txt</li> 055 * <li>the base name - file</li> 056 * <li>the extension - txt</li> 057 * </ul> 058 * Note that this class works best if directory file names end with a separator. 059 * If you omit the last separator, it is impossible to determine if the file name 060 * corresponds to a file or a directory. As a result, we have chosen to say 061 * it corresponds to a file. 062 * <p> 063 * This class only supports Unix and Windows style names. 064 * Prefixes are matched as follows: 065 * <pre> 066 * Windows: 067 * a\b\c.txt --> "" --> relative 068 * \a\b\c.txt --> "\" --> current drive absolute 069 * C:a\b\c.txt --> "C:" --> drive relative 070 * C:\a\b\c.txt --> "C:\" --> absolute 071 * \\server\a\b\c.txt --> "\\server\" --> UNC 072 * 073 * Unix: 074 * a/b/c.txt --> "" --> relative 075 * /a/b/c.txt --> "/" --> absolute 076 * ~/a/b/c.txt --> "~/" --> current user 077 * ~ --> "~/" --> current user (slash added) 078 * ~user/a/b/c.txt --> "~user/" --> named user 079 * ~user --> "~user/" --> named user (slash added) 080 * </pre> 081 * Both prefix styles are matched always, irrespective of the machine that you are 082 * currently running on. 083 * <p> 084 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 085 * 086 * @since 1.1 087 */ 088public class FilenameUtils { 089 090 private static final String[] EMPTY_STRING_ARRAY = new String[0]; 091 092 private static final String EMPTY_STRING = ""; 093 094 private static final int NOT_FOUND = -1; 095 096 /** 097 * The extension separator character. 098 * @since 1.4 099 */ 100 public static final char EXTENSION_SEPARATOR = '.'; 101 102 /** 103 * The extension separator String. 104 * @since 1.4 105 */ 106 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 107 108 /** 109 * The Unix separator character. 110 */ 111 private static final char UNIX_SEPARATOR = '/'; 112 113 /** 114 * The Windows separator character. 115 */ 116 private static final char WINDOWS_SEPARATOR = '\\'; 117 118 /** 119 * The system separator character. 120 */ 121 private static final char SYSTEM_SEPARATOR = File.separatorChar; 122 123 /** 124 * The separator character that is the opposite of the system separator. 125 */ 126 private static final char OTHER_SEPARATOR; 127 static { 128 if (isSystemWindows()) { 129 OTHER_SEPARATOR = UNIX_SEPARATOR; 130 } else { 131 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 132 } 133 } 134 135 /** 136 * Instances should NOT be constructed in standard programming. 137 */ 138 public FilenameUtils() { 139 super(); 140 } 141 142 //----------------------------------------------------------------------- 143 /** 144 * Determines if Windows file system is in use. 145 * 146 * @return true if the system is Windows 147 */ 148 static boolean isSystemWindows() { 149 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 150 } 151 152 //----------------------------------------------------------------------- 153 /** 154 * Checks if the character is a separator. 155 * 156 * @param ch the character to check 157 * @return true if it is a separator character 158 */ 159 private static boolean isSeparator(final char ch) { 160 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 161 } 162 163 //----------------------------------------------------------------------- 164 /** 165 * Normalizes a path, removing double and single dot path steps. 166 * <p> 167 * This method normalizes a path to a standard format. 168 * The input may contain separators in either Unix or Windows format. 169 * The output will contain separators in the format of the system. 170 * <p> 171 * A trailing slash will be retained. 172 * A double slash will be merged to a single slash (but UNC names are handled). 173 * A single dot path segment will be removed. 174 * A double dot will cause that path segment and the one before to be removed. 175 * If the double dot has no parent path segment to work with, {@code null} 176 * is returned. 177 * <p> 178 * The output will be the same on both Unix and Windows except 179 * for the separator character. 180 * <pre> 181 * /foo// --> /foo/ 182 * /foo/./ --> /foo/ 183 * /foo/../bar --> /bar 184 * /foo/../bar/ --> /bar/ 185 * /foo/../bar/../baz --> /baz 186 * //foo//./bar --> /foo/bar 187 * /../ --> null 188 * ../foo --> null 189 * foo/bar/.. --> foo/ 190 * foo/../../bar --> null 191 * foo/../bar --> bar 192 * //server/foo/../bar --> //server/bar 193 * //server/../bar --> null 194 * C:\foo\..\bar --> C:\bar 195 * C:\..\bar --> null 196 * ~/foo/../bar/ --> ~/bar/ 197 * ~/../bar --> null 198 * </pre> 199 * (Note the file separator returned will be correct for Windows/Unix) 200 * 201 * @param fileName the fileName to normalize, null returns null 202 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 203 */ 204 public static String normalize(final String fileName) { 205 return doNormalize(fileName, SYSTEM_SEPARATOR, true); 206 } 207 /** 208 * Normalizes a path, removing double and single dot path steps. 209 * <p> 210 * This method normalizes a path to a standard format. 211 * The input may contain separators in either Unix or Windows format. 212 * The output will contain separators in the format specified. 213 * <p> 214 * A trailing slash will be retained. 215 * A double slash will be merged to a single slash (but UNC names are handled). 216 * A single dot path segment will be removed. 217 * A double dot will cause that path segment and the one before to be removed. 218 * If the double dot has no parent path segment to work with, {@code null} 219 * is returned. 220 * <p> 221 * The output will be the same on both Unix and Windows except 222 * for the separator character. 223 * <pre> 224 * /foo// --> /foo/ 225 * /foo/./ --> /foo/ 226 * /foo/../bar --> /bar 227 * /foo/../bar/ --> /bar/ 228 * /foo/../bar/../baz --> /baz 229 * //foo//./bar --> /foo/bar 230 * /../ --> null 231 * ../foo --> null 232 * foo/bar/.. --> foo/ 233 * foo/../../bar --> null 234 * foo/../bar --> bar 235 * //server/foo/../bar --> //server/bar 236 * //server/../bar --> null 237 * C:\foo\..\bar --> C:\bar 238 * C:\..\bar --> null 239 * ~/foo/../bar/ --> ~/bar/ 240 * ~/../bar --> null 241 * </pre> 242 * The output will be the same on both Unix and Windows including 243 * the separator character. 244 * 245 * @param fileName the fileName to normalize, null returns null 246 * @param unixSeparator {@code true} if a unix separator should 247 * be used or {@code false} if a windows separator should be used. 248 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 249 * @since 2.0 250 */ 251 public static String normalize(final String fileName, final boolean unixSeparator) { 252 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 253 return doNormalize(fileName, separator, true); 254 } 255 256 //----------------------------------------------------------------------- 257 /** 258 * Normalizes a path, removing double and single dot path steps, 259 * and removing any final directory separator. 260 * <p> 261 * This method normalizes a path to a standard format. 262 * The input may contain separators in either Unix or Windows format. 263 * The output will contain separators in the format of the system. 264 * <p> 265 * A trailing slash will be removed. 266 * A double slash will be merged to a single slash (but UNC names are handled). 267 * A single dot path segment will be removed. 268 * A double dot will cause that path segment and the one before to be removed. 269 * If the double dot has no parent path segment to work with, {@code null} 270 * is returned. 271 * <p> 272 * The output will be the same on both Unix and Windows except 273 * for the separator character. 274 * <pre> 275 * /foo// --> /foo 276 * /foo/./ --> /foo 277 * /foo/../bar --> /bar 278 * /foo/../bar/ --> /bar 279 * /foo/../bar/../baz --> /baz 280 * //foo//./bar --> /foo/bar 281 * /../ --> null 282 * ../foo --> null 283 * foo/bar/.. --> foo 284 * foo/../../bar --> null 285 * foo/../bar --> bar 286 * //server/foo/../bar --> //server/bar 287 * //server/../bar --> null 288 * C:\foo\..\bar --> C:\bar 289 * C:\..\bar --> null 290 * ~/foo/../bar/ --> ~/bar 291 * ~/../bar --> null 292 * </pre> 293 * (Note the file separator returned will be correct for Windows/Unix) 294 * 295 * @param fileName the fileName to normalize, null returns null 296 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 297 */ 298 public static String normalizeNoEndSeparator(final String fileName) { 299 return doNormalize(fileName, SYSTEM_SEPARATOR, false); 300 } 301 302 /** 303 * Normalizes a path, removing double and single dot path steps, 304 * and removing any final directory separator. 305 * <p> 306 * This method normalizes a path to a standard format. 307 * The input may contain separators in either Unix or Windows format. 308 * The output will contain separators in the format specified. 309 * <p> 310 * A trailing slash will be removed. 311 * A double slash will be merged to a single slash (but UNC names are handled). 312 * A single dot path segment will be removed. 313 * A double dot will cause that path segment and the one before to be removed. 314 * If the double dot has no parent path segment to work with, {@code null} 315 * is returned. 316 * <p> 317 * The output will be the same on both Unix and Windows including 318 * the separator character. 319 * <pre> 320 * /foo// --> /foo 321 * /foo/./ --> /foo 322 * /foo/../bar --> /bar 323 * /foo/../bar/ --> /bar 324 * /foo/../bar/../baz --> /baz 325 * //foo//./bar --> /foo/bar 326 * /../ --> null 327 * ../foo --> null 328 * foo/bar/.. --> foo 329 * foo/../../bar --> null 330 * foo/../bar --> bar 331 * //server/foo/../bar --> //server/bar 332 * //server/../bar --> null 333 * C:\foo\..\bar --> C:\bar 334 * C:\..\bar --> null 335 * ~/foo/../bar/ --> ~/bar 336 * ~/../bar --> null 337 * </pre> 338 * 339 * @param fileName the fileName to normalize, null returns null 340 * @param unixSeparator {@code true} if a unix separator should 341 * be used or {@code false} if a windows separator should be used. 342 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 343 * @since 2.0 344 */ 345 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 346 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 347 return doNormalize(fileName, separator, false); 348 } 349 350 /** 351 * Internal method to perform the normalization. 352 * 353 * @param fileName the fileName 354 * @param separator The separator character to use 355 * @param keepSeparator true to keep the final separator 356 * @return the normalized fileName. Null bytes inside string will be removed. 357 */ 358 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 359 if (fileName == null) { 360 return null; 361 } 362 363 failIfNullBytePresent(fileName); 364 365 int size = fileName.length(); 366 if (size == 0) { 367 return fileName; 368 } 369 final int prefix = getPrefixLength(fileName); 370 if (prefix < 0) { 371 return null; 372 } 373 374 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 375 fileName.getChars(0, fileName.length(), array, 0); 376 377 // fix separators throughout 378 final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 379 for (int i = 0; i < array.length; i++) { 380 if (array[i] == otherSeparator) { 381 array[i] = separator; 382 } 383 } 384 385 // add extra separator on the end to simplify code below 386 boolean lastIsDirectory = true; 387 if (array[size - 1] != separator) { 388 array[size++] = separator; 389 lastIsDirectory = false; 390 } 391 392 // adjoining slashes 393 for (int i = prefix + 1; i < size; i++) { 394 if (array[i] == separator && array[i - 1] == separator) { 395 System.arraycopy(array, i, array, i - 1, size - i); 396 size--; 397 i--; 398 } 399 } 400 401 // dot slash 402 for (int i = prefix + 1; i < size; i++) { 403 if (array[i] == separator && array[i - 1] == '.' && 404 (i == prefix + 1 || array[i - 2] == separator)) { 405 if (i == size - 1) { 406 lastIsDirectory = true; 407 } 408 System.arraycopy(array, i + 1, array, i - 1, size - i); 409 size -=2; 410 i--; 411 } 412 } 413 414 // double dot slash 415 outer: 416 for (int i = prefix + 2; i < size; i++) { 417 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 418 (i == prefix + 2 || array[i - 3] == separator)) { 419 if (i == prefix + 2) { 420 return null; 421 } 422 if (i == size - 1) { 423 lastIsDirectory = true; 424 } 425 int j; 426 for (j = i - 4 ; j >= prefix; j--) { 427 if (array[j] == separator) { 428 // remove b/../ from a/b/../c 429 System.arraycopy(array, i + 1, array, j + 1, size - i); 430 size -= i - j; 431 i = j + 1; 432 continue outer; 433 } 434 } 435 // remove a/../ from a/../c 436 System.arraycopy(array, i + 1, array, prefix, size - i); 437 size -= i + 1 - prefix; 438 i = prefix + 1; 439 } 440 } 441 442 if (size <= 0) { // should never be less than 0 443 return EMPTY_STRING; 444 } 445 if (size <= prefix) { // should never be less than prefix 446 return new String(array, 0, size); 447 } 448 if (lastIsDirectory && keepSeparator) { 449 return new String(array, 0, size); // keep trailing separator 450 } 451 return new String(array, 0, size - 1); // lose trailing separator 452 } 453 454 //----------------------------------------------------------------------- 455 /** 456 * Concatenates a fileName to a base path using normal command line style rules. 457 * <p> 458 * The effect is equivalent to resultant directory after changing 459 * directory to the first argument, followed by changing directory to 460 * the second argument. 461 * <p> 462 * The first argument is the base path, the second is the path to concatenate. 463 * The returned path is always normalized via {@link #normalize(String)}, 464 * thus <code>..</code> is handled. 465 * <p> 466 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then 467 * it will be normalized and returned. 468 * Otherwise, the paths will be joined, normalized and returned. 469 * <p> 470 * The output will be the same on both Unix and Windows except 471 * for the separator character. 472 * <pre> 473 * /foo/ + bar --> /foo/bar 474 * /foo + bar --> /foo/bar 475 * /foo + /bar --> /bar 476 * /foo + C:/bar --> C:/bar 477 * /foo + C:bar --> C:bar (*) 478 * /foo/a/ + ../bar --> /foo/bar 479 * /foo/ + ../../bar --> null 480 * /foo/ + /bar --> /bar 481 * /foo/.. + /bar --> /bar 482 * /foo + bar/c.txt --> /foo/bar/c.txt 483 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 484 * </pre> 485 * (*) Note that the Windows relative drive prefix is unreliable when 486 * used with this method. 487 * (!) Note that the first parameter must be a path. If it ends with a name, then 488 * the name will be built into the concatenated path. If this might be a problem, 489 * use {@link #getFullPath(String)} on the base path argument. 490 * 491 * @param basePath the base path to attach to, always treated as a path 492 * @param fullFileNameToAdd the fileName (or path) to attach to the base 493 * @return the concatenated path, or null if invalid. Null bytes inside string will be removed 494 */ 495 public static String concat(final String basePath, final String fullFileNameToAdd) { 496 final int prefix = getPrefixLength(fullFileNameToAdd); 497 if (prefix < 0) { 498 return null; 499 } 500 if (prefix > 0) { 501 return normalize(fullFileNameToAdd); 502 } 503 if (basePath == null) { 504 return null; 505 } 506 final int len = basePath.length(); 507 if (len == 0) { 508 return normalize(fullFileNameToAdd); 509 } 510 final char ch = basePath.charAt(len - 1); 511 if (isSeparator(ch)) { 512 return normalize(basePath + fullFileNameToAdd); 513 } 514 return normalize(basePath + '/' + fullFileNameToAdd); 515 } 516 517 /** 518 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 519 * <p> 520 * The files names are expected to be normalized. 521 * </p> 522 * 523 * Edge cases: 524 * <ul> 525 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 526 * <li>A directory does not contain itself: return false</li> 527 * <li>A null child file is not contained in any parent: return false</li> 528 * </ul> 529 * 530 * @param canonicalParent 531 * the file to consider as the parent. 532 * @param canonicalChild 533 * the file to consider as the child. 534 * @return true is the candidate leaf is under by the specified composite. False otherwise. 535 * @throws IOException 536 * if an IO error occurs while checking the files. 537 * @since 2.2 538 * @see FileUtils#directoryContains(File, File) 539 */ 540 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) 541 throws IOException { 542 543 // Fail fast against NullPointerException 544 if (canonicalParent == null) { 545 throw new IllegalArgumentException("Directory must not be null"); 546 } 547 548 if (canonicalChild == null) { 549 return false; 550 } 551 552 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 553 return false; 554 } 555 556 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 557 } 558 559 //----------------------------------------------------------------------- 560 /** 561 * Converts all separators to the Unix separator of forward slash. 562 * 563 * @param path the path to be changed, null ignored 564 * @return the updated path 565 */ 566 public static String separatorsToUnix(final String path) { 567 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { 568 return path; 569 } 570 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 571 } 572 573 /** 574 * Converts all separators to the Windows separator of backslash. 575 * 576 * @param path the path to be changed, null ignored 577 * @return the updated path 578 */ 579 public static String separatorsToWindows(final String path) { 580 if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { 581 return path; 582 } 583 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 584 } 585 586 /** 587 * Converts all separators to the system separator. 588 * 589 * @param path the path to be changed, null ignored 590 * @return the updated path 591 */ 592 public static String separatorsToSystem(final String path) { 593 if (path == null) { 594 return null; 595 } 596 return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path); 597 } 598 599 //----------------------------------------------------------------------- 600 /** 601 * Returns the length of the fileName prefix, such as <code>C:/</code> or <code>~/</code>. 602 * <p> 603 * This method will handle a file in either Unix or Windows format. 604 * <p> 605 * The prefix length includes the first slash in the full fileName 606 * if applicable. Thus, it is possible that the length returned is greater 607 * than the length of the input string. 608 * <pre> 609 * Windows: 610 * a\b\c.txt --> "" --> relative 611 * \a\b\c.txt --> "\" --> current drive absolute 612 * C:a\b\c.txt --> "C:" --> drive relative 613 * C:\a\b\c.txt --> "C:\" --> absolute 614 * \\server\a\b\c.txt --> "\\server\" --> UNC 615 * \\\a\b\c.txt --> error, length = -1 616 * 617 * Unix: 618 * a/b/c.txt --> "" --> relative 619 * /a/b/c.txt --> "/" --> absolute 620 * ~/a/b/c.txt --> "~/" --> current user 621 * ~ --> "~/" --> current user (slash added) 622 * ~user/a/b/c.txt --> "~user/" --> named user 623 * ~user --> "~user/" --> named user (slash added) 624 * //server/a/b/c.txt --> "//server/" 625 * ///a/b/c.txt --> error, length = -1 626 * </pre> 627 * <p> 628 * The output will be the same irrespective of the machine that the code is running on. 629 * ie. both Unix and Windows prefixes are matched regardless. 630 * 631 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 632 * These must be followed by a server name, so double-slashes are not collapsed 633 * to a single slash at the start of the fileName. 634 * 635 * @param fileName the fileName to find the prefix in, null returns -1 636 * @return the length of the prefix, -1 if invalid or null 637 */ 638 public static int getPrefixLength(final String fileName) { 639 if (fileName == null) { 640 return NOT_FOUND; 641 } 642 final int len = fileName.length(); 643 if (len == 0) { 644 return 0; 645 } 646 char ch0 = fileName.charAt(0); 647 if (ch0 == ':') { 648 return NOT_FOUND; 649 } 650 if (len == 1) { 651 if (ch0 == '~') { 652 return 2; // return a length greater than the input 653 } 654 return isSeparator(ch0) ? 1 : 0; 655 } 656 if (ch0 == '~') { 657 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1); 658 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1); 659 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 660 return len + 1; // return a length greater than the input 661 } 662 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 663 posWin = posWin == NOT_FOUND ? posUnix : posWin; 664 return Math.min(posUnix, posWin) + 1; 665 } 666 final char ch1 = fileName.charAt(1); 667 if (ch1 == ':') { 668 ch0 = Character.toUpperCase(ch0); 669 if (ch0 >= 'A' && ch0 <= 'Z') { 670 if (len == 2 || isSeparator(fileName.charAt(2)) == false) { 671 return 2; 672 } 673 return 3; 674 } else if (ch0 == UNIX_SEPARATOR) { 675 return 1; 676 } 677 return NOT_FOUND; 678 679 } else if (isSeparator(ch0) && isSeparator(ch1)) { 680 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2); 681 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2); 682 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 683 return NOT_FOUND; 684 } 685 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 686 posWin = posWin == NOT_FOUND ? posUnix : posWin; 687 final int pos = Math.min(posUnix, posWin) + 1; 688 final String hostnamePart = fileName.substring(2, pos - 1); 689 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 690 } else { 691 return isSeparator(ch0) ? 1 : 0; 692 } 693 } 694 695 /** 696 * Returns the index of the last directory separator character. 697 * <p> 698 * This method will handle a file in either Unix or Windows format. 699 * The position of the last forward or backslash is returned. 700 * <p> 701 * The output will be the same irrespective of the machine that the code is running on. 702 * 703 * @param fileName the fileName to find the last path separator in, null returns -1 704 * @return the index of the last separator character, or -1 if there 705 * is no such character 706 */ 707 public static int indexOfLastSeparator(final String fileName) { 708 if (fileName == null) { 709 return NOT_FOUND; 710 } 711 final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR); 712 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR); 713 return Math.max(lastUnixPos, lastWindowsPos); 714 } 715 716 /** 717 * Returns the index of the last extension separator character, which is a dot. 718 * <p> 719 * This method also checks that there is no directory separator after the last dot. To do this it uses 720 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 721 * </p> 722 * <p> 723 * The output will be the same irrespective of the machine that the code is running on, with the 724 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 725 * </p> 726 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 727 * In this case, the name wouldn't be the name of a file, but the identifier of an 728 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 729 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 730 * an {@link IllegalArgumentException} for names like this. 731 * 732 * @param fileName 733 * the fileName to find the last extension separator in, null returns -1 734 * @return the index of the last extension separator character, or -1 if there is no such character 735 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 736 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 737 */ 738 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 739 if (fileName == null) { 740 return NOT_FOUND; 741 } 742 if (isSystemWindows()) { 743 // Special handling for NTFS ADS: Don't accept colon in the fileName. 744 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 745 if (offset != -1) { 746 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 747 } 748 } 749 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 750 final int lastSeparator = indexOfLastSeparator(fileName); 751 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 752 } 753 754 //----------------------------------------------------------------------- 755 /** 756 * Gets the prefix from a full fileName, such as <code>C:/</code> 757 * or <code>~/</code>. 758 * <p> 759 * This method will handle a file in either Unix or Windows format. 760 * The prefix includes the first slash in the full fileName where applicable. 761 * <pre> 762 * Windows: 763 * a\b\c.txt --> "" --> relative 764 * \a\b\c.txt --> "\" --> current drive absolute 765 * C:a\b\c.txt --> "C:" --> drive relative 766 * C:\a\b\c.txt --> "C:\" --> absolute 767 * \\server\a\b\c.txt --> "\\server\" --> UNC 768 * 769 * Unix: 770 * a/b/c.txt --> "" --> relative 771 * /a/b/c.txt --> "/" --> absolute 772 * ~/a/b/c.txt --> "~/" --> current user 773 * ~ --> "~/" --> current user (slash added) 774 * ~user/a/b/c.txt --> "~user/" --> named user 775 * ~user --> "~user/" --> named user (slash added) 776 * </pre> 777 * <p> 778 * The output will be the same irrespective of the machine that the code is running on. 779 * ie. both Unix and Windows prefixes are matched regardless. 780 * 781 * @param fileName the fileName to query, null returns null 782 * @return the prefix of the file, null if invalid. Null bytes inside string will be removed 783 */ 784 public static String getPrefix(final String fileName) { 785 if (fileName == null) { 786 return null; 787 } 788 final int len = getPrefixLength(fileName); 789 if (len < 0) { 790 return null; 791 } 792 if (len > fileName.length()) { 793 failIfNullBytePresent(fileName + UNIX_SEPARATOR); 794 return fileName + UNIX_SEPARATOR; 795 } 796 final String path = fileName.substring(0, len); 797 failIfNullBytePresent(path); 798 return path; 799 } 800 801 /** 802 * Gets the path from a full fileName, which excludes the prefix. 803 * <p> 804 * This method will handle a file in either Unix or Windows format. 805 * The method is entirely text based, and returns the text before and 806 * including the last forward or backslash. 807 * <pre> 808 * C:\a\b\c.txt --> a\b\ 809 * ~/a/b/c.txt --> a/b/ 810 * a.txt --> "" 811 * a/b/c --> a/b/ 812 * a/b/c/ --> a/b/c/ 813 * </pre> 814 * <p> 815 * The output will be the same irrespective of the machine that the code is running on. 816 * <p> 817 * This method drops the prefix from the result. 818 * See {@link #getFullPath(String)} for the method that retains the prefix. 819 * 820 * @param fileName the fileName to query, null returns null 821 * @return the path of the file, an empty string if none exists, null if invalid. 822 * Null bytes inside string will be removed 823 */ 824 public static String getPath(final String fileName) { 825 return doGetPath(fileName, 1); 826 } 827 828 /** 829 * Gets the path from a full fileName, which excludes the prefix, and 830 * also excluding the final directory separator. 831 * <p> 832 * This method will handle a file in either Unix or Windows format. 833 * The method is entirely text based, and returns the text before the 834 * last forward or backslash. 835 * <pre> 836 * C:\a\b\c.txt --> a\b 837 * ~/a/b/c.txt --> a/b 838 * a.txt --> "" 839 * a/b/c --> a/b 840 * a/b/c/ --> a/b/c 841 * </pre> 842 * <p> 843 * The output will be the same irrespective of the machine that the code is running on. 844 * <p> 845 * This method drops the prefix from the result. 846 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 847 * 848 * @param fileName the fileName to query, null returns null 849 * @return the path of the file, an empty string if none exists, null if invalid. 850 * Null bytes inside string will be removed 851 */ 852 public static String getPathNoEndSeparator(final String fileName) { 853 return doGetPath(fileName, 0); 854 } 855 856 /** 857 * Does the work of getting the path. 858 * 859 * @param fileName the fileName 860 * @param separatorAdd 0 to omit the end separator, 1 to return it 861 * @return the path. Null bytes inside string will be removed 862 */ 863 private static String doGetPath(final String fileName, final int separatorAdd) { 864 if (fileName == null) { 865 return null; 866 } 867 final int prefix = getPrefixLength(fileName); 868 if (prefix < 0) { 869 return null; 870 } 871 final int index = indexOfLastSeparator(fileName); 872 final int endIndex = index+separatorAdd; 873 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 874 return EMPTY_STRING; 875 } 876 final String path = fileName.substring(prefix, endIndex); 877 failIfNullBytePresent(path); 878 return path; 879 } 880 881 /** 882 * Gets the full path from a full fileName, which is the prefix + path. 883 * <p> 884 * This method will handle a file in either Unix or Windows format. 885 * The method is entirely text based, and returns the text before and 886 * including the last forward or backslash. 887 * <pre> 888 * C:\a\b\c.txt --> C:\a\b\ 889 * ~/a/b/c.txt --> ~/a/b/ 890 * a.txt --> "" 891 * a/b/c --> a/b/ 892 * a/b/c/ --> a/b/c/ 893 * C: --> C: 894 * C:\ --> C:\ 895 * ~ --> ~/ 896 * ~/ --> ~/ 897 * ~user --> ~user/ 898 * ~user/ --> ~user/ 899 * </pre> 900 * <p> 901 * The output will be the same irrespective of the machine that the code is running on. 902 * 903 * @param fileName the fileName to query, null returns null 904 * @return the path of the file, an empty string if none exists, null if invalid 905 */ 906 public static String getFullPath(final String fileName) { 907 return doGetFullPath(fileName, true); 908 } 909 910 /** 911 * Gets the full path from a full fileName, which is the prefix + path, 912 * and also excluding the final directory separator. 913 * <p> 914 * This method will handle a file in either Unix or Windows format. 915 * The method is entirely text based, and returns the text before the 916 * last forward or backslash. 917 * <pre> 918 * C:\a\b\c.txt --> C:\a\b 919 * ~/a/b/c.txt --> ~/a/b 920 * a.txt --> "" 921 * a/b/c --> a/b 922 * a/b/c/ --> a/b/c 923 * C: --> C: 924 * C:\ --> C:\ 925 * ~ --> ~ 926 * ~/ --> ~ 927 * ~user --> ~user 928 * ~user/ --> ~user 929 * </pre> 930 * <p> 931 * The output will be the same irrespective of the machine that the code is running on. 932 * 933 * @param fileName the fileName to query, null returns null 934 * @return the path of the file, an empty string if none exists, null if invalid 935 */ 936 public static String getFullPathNoEndSeparator(final String fileName) { 937 return doGetFullPath(fileName, false); 938 } 939 940 /** 941 * Does the work of getting the path. 942 * 943 * @param fileName the fileName 944 * @param includeSeparator true to include the end separator 945 * @return the path 946 */ 947 private static String doGetFullPath(final String fileName, final boolean includeSeparator) { 948 if (fileName == null) { 949 return null; 950 } 951 final int prefix = getPrefixLength(fileName); 952 if (prefix < 0) { 953 return null; 954 } 955 if (prefix >= fileName.length()) { 956 if (includeSeparator) { 957 return getPrefix(fileName); // add end slash if necessary 958 } 959 return fileName; 960 } 961 final int index = indexOfLastSeparator(fileName); 962 if (index < 0) { 963 return fileName.substring(0, prefix); 964 } 965 int end = index + (includeSeparator ? 1 : 0); 966 if (end == 0) { 967 end++; 968 } 969 return fileName.substring(0, end); 970 } 971 972 /** 973 * Gets the name minus the path from a full fileName. 974 * <p> 975 * This method will handle a file in either Unix or Windows format. 976 * The text after the last forward or backslash is returned. 977 * <pre> 978 * a/b/c.txt --> c.txt 979 * a.txt --> a.txt 980 * a/b/c --> c 981 * a/b/c/ --> "" 982 * </pre> 983 * <p> 984 * The output will be the same irrespective of the machine that the code is running on. 985 * 986 * @param fileName the fileName to query, null returns null 987 * @return the name of the file without the path, or an empty string if none exists. 988 * Null bytes inside string will be removed 989 */ 990 public static String getName(final String fileName) { 991 if (fileName == null) { 992 return null; 993 } 994 failIfNullBytePresent(fileName); 995 final int index = indexOfLastSeparator(fileName); 996 return fileName.substring(index + 1); 997 } 998 999 /** 1000 * Check the input for null bytes, a sign of unsanitized data being passed to to file level functions. 1001 * 1002 * This may be used for poison byte attacks. 1003 * @param path the path to check 1004 */ 1005 private static void failIfNullBytePresent(final String path) { 1006 final int len = path.length(); 1007 for (int i = 0; i < len; i++) { 1008 if (path.charAt(i) == 0) { 1009 throw new IllegalArgumentException("Null byte present in file/path name. There are no " + 1010 "known legitimate use cases for such data, but several injection attacks may use it"); 1011 } 1012 } 1013 } 1014 1015 /** 1016 * Gets the base name, minus the full path and extension, from a full fileName. 1017 * <p> 1018 * This method will handle a file in either Unix or Windows format. 1019 * The text after the last forward or backslash and before the last dot is returned. 1020 * <pre> 1021 * a/b/c.txt --> c 1022 * a.txt --> a 1023 * a/b/c --> c 1024 * a/b/c/ --> "" 1025 * </pre> 1026 * <p> 1027 * The output will be the same irrespective of the machine that the code is running on. 1028 * 1029 * @param fileName the fileName to query, null returns null 1030 * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string 1031 * will be removed 1032 */ 1033 public static String getBaseName(final String fileName) { 1034 return removeExtension(getName(fileName)); 1035 } 1036 1037 /** 1038 * Gets the extension of a fileName. 1039 * <p> 1040 * This method returns the textual part of the fileName after the last dot. 1041 * There must be no directory separator after the dot. 1042 * <pre> 1043 * foo.txt --> "txt" 1044 * a/b/c.jpg --> "jpg" 1045 * a/b.txt/c --> "" 1046 * a/b/c --> "" 1047 * </pre> 1048 * <p> 1049 * The output will be the same irrespective of the machine that the code is running on, with the 1050 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 1051 * </p> 1052 * <p> 1053 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 1054 * In this case, the name wouldn't be the name of a file, but the identifier of an 1055 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 1056 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 1057 * an {@link IllegalArgumentException} for names like this. 1058 * 1059 * @param fileName the fileName to retrieve the extension of. 1060 * @return the extension of the file or an empty string if none exists or {@code null} 1061 * if the fileName is {@code null}. 1062 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 1063 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 1064 */ 1065 public static String getExtension(final String fileName) throws IllegalArgumentException { 1066 if (fileName == null) { 1067 return null; 1068 } 1069 final int index = indexOfExtension(fileName); 1070 if (index == NOT_FOUND) { 1071 return EMPTY_STRING; 1072 } 1073 return fileName.substring(index + 1); 1074 } 1075 1076 /** 1077 * Special handling for NTFS ADS: Don't accept colon in the fileName. 1078 * 1079 * @param fileName a file name 1080 * @return ADS offsets. 1081 */ 1082 private static int getAdsCriticalOffset(final String fileName) { 1083 // Step 1: Remove leading path segments. 1084 final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR); 1085 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 1086 if (offset1 == -1) { 1087 if (offset2 == -1) { 1088 return 0; 1089 } 1090 return offset2 + 1; 1091 } 1092 if (offset2 == -1) { 1093 return offset1 + 1; 1094 } 1095 return Math.max(offset1, offset2) + 1; 1096 } 1097 1098 //----------------------------------------------------------------------- 1099 /** 1100 * Removes the extension from a fileName. 1101 * <p> 1102 * This method returns the textual part of the fileName before the last dot. 1103 * There must be no directory separator after the dot. 1104 * <pre> 1105 * foo.txt --> foo 1106 * a\b\c.jpg --> a\b\c 1107 * a\b\c --> a\b\c 1108 * a.b\c --> a.b\c 1109 * </pre> 1110 * <p> 1111 * The output will be the same irrespective of the machine that the code is running on. 1112 * 1113 * @param fileName the fileName to query, null returns null 1114 * @return the fileName minus the extension 1115 */ 1116 public static String removeExtension(final String fileName) { 1117 if (fileName == null) { 1118 return null; 1119 } 1120 failIfNullBytePresent(fileName); 1121 1122 final int index = indexOfExtension(fileName); 1123 if (index == NOT_FOUND) { 1124 return fileName; 1125 } 1126 return fileName.substring(0, index); 1127 } 1128 1129 //----------------------------------------------------------------------- 1130 /** 1131 * Checks whether two fileNames are equal exactly. 1132 * <p> 1133 * No processing is performed on the fileNames other than comparison, 1134 * thus this is merely a null-safe case-sensitive equals. 1135 * 1136 * @param fileName1 the first fileName to query, may be null 1137 * @param fileName2 the second fileName to query, may be null 1138 * @return true if the fileNames are equal, null equals null 1139 * @see IOCase#SENSITIVE 1140 */ 1141 public static boolean equals(final String fileName1, final String fileName2) { 1142 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 1143 } 1144 1145 /** 1146 * Checks whether two fileNames are equal using the case rules of the system. 1147 * <p> 1148 * No processing is performed on the fileNames other than comparison. 1149 * The check is case-sensitive on Unix and case-insensitive on Windows. 1150 * 1151 * @param fileName1 the first fileName to query, may be null 1152 * @param fileName2 the second fileName to query, may be null 1153 * @return true if the fileNames are equal, null equals null 1154 * @see IOCase#SYSTEM 1155 */ 1156 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 1157 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 1158 } 1159 1160 //----------------------------------------------------------------------- 1161 /** 1162 * Checks whether two fileNames are equal after both have been normalized. 1163 * <p> 1164 * Both fileNames are first passed to {@link #normalize(String)}. 1165 * The check is then performed in a case-sensitive manner. 1166 * 1167 * @param fileName1 the first fileName to query, may be null 1168 * @param fileName2 the second fileName to query, may be null 1169 * @return true if the fileNames are equal, null equals null 1170 * @see IOCase#SENSITIVE 1171 */ 1172 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 1173 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 1174 } 1175 1176 /** 1177 * Checks whether two fileNames are equal after both have been normalized 1178 * and using the case rules of the system. 1179 * <p> 1180 * Both fileNames are first passed to {@link #normalize(String)}. 1181 * The check is then performed case-sensitive on Unix and 1182 * case-insensitive on Windows. 1183 * 1184 * @param fileName1 the first fileName to query, may be null 1185 * @param fileName2 the second fileName to query, may be null 1186 * @return true if the fileNames are equal, null equals null 1187 * @see IOCase#SYSTEM 1188 */ 1189 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 1190 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 1191 } 1192 1193 /** 1194 * Checks whether two fileNames are equal, optionally normalizing and providing 1195 * control over the case-sensitivity. 1196 * 1197 * @param fileName1 the first fileName to query, may be null 1198 * @param fileName2 the second fileName to query, may be null 1199 * @param normalized whether to normalize the fileNames 1200 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1201 * @return true if the fileNames are equal, null equals null 1202 * @since 1.3 1203 */ 1204 public static boolean equals( 1205 String fileName1, String fileName2, 1206 final boolean normalized, IOCase caseSensitivity) { 1207 1208 if (fileName1 == null || fileName2 == null) { 1209 return fileName1 == null && fileName2 == null; 1210 } 1211 if (normalized) { 1212 fileName1 = normalize(fileName1); 1213 fileName2 = normalize(fileName2); 1214 Objects.requireNonNull(fileName1, "Error normalizing one or both of the file names"); 1215 Objects.requireNonNull(fileName2, "Error normalizing one or both of the file names"); 1216 } 1217 if (caseSensitivity == null) { 1218 caseSensitivity = IOCase.SENSITIVE; 1219 } 1220 return caseSensitivity.checkEquals(fileName1, fileName2); 1221 } 1222 1223 //----------------------------------------------------------------------- 1224 /** 1225 * Checks whether the extension of the fileName is that specified. 1226 * <p> 1227 * This method obtains the extension as the textual part of the fileName 1228 * after the last dot. There must be no directory separator after the dot. 1229 * The extension check is case-sensitive on all platforms. 1230 * 1231 * @param fileName the fileName to query, null returns false 1232 * @param extension the extension to check for, null or empty checks for no extension 1233 * @return true if the fileName has the specified extension 1234 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1235 */ 1236 public static boolean isExtension(final String fileName, final String extension) { 1237 if (fileName == null) { 1238 return false; 1239 } 1240 failIfNullBytePresent(fileName); 1241 1242 if (extension == null || extension.isEmpty()) { 1243 return indexOfExtension(fileName) == NOT_FOUND; 1244 } 1245 final String fileExt = getExtension(fileName); 1246 return fileExt.equals(extension); 1247 } 1248 1249 /** 1250 * Checks whether the extension of the fileName is one of those specified. 1251 * <p> 1252 * This method obtains the extension as the textual part of the fileName 1253 * after the last dot. There must be no directory separator after the dot. 1254 * The extension check is case-sensitive on all platforms. 1255 * 1256 * @param fileName the fileName to query, null returns false 1257 * @param extensions the extensions to check for, null checks for no extension 1258 * @return true if the fileName is one of the extensions 1259 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1260 */ 1261 public static boolean isExtension(final String fileName, final String... extensions) { 1262 if (fileName == null) { 1263 return false; 1264 } 1265 failIfNullBytePresent(fileName); 1266 1267 if (extensions == null || extensions.length == 0) { 1268 return indexOfExtension(fileName) == NOT_FOUND; 1269 } 1270 final String fileExt = getExtension(fileName); 1271 for (final String extension : extensions) { 1272 if (fileExt.equals(extension)) { 1273 return true; 1274 } 1275 } 1276 return false; 1277 } 1278 1279 /** 1280 * Checks whether the extension of the fileName is one of those specified. 1281 * <p> 1282 * This method obtains the extension as the textual part of the fileName 1283 * after the last dot. There must be no directory separator after the dot. 1284 * The extension check is case-sensitive on all platforms. 1285 * 1286 * @param fileName the fileName to query, null returns false 1287 * @param extensions the extensions to check for, null checks for no extension 1288 * @return true if the fileName is one of the extensions 1289 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1290 */ 1291 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1292 if (fileName == null) { 1293 return false; 1294 } 1295 failIfNullBytePresent(fileName); 1296 1297 if (extensions == null || extensions.isEmpty()) { 1298 return indexOfExtension(fileName) == NOT_FOUND; 1299 } 1300 final String fileExt = getExtension(fileName); 1301 for (final String extension : extensions) { 1302 if (fileExt.equals(extension)) { 1303 return true; 1304 } 1305 } 1306 return false; 1307 } 1308 1309 //----------------------------------------------------------------------- 1310 /** 1311 * Checks a fileName to see if it matches the specified wildcard matcher, 1312 * always testing case-sensitive. 1313 * <p> 1314 * The wildcard matcher uses the characters '?' and '*' to represent a 1315 * single or multiple (zero or more) wildcard characters. 1316 * This is the same as often found on Dos/Unix command lines. 1317 * The check is case-sensitive always. 1318 * <pre> 1319 * wildcardMatch("c.txt", "*.txt") --> true 1320 * wildcardMatch("c.txt", "*.jpg") --> false 1321 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1322 * wildcardMatch("c.txt", "*.???") --> true 1323 * wildcardMatch("c.txt", "*.????") --> false 1324 * </pre> 1325 * N.B. the sequence "*?" does not work properly at present in match strings. 1326 * 1327 * @param fileName the fileName to match on 1328 * @param wildcardMatcher the wildcard string to match against 1329 * @return true if the fileName matches the wildcard string 1330 * @see IOCase#SENSITIVE 1331 */ 1332 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1333 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1334 } 1335 1336 /** 1337 * Checks a fileName to see if it matches the specified wildcard matcher 1338 * using the case rules of the system. 1339 * <p> 1340 * The wildcard matcher uses the characters '?' and '*' to represent a 1341 * single or multiple (zero or more) wildcard characters. 1342 * This is the same as often found on Dos/Unix command lines. 1343 * The check is case-sensitive on Unix and case-insensitive on Windows. 1344 * <pre> 1345 * wildcardMatch("c.txt", "*.txt") --> true 1346 * wildcardMatch("c.txt", "*.jpg") --> false 1347 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1348 * wildcardMatch("c.txt", "*.???") --> true 1349 * wildcardMatch("c.txt", "*.????") --> false 1350 * </pre> 1351 * N.B. the sequence "*?" does not work properly at present in match strings. 1352 * 1353 * @param fileName the fileName to match on 1354 * @param wildcardMatcher the wildcard string to match against 1355 * @return true if the fileName matches the wildcard string 1356 * @see IOCase#SYSTEM 1357 */ 1358 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1359 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1360 } 1361 1362 /** 1363 * Checks a fileName to see if it matches the specified wildcard matcher 1364 * allowing control over case-sensitivity. 1365 * <p> 1366 * The wildcard matcher uses the characters '?' and '*' to represent a 1367 * single or multiple (zero or more) wildcard characters. 1368 * N.B. the sequence "*?" does not work properly at present in match strings. 1369 * 1370 * @param fileName the fileName to match on 1371 * @param wildcardMatcher the wildcard string to match against 1372 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1373 * @return true if the fileName matches the wildcard string 1374 * @since 1.3 1375 */ 1376 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) { 1377 if (fileName == null && wildcardMatcher == null) { 1378 return true; 1379 } 1380 if (fileName == null || wildcardMatcher == null) { 1381 return false; 1382 } 1383 if (caseSensitivity == null) { 1384 caseSensitivity = IOCase.SENSITIVE; 1385 } 1386 final String[] wcs = splitOnTokens(wildcardMatcher); 1387 boolean anyChars = false; 1388 int textIdx = 0; 1389 int wcsIdx = 0; 1390 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1391 1392 // loop around a backtrack stack, to handle complex * matching 1393 do { 1394 if (!backtrack.isEmpty()) { 1395 final int[] array = backtrack.pop(); 1396 wcsIdx = array[0]; 1397 textIdx = array[1]; 1398 anyChars = true; 1399 } 1400 1401 // loop whilst tokens and text left to process 1402 while (wcsIdx < wcs.length) { 1403 1404 if (wcs[wcsIdx].equals("?")) { 1405 // ? so move to next text char 1406 textIdx++; 1407 if (textIdx > fileName.length()) { 1408 break; 1409 } 1410 anyChars = false; 1411 1412 } else if (wcs[wcsIdx].equals("*")) { 1413 // set any chars status 1414 anyChars = true; 1415 if (wcsIdx == wcs.length - 1) { 1416 textIdx = fileName.length(); 1417 } 1418 1419 } else { 1420 // matching text token 1421 if (anyChars) { 1422 // any chars then try to locate text token 1423 textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1424 if (textIdx == NOT_FOUND) { 1425 // token not found 1426 break; 1427 } 1428 final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1429 if (repeat >= 0) { 1430 backtrack.push(new int[] {wcsIdx, repeat}); 1431 } 1432 } else { 1433 // matching from current position 1434 if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1435 // couldnt match token 1436 break; 1437 } 1438 } 1439 1440 // matched text token, move text index to end of matched token 1441 textIdx += wcs[wcsIdx].length(); 1442 anyChars = false; 1443 } 1444 1445 wcsIdx++; 1446 } 1447 1448 // full match 1449 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1450 return true; 1451 } 1452 1453 } while (!backtrack.isEmpty()); 1454 1455 return false; 1456 } 1457 1458 /** 1459 * Splits a string into a number of tokens. 1460 * The text is split by '?' and '*'. 1461 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1462 * 1463 * @param text the text to split 1464 * @return the array of tokens, never null 1465 */ 1466 static String[] splitOnTokens(final String text) { 1467 // used by wildcardMatch 1468 // package level so a unit test may run on this 1469 1470 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1471 return new String[] { text }; 1472 } 1473 1474 final char[] array = text.toCharArray(); 1475 final ArrayList<String> list = new ArrayList<>(); 1476 final StringBuilder buffer = new StringBuilder(); 1477 char prevChar = 0; 1478 for (final char ch : array) { 1479 if (ch == '?' || ch == '*') { 1480 if (buffer.length() != 0) { 1481 list.add(buffer.toString()); 1482 buffer.setLength(0); 1483 } 1484 if (ch == '?') { 1485 list.add("?"); 1486 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1487 list.add("*"); 1488 } 1489 } else { 1490 buffer.append(ch); 1491 } 1492 prevChar = ch; 1493 } 1494 if (buffer.length() != 0) { 1495 list.add(buffer.toString()); 1496 } 1497 1498 return list.toArray(EMPTY_STRING_ARRAY); 1499 } 1500 1501 /** 1502 * Checks whether a given string is a valid host name according to 1503 * RFC 3986. 1504 * 1505 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1506 * RFC calls a "reg-name". Percent encoded names don't seem to be 1507 * valid names in UNC paths.</p> 1508 * 1509 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1510 * @param name the hostname to validate 1511 * @return true if the given name is a valid host name 1512 */ 1513 private static boolean isValidHostName(final String name) { 1514 return isIPv6Address(name) || isRFC3986HostName(name); 1515 } 1516 1517 private static final Pattern IPV4_PATTERN = 1518 Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 1519 private static final int IPV4_MAX_OCTET_VALUE = 255; 1520 1521 /** 1522 * Checks whether a given string represents a valid IPv4 address. 1523 * 1524 * @param name the name to validate 1525 * @return true if the given name is a valid IPv4 address 1526 */ 1527 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1528 private static boolean isIPv4Address(final String name) { 1529 final Matcher m = IPV4_PATTERN.matcher(name); 1530 if (!m.matches() || m.groupCount() != 4) { 1531 return false; 1532 } 1533 1534 // verify that address subgroups are legal 1535 for (int i = 1; i <= 4; i++) { 1536 final String ipSegment = m.group(i); 1537 final int iIpSegment = Integer.parseInt(ipSegment); 1538 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1539 return false; 1540 } 1541 1542 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1543 return false; 1544 } 1545 1546 } 1547 1548 return true; 1549 } 1550 1551 private static final int IPV6_MAX_HEX_GROUPS = 8; 1552 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 1553 private static final int MAX_UNSIGNED_SHORT = 0xffff; 1554 private static final int BASE_16 = 16; 1555 1556 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1557 /** 1558 * Checks whether a given string represents a valid IPv6 address. 1559 * 1560 * @param inet6Address the name to validate 1561 * @return true if the given name is a valid IPv6 address 1562 */ 1563 private static boolean isIPv6Address(final String inet6Address) { 1564 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1565 if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { 1566 return false; 1567 } 1568 if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) 1569 || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { 1570 return false; 1571 } 1572 String[] octets = inet6Address.split(":"); 1573 if (containsCompressedZeroes) { 1574 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1575 if (inet6Address.endsWith("::")) { 1576 // String.split() drops ending empty segments 1577 octetList.add(""); 1578 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1579 octetList.remove(0); 1580 } 1581 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1582 } 1583 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1584 return false; 1585 } 1586 int validOctets = 0; 1587 int emptyOctets = 0; // consecutive empty chunks 1588 for (int index = 0; index < octets.length; index++) { 1589 final String octet = octets[index]; 1590 if (octet.length() == 0) { 1591 emptyOctets++; 1592 if (emptyOctets > 1) { 1593 return false; 1594 } 1595 } else { 1596 emptyOctets = 0; 1597 // Is last chunk an IPv4 address? 1598 if (index == octets.length - 1 && octet.contains(".")) { 1599 if (!isIPv4Address(octet)) { 1600 return false; 1601 } 1602 validOctets += 2; 1603 continue; 1604 } 1605 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1606 return false; 1607 } 1608 int octetInt = 0; 1609 try { 1610 octetInt = Integer.parseInt(octet, BASE_16); 1611 } catch (final NumberFormatException e) { 1612 return false; 1613 } 1614 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1615 return false; 1616 } 1617 } 1618 validOctets++; 1619 } 1620 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1621 } 1622 1623 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 1624 1625 /** 1626 * Checks whether a given string is a valid host name according to 1627 * RFC 3986 - not accepting IP addresses. 1628 * 1629 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1630 * @param name the hostname to validate 1631 * @return true if the given name is a valid host name 1632 */ 1633 private static boolean isRFC3986HostName(final String name) { 1634 final String[] parts = name.split("\\.", -1); 1635 for (int i = 0; i < parts.length; i++) { 1636 if (parts[i].length() == 0) { 1637 // trailing dot is legal, otherwise we've hit a .. sequence 1638 return i == parts.length - 1; 1639 } 1640 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1641 return false; 1642 } 1643 } 1644 return true; 1645 } 1646}