FilenameUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io;

  18. import java.io.File;
  19. import java.util.ArrayDeque;
  20. import java.util.ArrayList;
  21. import java.util.Arrays;
  22. import java.util.Collection;
  23. import java.util.Deque;
  24. import java.util.List;
  25. import java.util.regex.Matcher;
  26. import java.util.regex.Pattern;
  27. import java.util.stream.Stream;

  28. /**
  29.  * General file name and file path manipulation utilities. The methods in this class
  30.  * operate on strings that represent relative or absolute paths. Nothing in this class
  31.  * ever accesses the file system, or depends on whether a path points to a file that exists.
  32.  * <p>
  33.  * When dealing with file names, you can hit problems when moving from a Windows
  34.  * based development machine to a Unix based production machine.
  35.  * This class aims to help avoid those problems.
  36.  * </p>
  37.  * <p>
  38.  * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
  39.  * using JDK {@link File File} objects and the two argument constructor
  40.  * {@link File#File(java.io.File, String) File(File,String)}.
  41.  * </p>
  42.  * <p>
  43.  * Most methods in this class are designed to work the same on both Unix and Windows.
  44.  * Those that don't include 'System', 'Unix', or 'Windows' in their name.
  45.  * </p>
  46.  * <p>
  47.  * Most methods recognize both separators (forward and backslashes), and both
  48.  * sets of prefixes. See the Javadoc of each method for details.
  49.  * </p>
  50.  * <p>
  51.  * This class defines six components within a path (sometimes called a file name or a full file name).
  52.  * Given an absolute Windows path such as C:\dev\project\file.txt they are:
  53.  * </p>
  54.  * <ul>
  55.  * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
  56.  * <li>the prefix - C:\</li>
  57.  * <li>the path - dev\project\</li>
  58.  * <li>the full path - C:\dev\project\</li>
  59.  * <li>the name - file.txt</li>
  60.  * <li>the base name - file</li>
  61.  * <li>the extension - txt</li>
  62.  * </ul>
  63.  * <p>
  64.  * Given an absolute Unix path such as /dev/project/file.txt they are:
  65.  * </p>
  66.  * <ul>
  67.  * <li>the full file name, or just file name - /dev/project/file.txt</li>
  68.  * <li>the prefix - /</li>
  69.  * <li>the path - dev/project</li>
  70.  * <li>the full path - /dev/project</li>
  71.  * <li>the name - file.txt</li>
  72.  * <li>the base name - file</li>
  73.  * <li>the extension - txt</li>
  74.  * </ul>
  75.  * <p>
  76.  * Given a relative Windows path such as dev\project\file.txt they are:
  77.  * </p>
  78.  * <ul>
  79.  * <li>the full file name, or just file name - dev\project\file.txt</li>
  80.  * <li>the prefix - null</li>
  81.  * <li>the path - dev\project\</li>
  82.  * <li>the full path - dev\project\</li>
  83.  * <li>the name - file.txt</li>
  84.  * <li>the base name - file</li>
  85.  * <li>the extension - txt</li>
  86.  * </ul>
  87.  * <p>
  88.  * Given an absolute Unix path such as /dev/project/file.txt they are:
  89.  * </p>
  90.  * <ul>
  91.  * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
  92.  * <li>the prefix - /</li>
  93.  * <li>the path - dev/project</li>
  94.  * <li>the full path - /dev/project</li>
  95.  * <li>the name - file.txt</li>
  96.  * <li>the base name - file</li>
  97.  * <li>the extension - txt</li>
  98.  * </ul>
  99.  *
  100.  *
  101.  * <p>
  102.  * This class works best if directory names end with a separator.
  103.  * If you omit the last separator, it is impossible to determine if the last component
  104.  * corresponds to a file or a directory. This class treats final components
  105.  * that do not end with a separator as files, not directories.
  106.  * </p>
  107.  * <p>
  108.  * This class only supports Unix and Windows style names.
  109.  * Prefixes are matched as follows:
  110.  * </p>
  111.  * <pre>
  112.  * Windows:
  113.  * a\b\c.txt           --&gt; ""          --&gt; relative
  114.  * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
  115.  * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
  116.  * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
  117.  * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
  118.  *
  119.  * Unix:
  120.  * a/b/c.txt           --&gt; ""          --&gt; relative
  121.  * /a/b/c.txt          --&gt; "/"         --&gt; absolute
  122.  * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
  123.  * ~                   --&gt; "~/"        --&gt; current user (slash added)
  124.  * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
  125.  * ~user               --&gt; "~user/"    --&gt; named user (slash added)
  126.  * </pre>
  127.  * <p>
  128.  * Both prefix styles are matched, irrespective of the machine that you are
  129.  * currently running on.
  130.  * </p>
  131.  *
  132.  * @since 1.1
  133.  */
  134. public class FilenameUtils {

  135.     private static final String[] EMPTY_STRING_ARRAY = {};

  136.     private static final String EMPTY_STRING = "";

  137.     private static final int NOT_FOUND = -1;

  138.     /**
  139.      * The extension separator character.
  140.      * @since 1.4
  141.      */
  142.     public static final char EXTENSION_SEPARATOR = '.';

  143.     /**
  144.      * The extension separator String.
  145.      * @since 1.4
  146.      */
  147.     public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);

  148.     /**
  149.      * The Unix separator character.
  150.      */
  151.     private static final char UNIX_NAME_SEPARATOR = '/';

  152.     /**
  153.      * The Windows separator character.
  154.      */
  155.     private static final char WINDOWS_NAME_SEPARATOR = '\\';

  156.     /**
  157.      * The system separator character.
  158.      */
  159.     private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;

  160.     /**
  161.      * The separator character that is the opposite of the system separator.
  162.      */
  163.     private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);

  164.     private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");

  165.     private static final int IPV4_MAX_OCTET_VALUE = 255;

  166.     private static final int IPV6_MAX_HEX_GROUPS = 8;

  167.     private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;

  168.     private static final int MAX_UNSIGNED_SHORT = 0xffff;

  169.     private static final int BASE_16 = 16;

  170.     private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");

  171.     /**
  172.      * Concatenates a fileName to a base path using normal command line style rules.
  173.      * <p>
  174.      * The effect is equivalent to resultant directory after changing
  175.      * directory to the first argument, followed by changing directory to
  176.      * the second argument.
  177.      * </p>
  178.      * <p>
  179.      * The first argument is the base path, the second is the path to concatenate.
  180.      * The returned path is always normalized via {@link #normalize(String)},
  181.      * thus {@code ..} is handled.
  182.      * </p>
  183.      * <p>
  184.      * If {@code pathToAdd} is absolute (has an absolute prefix), then
  185.      * it will be normalized and returned.
  186.      * Otherwise, the paths will be joined, normalized and returned.
  187.      * </p>
  188.      * <p>
  189.      * The output will be the same on both Unix and Windows except
  190.      * for the separator character.
  191.      * </p>
  192.      * <pre>
  193.      * /foo/      + bar        --&gt;  /foo/bar
  194.      * /foo       + bar        --&gt;  /foo/bar
  195.      * /foo       + /bar       --&gt;  /bar
  196.      * /foo       + C:/bar     --&gt;  C:/bar
  197.      * /foo       + C:bar      --&gt;  C:bar [1]
  198.      * /foo/a/    + ../bar     --&gt;  /foo/bar
  199.      * /foo/      + ../../bar  --&gt;  null
  200.      * /foo/      + /bar       --&gt;  /bar
  201.      * /foo/..    + /bar       --&gt;  /bar
  202.      * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
  203.      * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
  204.      * </pre>
  205.      * <p>
  206.      * [1] Note that the Windows relative drive prefix is unreliable when
  207.      * used with this method.
  208.      * </p>
  209.      * <p>
  210.      * [2] Note that the first parameter must be a path. If it ends with a name, then
  211.      * the name will be built into the concatenated path. If this might be a problem,
  212.      * use {@link #getFullPath(String)} on the base path argument.
  213.      * </p>
  214.      *
  215.      * @param basePath  the base path to attach to, always treated as a path
  216.      * @param fullFileNameToAdd  the file name (or path) to attach to the base
  217.      * @return the concatenated path, or null if invalid
  218.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  219.      */
  220.     public static String concat(final String basePath, final String fullFileNameToAdd) {
  221.         final int prefix = getPrefixLength(fullFileNameToAdd);
  222.         if (prefix < 0) {
  223.             return null;
  224.         }
  225.         if (prefix > 0) {
  226.             return normalize(fullFileNameToAdd);
  227.         }
  228.         if (basePath == null) {
  229.             return null;
  230.         }
  231.         final int len = basePath.length();
  232.         if (len == 0) {
  233.             return normalize(fullFileNameToAdd);
  234.         }
  235.         final char ch = basePath.charAt(len - 1);
  236.         if (isSeparator(ch)) {
  237.             return normalize(basePath + fullFileNameToAdd);
  238.         }
  239.         return normalize(basePath + '/' + fullFileNameToAdd);
  240.     }

  241.     /**
  242.      * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
  243.      * This does not read from the file system, and there is no guarantee or expectation that
  244.      * these paths actually exist.
  245.      * <p>
  246.      * The files names are expected to be normalized.
  247.      * </p>
  248.      *
  249.      * Edge cases:
  250.      * <ul>
  251.      * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
  252.      * <li>A directory does not contain itself: return false</li>
  253.      * <li>A null child file is not contained in any parent: return false</li>
  254.      * </ul>
  255.      *
  256.      * @param canonicalParent the path string to consider as the parent.
  257.      * @param canonicalChild the path string to consider as the child.
  258.      * @return true if the candidate leaf is under the specified composite. False otherwise.
  259.      * @since 2.2
  260.      * @see FileUtils#directoryContains(File, File)
  261.      */
  262.     public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
  263.         if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
  264.             return false;
  265.         }

  266.         if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
  267.             return false;
  268.         }

  269.         final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
  270.         final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;

  271.         return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
  272.     }

  273.     /**
  274.      * Does the work of getting the path.
  275.      *
  276.      * @param fileName  the file name
  277.      * @param includeSeparator  true to include the end separator
  278.      * @return the path
  279.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  280.      */
  281.     private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
  282.         if (fileName == null) {
  283.             return null;
  284.         }
  285.         final int prefix = getPrefixLength(fileName);
  286.         if (prefix < 0) {
  287.             return null;
  288.         }
  289.         if (prefix >= fileName.length()) {
  290.             if (includeSeparator) {
  291.                 return getPrefix(fileName);  // add end slash if necessary
  292.             }
  293.             return fileName;
  294.         }
  295.         final int index = indexOfLastSeparator(fileName);
  296.         if (index < 0) {
  297.             return fileName.substring(0, prefix);
  298.         }
  299.         int end = index + (includeSeparator ?  1 : 0);
  300.         if (end == 0) {
  301.             end++;
  302.         }
  303.         return fileName.substring(0, end);
  304.     }

  305.     /**
  306.      * Does the work of getting the path.
  307.      *
  308.      * @param fileName  the file name
  309.      * @param separatorAdd  0 to omit the end separator, 1 to return it
  310.      * @return the path
  311.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  312.      */
  313.     private static String doGetPath(final String fileName, final int separatorAdd) {
  314.         if (fileName == null) {
  315.             return null;
  316.         }
  317.         final int prefix = getPrefixLength(fileName);
  318.         if (prefix < 0) {
  319.             return null;
  320.         }
  321.         final int index = indexOfLastSeparator(fileName);
  322.         final int endIndex = index + separatorAdd;
  323.         if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
  324.             return EMPTY_STRING;
  325.         }
  326.         return requireNonNullChars(fileName.substring(prefix, endIndex));
  327.     }

  328.     /**
  329.      * Internal method to perform the normalization.
  330.      *
  331.      * @param fileName  the file name
  332.      * @param separator The separator character to use
  333.      * @param keepSeparator  true to keep the final separator
  334.      * @return the normalized fileName
  335.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  336.      */
  337.     private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
  338.         if (fileName == null) {
  339.             return null;
  340.         }

  341.         requireNonNullChars(fileName);

  342.         int size = fileName.length();
  343.         if (size == 0) {
  344.             return fileName;
  345.         }
  346.         final int prefix = getPrefixLength(fileName);
  347.         if (prefix < 0) {
  348.             return null;
  349.         }

  350.         final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
  351.         fileName.getChars(0, fileName.length(), array, 0);

  352.         // fix separators throughout
  353.         final char otherSeparator = flipSeparator(separator);
  354.         for (int i = 0; i < array.length; i++) {
  355.             if (array[i] == otherSeparator) {
  356.                 array[i] = separator;
  357.             }
  358.         }

  359.         // add extra separator on the end to simplify code below
  360.         boolean lastIsDirectory = true;
  361.         if (array[size - 1] != separator) {
  362.             array[size++] = separator;
  363.             lastIsDirectory = false;
  364.         }

  365.         // adjoining slashes
  366.         // If we get here, prefix can only be 0 or greater, size 1 or greater
  367.         // If prefix is 0, set loop start to 1 to prevent index errors
  368.         for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
  369.             if (array[i] == separator && array[i - 1] == separator) {
  370.                 System.arraycopy(array, i, array, i - 1, size - i);
  371.                 size--;
  372.                 i--;
  373.             }
  374.         }

  375.         // period slash
  376.         for (int i = prefix + 1; i < size; i++) {
  377.             if (array[i] == separator && array[i - 1] == '.' &&
  378.                     (i == prefix + 1 || array[i - 2] == separator)) {
  379.                 if (i == size - 1) {
  380.                     lastIsDirectory = true;
  381.                 }
  382.                 System.arraycopy(array, i + 1, array, i - 1, size - i);
  383.                 size -= 2;
  384.                 i--;
  385.             }
  386.         }

  387.         // double period slash
  388.         outer:
  389.         for (int i = prefix + 2; i < size; i++) {
  390.             if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
  391.                     (i == prefix + 2 || array[i - 3] == separator)) {
  392.                 if (i == prefix + 2) {
  393.                     return null;
  394.                 }
  395.                 if (i == size - 1) {
  396.                     lastIsDirectory = true;
  397.                 }
  398.                 int j;
  399.                 for (j = i - 4 ; j >= prefix; j--) {
  400.                     if (array[j] == separator) {
  401.                         // remove b/../ from a/b/../c
  402.                         System.arraycopy(array, i + 1, array, j + 1, size - i);
  403.                         size -= i - j;
  404.                         i = j + 1;
  405.                         continue outer;
  406.                     }
  407.                 }
  408.                 // remove a/../ from a/../c
  409.                 System.arraycopy(array, i + 1, array, prefix, size - i);
  410.                 size -= i + 1 - prefix;
  411.                 i = prefix + 1;
  412.             }
  413.         }

  414.         if (size <= 0) {  // should never be less than 0
  415.             return EMPTY_STRING;
  416.         }
  417.         if (size <= prefix) {  // should never be less than prefix
  418.             return new String(array, 0, size);
  419.         }
  420.         if (lastIsDirectory && keepSeparator) {
  421.             return new String(array, 0, size);  // keep trailing separator
  422.         }
  423.         return new String(array, 0, size - 1);  // lose trailing separator
  424.     }

  425.     /**
  426.      * Checks whether two file names are exactly equal.
  427.      * <p>
  428.      * No processing is performed on the file names other than comparison.
  429.      * This is merely a null-safe case-sensitive string equality.
  430.      * </p>
  431.      *
  432.      * @param fileName1  the first file name, may be null
  433.      * @param fileName2  the second file name, may be null
  434.      * @return true if the file names are equal, null equals null
  435.      * @see IOCase#SENSITIVE
  436.      */
  437.     public static boolean equals(final String fileName1, final String fileName2) {
  438.         return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
  439.     }

  440.     /**
  441.      * Checks whether two file names are equal, optionally normalizing and providing
  442.      * control over the case-sensitivity.
  443.      *
  444.      * @param fileName1  the first file name, may be null
  445.      * @param fileName2  the second file name, may be null
  446.      * @param normalize  whether to normalize the file names
  447.      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
  448.      * @return true if the file names are equal, null equals null
  449.      * @since 1.3
  450.      */
  451.     public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {

  452.         if (fileName1 == null || fileName2 == null) {
  453.             return fileName1 == null && fileName2 == null;
  454.         }
  455.         if (normalize) {
  456.             fileName1 = normalize(fileName1);
  457.             if (fileName1 == null) {
  458.                 return false;
  459.             }
  460.             fileName2 = normalize(fileName2);
  461.             if (fileName2 == null) {
  462.                 return false;
  463.             }
  464.         }
  465.         return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
  466.     }

  467.     /**
  468.      * Checks whether two file names are equal after both have been normalized.
  469.      * <p>
  470.      * Both file names are first passed to {@link #normalize(String)}.
  471.      * The check is then performed in a case-sensitive manner.
  472.      * </p>
  473.      *
  474.      * @param fileName1  the first file name, may be null
  475.      * @param fileName2  the second file name, may be null
  476.      * @return true if the file names are equal, null equals null
  477.      * @see IOCase#SENSITIVE
  478.      */
  479.     public static boolean equalsNormalized(final String fileName1, final String fileName2) {
  480.         return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
  481.     }

  482.     /**
  483.      * Checks whether two file names are equal using the case rules of the system
  484.      * after both have been normalized.
  485.      * <p>
  486.      * Both file names are first passed to {@link #normalize(String)}.
  487.      * The check is then performed case-sensitively on Unix and
  488.      * case-insensitively on Windows.
  489.      * </p>
  490.      *
  491.      * @param fileName1  the first file name, may be null
  492.      * @param fileName2  the second file name, may be null
  493.      * @return true if the file names are equal, null equals null
  494.      * @see IOCase#SYSTEM
  495.      */
  496.     public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
  497.         return equals(fileName1, fileName2, true, IOCase.SYSTEM);
  498.     }

  499.     /**
  500.      * Checks whether two file names are equal using the case rules of the system.
  501.      * <p>
  502.      * No processing is performed on the file names other than comparison.
  503.      * The check is case-sensitive on Unix and case-insensitive on Windows.
  504.      * </p>
  505.      *
  506.      * @param fileName1  the first file name, may be null
  507.      * @param fileName2  the second file name, may be null
  508.      * @return true if the file names are equal, null equals null
  509.      * @see IOCase#SYSTEM
  510.      */
  511.     public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
  512.         return equals(fileName1, fileName2, false, IOCase.SYSTEM);
  513.     }

  514.     /**
  515.      * Flips the Windows name separator to Linux and vice-versa.
  516.      *
  517.      * @param ch The Windows or Linux name separator.
  518.      * @return The Windows or Linux name separator.
  519.      */
  520.     static char flipSeparator(final char ch) {
  521.         if (ch == UNIX_NAME_SEPARATOR) {
  522.             return WINDOWS_NAME_SEPARATOR;
  523.         }
  524.         if (ch == WINDOWS_NAME_SEPARATOR) {
  525.             return UNIX_NAME_SEPARATOR;
  526.         }
  527.         throw new IllegalArgumentException(String.valueOf(ch));
  528.     }

  529.     /**
  530.      * Special handling for NTFS ADS: Don't accept colon in the file name.
  531.      *
  532.      * @param fileName a file name
  533.      * @return ADS offsets.
  534.      */
  535.     private static int getAdsCriticalOffset(final String fileName) {
  536.         // Step 1: Remove leading path segments.
  537.         final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
  538.         final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
  539.         if (offset1 == -1) {
  540.             if (offset2 == -1) {
  541.                 return 0;
  542.             }
  543.             return offset2 + 1;
  544.         }
  545.         if (offset2 == -1) {
  546.             return offset1 + 1;
  547.         }
  548.         return Math.max(offset1, offset2) + 1;
  549.     }

  550.     /**
  551.      * Gets the base name, minus the full path and extension, from a full file name.
  552.      * <p>
  553.      * This method will handle a path in either Unix or Windows format.
  554.      * The text after the last forward or backslash and before the last period is returned.
  555.      * </p>
  556.      * <pre>
  557.      * a/b/c.txt --&gt; c
  558.      * a\b\c.txt --&gt; c
  559.      * a/b/c.foo.txt --&gt; c.foo
  560.      * a.txt     --&gt; a
  561.      * a/b/c     --&gt; c
  562.      * a/b/c/    --&gt; ""
  563.      * </pre>
  564.      * <p>
  565.      * The output will be the same irrespective of the machine that the code is running on.
  566.      * </p>
  567.      *
  568.      * @param fileName  the file name, null returns null
  569.      * @return the name of the file without the path, or an empty string if none exists
  570.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  571.      */
  572.     public static String getBaseName(final String fileName) {
  573.         return removeExtension(getName(fileName));
  574.     }

  575.     /**
  576.      * Gets the extension of a file name.
  577.      * <p>
  578.      * This method returns the textual part of the file name after the last period.
  579.      * There must be no directory separator after the period.
  580.      * </p>
  581.      * <pre>
  582.      * foo.txt      --&gt; "txt"
  583.      * a/b/c.jpg    --&gt; "jpg"
  584.      * a/b.txt/c    --&gt; ""
  585.      * a/b/c        --&gt; ""
  586.      * </pre>
  587.      * <p>
  588.      * The output will be the same irrespective of the machine that the code is running on, with the
  589.      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
  590.      * </p>
  591.      * <p>
  592.      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
  593.      * In this case, the name wouldn't be the name of a file, but the identifier of an
  594.      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
  595.      * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
  596.      * an {@link IllegalArgumentException} for names like this.
  597.      * </p>
  598.      *
  599.      * @param fileName the file name to retrieve the extension of.
  600.      * @return the extension of the file or an empty string if none exists or {@code null}
  601.      * if the file name is {@code null}.
  602.      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
  603.      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
  604.      */
  605.     public static String getExtension(final String fileName) throws IllegalArgumentException {
  606.         if (fileName == null) {
  607.             return null;
  608.         }
  609.         final int index = indexOfExtension(fileName);
  610.         if (index == NOT_FOUND) {
  611.             return EMPTY_STRING;
  612.         }
  613.         return fileName.substring(index + 1);
  614.     }

  615.     /**
  616.      * Gets the full path (prefix + path) from a full file name.
  617.      * <p>
  618.      * This method will handle a file in either Unix or Windows format.
  619.      * The method is entirely text based, and returns the text before and
  620.      * including the last forward or backslash.
  621.      * </p>
  622.      * <pre>
  623.      * C:\a\b\c.txt --&gt; C:\a\b\
  624.      * ~/a/b/c.txt  --&gt; ~/a/b/
  625.      * a.txt        --&gt; ""
  626.      * a/b/c        --&gt; a/b/
  627.      * a/b/c/       --&gt; a/b/c/
  628.      * C:           --&gt; C:
  629.      * C:\          --&gt; C:\
  630.      * ~            --&gt; ~/
  631.      * ~/           --&gt; ~/
  632.      * ~user        --&gt; ~user/
  633.      * ~user/       --&gt; ~user/
  634.      * </pre>
  635.      * <p>
  636.      * The output will be the same irrespective of the machine that the code is running on.
  637.      * </p>
  638.      *
  639.      * @param fileName  the file name, null returns null
  640.      * @return the path of the file, an empty string if none exists, null if invalid
  641.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  642.      */
  643.     public static String getFullPath(final String fileName) {
  644.         return doGetFullPath(fileName, true);
  645.     }

  646.     /**
  647.      * Gets the full path (prefix + path) from a full file name,
  648.      * excluding the final directory separator.
  649.      * <p>
  650.      * This method will handle a file in either Unix or Windows format.
  651.      * The method is entirely text based, and returns the text before the
  652.      * last forward or backslash.
  653.      * </p>
  654.      * <pre>
  655.      * C:\a\b\c.txt --&gt; C:\a\b
  656.      * ~/a/b/c.txt  --&gt; ~/a/b
  657.      * a.txt        --&gt; ""
  658.      * a/b/c        --&gt; a/b
  659.      * a/b/c/       --&gt; a/b/c
  660.      * C:           --&gt; C:
  661.      * C:\          --&gt; C:\
  662.      * ~            --&gt; ~
  663.      * ~/           --&gt; ~
  664.      * ~user        --&gt; ~user
  665.      * ~user/       --&gt; ~user
  666.      * </pre>
  667.      * <p>
  668.      * The output will be the same irrespective of the machine that the code is running on.
  669.      * </p>
  670.      *
  671.      * @param fileName  the file name, null returns null
  672.      * @return the path of the file, an empty string if none exists, null if invalid
  673.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  674.      */
  675.     public static String getFullPathNoEndSeparator(final String fileName) {
  676.         return doGetFullPath(fileName, false);
  677.     }

  678.     /**
  679.      * Gets the name minus the path from a full file name.
  680.      * <p>
  681.      * This method will handle a file in either Unix or Windows format.
  682.      * The text after the last forward or backslash is returned.
  683.      * </p>
  684.      * <pre>
  685.      * a/b/c.txt --&gt; c.txt
  686.      * a\b\c.txt --&gt; c.txt
  687.      * a.txt     --&gt; a.txt
  688.      * a/b/c     --&gt; c
  689.      * a/b/c/    --&gt; ""
  690.      * </pre>
  691.      * <p>
  692.      * The output will be the same irrespective of the machine that the code is running on.
  693.      * </p>
  694.      *
  695.      * @param fileName  the file name, null returns null
  696.      * @return the name of the file without the path, or an empty string if none exists
  697.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  698.      */
  699.     public static String getName(final String fileName) {
  700.         if (fileName == null) {
  701.             return null;
  702.         }
  703.         return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
  704.     }

  705.     /**
  706.      * Gets the path from a full file name, which excludes the prefix and the name.
  707.      * <p>
  708.      * This method will handle a file in either Unix or Windows format.
  709.      * The method is entirely text based, and returns the text before and
  710.      * including the last forward or backslash.
  711.      * </p>
  712.      * <pre>
  713.      * C:\a\b\c.txt --&gt; a\b\
  714.      * ~/a/b/c.txt  --&gt; a/b/
  715.      * a.txt        --&gt; ""
  716.      * a/b/c        --&gt; a/b/
  717.      * a/b/c/       --&gt; a/b/c/
  718.      * </pre>
  719.      * <p>
  720.      * The output will be the same irrespective of the machine that the code is running on.
  721.      * </p>
  722.      * <p>
  723.      * This method drops the prefix from the result.
  724.      * See {@link #getFullPath(String)} for the method that retains the prefix.
  725.      * </p>
  726.      *
  727.      * @param fileName  the file name, null returns null
  728.      * @return the path of the file, an empty string if none exists, null if invalid
  729.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  730.      */
  731.     public static String getPath(final String fileName) {
  732.         return doGetPath(fileName, 1);
  733.     }

  734.     /**
  735.      * Gets the path (which excludes the prefix) from a full file name, and
  736.      * also excluding the final directory separator.
  737.      * <p>
  738.      * This method will handle a file in either Unix or Windows format.
  739.      * The method is entirely text based, and returns the text before the
  740.      * last forward or backslash.
  741.      * </p>
  742.      * <pre>
  743.      * C:\a\b\c.txt --&gt; a\b
  744.      * ~/a/b/c.txt  --&gt; a/b
  745.      * a.txt        --&gt; ""
  746.      * a/b/c        --&gt; a/b
  747.      * a/b/c/       --&gt; a/b/c
  748.      * </pre>
  749.      * <p>
  750.      * The output will be the same irrespective of the machine that the code is running on.
  751.      * </p>
  752.      * <p>
  753.      * This method drops the prefix from the result.
  754.      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
  755.      * </p>
  756.      *
  757.      * @param fileName  the file name, null returns null
  758.      * @return the path of the file, an empty string if none exists, null if invalid
  759.      * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
  760.      */
  761.     public static String getPathNoEndSeparator(final String fileName) {
  762.         return doGetPath(fileName, 0);
  763.     }

  764.     /**
  765.      * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
  766.      * <p>
  767.      * This method will handle a file in either Unix or Windows format.
  768.      * The prefix includes the first slash in the full file name where applicable.
  769.      * </p>
  770.      * <pre>
  771.      * Windows:
  772.      * a\b\c.txt           --&gt; ""          --&gt; relative
  773.      * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
  774.      * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
  775.      * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
  776.      * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
  777.      *
  778.      * Unix:
  779.      * a/b/c.txt           --&gt; ""          --&gt; relative
  780.      * /a/b/c.txt          --&gt; "/"         --&gt; absolute
  781.      * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
  782.      * ~                   --&gt; "~/"        --&gt; current user (slash added)
  783.      * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
  784.      * ~user               --&gt; "~user/"    --&gt; named user (slash added)
  785.      * </pre>
  786.      * <p>
  787.      * The output will be the same irrespective of the machine that the code is running on.
  788.      * ie. both Unix and Windows prefixes are matched regardless.
  789.      * </p>
  790.      *
  791.      * @param fileName  the file name, null returns null
  792.      * @return the prefix of the file, null if invalid
  793.      * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
  794.      */
  795.     public static String getPrefix(final String fileName) {
  796.         if (fileName == null) {
  797.             return null;
  798.         }
  799.         final int len = getPrefixLength(fileName);
  800.         if (len < 0) {
  801.             return null;
  802.         }
  803.         if (len > fileName.length()) {
  804.             requireNonNullChars(fileName);
  805.             return fileName + UNIX_NAME_SEPARATOR;
  806.         }
  807.         return requireNonNullChars(fileName.substring(0, len));
  808.     }

  809.     /**
  810.      * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
  811.      * <p>
  812.      * This method will handle a file in either Unix or Windows format.
  813.      * </p>
  814.      * <p>
  815.      * The prefix length includes the first slash in the full file name
  816.      * if applicable. Thus, it is possible that the length returned is greater
  817.      * than the length of the input string.
  818.      * </p>
  819.      * <pre>
  820.      * Windows:
  821.      * a\b\c.txt           --&gt; 0           --&gt; relative
  822.      * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
  823.      * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
  824.      * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
  825.      * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
  826.      * \\\a\b\c.txt        --&gt; -1          --&gt; error
  827.      *
  828.      * Unix:
  829.      * a/b/c.txt           --&gt; 0           --&gt; relative
  830.      * /a/b/c.txt          --&gt; 1           --&gt; absolute
  831.      * ~/a/b/c.txt         --&gt; 2           --&gt; current user
  832.      * ~                   --&gt; 2           --&gt; current user (slash added)
  833.      * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
  834.      * ~user               --&gt; 6           --&gt; named user (slash added)
  835.      * //server/a/b/c.txt  --&gt; 9
  836.      * ///a/b/c.txt        --&gt; -1          --&gt; error
  837.      * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
  838.      * </pre>
  839.      * <p>
  840.      * The output will be the same irrespective of the machine that the code is running on.
  841.      * ie. both Unix and Windows prefixes are matched regardless.
  842.      * </p>
  843.      * <p>
  844.      * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
  845.      * These must be followed by a server name, so double-slashes are not collapsed
  846.      * to a single slash at the start of the file name.
  847.      * </p>
  848.      *
  849.      * @param fileName  the file name to find the prefix in, null returns -1
  850.      * @return the length of the prefix, -1 if invalid or null
  851.      */
  852.     public static int getPrefixLength(final String fileName) {
  853.         if (fileName == null) {
  854.             return NOT_FOUND;
  855.         }
  856.         final int len = fileName.length();
  857.         if (len == 0) {
  858.             return 0;
  859.         }
  860.         char ch0 = fileName.charAt(0);
  861.         if (ch0 == ':') {
  862.             return NOT_FOUND;
  863.         }
  864.         if (len == 1) {
  865.             if (ch0 == '~') {
  866.                 return 2;  // return a length greater than the input
  867.             }
  868.             return isSeparator(ch0) ? 1 : 0;
  869.         }
  870.         if (ch0 == '~') {
  871.             int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
  872.             int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
  873.             if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
  874.                 return len + 1;  // return a length greater than the input
  875.             }
  876.             posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
  877.             posWin = posWin == NOT_FOUND ? posUnix : posWin;
  878.             return Math.min(posUnix, posWin) + 1;
  879.         }
  880.         final char ch1 = fileName.charAt(1);
  881.         if (ch1 == ':') {
  882.             ch0 = Character.toUpperCase(ch0);
  883.             if (ch0 >= 'A' && ch0 <= 'Z') {
  884.                 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
  885.                     return 0;
  886.                 }
  887.                 if (len == 2 || !isSeparator(fileName.charAt(2))) {
  888.                     return 2;
  889.                 }
  890.                 return 3;
  891.             }
  892.             if (ch0 == UNIX_NAME_SEPARATOR) {
  893.                 return 1;
  894.             }
  895.             return NOT_FOUND;

  896.         }
  897.         if (!isSeparator(ch0) || !isSeparator(ch1)) {
  898.             return isSeparator(ch0) ? 1 : 0;
  899.         }
  900.         int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
  901.         int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
  902.         if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
  903.             return NOT_FOUND;
  904.         }
  905.         posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
  906.         posWin = posWin == NOT_FOUND ? posUnix : posWin;
  907.         final int pos = Math.min(posUnix, posWin) + 1;
  908.         final String hostnamePart = fileName.substring(2, pos - 1);
  909.         return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
  910.     }

  911.     /**
  912.      * Returns the index of the last extension separator character, which is a period.
  913.      * <p>
  914.      * This method also checks that there is no directory separator after the last period. To do this it uses
  915.      * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
  916.      * </p>
  917.      * <p>
  918.      * The output will be the same irrespective of the machine that the code is running on, with the
  919.      * exception of a possible {@link IllegalArgumentException} on Windows (see below).
  920.      * </p>
  921.      * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
  922.      * In this case, the name wouldn't be the name of a file, but the identifier of an
  923.      * alternate data stream (bar.txt) on the file foo.exe. The method used to return
  924.      * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
  925.      * an {@link IllegalArgumentException} for names like this.
  926.      *
  927.      * @param fileName
  928.      *            the file name to find the last extension separator in, null returns -1
  929.      * @return the index of the last extension separator character, or -1 if there is no such character
  930.      * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
  931.      * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
  932.      */
  933.     public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
  934.         if (fileName == null) {
  935.             return NOT_FOUND;
  936.         }
  937.         if (isSystemWindows()) {
  938.             // Special handling for NTFS ADS: Don't accept colon in the file name.
  939.             final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
  940.             if (offset != -1) {
  941.                 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
  942.             }
  943.         }
  944.         final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
  945.         final int lastSeparator = indexOfLastSeparator(fileName);
  946.         return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
  947.     }

  948.     /**
  949.      * Returns the index of the last directory separator character.
  950.      * <p>
  951.      * This method will handle a file in either Unix or Windows format.
  952.      * The position of the last forward or backslash is returned.
  953.      * <p>
  954.      * The output will be the same irrespective of the machine that the code is running on.
  955.      *
  956.      * @param fileName  the file name to find the last path separator in, null returns -1
  957.      * @return the index of the last separator character, or -1 if there
  958.      * is no such character
  959.      */
  960.     public static int indexOfLastSeparator(final String fileName) {
  961.         if (fileName == null) {
  962.             return NOT_FOUND;
  963.         }
  964.         final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
  965.         final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
  966.         return Math.max(lastUnixPos, lastWindowsPos);
  967.     }

  968.     private static boolean isEmpty(final String string) {
  969.         return string == null || string.isEmpty();
  970.     }

  971.     /**
  972.      * Checks whether the extension of the file name is one of those specified.
  973.      * <p>
  974.      * This method obtains the extension as the textual part of the file name
  975.      * after the last period. There must be no directory separator after the period.
  976.      * The extension check is case-sensitive on all platforms.
  977.      *
  978.      * @param fileName  the file name, null returns false
  979.      * @param extensions  the extensions to check for, null checks for no extension
  980.      * @return true if the file name is one of the extensions
  981.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  982.      */
  983.     public static boolean isExtension(final String fileName, final Collection<String> extensions) {
  984.         if (fileName == null) {
  985.             return false;
  986.         }
  987.         requireNonNullChars(fileName);

  988.         if (extensions == null || extensions.isEmpty()) {
  989.             return indexOfExtension(fileName) == NOT_FOUND;
  990.         }
  991.         return extensions.contains(getExtension(fileName));
  992.     }

  993.     /**
  994.      * Checks whether the extension of the file name is that specified.
  995.      * <p>
  996.      * This method obtains the extension as the textual part of the file name
  997.      * after the last period. There must be no directory separator after the period.
  998.      * The extension check is case-sensitive on all platforms.
  999.      *
  1000.      * @param fileName  the file name, null returns false
  1001.      * @param extension  the extension to check for, null or empty checks for no extension
  1002.      * @return true if the file name has the specified extension
  1003.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1004.      */
  1005.     public static boolean isExtension(final String fileName, final String extension) {
  1006.         if (fileName == null) {
  1007.             return false;
  1008.         }
  1009.         requireNonNullChars(fileName);

  1010.         if (isEmpty(extension)) {
  1011.             return indexOfExtension(fileName) == NOT_FOUND;
  1012.         }
  1013.         return getExtension(fileName).equals(extension);
  1014.     }

  1015.     /**
  1016.      * Checks whether the extension of the file name is one of those specified.
  1017.      * <p>
  1018.      * This method obtains the extension as the textual part of the file name
  1019.      * after the last period. There must be no directory separator after the period.
  1020.      * The extension check is case-sensitive on all platforms.
  1021.      *
  1022.      * @param fileName  the file name, null returns false
  1023.      * @param extensions  the extensions to check for, null checks for no extension
  1024.      * @return true if the file name is one of the extensions
  1025.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1026.      */
  1027.     public static boolean isExtension(final String fileName, final String... extensions) {
  1028.         if (fileName == null) {
  1029.             return false;
  1030.         }
  1031.         requireNonNullChars(fileName);

  1032.         if (extensions == null || extensions.length == 0) {
  1033.             return indexOfExtension(fileName) == NOT_FOUND;
  1034.         }
  1035.         final String fileExt = getExtension(fileName);
  1036.         return Stream.of(extensions).anyMatch(fileExt::equals);
  1037.     }

  1038.     /**
  1039.      * Checks whether a given string represents a valid IPv4 address.
  1040.      *
  1041.      * @param name the name to validate
  1042.      * @return true if the given name is a valid IPv4 address
  1043.      */
  1044.     // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
  1045.     private static boolean isIPv4Address(final String name) {
  1046.         final Matcher m = IPV4_PATTERN.matcher(name);
  1047.         if (!m.matches() || m.groupCount() != 4) {
  1048.             return false;
  1049.         }

  1050.         // verify that address subgroups are legal
  1051.         for (int i = 1; i <= 4; i++) {
  1052.             final String ipSegment = m.group(i);
  1053.             final int iIpSegment = Integer.parseInt(ipSegment);
  1054.             if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
  1055.                 return false;
  1056.             }

  1057.             if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
  1058.                 return false;
  1059.             }

  1060.         }

  1061.         return true;
  1062.     }

  1063.     // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
  1064.     /**
  1065.      * Checks whether a given string represents a valid IPv6 address.
  1066.      *
  1067.      * @param inet6Address the name to validate
  1068.      * @return true if the given name is a valid IPv6 address
  1069.      */
  1070.     private static boolean isIPv6Address(final String inet6Address) {
  1071.         final boolean containsCompressedZeroes = inet6Address.contains("::");
  1072.         if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
  1073.             return false;
  1074.         }
  1075.         if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
  1076.                 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
  1077.             return false;
  1078.         }
  1079.         String[] octets = inet6Address.split(":");
  1080.         if (containsCompressedZeroes) {
  1081.             final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
  1082.             if (inet6Address.endsWith("::")) {
  1083.                 // String.split() drops ending empty segments
  1084.                 octetList.add("");
  1085.             } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
  1086.                 octetList.remove(0);
  1087.             }
  1088.             octets = octetList.toArray(EMPTY_STRING_ARRAY);
  1089.         }
  1090.         if (octets.length > IPV6_MAX_HEX_GROUPS) {
  1091.             return false;
  1092.         }
  1093.         int validOctets = 0;
  1094.         int emptyOctets = 0; // consecutive empty chunks
  1095.         for (int index = 0; index < octets.length; index++) {
  1096.             final String octet = octets[index];
  1097.             if (octet.isEmpty()) {
  1098.                 emptyOctets++;
  1099.                 if (emptyOctets > 1) {
  1100.                     return false;
  1101.                 }
  1102.             } else {
  1103.                 emptyOctets = 0;
  1104.                 // Is last chunk an IPv4 address?
  1105.                 if (index == octets.length - 1 && octet.contains(".")) {
  1106.                     if (!isIPv4Address(octet)) {
  1107.                         return false;
  1108.                     }
  1109.                     validOctets += 2;
  1110.                     continue;
  1111.                 }
  1112.                 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
  1113.                     return false;
  1114.                 }
  1115.                 final int octetInt;
  1116.                 try {
  1117.                     octetInt = Integer.parseInt(octet, BASE_16);
  1118.                 } catch (final NumberFormatException e) {
  1119.                     return false;
  1120.                 }
  1121.                 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
  1122.                     return false;
  1123.                 }
  1124.             }
  1125.             validOctets++;
  1126.         }
  1127.         return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
  1128.     }

  1129.     /**
  1130.      * Checks whether a given string is a valid host name according to
  1131.      * RFC 3986 - not accepting IP addresses.
  1132.      *
  1133.      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
  1134.      * @param name the hostname to validate
  1135.      * @return true if the given name is a valid host name
  1136.      */
  1137.     private static boolean isRFC3986HostName(final String name) {
  1138.         final String[] parts = name.split("\\.", -1);
  1139.         for (int i = 0; i < parts.length; i++) {
  1140.             if (parts[i].isEmpty()) {
  1141.                 // trailing period is legal, otherwise we've hit a .. sequence
  1142.                 return i == parts.length - 1;
  1143.             }
  1144.             if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
  1145.                 return false;
  1146.             }
  1147.         }
  1148.         return true;
  1149.     }

  1150.     /**
  1151.      * Checks if the character is a separator.
  1152.      *
  1153.      * @param ch  the character to check
  1154.      * @return true if it is a separator character
  1155.      */
  1156.     private static boolean isSeparator(final char ch) {
  1157.         return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
  1158.     }

  1159.     /**
  1160.      * Determines if Windows file system is in use.
  1161.      *
  1162.      * @return true if the system is Windows
  1163.      */
  1164.     static boolean isSystemWindows() {
  1165.         return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
  1166.     }

  1167.     /**
  1168.      * Checks whether a given string is a valid host name according to
  1169.      * RFC 3986.
  1170.      *
  1171.      * <p>Accepted are IP addresses (v4 and v6) as well as what the
  1172.      * RFC calls a "reg-name". Percent encoded names don't seem to be
  1173.      * valid names in UNC paths.</p>
  1174.      *
  1175.      * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
  1176.      * @param name the hostname to validate
  1177.      * @return true if the given name is a valid host name
  1178.      */
  1179.     private static boolean isValidHostName(final String name) {
  1180.         return isIPv6Address(name) || isRFC3986HostName(name);
  1181.     }

  1182.     /**
  1183.      * Normalizes a path, removing double and single period path steps.
  1184.      * <p>
  1185.      * This method normalizes a path to a standard format.
  1186.      * The input may contain separators in either Unix or Windows format.
  1187.      * The output will contain separators in the format of the system.
  1188.      * <p>
  1189.      * A trailing slash will be retained.
  1190.      * A double slash will be merged to a single slash (but UNC names are handled).
  1191.      * A single period path segment will be removed.
  1192.      * A double period will cause that path segment and the one before to be removed.
  1193.      * If the double period has no parent path segment, {@code null} is returned.
  1194.      * <p>
  1195.      * The output will be the same on both Unix and Windows except
  1196.      * for the separator character.
  1197.      * <pre>
  1198.      * /foo//               --&gt;   /foo/
  1199.      * /foo/./              --&gt;   /foo/
  1200.      * /foo/../bar          --&gt;   /bar
  1201.      * /foo/../bar/         --&gt;   /bar/
  1202.      * /foo/../bar/../baz   --&gt;   /baz
  1203.      * //foo//./bar         --&gt;   //foo/bar
  1204.      * /../                 --&gt;   null
  1205.      * ../foo               --&gt;   null
  1206.      * foo/bar/..           --&gt;   foo/
  1207.      * foo/../../bar        --&gt;   null
  1208.      * foo/../bar           --&gt;   bar
  1209.      * //server/foo/../bar  --&gt;   //server/bar
  1210.      * //server/../bar      --&gt;   null
  1211.      * C:\foo\..\bar        --&gt;   C:\bar
  1212.      * C:\..\bar            --&gt;   null
  1213.      * ~/foo/../bar/        --&gt;   ~/bar/
  1214.      * ~/../bar             --&gt;   null
  1215.      * </pre>
  1216.      * (Note the file separator will be correct for Windows/Unix.)
  1217.      *
  1218.      * @param fileName  the file name to normalize, null returns null
  1219.      * @return the normalized fileName, or null if invalid
  1220.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1221.      */
  1222.     public static String normalize(final String fileName) {
  1223.         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
  1224.     }

  1225.     /**
  1226.      * Normalizes a path, removing double and single period path steps.
  1227.      * <p>
  1228.      * This method normalizes a path to a standard format.
  1229.      * The input may contain separators in either Unix or Windows format.
  1230.      * The output will contain separators in the format specified.
  1231.      * <p>
  1232.      * A trailing slash will be retained.
  1233.      * A double slash will be merged to a single slash (but UNC names are handled).
  1234.      * A single period path segment will be removed.
  1235.      * A double period will cause that path segment and the one before to be removed.
  1236.      * If the double period has no parent path segment to work with, {@code null}
  1237.      * is returned.
  1238.      * <p>
  1239.      * The output will be the same on both Unix and Windows except
  1240.      * for the separator character.
  1241.      * <pre>
  1242.      * /foo//               --&gt;   /foo/
  1243.      * /foo/./              --&gt;   /foo/
  1244.      * /foo/../bar          --&gt;   /bar
  1245.      * /foo/../bar/         --&gt;   /bar/
  1246.      * /foo/../bar/../baz   --&gt;   /baz
  1247.      * //foo//./bar         --&gt;   /foo/bar
  1248.      * /../                 --&gt;   null
  1249.      * ../foo               --&gt;   null
  1250.      * foo/bar/..           --&gt;   foo/
  1251.      * foo/../../bar        --&gt;   null
  1252.      * foo/../bar           --&gt;   bar
  1253.      * //server/foo/../bar  --&gt;   //server/bar
  1254.      * //server/../bar      --&gt;   null
  1255.      * C:\foo\..\bar        --&gt;   C:\bar
  1256.      * C:\..\bar            --&gt;   null
  1257.      * ~/foo/../bar/        --&gt;   ~/bar/
  1258.      * ~/../bar             --&gt;   null
  1259.      * </pre>
  1260.      * The output will be the same on both Unix and Windows including
  1261.      * the separator character.
  1262.      *
  1263.      * @param fileName  the file name to normalize, null returns null
  1264.      * @param unixSeparator {@code true} if a Unix separator should
  1265.      * be used or {@code false} if a Windows separator should be used.
  1266.      * @return the normalized fileName, or null if invalid
  1267.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1268.      * @since 2.0
  1269.      */
  1270.     public static String normalize(final String fileName, final boolean unixSeparator) {
  1271.         return doNormalize(fileName, toSeparator(unixSeparator), true);
  1272.     }

  1273.     /**
  1274.      * Normalizes a path, removing double and single period path steps,
  1275.      * and removing any final directory separator.
  1276.      * <p>
  1277.      * This method normalizes a path to a standard format.
  1278.      * The input may contain separators in either Unix or Windows format.
  1279.      * The output will contain separators in the format of the system.
  1280.      * <p>
  1281.      * A trailing slash will be removed.
  1282.      * A double slash will be merged to a single slash (but UNC names are handled).
  1283.      * A single period path segment will be removed.
  1284.      * A double period will cause that path segment and the one before to be removed.
  1285.      * If the double period has no parent path segment to work with, {@code null}
  1286.      * is returned.
  1287.      * <p>
  1288.      * The output will be the same on both Unix and Windows except
  1289.      * for the separator character.
  1290.      * <pre>
  1291.      * /foo//               --&gt;   /foo
  1292.      * /foo/./              --&gt;   /foo
  1293.      * /foo/../bar          --&gt;   /bar
  1294.      * /foo/../bar/         --&gt;   /bar
  1295.      * /foo/../bar/../baz   --&gt;   /baz
  1296.      * //foo//./bar         --&gt;   /foo/bar
  1297.      * /../                 --&gt;   null
  1298.      * ../foo               --&gt;   null
  1299.      * foo/bar/..           --&gt;   foo
  1300.      * foo/../../bar        --&gt;   null
  1301.      * foo/../bar           --&gt;   bar
  1302.      * //server/foo/../bar  --&gt;   //server/bar
  1303.      * //server/../bar      --&gt;   null
  1304.      * C:\foo\..\bar        --&gt;   C:\bar
  1305.      * C:\..\bar            --&gt;   null
  1306.      * ~/foo/../bar/        --&gt;   ~/bar
  1307.      * ~/../bar             --&gt;   null
  1308.      * </pre>
  1309.      * (Note the file separator returned will be correct for Windows/Unix)
  1310.      *
  1311.      * @param fileName  the file name to normalize, null returns null
  1312.      * @return the normalized fileName, or null if invalid
  1313.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1314.      */
  1315.     public static String normalizeNoEndSeparator(final String fileName) {
  1316.         return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
  1317.     }

  1318.     /**
  1319.      * Normalizes a path, removing double and single period path steps,
  1320.      * and removing any final directory separator.
  1321.      * <p>
  1322.      * This method normalizes a path to a standard format.
  1323.      * The input may contain separators in either Unix or Windows format.
  1324.      * The output will contain separators in the format specified.
  1325.      * <p>
  1326.      * A trailing slash will be removed.
  1327.      * A double slash will be merged to a single slash (but UNC names are handled).
  1328.      * A single period path segment will be removed.
  1329.      * A double period will cause that path segment and the one before to be removed.
  1330.      * If the double period has no parent path segment to work with, {@code null}
  1331.      * is returned.
  1332.      * <p>
  1333.      * The output will be the same on both Unix and Windows including
  1334.      * the separator character.
  1335.      * <pre>
  1336.      * /foo//               --&gt;   /foo
  1337.      * /foo/./              --&gt;   /foo
  1338.      * /foo/../bar          --&gt;   /bar
  1339.      * /foo/../bar/         --&gt;   /bar
  1340.      * /foo/../bar/../baz   --&gt;   /baz
  1341.      * //foo//./bar         --&gt;   /foo/bar
  1342.      * /../                 --&gt;   null
  1343.      * ../foo               --&gt;   null
  1344.      * foo/bar/..           --&gt;   foo
  1345.      * foo/../../bar        --&gt;   null
  1346.      * foo/../bar           --&gt;   bar
  1347.      * //server/foo/../bar  --&gt;   //server/bar
  1348.      * //server/../bar      --&gt;   null
  1349.      * C:\foo\..\bar        --&gt;   C:\bar
  1350.      * C:\..\bar            --&gt;   null
  1351.      * ~/foo/../bar/        --&gt;   ~/bar
  1352.      * ~/../bar             --&gt;   null
  1353.      * </pre>
  1354.      *
  1355.      * @param fileName  the file name to normalize, null returns null
  1356.      * @param unixSeparator {@code true} if a Unix separator should
  1357.      * be used or {@code false} if a Windows separator should be used.
  1358.      * @return the normalized fileName, or null if invalid
  1359.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1360.      * @since 2.0
  1361.      */
  1362.     public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
  1363.          return doNormalize(fileName, toSeparator(unixSeparator), false);
  1364.     }

  1365.     /**
  1366.      * Removes the extension from a fileName.
  1367.      * <p>
  1368.      * This method returns the textual part of the file name before the last period.
  1369.      * There must be no directory separator after the period.
  1370.      * <pre>
  1371.      * foo.txt    --&gt; foo
  1372.      * .txt       --&gt; "" (empty string)
  1373.      * a\b\c.jpg  --&gt; a\b\c
  1374.      * /a/b/c.jpg --&gt; /a/b/c
  1375.      * a\b\c      --&gt; a\b\c
  1376.      * a.b\c      --&gt; a.b\c
  1377.      * </pre>
  1378.      * <p>
  1379.      * The output will be the same irrespective of the machine that the code is running on.
  1380.      *
  1381.      * @param fileName  the file name, null returns null
  1382.      * @return the file name minus the extension
  1383.      * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
  1384.      */
  1385.     public static String removeExtension(final String fileName) {
  1386.         if (fileName == null) {
  1387.             return null;
  1388.         }
  1389.         requireNonNullChars(fileName);

  1390.         final int index = indexOfExtension(fileName);
  1391.         if (index == NOT_FOUND) {
  1392.             return fileName;
  1393.         }
  1394.         return fileName.substring(0, index);
  1395.     }

  1396.     /**
  1397.      * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
  1398.      *
  1399.      * This may be used to defend against poison byte attacks.
  1400.      *
  1401.      * @param path the path to check
  1402.      * @return The input
  1403.      * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
  1404.      */
  1405.     private static String requireNonNullChars(final String path) {
  1406.         if (path.indexOf(0) >= 0) {
  1407.             throw new IllegalArgumentException(
  1408.                 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
  1409.         }
  1410.         return path;
  1411.     }

  1412.     /**
  1413.      * Converts all separators to the system separator.
  1414.      *
  1415.      * @param path the path to be changed, null ignored.
  1416.      * @return the updated path.
  1417.      */
  1418.     public static String separatorsToSystem(final String path) {
  1419.         return FileSystem.getCurrent().normalizeSeparators(path);
  1420.     }

  1421.     /**
  1422.      * Converts all separators to the Unix separator of forward slash.
  1423.      *
  1424.      * @param path the path to be changed, null ignored.
  1425.      * @return the new path.
  1426.      */
  1427.     public static String separatorsToUnix(final String path) {
  1428.         return FileSystem.LINUX.normalizeSeparators(path);
  1429.     }

  1430.     /**
  1431.      * Converts all separators to the Windows separator of backslash.
  1432.      *
  1433.      * @param path the path to be changed, null ignored.
  1434.      * @return the updated path.
  1435.      */
  1436.     public static String separatorsToWindows(final String path) {
  1437.         return FileSystem.WINDOWS.normalizeSeparators(path);
  1438.     }

  1439.     /**
  1440.      * Splits a string into a number of tokens.
  1441.      * The text is split by '?' and '*'.
  1442.      * Where multiple '*' occur consecutively they are collapsed into a single '*'.
  1443.      *
  1444.      * @param text  the text to split
  1445.      * @return the array of tokens, never null
  1446.      */
  1447.     static String[] splitOnTokens(final String text) {
  1448.         // used by wildcardMatch
  1449.         // package level so a unit test may run on this

  1450.         if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
  1451.             return new String[] { text };
  1452.         }

  1453.         final char[] array = text.toCharArray();
  1454.         final ArrayList<String> list = new ArrayList<>();
  1455.         final StringBuilder buffer = new StringBuilder();
  1456.         char prevChar = 0;
  1457.         for (final char ch : array) {
  1458.             if (ch == '?' || ch == '*') {
  1459.                 if (buffer.length() != 0) {
  1460.                     list.add(buffer.toString());
  1461.                     buffer.setLength(0);
  1462.                 }
  1463.                 if (ch == '?') {
  1464.                     list.add("?");
  1465.                 } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
  1466.                     list.add("*");
  1467.                 }
  1468.             } else {
  1469.                 buffer.append(ch);
  1470.             }
  1471.             prevChar = ch;
  1472.         }
  1473.         if (buffer.length() != 0) {
  1474.             list.add(buffer.toString());
  1475.         }

  1476.         return list.toArray(EMPTY_STRING_ARRAY);
  1477.     }

  1478.     /**
  1479.      * Returns '/' if given true, '\\' otherwise.
  1480.      *
  1481.      * @param unixSeparator which separator to return.
  1482.      * @return '/' if given true, '\\' otherwise.
  1483.      */
  1484.     private static char toSeparator(final boolean unixSeparator) {
  1485.         return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
  1486.     }

  1487.     /**
  1488.      * Checks a fileName to see if it matches the specified wildcard matcher,
  1489.      * always testing case-sensitive.
  1490.      * <p>
  1491.      * The wildcard matcher uses the characters '?' and '*' to represent a
  1492.      * single or multiple (zero or more) wildcard characters.
  1493.      * This is the same as often found on DOS/Unix command lines.
  1494.      * The check is case-sensitive always.
  1495.      * <pre>
  1496.      * wildcardMatch("c.txt", "*.txt")      --&gt; true
  1497.      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
  1498.      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
  1499.      * wildcardMatch("c.txt", "*.???")      --&gt; true
  1500.      * wildcardMatch("c.txt", "*.????")     --&gt; false
  1501.      * </pre>
  1502.      * The sequence "*?" does not work properly at present in match strings.
  1503.      *
  1504.      * @param fileName  the file name to match on
  1505.      * @param wildcardMatcher  the wildcard string to match against
  1506.      * @return true if the file name matches the wildcard string
  1507.      * @see IOCase#SENSITIVE
  1508.      */
  1509.     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
  1510.         return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
  1511.     }

  1512.     /**
  1513.      * Checks a fileName to see if it matches the specified wildcard matcher
  1514.      * allowing control over case-sensitivity.
  1515.      * <p>
  1516.      * The wildcard matcher uses the characters '?' and '*' to represent a
  1517.      * single or multiple (zero or more) wildcard characters.
  1518.      * The sequence "*?" does not work properly at present in match strings.
  1519.      *
  1520.      * @param fileName  the file name to match on
  1521.      * @param wildcardMatcher  the wildcard string to match against
  1522.      * @param ioCase  what case sensitivity rule to use, null means case-sensitive
  1523.      * @return true if the file name matches the wildcard string
  1524.      * @since 1.3
  1525.      */
  1526.     public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
  1527.         if (fileName == null && wildcardMatcher == null) {
  1528.             return true;
  1529.         }
  1530.         if (fileName == null || wildcardMatcher == null) {
  1531.             return false;
  1532.         }
  1533.         ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
  1534.         final String[] wcs = splitOnTokens(wildcardMatcher);
  1535.         boolean anyChars = false;
  1536.         int textIdx = 0;
  1537.         int wcsIdx = 0;
  1538.         final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);

  1539.         // loop around a backtrack stack, to handle complex * matching
  1540.         do {
  1541.             if (!backtrack.isEmpty()) {
  1542.                 final int[] array = backtrack.pop();
  1543.                 wcsIdx = array[0];
  1544.                 textIdx = array[1];
  1545.                 anyChars = true;
  1546.             }

  1547.             // loop whilst tokens and text left to process
  1548.             while (wcsIdx < wcs.length) {

  1549.                 if (wcs[wcsIdx].equals("?")) {
  1550.                     // ? so move to next text char
  1551.                     textIdx++;
  1552.                     if (textIdx > fileName.length()) {
  1553.                         break;
  1554.                     }
  1555.                     anyChars = false;

  1556.                 } else if (wcs[wcsIdx].equals("*")) {
  1557.                     // set any chars status
  1558.                     anyChars = true;
  1559.                     if (wcsIdx == wcs.length - 1) {
  1560.                         textIdx = fileName.length();
  1561.                     }

  1562.                 } else {
  1563.                     // matching text token
  1564.                     if (anyChars) {
  1565.                         // any chars then try to locate text token
  1566.                         textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
  1567.                         if (textIdx == NOT_FOUND) {
  1568.                             // token not found
  1569.                             break;
  1570.                         }
  1571.                         final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
  1572.                         if (repeat >= 0) {
  1573.                             backtrack.push(new int[] {wcsIdx, repeat});
  1574.                         }
  1575.                     } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
  1576.                         // matching from current position
  1577.                         // couldn't match token
  1578.                         break;
  1579.                     }

  1580.                     // matched text token, move text index to end of matched token
  1581.                     textIdx += wcs[wcsIdx].length();
  1582.                     anyChars = false;
  1583.                 }

  1584.                 wcsIdx++;
  1585.             }

  1586.             // full match
  1587.             if (wcsIdx == wcs.length && textIdx == fileName.length()) {
  1588.                 return true;
  1589.             }

  1590.         } while (!backtrack.isEmpty());

  1591.         return false;
  1592.     }

  1593.     /**
  1594.      * Checks a fileName to see if it matches the specified wildcard matcher
  1595.      * using the case rules of the system.
  1596.      * <p>
  1597.      * The wildcard matcher uses the characters '?' and '*' to represent a
  1598.      * single or multiple (zero or more) wildcard characters.
  1599.      * This is the same as often found on DOS/Unix command lines.
  1600.      * The check is case-sensitive on Unix and case-insensitive on Windows.
  1601.      * <pre>
  1602.      * wildcardMatch("c.txt", "*.txt")      --&gt; true
  1603.      * wildcardMatch("c.txt", "*.jpg")      --&gt; false
  1604.      * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
  1605.      * wildcardMatch("c.txt", "*.???")      --&gt; true
  1606.      * wildcardMatch("c.txt", "*.????")     --&gt; false
  1607.      * </pre>
  1608.      * The sequence "*?" does not work properly at present in match strings.
  1609.      *
  1610.      * @param fileName  the file name to match on
  1611.      * @param wildcardMatcher  the wildcard string to match against
  1612.      * @return true if the file name matches the wildcard string
  1613.      * @see IOCase#SYSTEM
  1614.      */
  1615.     public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
  1616.         return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
  1617.     }

  1618.     /**
  1619.      * Instances should NOT be constructed in standard programming.
  1620.      *
  1621.      * @deprecated TODO Make private in 3.0.
  1622.      */
  1623.     @Deprecated
  1624.     public FilenameUtils() {
  1625.         // empty
  1626.     }
  1627. }