UnixFTPEntryParser.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.net.ftp.parser;

  18. import java.text.ParseException;
  19. import java.util.List;
  20. import java.util.regex.Pattern;

  21. import org.apache.commons.net.ftp.FTPClientConfig;
  22. import org.apache.commons.net.ftp.FTPFile;

  23. /**
  24.  * Implementation FTPFileEntryParser and FTPFileListParser for standard UNIX Systems.
  25.  *
  26.  * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser
  27.  * interface.
  28.  *
  29.  * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
  30.  */
  31. public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {

  32.     static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001

  33.     static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06

  34.     static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06

  35.     // Suffixes used in Japanese listings after the numeric values
  36.     private static final String JA_MONTH = "\u6708";
  37.     private static final String JA_DAY = "\u65e5";
  38.     private static final String JA_YEAR = "\u5e74";

  39.     private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年

  40.     private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10

  41.     private static final Pattern TOTAL_PATTERN = Pattern.compile("^total \\d+$");

  42.     /**
  43.      * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: <code>"yyyy-MM-dd HH:mm</code>. This
  44.      * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely
  45.      * accommodate it, but do not make it the default.
  46.      * <p>
  47.      * For now end users may specify this format only via <code>UnixFTPEntryParser(FTPClientConfig)</code>. Steve Cohen - 2005-04-17
  48.      */
  49.     public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null);

  50.     /**
  51.      * this is the regular expression used by this parser.
  52.      *
  53.      * Permissions: r the file is readable w the file is writable x the file is executable - the indicated permission is not granted L mandatory locking occurs
  54.      * during access (the set-group-ID bit is on and the group execution bit is off) s the set-user-ID or set-group-ID bit is on, and the corresponding user or
  55.      * group execution bit is also on S undefined bit-state (the set-user-ID bit is on and the user execution bit is off) t the 1000 (octal) bit, or sticky bit,
  56.      * is on [see chmod(1)], and execution is on T the 1000 bit is turned on, and execution is off (undefined bit-state) e z/OS external link bit. Final letter
  57.      * may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for here as does not
  58.      * appear to be shown by FTP servers {@code @} file has extended attributes
  59.      */
  60.     private static final String REGEX = "([bcdelfmpSs-])" // file type
  61.             + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions

  62.             + "\\s*" // separator TODO why allow it to be omitted??

  63.             + "(\\d+)" // link count

  64.             + "\\s+" // separator

  65.             + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces)
  66.             + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces)
  67.             + "(\\d+(?:,\\s*\\d+)?)" // size or n,m

  68.             + "\\s+" // separator

  69.             /*
  70.              * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM N.B. use non-space for MMM to allow for languages such
  71.              * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX
  72.              * [d]dZZ
  73.              */
  74.             + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
  75.             "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d
  76.             "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
  77.             "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" + ")"

  78.             + "\\s+" // separator

  79.             /*
  80.              * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX
  81.              */
  82.             + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)

  83.             + "\\s" // separator

  84.             + "(.*)"; // the rest (21)

  85.     // if true, leading spaces are trimmed from file names
  86.     // this was the case for the original implementation
  87.     final boolean trimLeadingSpaces; // package protected for access from test code

  88.     /**
  89.      * The default constructor for a UnixFTPEntryParser object.
  90.      *
  91.      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
  92.      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
  93.      */
  94.     public UnixFTPEntryParser() {
  95.         this(null);
  96.     }

  97.     /**
  98.      * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
  99.      *
  100.      * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
  101.      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
  102.      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
  103.      * @since 1.4
  104.      */
  105.     public UnixFTPEntryParser(final FTPClientConfig config) {
  106.         this(config, false);
  107.     }

  108.     /**
  109.      * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
  110.      *
  111.      * @param config            The {@link FTPClientConfig configuration} object used to configure this parser.
  112.      * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
  113.      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
  114.      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
  115.      * @since 3.4
  116.      */
  117.     public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) {
  118.         super(REGEX);
  119.         configure(config);
  120.         this.trimLeadingSpaces = trimLeadingSpaces;
  121.     }

  122.     /**
  123.      * Defines a default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified.
  124.      *
  125.      * @return the default configuration for this parser.
  126.      */
  127.     @Override
  128.     protected FTPClientConfig getDefaultConfiguration() {
  129.         return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT);
  130.     }

  131.     /**
  132.      * Parses a line of a unix (standard) FTP server file listing and converts it into a usable format in the form of an <code>FTPFile</code> instance. If the
  133.      * file listing line doesn't describe a file, <code>null</code> is returned, otherwise a <code>FTPFile</code> instance representing the files in the
  134.      * directory is returned.
  135.      *
  136.      * @param entry A line of text from the file listing
  137.      * @return An FTPFile instance corresponding to the supplied entry
  138.      */
  139.     @Override
  140.     public FTPFile parseFTPEntry(final String entry) {
  141.         final FTPFile file = new FTPFile();
  142.         file.setRawListing(entry);
  143.         final int type;
  144.         boolean isDevice = false;

  145.         if (matches(entry)) {
  146.             final String typeStr = group(1);
  147.             final String hardLinkCount = group(15);
  148.             final String usr = group(16);
  149.             final String grp = group(17);
  150.             final String filesize = group(18);
  151.             final String datestr = group(19) + " " + group(20);
  152.             String name = group(21);
  153.             if (trimLeadingSpaces) {
  154.                 name = name.replaceFirst("^\\s+", "");
  155.             }

  156.             try {
  157.                 if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
  158.                     final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
  159.                     jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
  160.                     file.setTimestamp(jaParser.parseTimestamp(datestr));
  161.                 } else {
  162.                     file.setTimestamp(super.parseTimestamp(datestr));
  163.                 }
  164.             } catch (final ParseException e) {
  165.                 // intentionally do nothing
  166.             }

  167.             // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
  168.             // 'translucent' filesystems, of which a 'union' filesystem is one.

  169.             // bcdelfmpSs-
  170.             switch (typeStr.charAt(0)) {
  171.             case 'd':
  172.                 type = FTPFile.DIRECTORY_TYPE;
  173.                 break;
  174.             case 'e': // NET-39 => z/OS external link
  175.                 type = FTPFile.SYMBOLIC_LINK_TYPE;
  176.                 break;
  177.             case 'l':
  178.                 type = FTPFile.SYMBOLIC_LINK_TYPE;
  179.                 break;
  180.             case 'b':
  181.             case 'c':
  182.                 isDevice = true;
  183.                 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
  184.                 break;
  185.             case 'f':
  186.             case '-':
  187.                 type = FTPFile.FILE_TYPE;
  188.                 break;
  189.             default: // e.g. ? and w = whiteout
  190.                 type = FTPFile.UNKNOWN_TYPE;
  191.             }

  192.             file.setType(type);

  193.             int g = 4;
  194.             for (int access = 0; access < 3; access++, g += 4) {
  195.                 // Use != '-' to avoid having to check for suid and sticky bits
  196.                 file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-"));
  197.                 file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-"));

  198.                 final String execPerm = group(g + 2);
  199.                 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)));
  200.             }

  201.             if (!isDevice) {
  202.                 try {
  203.                     file.setHardLinkCount(Integer.parseInt(hardLinkCount));
  204.                 } catch (final NumberFormatException e) {
  205.                     // intentionally do nothing
  206.                 }
  207.             }

  208.             file.setUser(usr);
  209.             file.setGroup(grp);

  210.             try {
  211.                 file.setSize(Long.parseLong(filesize));
  212.             } catch (final NumberFormatException e) {
  213.                 // intentionally do nothing
  214.             }

  215.             // oddball cases like symbolic links, file names
  216.             // with spaces in them.
  217.             if (type == FTPFile.SYMBOLIC_LINK_TYPE) {

  218.                 final int end = name.indexOf(" -> ");
  219.                 // Give up if no link indicator is present
  220.                 if (end == -1) {
  221.                     file.setName(name);
  222.                 } else {
  223.                     file.setName(name.substring(0, end));
  224.                     file.setLink(name.substring(end + 4));
  225.                 }

  226.             } else {
  227.                 file.setName(name);
  228.             }
  229.             return file;
  230.         }
  231.         return null;
  232.     }

  233.     /**
  234.      * Preparses the list to discard "total nnn" lines.
  235.      */
  236.     @Override
  237.     public List<String> preParse(final List<String> original) {
  238.         // NET-389
  239.         original.removeIf(entry -> TOTAL_PATTERN.matcher(entry).matches());
  240.         return original;
  241.     }

  242. }