View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.ftp.parser;
19  
20  import java.text.ParseException;
21  import java.util.List;
22  
23  import org.apache.commons.net.ftp.FTPClientConfig;
24  import org.apache.commons.net.ftp.FTPFile;
25  
26  /**
27   * Implementation FTPFileEntryParser and FTPFileListParser for standard Unix Systems.
28   *
29   * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser
30   * interface.
31   *
32   * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
33   */
34  public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
35  
36      static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001
37  
38      static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06
39  
40      static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06
41  
42      // Suffixes used in Japanese listings after the numeric values
43      private static final String JA_MONTH = "\u6708";
44      private static final String JA_DAY = "\u65e5";
45      private static final String JA_YEAR = "\u5e74";
46  
47      private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年
48  
49      private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10
50  
51      /**
52       * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: <code>"yyyy-MM-dd HH:mm</code>. This
53       * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely
54       * accommodate it, but do not make it the default.
55       * <p>
56       * For now end users may specify this format only via <code>UnixFTPEntryParser(FTPClientConfig)</code>. Steve Cohen - 2005-04-17
57       */
58      public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null);
59  
60      /**
61       * this is the regular expression used by this parser.
62       *
63       * Permissions: r the file is readable w the file is writable x the file is executable - the indicated permission is not granted L mandatory locking occurs
64       * during access (the set-group-ID bit is on and the group execution bit is off) s the set-user-ID or set-group-ID bit is on, and the corresponding user or
65       * group execution bit is also on S undefined bit-state (the set-user-ID bit is on and the user execution bit is off) t the 1000 (octal) bit, or sticky bit,
66       * is on [see chmod(1)], and execution is on T the 1000 bit is turned on, and execution is off (undefined bit-state) e z/OS external link bit. Final letter
67       * may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for here as does not
68       * appear to be shown by FTP servers {@code @} file has extended attributes
69       */
70      private static final String REGEX = "([bcdelfmpSs-])" // file type
71              + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
72  
73              + "\\s*" // separator TODO why allow it to be omitted??
74  
75              + "(\\d+)" // link count
76  
77              + "\\s+" // separator
78  
79              + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces)
80              + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces)
81              + "(\\d+(?:,\\s*\\d+)?)" // size or n,m
82  
83              + "\\s+" // separator
84  
85              /*
86               * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM N.B. use non-space for MMM to allow for languages such
87               * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX
88               * [d]dZZ
89               */
90              + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
91              "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d
92              "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
93              "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" + ")"
94  
95              + "\\s+" // separator
96  
97              /*
98               * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX
99               */
100             + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
101 
102             + "\\s" // separator
103 
104             + "(.*)"; // the rest (21)
105 
106     // if true, leading spaces are trimmed from file names
107     // this was the case for the original implementation
108     final boolean trimLeadingSpaces; // package protected for access from test code
109 
110     /**
111      * The default constructor for a UnixFTPEntryParser object.
112      *
113      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
114      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
115      */
116     public UnixFTPEntryParser() {
117         this(null);
118     }
119 
120     /**
121      * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
122      *
123      * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
124      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
125      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
126      * @since 1.4
127      */
128     public UnixFTPEntryParser(final FTPClientConfig config) {
129         this(config, false);
130     }
131 
132     /**
133      * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
134      *
135      * @param config            The {@link FTPClientConfig configuration} object used to configure this parser.
136      * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
137      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
138      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
139      * @since 3.4
140      */
141     public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) {
142         super(REGEX);
143         configure(config);
144         this.trimLeadingSpaces = trimLeadingSpaces;
145     }
146 
147     /**
148      * Defines a default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified.
149      *
150      * @return the default configuration for this parser.
151      */
152     @Override
153     protected FTPClientConfig getDefaultConfiguration() {
154         return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT);
155     }
156 
157     /**
158      * Parses a line of a unix (standard) FTP server file listing and converts it into a usable format in the form of an <code> FTPFile </code> instance. If the
159      * file listing line doesn't describe a file, <code> null </code> is returned, otherwise a <code> FTPFile </code> instance representing the files in the
160      * directory is returned.
161      *
162      * @param entry A line of text from the file listing
163      * @return An FTPFile instance corresponding to the supplied entry
164      */
165     @Override
166     public FTPFile parseFTPEntry(final String entry) {
167         final FTPFile file = new FTPFile();
168         file.setRawListing(entry);
169         final int type;
170         boolean isDevice = false;
171 
172         if (matches(entry)) {
173             final String typeStr = group(1);
174             final String hardLinkCount = group(15);
175             final String usr = group(16);
176             final String grp = group(17);
177             final String filesize = group(18);
178             final String datestr = group(19) + " " + group(20);
179             String name = group(21);
180             if (trimLeadingSpaces) {
181                 name = name.replaceFirst("^\\s+", "");
182             }
183 
184             try {
185                 if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
186                     final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
187                     jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
188                     file.setTimestamp(jaParser.parseTimestamp(datestr));
189                 } else {
190                     file.setTimestamp(super.parseTimestamp(datestr));
191                 }
192             } catch (final ParseException e) {
193                 // intentionally do nothing
194             }
195 
196             // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
197             // 'translucent' filesystems, of which a 'union' filesystem is one.
198 
199             // bcdelfmpSs-
200             switch (typeStr.charAt(0)) {
201             case 'd':
202                 type = FTPFile.DIRECTORY_TYPE;
203                 break;
204             case 'e': // NET-39 => z/OS external link
205                 type = FTPFile.SYMBOLIC_LINK_TYPE;
206                 break;
207             case 'l':
208                 type = FTPFile.SYMBOLIC_LINK_TYPE;
209                 break;
210             case 'b':
211             case 'c':
212                 isDevice = true;
213                 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
214                 break;
215             case 'f':
216             case '-':
217                 type = FTPFile.FILE_TYPE;
218                 break;
219             default: // e.g. ? and w = whiteout
220                 type = FTPFile.UNKNOWN_TYPE;
221             }
222 
223             file.setType(type);
224 
225             int g = 4;
226             for (int access = 0; access < 3; access++, g += 4) {
227                 // Use != '-' to avoid having to check for suid and sticky bits
228                 file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-"));
229                 file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-"));
230 
231                 final String execPerm = group(g + 2);
232                 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)));
233             }
234 
235             if (!isDevice) {
236                 try {
237                     file.setHardLinkCount(Integer.parseInt(hardLinkCount));
238                 } catch (final NumberFormatException e) {
239                     // intentionally do nothing
240                 }
241             }
242 
243             file.setUser(usr);
244             file.setGroup(grp);
245 
246             try {
247                 file.setSize(Long.parseLong(filesize));
248             } catch (final NumberFormatException e) {
249                 // intentionally do nothing
250             }
251 
252             // oddball cases like symbolic links, file names
253             // with spaces in them.
254             if (type == FTPFile.SYMBOLIC_LINK_TYPE) {
255 
256                 final int end = name.indexOf(" -> ");
257                 // Give up if no link indicator is present
258                 if (end == -1) {
259                     file.setName(name);
260                 } else {
261                     file.setName(name.substring(0, end));
262                     file.setLink(name.substring(end + 4));
263                 }
264 
265             } else {
266                 file.setName(name);
267             }
268             return file;
269         }
270         return null;
271     }
272 
273     /**
274      * Preparse the list to discard "total nnn" lines
275      */
276     @Override
277     public List<String> preParse(final List<String> original) {
278         // NET-389
279         original.removeIf(entry -> entry.matches("^total \\d+$"));
280         return original;
281     }
282 
283 }