View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.ftp.parser;
19  import java.text.ParseException;
20  import java.util.List;
21  import java.util.ListIterator;
22  
23  import org.apache.commons.net.ftp.FTPClientConfig;
24  import org.apache.commons.net.ftp.FTPFile;
25  
26  /**
27   * Implementation FTPFileEntryParser and FTPFileListParser for standard
28   * Unix Systems.
29   *
30   * This class is based on the logic of Daniel Savarese's
31   * DefaultFTPListParser, but adapted to use regular expressions and to fit the
32   * new FTPFileEntryParser interface.
33   * @version $Id: UnixFTPEntryParser.java 1781925 2017-02-06 16:43:40Z sebb $
34   * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
35   */
36  public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl
37  {
38  
39      static final String DEFAULT_DATE_FORMAT
40          = "MMM d yyyy"; //Nov 9 2001
41  
42      static final String DEFAULT_RECENT_DATE_FORMAT
43          = "MMM d HH:mm"; //Nov 9 20:06
44  
45      static final String NUMERIC_DATE_FORMAT
46          = "yyyy-MM-dd HH:mm"; //2001-11-09 20:06
47  
48      // Suffixes used in Japanese listings after the numeric values
49      private static final String JA_MONTH = "\u6708";
50      private static final String JA_DAY   = "\u65e5";
51      private static final String JA_YEAR  = "\u5e74";
52  
53      private static final String DEFAULT_DATE_FORMAT_JA
54          = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; //6月 3日 2003年
55  
56      private static final String DEFAULT_RECENT_DATE_FORMAT_JA
57          = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; //8月 17日 20:10
58  
59      /**
60       * Some Linux distributions are now shipping an FTP server which formats
61       * file listing dates in an all-numeric format:
62       * <code>"yyyy-MM-dd HH:mm</code>.
63       * This is a very welcome development,  and hopefully it will soon become
64       * the standard.  However, since it is so new, for now, and possibly
65       * forever, we merely accomodate it, but do not make it the default.
66       * <p>
67       * For now end users may specify this format only via
68       * <code>UnixFTPEntryParser(FTPClientConfig)</code>.
69       * Steve Cohen - 2005-04-17
70       */
71      public static final FTPClientConfig NUMERIC_DATE_CONFIG =
72          new FTPClientConfig(
73                  FTPClientConfig.SYST_UNIX,
74                  NUMERIC_DATE_FORMAT,
75                  null);
76  
77      /**
78       * this is the regular expression used by this parser.
79       *
80       * Permissions:
81       *    r   the file is readable
82       *    w   the file is writable
83       *    x   the file is executable
84       *    -   the indicated permission is not granted
85       *    L   mandatory locking occurs during access (the set-group-ID bit is
86       *        on and the group execution bit is off)
87       *    s   the set-user-ID or set-group-ID bit is on, and the corresponding
88       *        user or group execution bit is also on
89       *    S   undefined bit-state (the set-user-ID bit is on and the user
90       *        execution bit is off)
91       *    t   the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and
92       *        execution is on
93       *    T   the 1000 bit is turned on, and execution is off (undefined bit-
94       *        state)
95       *    e   z/OS external link bit
96       *    Final letter may be appended:
97       *    +   file has extended security attributes (e.g. ACL)
98       *    Note: local listings on MacOSX also use '@';
99       *    this is not allowed for here as does not appear to be shown by FTP servers
100      *    {@code @}   file has extended attributes
101      */
102     private static final String REGEX =
103         "([bcdelfmpSs-])" // file type
104         +"(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
105 
106         + "\\s*"                                        // separator TODO why allow it to be omitted??
107 
108         + "(\\d+)"                                      // link count
109 
110         + "\\s+" // separator
111 
112         + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?"                // owner name (optional spaces)
113         + "(?:(\\S+(?:\\s\\S+)*)\\s+)?"                 // group name (optional spaces)
114         + "(\\d+(?:,\\s*\\d+)?)"                        // size or n,m
115 
116         + "\\s+" // separator
117 
118         /*
119          * numeric or standard format date:
120          *   yyyy-mm-dd (expecting hh:mm to follow)
121          *   MMM [d]d
122          *   [d]d MMM
123          *   N.B. use non-space for MMM to allow for languages such as German which use
124          *   diacritics (e.g. umlaut) in some abbreviations.
125          *   Japanese uses numeric day and month with suffixes to distinguish them
126          *   [d]dXX [d]dZZ
127         */
128         + "("+
129             "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
130             "|(?:\\S{3}\\s+\\d{1,2})" +  // MMM [d]d
131             "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
132             "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")"+
133            ")"
134 
135         + "\\s+" // separator
136 
137         /*
138            year (for non-recent standard format) - yyyy
139            or time (for numeric or recent standard format) [h]h:mm
140            or Japanese year - yyyyXX
141         */
142         + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
143 
144         + "\\s" // separator
145 
146         + "(.*)"; // the rest (21)
147 
148 
149     // if true, leading spaces are trimmed from file names
150     // this was the case for the original implementation
151     final boolean trimLeadingSpaces; // package protected for access from test code
152 
153     /**
154      * The default constructor for a UnixFTPEntryParser object.
155      *
156      * @throws IllegalArgumentException
157      * Thrown if the regular expression is unparseable.  Should not be seen
158      * under normal conditions.  It it is seen, this is a sign that
159      * <code>REGEX</code> is  not a valid regular expression.
160      */
161     public UnixFTPEntryParser()
162     {
163         this(null);
164     }
165 
166     /**
167      * This constructor allows the creation of a UnixFTPEntryParser object with
168      * something other than the default configuration.
169      *
170      * @param config The {@link FTPClientConfig configuration} object used to
171      * configure this parser.
172      * @throws IllegalArgumentException
173      * Thrown if the regular expression is unparseable.  Should not be seen
174      * under normal conditions.  It it is seen, this is a sign that
175      * <code>REGEX</code> is  not a valid regular expression.
176      * @since 1.4
177      */
178     public UnixFTPEntryParser(FTPClientConfig config)
179     {
180         this(config, false);
181     }
182 
183     /**
184      * This constructor allows the creation of a UnixFTPEntryParser object with
185      * something other than the default configuration.
186      *
187      * @param config The {@link FTPClientConfig configuration} object used to
188      * configure this parser.
189      * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
190      * @throws IllegalArgumentException
191      * Thrown if the regular expression is unparseable.  Should not be seen
192      * under normal conditions.  It it is seen, this is a sign that
193      * <code>REGEX</code> is  not a valid regular expression.
194      * @since 3.4
195      */
196     public UnixFTPEntryParser(FTPClientConfig config, boolean trimLeadingSpaces)
197     {
198         super(REGEX);
199         configure(config);
200         this.trimLeadingSpaces = trimLeadingSpaces;
201     }
202 
203     /**
204      * Preparse the list to discard "total nnn" lines
205      */
206     @Override
207     public List<String> preParse(List<String> original) {
208         ListIterator<String> iter = original.listIterator();
209         while (iter.hasNext()) {
210             String entry = iter.next();
211             if (entry.matches("^total \\d+$")) { // NET-389
212                 iter.remove();
213             }
214         }
215         return original;
216     }
217 
218     /**
219      * Parses a line of a unix (standard) FTP server file listing and converts
220      * it into a usable format in the form of an <code> FTPFile </code>
221      * instance.  If the file listing line doesn't describe a file,
222      * <code> null </code> is returned, otherwise a <code> FTPFile </code>
223      * instance representing the files in the directory is returned.
224      *
225      * @param entry A line of text from the file listing
226      * @return An FTPFile instance corresponding to the supplied entry
227      */
228     @Override
229     public FTPFile parseFTPEntry(String entry) {
230         FTPFile file = new FTPFile();
231         file.setRawListing(entry);
232         int type;
233         boolean isDevice = false;
234 
235         if (matches(entry))
236         {
237             String typeStr = group(1);
238             String hardLinkCount = group(15);
239             String usr = group(16);
240             String grp = group(17);
241             String filesize = group(18);
242             String datestr = group(19) + " " + group(20);
243             String name = group(21);
244             if (trimLeadingSpaces) {
245                 name = name.replaceFirst("^\\s+", "");
246             }
247 
248             try
249             {
250                 if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
251                     FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
252                     jaParser.configure(new FTPClientConfig(
253                             FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
254                     file.setTimestamp(jaParser.parseTimestamp(datestr));
255                 } else {
256                     file.setTimestamp(super.parseTimestamp(datestr));
257                 }
258             }
259             catch (ParseException e)
260             {
261                  // intentionally do nothing
262             }
263 
264             // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
265             // 'translucent' filesystems, of which a 'union' filesystem is one.
266 
267             // bcdelfmpSs-
268             switch (typeStr.charAt(0))
269             {
270             case 'd':
271                 type = FTPFile.DIRECTORY_TYPE;
272                 break;
273             case 'e': // NET-39 => z/OS external link
274                 type = FTPFile.SYMBOLIC_LINK_TYPE;
275                 break;
276             case 'l':
277                 type = FTPFile.SYMBOLIC_LINK_TYPE;
278                 break;
279             case 'b':
280             case 'c':
281                 isDevice = true;
282                 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
283                 break;
284             case 'f':
285             case '-':
286                 type = FTPFile.FILE_TYPE;
287                 break;
288             default: // e.g. ? and w = whiteout
289                 type = FTPFile.UNKNOWN_TYPE;
290             }
291 
292             file.setType(type);
293 
294             int g = 4;
295             for (int access = 0; access < 3; access++, g += 4)
296             {
297                 // Use != '-' to avoid having to check for suid and sticky bits
298                 file.setPermission(access, FTPFile.READ_PERMISSION,
299                                    (!group(g).equals("-")));
300                 file.setPermission(access, FTPFile.WRITE_PERMISSION,
301                                    (!group(g + 1).equals("-")));
302 
303                 String execPerm = group(g + 2);
304                 if (!execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)))
305                 {
306                     file.setPermission(access, FTPFile.EXECUTE_PERMISSION, true);
307                 }
308                 else
309                 {
310                     file.setPermission(access, FTPFile.EXECUTE_PERMISSION, false);
311                 }
312             }
313 
314             if (!isDevice)
315             {
316                 try
317                 {
318                     file.setHardLinkCount(Integer.parseInt(hardLinkCount));
319                 }
320                 catch (NumberFormatException e)
321                 {
322                     // intentionally do nothing
323                 }
324             }
325 
326             file.setUser(usr);
327             file.setGroup(grp);
328 
329             try
330             {
331                 file.setSize(Long.parseLong(filesize));
332             }
333             catch (NumberFormatException e)
334             {
335                 // intentionally do nothing
336             }
337 
338             // oddball cases like symbolic links, file names
339             // with spaces in them.
340             if (type == FTPFile.SYMBOLIC_LINK_TYPE)
341             {
342 
343                 int end = name.indexOf(" -> ");
344                 // Give up if no link indicator is present
345                 if (end == -1)
346                 {
347                     file.setName(name);
348                 }
349                 else
350                 {
351                     file.setName(name.substring(0, end));
352                     file.setLink(name.substring(end + 4));
353                 }
354 
355             }
356             else
357             {
358                 file.setName(name);
359             }
360             return file;
361         }
362         return null;
363     }
364 
365     /**
366      * Defines a default configuration to be used when this class is
367      * instantiated without a {@link  FTPClientConfig  FTPClientConfig}
368      * parameter being specified.
369      * @return the default configuration for this parser.
370      */
371     @Override
372     protected FTPClientConfig getDefaultConfiguration() {
373         return new FTPClientConfig(
374                 FTPClientConfig.SYST_UNIX,
375                 DEFAULT_DATE_FORMAT,
376                 DEFAULT_RECENT_DATE_FORMAT);
377     }
378 
379 }