View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.ftp.parser;
19  
20  import java.text.ParseException;
21  import java.util.List;
22  import java.util.regex.Pattern;
23  
24  import org.apache.commons.net.ftp.FTPClientConfig;
25  import org.apache.commons.net.ftp.FTPFile;
26  
27  /**
28   * Implementation FTPFileEntryParser and FTPFileListParser for standard Unix Systems.
29   * <p>
30   * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser
31   * interface.
32   * </p>
33   *
34   * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
35   */
36  public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
37  
38      static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001
39  
40      static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06
41  
42      static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06
43  
44      // Suffixes used in Japanese listings after the numeric values
45      private static final String JA_MONTH = "\u6708";
46      private static final String JA_DAY = "\u65e5";
47      private static final String JA_YEAR = "\u5e74";
48  
49      private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年
50      private static final String DEFAULT_DATE_FORMAT_CN = "M'" + JA_MONTH + "' d yyyy"; // 6月 3 2003
51  
52      private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10
53      private static final String DEFAULT_RECENT_DATE_FORMAT_CN = "M'" + JA_MONTH + "' d HH:mm"; // 8月 17 20:10
54  
55      private static final Pattern TOTAL_PATTERN = Pattern.compile("^total \\d+$");
56  
57      /**
58       * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: {@code "yyyy-MM-dd HH:mm}. This
59       * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely
60       * accommodate it, but do not make it the default.
61       * <p>
62       * For now end users may specify this format only via {@code UnixFTPEntryParser(FTPClientConfig)}. Steve Cohen - 2005-04-17
63       * </p>
64       */
65      public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null);
66  
67      /**
68       * Regular expression used by this parser.
69       * <p>
70       * Permissions:
71       * </p>
72       * <ul>
73       * <li>r the file is readable</li>
74       * <li>w the file is writable</li>
75       * <li>x the file is executable</li>
76       * <li>- the indicated permission is not granted</li>
77       * <li>L mandatory locking occurs</li> during access (the set-group-ID bit is on and the group execution bit is off)</li>
78       * <li>s the set-user-ID or set-group-ID bit is on, and the corresponding user or group execution bit is also on
79       * <li>S undefined bit-state (the set-user-ID bit is on and the user execution bit is off)</li>
80       * <li>t the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and execution is on</li>
81       * <li>T the 1000 bit is turned on, and execution is off (undefined bit-state)</li>
82       * <li>e z/OS external link bit.</li>
83       * <p>
84       * Final letter may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for
85       * here as does not appear to be shown by FTP servers {@code @} file has extended attributes
86       * </p>
87       */
88      private static final String REGEX = "([bcdelfmpSs-])" // file type
89              + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
90              + "\\s*" // separator TODO why allow it to be omitted??
91              + "(\\d+)" // link count
92              + "\\s+" // separator
93              + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces)
94              + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces)
95              + "(\\d+(?:,\\s*\\d+)?)" // size or n,m
96              + "\\s+" // separator
97              /*
98               * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM Use non-space for MMM to allow for languages such
99               * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX
100              * [d]dZZ
101              */
102             + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
103             "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d
104             "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
105             "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" +
106             "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2})"
107             + ")"
108             + "\\s+" // separator
109             /*
110              * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX
111              */
112             + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
113             + "\\s" // separator
114             + "(.*)"; // the rest (21)
115 
116     /**
117      * Whether leading spaces are trimmed from file names this was the case for the original implementation.
118      */
119     final boolean trimLeadingSpaces; // package protected for access from test code
120 
121     /**
122      * Constructs a new instance.
123      *
124      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
125      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
126      */
127     public UnixFTPEntryParser() {
128         this(null);
129     }
130 
131     /**
132      * Constructs a new instance with something other than the default configuration.
133      *
134      * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
135      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
136      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
137      * @since 1.4
138      */
139     public UnixFTPEntryParser(final FTPClientConfig config) {
140         this(config, false);
141     }
142 
143     /**
144      * Constructs a new instance  with something other than the default configuration.
145      *
146      * @param config            The {@link FTPClientConfig configuration} object used to configure this parser.
147      * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
148      * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
149      *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
150      * @since 3.4
151      */
152     public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) {
153         super(REGEX);
154         configure(config);
155         this.trimLeadingSpaces = trimLeadingSpaces;
156     }
157 
158     /**
159      * Gets a new default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified.
160      *
161      * @return the default configuration for this parser.
162      */
163     @Override
164     protected FTPClientConfig getDefaultConfiguration() {
165         return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT);
166     }
167 
168     /**
169      * Parses a line of a Unix (standard) FTP server file listing and converts it into a usable format in the form of an {@code FTPFile} instance. If the
170      * file listing line doesn't describe a file, {@code null} is returned, otherwise a {@code FTPFile} instance representing the files in the
171      * directory is returned.
172      *
173      * @param entry A line of text from the file listing
174      * @return An FTPFile instance corresponding to the supplied entry
175      */
176     @Override
177     public FTPFile parseFTPEntry(final String entry) {
178         final FTPFile file = new FTPFile();
179         file.setRawListing(entry);
180         final int type;
181         boolean isDevice = false;
182         if (matches(entry)) {
183             final String typeStr = group(1);
184             final String hardLinkCount = group(15);
185             final String usr = group(16);
186             final String grp = group(17);
187             final String fileSize = group(18);
188             final String datestr = group(19) + " " + group(20);
189             String name = group(21);
190             if (trimLeadingSpaces) {
191                 name = name.replaceFirst("^\\s+", "");
192             }
193             try {
194                 if (group(19).contains(JA_MONTH) && group(19).contains(JA_DAY)) { // special processing for Japanese format
195                     final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
196                     jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
197                     file.setTimestamp(jaParser.parseTimestamp(datestr));
198                 } else if (group(19).contains(JA_MONTH) && !group(19).contains(JA_DAY)) {
199                     final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
200                     jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_CN, DEFAULT_RECENT_DATE_FORMAT_CN));
201                     file.setTimestamp(jaParser.parseTimestamp(datestr));
202                 } else {
203                     file.setTimestamp(super.parseTimestamp(datestr));
204                 }
205             } catch (final ParseException e) {
206                 // intentionally do nothing
207             }
208             // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
209             // 'translucent' filesystems, of which a 'union' filesystem is one.
210             // bcdelfmpSs-
211             switch (typeStr.charAt(0)) {
212             case 'd':
213                 type = FTPFile.DIRECTORY_TYPE;
214                 break;
215             case 'e': // NET-39 => z/OS external link
216                 type = FTPFile.SYMBOLIC_LINK_TYPE;
217                 break;
218             case 'l':
219                 type = FTPFile.SYMBOLIC_LINK_TYPE;
220                 break;
221             case 'b':
222             case 'c':
223                 isDevice = true;
224                 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
225                 break;
226             case 'f':
227             case '-':
228                 type = FTPFile.FILE_TYPE;
229                 break;
230             default: // e.g. ? and w = whiteout
231                 type = FTPFile.UNKNOWN_TYPE;
232             }
233             file.setType(type);
234             int g = 4;
235             for (int access = 0; access < 3; access++, g += 4) {
236                 // Use != '-' to avoid having to check for suid and sticky bits
237                 file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-"));
238                 file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-"));
239                 final String execPerm = group(g + 2);
240                 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)));
241             }
242             if (!isDevice) {
243                 try {
244                     file.setHardLinkCount(Integer.parseInt(hardLinkCount));
245                 } catch (final NumberFormatException e) {
246                     // intentionally do nothing
247                 }
248             }
249             file.setUser(usr);
250             file.setGroup(grp);
251             try {
252                 file.setSize(Long.parseLong(fileSize));
253             } catch (final NumberFormatException e) {
254                 // intentionally do nothing
255             }
256             // oddball cases like symbolic links, file names
257             // with spaces in them.
258             if (type == FTPFile.SYMBOLIC_LINK_TYPE) {
259                 final int end = name.indexOf(" -> ");
260                 // Give up if no link indicator is present
261                 if (end == -1) {
262                     file.setName(name);
263                 } else {
264                     file.setName(name.substring(0, end));
265                     file.setLink(name.substring(end + 4));
266                 }
267             } else {
268                 file.setName(name);
269             }
270             return file;
271         }
272         return null;
273     }
274 
275     /**
276      * Preparses the list to discard "total nnn" lines.
277      */
278     @Override
279     public List<String> preParse(final List<String> original) {
280         // NET-389
281         original.removeIf(entry -> TOTAL_PATTERN.matcher(entry).matches());
282         return original;
283     }
284 
285 }