1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.net.ftp.parser;
19
20 import java.text.ParseException;
21 import java.util.List;
22 import java.util.regex.Pattern;
23
24 import org.apache.commons.net.ftp.FTPClientConfig;
25 import org.apache.commons.net.ftp.FTPFile;
26
27 /**
28 * Implementation FTPFileEntryParser and FTPFileListParser for standard Unix Systems.
29 * <p>
30 * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser
31 * interface.
32 * </p>
33 *
34 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
35 */
36 public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
37
38 static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001
39
40 static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06
41
42 static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06
43
44 // Suffixes used in Japanese listings after the numeric values
45 private static final String JA_MONTH = "\u6708";
46 private static final String JA_DAY = "\u65e5";
47 private static final String JA_YEAR = "\u5e74";
48
49 private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年
50
51 private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10
52
53 private static final Pattern TOTAL_PATTERN = Pattern.compile("^total \\d+$");
54
55 /**
56 * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: {@code "yyyy-MM-dd HH:mm}. This
57 * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely
58 * accommodate it, but do not make it the default.
59 * <p>
60 * For now end users may specify this format only via {@code UnixFTPEntryParser(FTPClientConfig)}. Steve Cohen - 2005-04-17
61 * </p>
62 */
63 public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null);
64
65 /**
66 * Regular expression used by this parser.
67 * <p>
68 * Permissions:
69 * </p>
70 * <ul>
71 * <li>r the file is readable</li>
72 * <li>w the file is writable</li>
73 * <li>x the file is executable</li>
74 * <li>- the indicated permission is not granted</li>
75 * <li>L mandatory locking occurs</li> during access (the set-group-ID bit is on and the group execution bit is off)</li>
76 * <li>s the set-user-ID or set-group-ID bit is on, and the corresponding user or group execution bit is also on
77 * <li>S undefined bit-state (the set-user-ID bit is on and the user execution bit is off)</li>
78 * <li>t the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and execution is on</li>
79 * <li>T the 1000 bit is turned on, and execution is off (undefined bit-state)</li>
80 * <li>e z/OS external link bit.</li>
81 * <p>
82 * Final letter may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for
83 * here as does not appear to be shown by FTP servers {@code @} file has extended attributes
84 * </p>
85 */
86 private static final String REGEX = "([bcdelfmpSs-])" // file type
87 + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
88 + "\\s*" // separator TODO why allow it to be omitted??
89 + "(\\d+)" // link count
90 + "\\s+" // separator
91 + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces)
92 + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces)
93 + "(\\d+(?:,\\s*\\d+)?)" // size or n,m
94 + "\\s+" // separator
95 /*
96 * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM Use non-space for MMM to allow for languages such
97 * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX
98 * [d]dZZ
99 */
100 + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
101 "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d
102 "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
103 "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" + ")"
104 + "\\s+" // separator
105 /*
106 * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX
107 */
108 + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
109 + "\\s" // separator
110 + "(.*)"; // the rest (21)
111
112 /**
113 * Whether leading spaces are trimmed from file names this was the case for the original implementation.
114 */
115 final boolean trimLeadingSpaces; // package protected for access from test code
116
117 /**
118 * Constructs a new instance.
119 *
120 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
121 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
122 */
123 public UnixFTPEntryParser() {
124 this(null);
125 }
126
127 /**
128 * Constructs a new instance with something other than the default configuration.
129 *
130 * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
131 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
132 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
133 * @since 1.4
134 */
135 public UnixFTPEntryParser(final FTPClientConfig config) {
136 this(config, false);
137 }
138
139 /**
140 * Constructs a new instance with something other than the default configuration.
141 *
142 * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
143 * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
144 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
145 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
146 * @since 3.4
147 */
148 public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) {
149 super(REGEX);
150 configure(config);
151 this.trimLeadingSpaces = trimLeadingSpaces;
152 }
153
154 /**
155 * Gets a new default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified.
156 *
157 * @return the default configuration for this parser.
158 */
159 @Override
160 protected FTPClientConfig getDefaultConfiguration() {
161 return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT);
162 }
163
164 /**
165 * Parses a line of a Unix (standard) FTP server file listing and converts it into a usable format in the form of an {@code FTPFile} instance. If the
166 * file listing line doesn't describe a file, {@code null} is returned, otherwise a {@code FTPFile} instance representing the files in the
167 * directory is returned.
168 *
169 * @param entry A line of text from the file listing
170 * @return An FTPFile instance corresponding to the supplied entry
171 */
172 @Override
173 public FTPFile parseFTPEntry(final String entry) {
174 final FTPFile file = new FTPFile();
175 file.setRawListing(entry);
176 final int type;
177 boolean isDevice = false;
178 if (matches(entry)) {
179 final String typeStr = group(1);
180 final String hardLinkCount = group(15);
181 final String usr = group(16);
182 final String grp = group(17);
183 final String fileSize = group(18);
184 final String datestr = group(19) + " " + group(20);
185 String name = group(21);
186 if (trimLeadingSpaces) {
187 name = name.replaceFirst("^\\s+", "");
188 }
189 try {
190 if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
191 final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
192 jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
193 file.setTimestamp(jaParser.parseTimestamp(datestr));
194 } else {
195 file.setTimestamp(super.parseTimestamp(datestr));
196 }
197 } catch (final ParseException e) {
198 // intentionally do nothing
199 }
200 // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
201 // 'translucent' filesystems, of which a 'union' filesystem is one.
202 // bcdelfmpSs-
203 switch (typeStr.charAt(0)) {
204 case 'd':
205 type = FTPFile.DIRECTORY_TYPE;
206 break;
207 case 'e': // NET-39 => z/OS external link
208 type = FTPFile.SYMBOLIC_LINK_TYPE;
209 break;
210 case 'l':
211 type = FTPFile.SYMBOLIC_LINK_TYPE;
212 break;
213 case 'b':
214 case 'c':
215 isDevice = true;
216 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
217 break;
218 case 'f':
219 case '-':
220 type = FTPFile.FILE_TYPE;
221 break;
222 default: // e.g. ? and w = whiteout
223 type = FTPFile.UNKNOWN_TYPE;
224 }
225 file.setType(type);
226 int g = 4;
227 for (int access = 0; access < 3; access++, g += 4) {
228 // Use != '-' to avoid having to check for suid and sticky bits
229 file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-"));
230 file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-"));
231 final String execPerm = group(g + 2);
232 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)));
233 }
234 if (!isDevice) {
235 try {
236 file.setHardLinkCount(Integer.parseInt(hardLinkCount));
237 } catch (final NumberFormatException e) {
238 // intentionally do nothing
239 }
240 }
241 file.setUser(usr);
242 file.setGroup(grp);
243 try {
244 file.setSize(Long.parseLong(fileSize));
245 } catch (final NumberFormatException e) {
246 // intentionally do nothing
247 }
248 // oddball cases like symbolic links, file names
249 // with spaces in them.
250 if (type == FTPFile.SYMBOLIC_LINK_TYPE) {
251 final int end = name.indexOf(" -> ");
252 // Give up if no link indicator is present
253 if (end == -1) {
254 file.setName(name);
255 } else {
256 file.setName(name.substring(0, end));
257 file.setLink(name.substring(end + 4));
258 }
259 } else {
260 file.setName(name);
261 }
262 return file;
263 }
264 return null;
265 }
266
267 /**
268 * Preparses the list to discard "total nnn" lines.
269 */
270 @Override
271 public List<String> preParse(final List<String> original) {
272 // NET-389
273 original.removeIf(entry -> TOTAL_PATTERN.matcher(entry).matches());
274 return original;
275 }
276
277 }