001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.ftp.parser; 019 020import java.text.ParseException; 021import java.util.List; 022import java.util.regex.Pattern; 023 024import org.apache.commons.net.ftp.FTPClientConfig; 025import org.apache.commons.net.ftp.FTPFile; 026 027/** 028 * Implementation FTPFileEntryParser and FTPFileListParser for standard Unix Systems. 029 * <p> 030 * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser 031 * interface. 032 * </p> 033 * 034 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions) 035 */ 036public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl { 037 038 static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001 039 040 static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06 041 042 static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06 043 044 // Suffixes used in Japanese listings after the numeric values 045 private static final String JA_MONTH = "\u6708"; 046 private static final String JA_DAY = "\u65e5"; 047 private static final String JA_YEAR = "\u5e74"; 048 049 private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年 050 private static final String DEFAULT_DATE_FORMAT_CN = "M'" + JA_MONTH + "' d yyyy"; // 6月 3 2003 051 052 private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10 053 private static final String DEFAULT_RECENT_DATE_FORMAT_CN = "M'" + JA_MONTH + "' d HH:mm"; // 8月 17 20:10 054 055 private static final Pattern TOTAL_PATTERN = Pattern.compile("^total \\d+$"); 056 057 /** 058 * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: {@code "yyyy-MM-dd HH:mm}. This 059 * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely 060 * accommodate it, but do not make it the default. 061 * <p> 062 * For now end users may specify this format only via {@code UnixFTPEntryParser(FTPClientConfig)}. Steve Cohen - 2005-04-17 063 * </p> 064 */ 065 public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null); 066 067 /** 068 * Regular expression used by this parser. 069 * <p> 070 * Permissions: 071 * </p> 072 * <ul> 073 * <li>r the file is readable</li> 074 * <li>w the file is writable</li> 075 * <li>x the file is executable</li> 076 * <li>- the indicated permission is not granted</li> 077 * <li>L mandatory locking occurs</li> during access (the set-group-ID bit is on and the group execution bit is off)</li> 078 * <li>s the set-user-ID or set-group-ID bit is on, and the corresponding user or group execution bit is also on 079 * <li>S undefined bit-state (the set-user-ID bit is on and the user execution bit is off)</li> 080 * <li>t the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and execution is on</li> 081 * <li>T the 1000 bit is turned on, and execution is off (undefined bit-state)</li> 082 * <li>e z/OS external link bit.</li> 083 * <p> 084 * Final letter may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for 085 * here as does not appear to be shown by FTP servers {@code @} file has extended attributes 086 * </p> 087 */ 088 private static final String REGEX = "([bcdelfmpSs-])" // file type 089 + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions 090 + "\\s*" // separator TODO why allow it to be omitted?? 091 + "(\\d+)" // link count 092 + "\\s+" // separator 093 + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces) 094 + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces) 095 + "(\\d+(?:,\\s*\\d+)?)" // size or n,m 096 + "\\s+" // separator 097 /* 098 * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM Use non-space for MMM to allow for languages such 099 * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX 100 * [d]dZZ 101 */ 102 + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd 103 "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d 104 "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM 105 "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" + 106 "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2})" 107 + ")" 108 + "\\s+" // separator 109 /* 110 * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX 111 */ 112 + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20) 113 + "\\s" // separator 114 + "(.*)"; // the rest (21) 115 116 /** 117 * Whether leading spaces are trimmed from file names this was the case for the original implementation. 118 */ 119 final boolean trimLeadingSpaces; // package protected for access from test code 120 121 /** 122 * Constructs a new instance. 123 * 124 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions. 125 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression. 126 */ 127 public UnixFTPEntryParser() { 128 this(null); 129 } 130 131 /** 132 * Constructs a new instance with something other than the default configuration. 133 * 134 * @param config The {@link FTPClientConfig configuration} object used to configure this parser. 135 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions. 136 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression. 137 * @since 1.4 138 */ 139 public UnixFTPEntryParser(final FTPClientConfig config) { 140 this(config, false); 141 } 142 143 /** 144 * Constructs a new instance with something other than the default configuration. 145 * 146 * @param config The {@link FTPClientConfig configuration} object used to configure this parser. 147 * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names 148 * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions. 149 * If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression. 150 * @since 3.4 151 */ 152 public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) { 153 super(REGEX); 154 configure(config); 155 this.trimLeadingSpaces = trimLeadingSpaces; 156 } 157 158 /** 159 * Gets a new default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified. 160 * 161 * @return the default configuration for this parser. 162 */ 163 @Override 164 protected FTPClientConfig getDefaultConfiguration() { 165 return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT); 166 } 167 168 /** 169 * Parses a line of a Unix (standard) FTP server file listing and converts it into a usable format in the form of an {@code FTPFile} instance. If the 170 * file listing line doesn't describe a file, {@code null} is returned, otherwise a {@code FTPFile} instance representing the files in the 171 * directory is returned. 172 * 173 * @param entry A line of text from the file listing 174 * @return An FTPFile instance corresponding to the supplied entry 175 */ 176 @Override 177 public FTPFile parseFTPEntry(final String entry) { 178 final FTPFile file = new FTPFile(); 179 file.setRawListing(entry); 180 final int type; 181 boolean isDevice = false; 182 if (matches(entry)) { 183 final String typeStr = group(1); 184 final String hardLinkCount = group(15); 185 final String usr = group(16); 186 final String grp = group(17); 187 final String fileSize = group(18); 188 final String datestr = group(19) + " " + group(20); 189 String name = group(21); 190 if (trimLeadingSpaces) { 191 name = name.replaceFirst("^\\s+", ""); 192 } 193 try { 194 if (group(19).contains(JA_MONTH) && group(19).contains(JA_DAY)) { // special processing for Japanese format 195 final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl(); 196 jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA)); 197 file.setTimestamp(jaParser.parseTimestamp(datestr)); 198 } else if (group(19).contains(JA_MONTH) && !group(19).contains(JA_DAY)) { 199 final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl(); 200 jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_CN, DEFAULT_RECENT_DATE_FORMAT_CN)); 201 file.setTimestamp(jaParser.parseTimestamp(datestr)); 202 } else { 203 file.setTimestamp(super.parseTimestamp(datestr)); 204 } 205 } catch (final ParseException e) { 206 // intentionally do nothing 207 } 208 // A 'whiteout' file is an ARTIFICIAL entry in any of several types of 209 // 'translucent' filesystems, of which a 'union' filesystem is one. 210 // bcdelfmpSs- 211 switch (typeStr.charAt(0)) { 212 case 'd': 213 type = FTPFile.DIRECTORY_TYPE; 214 break; 215 case 'e': // NET-39 => z/OS external link 216 type = FTPFile.SYMBOLIC_LINK_TYPE; 217 break; 218 case 'l': 219 type = FTPFile.SYMBOLIC_LINK_TYPE; 220 break; 221 case 'b': 222 case 'c': 223 isDevice = true; 224 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented 225 break; 226 case 'f': 227 case '-': 228 type = FTPFile.FILE_TYPE; 229 break; 230 default: // e.g. ? and w = whiteout 231 type = FTPFile.UNKNOWN_TYPE; 232 } 233 file.setType(type); 234 int g = 4; 235 for (int access = 0; access < 3; access++, g += 4) { 236 // Use != '-' to avoid having to check for suid and sticky bits 237 file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-")); 238 file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-")); 239 final String execPerm = group(g + 2); 240 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0))); 241 } 242 if (!isDevice) { 243 try { 244 file.setHardLinkCount(Integer.parseInt(hardLinkCount)); 245 } catch (final NumberFormatException e) { 246 // intentionally do nothing 247 } 248 } 249 file.setUser(usr); 250 file.setGroup(grp); 251 try { 252 file.setSize(Long.parseLong(fileSize)); 253 } catch (final NumberFormatException e) { 254 // intentionally do nothing 255 } 256 // oddball cases like symbolic links, file names 257 // with spaces in them. 258 if (type == FTPFile.SYMBOLIC_LINK_TYPE) { 259 final int end = name.indexOf(" -> "); 260 // Give up if no link indicator is present 261 if (end == -1) { 262 file.setName(name); 263 } else { 264 file.setName(name.substring(0, end)); 265 file.setLink(name.substring(end + 4)); 266 } 267 } else { 268 file.setName(name); 269 } 270 return file; 271 } 272 return null; 273 } 274 275 /** 276 * Preparses the list to discard "total nnn" lines. 277 */ 278 @Override 279 public List<String> preParse(final List<String> original) { 280 // NET-389 281 original.removeIf(entry -> TOTAL_PATTERN.matcher(entry).matches()); 282 return original; 283 } 284 285}