001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019import java.text.ParseException;
020import java.util.List;
021import java.util.ListIterator;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation FTPFileEntryParser and FTPFileListParser for standard
028 * Unix Systems.
029 *
030 * This class is based on the logic of Daniel Savarese's
031 * DefaultFTPListParser, but adapted to use regular expressions and to fit the
032 * new FTPFileEntryParser interface.
033 * @version $Id: UnixFTPEntryParser.java 1781925 2017-02-06 16:43:40Z sebb $
034 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
035 */
036public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl
037{
038
039    static final String DEFAULT_DATE_FORMAT
040        = "MMM d yyyy"; //Nov 9 2001
041
042    static final String DEFAULT_RECENT_DATE_FORMAT
043        = "MMM d HH:mm"; //Nov 9 20:06
044
045    static final String NUMERIC_DATE_FORMAT
046        = "yyyy-MM-dd HH:mm"; //2001-11-09 20:06
047
048    // Suffixes used in Japanese listings after the numeric values
049    private static final String JA_MONTH = "\u6708";
050    private static final String JA_DAY   = "\u65e5";
051    private static final String JA_YEAR  = "\u5e74";
052
053    private static final String DEFAULT_DATE_FORMAT_JA
054        = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; //6月 3日 2003年
055
056    private static final String DEFAULT_RECENT_DATE_FORMAT_JA
057        = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; //8月 17日 20:10
058
059    /**
060     * Some Linux distributions are now shipping an FTP server which formats
061     * file listing dates in an all-numeric format:
062     * <code>"yyyy-MM-dd HH:mm</code>.
063     * This is a very welcome development,  and hopefully it will soon become
064     * the standard.  However, since it is so new, for now, and possibly
065     * forever, we merely accomodate it, but do not make it the default.
066     * <p>
067     * For now end users may specify this format only via
068     * <code>UnixFTPEntryParser(FTPClientConfig)</code>.
069     * Steve Cohen - 2005-04-17
070     */
071    public static final FTPClientConfig NUMERIC_DATE_CONFIG =
072        new FTPClientConfig(
073                FTPClientConfig.SYST_UNIX,
074                NUMERIC_DATE_FORMAT,
075                null);
076
077    /**
078     * this is the regular expression used by this parser.
079     *
080     * Permissions:
081     *    r   the file is readable
082     *    w   the file is writable
083     *    x   the file is executable
084     *    -   the indicated permission is not granted
085     *    L   mandatory locking occurs during access (the set-group-ID bit is
086     *        on and the group execution bit is off)
087     *    s   the set-user-ID or set-group-ID bit is on, and the corresponding
088     *        user or group execution bit is also on
089     *    S   undefined bit-state (the set-user-ID bit is on and the user
090     *        execution bit is off)
091     *    t   the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and
092     *        execution is on
093     *    T   the 1000 bit is turned on, and execution is off (undefined bit-
094     *        state)
095     *    e   z/OS external link bit
096     *    Final letter may be appended:
097     *    +   file has extended security attributes (e.g. ACL)
098     *    Note: local listings on MacOSX also use '@';
099     *    this is not allowed for here as does not appear to be shown by FTP servers
100     *    {@code @}   file has extended attributes
101     */
102    private static final String REGEX =
103        "([bcdelfmpSs-])" // file type
104        +"(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
105
106        + "\\s*"                                        // separator TODO why allow it to be omitted??
107
108        + "(\\d+)"                                      // link count
109
110        + "\\s+" // separator
111
112        + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?"                // owner name (optional spaces)
113        + "(?:(\\S+(?:\\s\\S+)*)\\s+)?"                 // group name (optional spaces)
114        + "(\\d+(?:,\\s*\\d+)?)"                        // size or n,m
115
116        + "\\s+" // separator
117
118        /*
119         * numeric or standard format date:
120         *   yyyy-mm-dd (expecting hh:mm to follow)
121         *   MMM [d]d
122         *   [d]d MMM
123         *   N.B. use non-space for MMM to allow for languages such as German which use
124         *   diacritics (e.g. umlaut) in some abbreviations.
125         *   Japanese uses numeric day and month with suffixes to distinguish them
126         *   [d]dXX [d]dZZ
127        */
128        + "("+
129            "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
130            "|(?:\\S{3}\\s+\\d{1,2})" +  // MMM [d]d
131            "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
132            "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")"+
133           ")"
134
135        + "\\s+" // separator
136
137        /*
138           year (for non-recent standard format) - yyyy
139           or time (for numeric or recent standard format) [h]h:mm
140           or Japanese year - yyyyXX
141        */
142        + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
143
144        + "\\s" // separator
145
146        + "(.*)"; // the rest (21)
147
148
149    // if true, leading spaces are trimmed from file names
150    // this was the case for the original implementation
151    final boolean trimLeadingSpaces; // package protected for access from test code
152
153    /**
154     * The default constructor for a UnixFTPEntryParser object.
155     *
156     * @throws IllegalArgumentException
157     * Thrown if the regular expression is unparseable.  Should not be seen
158     * under normal conditions.  It it is seen, this is a sign that
159     * <code>REGEX</code> is  not a valid regular expression.
160     */
161    public UnixFTPEntryParser()
162    {
163        this(null);
164    }
165
166    /**
167     * This constructor allows the creation of a UnixFTPEntryParser object with
168     * something other than the default configuration.
169     *
170     * @param config The {@link FTPClientConfig configuration} object used to
171     * configure this parser.
172     * @throws IllegalArgumentException
173     * Thrown if the regular expression is unparseable.  Should not be seen
174     * under normal conditions.  It it is seen, this is a sign that
175     * <code>REGEX</code> is  not a valid regular expression.
176     * @since 1.4
177     */
178    public UnixFTPEntryParser(FTPClientConfig config)
179    {
180        this(config, false);
181    }
182
183    /**
184     * This constructor allows the creation of a UnixFTPEntryParser object with
185     * something other than the default configuration.
186     *
187     * @param config The {@link FTPClientConfig configuration} object used to
188     * configure this parser.
189     * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
190     * @throws IllegalArgumentException
191     * Thrown if the regular expression is unparseable.  Should not be seen
192     * under normal conditions.  It it is seen, this is a sign that
193     * <code>REGEX</code> is  not a valid regular expression.
194     * @since 3.4
195     */
196    public UnixFTPEntryParser(FTPClientConfig config, boolean trimLeadingSpaces)
197    {
198        super(REGEX);
199        configure(config);
200        this.trimLeadingSpaces = trimLeadingSpaces;
201    }
202
203    /**
204     * Preparse the list to discard "total nnn" lines
205     */
206    @Override
207    public List<String> preParse(List<String> original) {
208        ListIterator<String> iter = original.listIterator();
209        while (iter.hasNext()) {
210            String entry = iter.next();
211            if (entry.matches("^total \\d+$")) { // NET-389
212                iter.remove();
213            }
214        }
215        return original;
216    }
217
218    /**
219     * Parses a line of a unix (standard) FTP server file listing and converts
220     * it into a usable format in the form of an <code> FTPFile </code>
221     * instance.  If the file listing line doesn't describe a file,
222     * <code> null </code> is returned, otherwise a <code> FTPFile </code>
223     * instance representing the files in the directory is returned.
224     *
225     * @param entry A line of text from the file listing
226     * @return An FTPFile instance corresponding to the supplied entry
227     */
228    @Override
229    public FTPFile parseFTPEntry(String entry) {
230        FTPFile file = new FTPFile();
231        file.setRawListing(entry);
232        int type;
233        boolean isDevice = false;
234
235        if (matches(entry))
236        {
237            String typeStr = group(1);
238            String hardLinkCount = group(15);
239            String usr = group(16);
240            String grp = group(17);
241            String filesize = group(18);
242            String datestr = group(19) + " " + group(20);
243            String name = group(21);
244            if (trimLeadingSpaces) {
245                name = name.replaceFirst("^\\s+", "");
246            }
247
248            try
249            {
250                if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
251                    FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
252                    jaParser.configure(new FTPClientConfig(
253                            FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
254                    file.setTimestamp(jaParser.parseTimestamp(datestr));
255                } else {
256                    file.setTimestamp(super.parseTimestamp(datestr));
257                }
258            }
259            catch (ParseException e)
260            {
261                 // intentionally do nothing
262            }
263
264            // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
265            // 'translucent' filesystems, of which a 'union' filesystem is one.
266
267            // bcdelfmpSs-
268            switch (typeStr.charAt(0))
269            {
270            case 'd':
271                type = FTPFile.DIRECTORY_TYPE;
272                break;
273            case 'e': // NET-39 => z/OS external link
274                type = FTPFile.SYMBOLIC_LINK_TYPE;
275                break;
276            case 'l':
277                type = FTPFile.SYMBOLIC_LINK_TYPE;
278                break;
279            case 'b':
280            case 'c':
281                isDevice = true;
282                type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
283                break;
284            case 'f':
285            case '-':
286                type = FTPFile.FILE_TYPE;
287                break;
288            default: // e.g. ? and w = whiteout
289                type = FTPFile.UNKNOWN_TYPE;
290            }
291
292            file.setType(type);
293
294            int g = 4;
295            for (int access = 0; access < 3; access++, g += 4)
296            {
297                // Use != '-' to avoid having to check for suid and sticky bits
298                file.setPermission(access, FTPFile.READ_PERMISSION,
299                                   (!group(g).equals("-")));
300                file.setPermission(access, FTPFile.WRITE_PERMISSION,
301                                   (!group(g + 1).equals("-")));
302
303                String execPerm = group(g + 2);
304                if (!execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)))
305                {
306                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, true);
307                }
308                else
309                {
310                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, false);
311                }
312            }
313
314            if (!isDevice)
315            {
316                try
317                {
318                    file.setHardLinkCount(Integer.parseInt(hardLinkCount));
319                }
320                catch (NumberFormatException e)
321                {
322                    // intentionally do nothing
323                }
324            }
325
326            file.setUser(usr);
327            file.setGroup(grp);
328
329            try
330            {
331                file.setSize(Long.parseLong(filesize));
332            }
333            catch (NumberFormatException e)
334            {
335                // intentionally do nothing
336            }
337
338            // oddball cases like symbolic links, file names
339            // with spaces in them.
340            if (type == FTPFile.SYMBOLIC_LINK_TYPE)
341            {
342
343                int end = name.indexOf(" -> ");
344                // Give up if no link indicator is present
345                if (end == -1)
346                {
347                    file.setName(name);
348                }
349                else
350                {
351                    file.setName(name.substring(0, end));
352                    file.setLink(name.substring(end + 4));
353                }
354
355            }
356            else
357            {
358                file.setName(name);
359            }
360            return file;
361        }
362        return null;
363    }
364
365    /**
366     * Defines a default configuration to be used when this class is
367     * instantiated without a {@link  FTPClientConfig  FTPClientConfig}
368     * parameter being specified.
369     * @return the default configuration for this parser.
370     */
371    @Override
372    protected FTPClientConfig getDefaultConfiguration() {
373        return new FTPClientConfig(
374                FTPClientConfig.SYST_UNIX,
375                DEFAULT_DATE_FORMAT,
376                DEFAULT_RECENT_DATE_FORMAT);
377    }
378
379}