001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019import java.text.ParseException;
020import java.util.List;
021import java.util.ListIterator;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation FTPFileEntryParser and FTPFileListParser for standard
028 * Unix Systems.
029 *
030 * This class is based on the logic of Daniel Savarese's
031 * DefaultFTPListParser, but adapted to use regular expressions and to fit the
032 * new FTPFileEntryParser interface.
033 * @version $Id: UnixFTPEntryParser.java 1741829 2016-05-01 00:24:44Z sebb $
034 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
035 */
036public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl
037{
038
039    static final String DEFAULT_DATE_FORMAT
040        = "MMM d yyyy"; //Nov 9 2001
041
042    static final String DEFAULT_RECENT_DATE_FORMAT
043        = "MMM d HH:mm"; //Nov 9 20:06
044
045    static final String NUMERIC_DATE_FORMAT
046        = "yyyy-MM-dd HH:mm"; //2001-11-09 20:06
047
048    /**
049     * Some Linux distributions are now shipping an FTP server which formats
050     * file listing dates in an all-numeric format:
051     * <code>"yyyy-MM-dd HH:mm</code>.
052     * This is a very welcome development,  and hopefully it will soon become
053     * the standard.  However, since it is so new, for now, and possibly
054     * forever, we merely accomodate it, but do not make it the default.
055     * <p>
056     * For now end users may specify this format only via
057     * <code>UnixFTPEntryParser(FTPClientConfig)</code>.
058     * Steve Cohen - 2005-04-17
059     */
060    public static final FTPClientConfig NUMERIC_DATE_CONFIG =
061        new FTPClientConfig(
062                FTPClientConfig.SYST_UNIX,
063                NUMERIC_DATE_FORMAT,
064                null, null, null, null);
065
066    /**
067     * this is the regular expression used by this parser.
068     *
069     * Permissions:
070     *    r   the file is readable
071     *    w   the file is writable
072     *    x   the file is executable
073     *    -   the indicated permission is not granted
074     *    L   mandatory locking occurs during access (the set-group-ID bit is
075     *        on and the group execution bit is off)
076     *    s   the set-user-ID or set-group-ID bit is on, and the corresponding
077     *        user or group execution bit is also on
078     *    S   undefined bit-state (the set-user-ID bit is on and the user
079     *        execution bit is off)
080     *    t   the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and
081     *        execution is on
082     *    T   the 1000 bit is turned on, and execution is off (undefined bit-
083     *        state)
084     *    e   z/OS external link bit
085     *    Final letter may be appended:
086     *    +   file has extended security attributes (e.g. ACL)
087     *    Note: local listings on MacOSX also use '@';
088     *    this is not allowed for here as does not appear to be shown by FTP servers
089     *    {@code @}   file has extended attributes
090     */
091    private static final String REGEX =
092        "([bcdelfmpSs-])" // file type
093        +"(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
094
095        + "\\s*"                                        // separator TODO why allow it to be omitted??
096
097        + "(\\d+)"                                      // link count
098
099        + "\\s+" // separator
100
101        + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?"                // owner name (optional spaces)
102        + "(?:(\\S+(?:\\s\\S+)*)\\s+)?"                 // group name (optional spaces)
103        + "(\\d+(?:,\\s*\\d+)?)"                        // size or n,m
104
105        + "\\s+" // separator
106
107        /*
108         * numeric or standard format date:
109         *   yyyy-mm-dd (expecting hh:mm to follow)
110         *   MMM [d]d
111         *   [d]d MMM
112         *   N.B. use non-space for MMM to allow for languages such as German which use
113         *   diacritics (e.g. umlaut) in some abbreviations.
114        */
115        + "((?:\\d+[-/]\\d+[-/]\\d+)|(?:\\S{3}\\s+\\d{1,2})|(?:\\d{1,2}\\s+\\S{3}))"
116
117        + "\\s+" // separator
118
119        /*
120           year (for non-recent standard format) - yyyy
121           or time (for numeric or recent standard format) [h]h:mm
122        */
123        + "(\\d+(?::\\d+)?)" // (20)
124
125        + "\\s" // separator
126
127        + "(.*)"; // the rest (21)
128
129
130    // if true, leading spaces are trimmed from file names
131    // this was the case for the original implementation
132    private final boolean trimLeadingSpaces;
133
134    /**
135     * The default constructor for a UnixFTPEntryParser object.
136     *
137     * @exception IllegalArgumentException
138     * Thrown if the regular expression is unparseable.  Should not be seen
139     * under normal conditions.  It it is seen, this is a sign that
140     * <code>REGEX</code> is  not a valid regular expression.
141     */
142    public UnixFTPEntryParser()
143    {
144        this(null);
145    }
146
147    /**
148     * This constructor allows the creation of a UnixFTPEntryParser object with
149     * something other than the default configuration.
150     *
151     * @param config The {@link FTPClientConfig configuration} object used to
152     * configure this parser.
153     * @exception IllegalArgumentException
154     * Thrown if the regular expression is unparseable.  Should not be seen
155     * under normal conditions.  It it is seen, this is a sign that
156     * <code>REGEX</code> is  not a valid regular expression.
157     * @since 1.4
158     */
159    public UnixFTPEntryParser(FTPClientConfig config)
160    {
161        this(config, false);
162    }
163
164    /**
165     * This constructor allows the creation of a UnixFTPEntryParser object with
166     * something other than the default configuration.
167     *
168     * @param config The {@link FTPClientConfig configuration} object used to
169     * configure this parser.
170     * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
171     * @exception IllegalArgumentException
172     * Thrown if the regular expression is unparseable.  Should not be seen
173     * under normal conditions.  It it is seen, this is a sign that
174     * <code>REGEX</code> is  not a valid regular expression.
175     * @since 3.4
176     */
177    public UnixFTPEntryParser(FTPClientConfig config, boolean trimLeadingSpaces)
178    {
179        super(REGEX);
180        configure(config);
181        this.trimLeadingSpaces = trimLeadingSpaces;
182    }
183
184    /**
185     * Preparse the list to discard "total nnn" lines
186     */
187    @Override
188    public List<String> preParse(List<String> original) {
189        ListIterator<String> iter = original.listIterator();
190        while (iter.hasNext()) {
191            String entry = iter.next();
192            if (entry.matches("^total \\d+$")) { // NET-389
193                iter.remove();
194            }
195        }
196        return original;
197    }
198
199    /**
200     * Parses a line of a unix (standard) FTP server file listing and converts
201     * it into a usable format in the form of an <code> FTPFile </code>
202     * instance.  If the file listing line doesn't describe a file,
203     * <code> null </code> is returned, otherwise a <code> FTPFile </code>
204     * instance representing the files in the directory is returned.
205     *
206     * @param entry A line of text from the file listing
207     * @return An FTPFile instance corresponding to the supplied entry
208     */
209    @Override
210    public FTPFile parseFTPEntry(String entry) {
211        FTPFile file = new FTPFile();
212        file.setRawListing(entry);
213        int type;
214        boolean isDevice = false;
215
216        if (matches(entry))
217        {
218            String typeStr = group(1);
219            String hardLinkCount = group(15);
220            String usr = group(16);
221            String grp = group(17);
222            String filesize = group(18);
223            String datestr = group(19) + " " + group(20);
224            String name = group(21);
225            if (trimLeadingSpaces) {
226                name = name.replaceFirst("^\\s+", "");
227            }
228
229            try
230            {
231                file.setTimestamp(super.parseTimestamp(datestr));
232            }
233            catch (ParseException e)
234            {
235                 // intentionally do nothing
236            }
237
238            // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
239            // 'translucent' filesystems, of which a 'union' filesystem is one.
240
241            // bcdelfmpSs-
242            switch (typeStr.charAt(0))
243            {
244            case 'd':
245                type = FTPFile.DIRECTORY_TYPE;
246                break;
247            case 'e': // NET-39 => z/OS external link
248                type = FTPFile.SYMBOLIC_LINK_TYPE;
249                break;
250            case 'l':
251                type = FTPFile.SYMBOLIC_LINK_TYPE;
252                break;
253            case 'b':
254            case 'c':
255                isDevice = true;
256                type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
257                break;
258            case 'f':
259            case '-':
260                type = FTPFile.FILE_TYPE;
261                break;
262            default: // e.g. ? and w = whiteout
263                type = FTPFile.UNKNOWN_TYPE;
264            }
265
266            file.setType(type);
267
268            int g = 4;
269            for (int access = 0; access < 3; access++, g += 4)
270            {
271                // Use != '-' to avoid having to check for suid and sticky bits
272                file.setPermission(access, FTPFile.READ_PERMISSION,
273                                   (!group(g).equals("-")));
274                file.setPermission(access, FTPFile.WRITE_PERMISSION,
275                                   (!group(g + 1).equals("-")));
276
277                String execPerm = group(g + 2);
278                if (!execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)))
279                {
280                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, true);
281                }
282                else
283                {
284                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, false);
285                }
286            }
287
288            if (!isDevice)
289            {
290                try
291                {
292                    file.setHardLinkCount(Integer.parseInt(hardLinkCount));
293                }
294                catch (NumberFormatException e)
295                {
296                    // intentionally do nothing
297                }
298            }
299
300            file.setUser(usr);
301            file.setGroup(grp);
302
303            try
304            {
305                file.setSize(Long.parseLong(filesize));
306            }
307            catch (NumberFormatException e)
308            {
309                // intentionally do nothing
310            }
311
312            // oddball cases like symbolic links, file names
313            // with spaces in them.
314            if (type == FTPFile.SYMBOLIC_LINK_TYPE)
315            {
316
317                int end = name.indexOf(" -> ");
318                // Give up if no link indicator is present
319                if (end == -1)
320                {
321                    file.setName(name);
322                }
323                else
324                {
325                    file.setName(name.substring(0, end));
326                    file.setLink(name.substring(end + 4));
327                }
328
329            }
330            else
331            {
332                file.setName(name);
333            }
334            return file;
335        }
336        return null;
337    }
338
339    /**
340     * Defines a default configuration to be used when this class is
341     * instantiated without a {@link  FTPClientConfig  FTPClientConfig}
342     * parameter being specified.
343     * @return the default configuration for this parser.
344     */
345    @Override
346    protected FTPClientConfig getDefaultConfiguration() {
347        return new FTPClientConfig(
348                FTPClientConfig.SYST_UNIX,
349                DEFAULT_DATE_FORMAT,
350                DEFAULT_RECENT_DATE_FORMAT,
351                null, null, null);
352    }
353
354}