MVSFTPEntryParser.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.net.ftp.parser;

  18. import java.text.ParseException;
  19. import java.util.List;

  20. import org.apache.commons.net.ftp.Configurable;
  21. import org.apache.commons.net.ftp.FTPClientConfig;
  22. import org.apache.commons.net.ftp.FTPFile;
  23. import org.apache.commons.net.ftp.FTPFileEntryParser;

  24. /**
  25.  * Implements {@link FTPFileEntryParser} and {@link Configurable} for IBM zOS/MVS Systems.
  26.  *
  27.  * @see FTPFileEntryParser Usage instructions.
  28.  */
  29. public class MVSFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {

  30.     static final int UNKNOWN_LIST_TYPE = -1;
  31.     static final int FILE_LIST_TYPE = 0;
  32.     static final int MEMBER_LIST_TYPE = 1;
  33.     static final int UNIX_LIST_TYPE = 2;
  34.     static final int JES_LEVEL_1_LIST_TYPE = 3;
  35.     static final int JES_LEVEL_2_LIST_TYPE = 4;

  36.     /**
  37.      * Dates are ignored for file lists, but are used for member lists where possible
  38.      */
  39.     static final String DEFAULT_DATE_FORMAT = "yyyy/MM/dd HH:mm"; // 2001/09/18
  40.                                                                   // 13:52

  41.     /**
  42.      * Matches these entries:
  43.      *
  44.      * <pre>
  45.      *  Volume Unit    Referred Ext Used Recfm Lrecl BlkSz Dsorg Dsname
  46.      *  B10142 3390   2006/03/20  2   31  F       80    80  PS   MDI.OKL.WORK
  47.      * </pre>
  48.      *
  49.      * @see <a href= "https://www.ibm.com/support/knowledgecenter/zosbasics/com.ibm.zos.zconcepts/zconcepts_159.htm">Data set record formats</a>
  50.      */
  51.     static final String FILE_LIST_REGEX = "\\S+\\s+" + // volume
  52.                                                        // ignored
  53.             "\\S+\\s+" + // unit - ignored
  54.             "\\S+\\s+" + // access date - ignored
  55.             "\\S+\\s+" + // extents -ignored
  56.             // If the values are too large, the fields may be merged (NET-639)
  57.             "(?:\\S+\\s+)?" + // used - ignored
  58.             "\\S+\\s+" + // recfm - ignored
  59.             "\\S+\\s+" + // logical record length -ignored
  60.             "\\S+\\s+" + // block size - ignored
  61.             "(PS|PO|PO-E)\\s+" + // Dataset organisation. Many exist
  62.             // but only support: PS, PO, PO-E
  63.             "(\\S+)\\s*"; // Dataset Name (file name)

  64.     /**
  65.      * Matches these entries:
  66.      *
  67.      * <pre>
  68.      *   Name      VV.MM   Created       Changed      Size  Init   Mod   Id
  69.      *   TBSHELF   01.03 2002/09/12 2002/10/11 09:37    11    11     0 KIL001
  70.      * </pre>
  71.      */
  72.     static final String MEMBER_LIST_REGEX = "(\\S+)\\s+" + // name
  73.             "\\S+\\s+" + // version, modification (ignored)
  74.             "\\S+\\s+" + // create date (ignored)
  75.             "(\\S+)\\s+" + // modification date
  76.             "(\\S+)\\s+" + // modification time
  77.             "\\S+\\s+" + // size in lines (ignored)
  78.             "\\S+\\s+" + // size in lines at creation(ignored)
  79.             "\\S+\\s+" + // lines modified (ignored)
  80.             "\\S+\\s*"; // id of user who modified (ignored)

  81.     /**
  82.      * Matches these entries, note: no header:
  83.      *
  84.      * <pre>
  85.      *   IBMUSER1  JOB01906  OUTPUT    3 Spool Files
  86.      *   012345678901234567890123456789012345678901234
  87.      *             1         2         3         4
  88.      * </pre>
  89.      */
  90.     static final String JES_LEVEL_1_LIST_REGEX = "(\\S+)\\s+" + // job name ignored
  91.             "(\\S+)\\s+" + // job number
  92.             "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE)
  93.             "(\\S+)\\s+" + // number of spool files
  94.             "(\\S+)\\s+" + // Text "Spool" ignored
  95.             "(\\S+)\\s*" // Text "Files" ignored
  96.     ;

  97.     /**
  98.      * JES INTERFACE LEVEL 2 parser Matches these entries:
  99.      *
  100.      * <pre>
  101.      * JOBNAME  JOBID    OWNER    STATUS CLASS
  102.      * IBMUSER1 JOB01906 IBMUSER  OUTPUT A        RC=0000 3 spool files
  103.      * IBMUSER  TSU01830 IBMUSER  OUTPUT TSU      ABEND=522 3 spool files
  104.      * </pre>
  105.      *
  106.      * Sample output from FTP session:
  107.      *
  108.      * <pre>
  109.      * ftp> quote site filetype=jes
  110.      * 200 SITE command was accepted
  111.      * ftp> ls
  112.      * 200 Port request OK.
  113.      * 125 List started OK for JESJOBNAME=IBMUSER*, JESSTATUS=ALL and JESOWNER=IBMUSER
  114.      * JOBNAME  JOBID    OWNER    STATUS CLASS
  115.      * IBMUSER1 JOB01906 IBMUSER  OUTPUT A        RC=0000 3 spool files
  116.      * IBMUSER  TSU01830 IBMUSER  OUTPUT TSU      ABEND=522 3 spool files
  117.      * 250 List completed successfully.
  118.      * ftp> ls job01906
  119.      * 200 Port request OK.
  120.      * 125 List started OK for JESJOBNAME=IBMUSER*, JESSTATUS=ALL and JESOWNER=IBMUSER
  121.      * JOBNAME  JOBID    OWNER    STATUS CLASS
  122.      * IBMUSER1 JOB01906 IBMUSER  OUTPUT A        RC=0000
  123.      * --------
  124.      * ID  STEPNAME PROCSTEP C DDNAME   BYTE-COUNT
  125.      * 001 JES2              A JESMSGLG       858
  126.      * 002 JES2              A JESJCL         128
  127.      * 003 JES2              A JESYSMSG       443
  128.      * 3 spool files
  129.      * 250 List completed successfully.
  130.      * </pre>
  131.      */

  132.     static final String JES_LEVEL_2_LIST_REGEX = "(\\S+)\\s+" + // job name ignored
  133.             "(\\S+)\\s+" + // job number
  134.             "(\\S+)\\s+" + // owner ignored
  135.             "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE) ignored
  136.             "(\\S+)\\s+" + // job class ignored
  137.             "(\\S+).*" // rest ignored
  138.     ;

  139.     private int isType = UNKNOWN_LIST_TYPE;

  140.     /**
  141.      * Fallback parser for Unix-style listings
  142.      */
  143.     private UnixFTPEntryParser unixFTPEntryParser;

  144.     /*
  145.      * --------------------------------------------------------------------- Very brief and incomplete description of the zOS/MVS-file system. (Note: "zOS" is
  146.      * the operating system on the mainframe, and is the new name for MVS)
  147.      *
  148.      * The file system on the mainframe does not have hierarchical structure as for example the unix file system. For a more comprehensive description,
  149.      * please refer to the IBM manuals
  150.      *
  151.      * @LINK: https://publibfp.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/dgt2d440/CONTENTS
  152.      *
  153.      *
  154.      * Dataset names =============
  155.      *
  156.      * A dataset name consist of a number of qualifiers separated by '.', each qualifier can be at most 8 characters, and the total length of a dataset can be
  157.      * max 44 characters including the dots.
  158.      *
  159.      *
  160.      * Dataset organisation ====================
  161.      *
  162.      * A dataset represents a piece of storage allocated on one or more disks. The structure of the storage is described with the field dataset organisation
  163.      * (DSORG). There are a number of dataset organisations, but only two are usable for FTP transfer.
  164.      *
  165.      * DSORG: PS: sequential, or flat file PO: partitioned dataset PO-E: extended partitioned dataset
  166.      *
  167.      * The PS file is just a flat file, as you would find it on the unix file system.
  168.      *
  169.      * The PO and PO-E files, can be compared to a single level directory structure. A PO file consist of a number of dataset members, or files if you will. It
  170.      * is possible to CD into the file, and to retrieve the individual members.
  171.      *
  172.      *
  173.      * Dataset record format =====================
  174.      *
  175.      * The physical layout of the dataset is described on the dataset itself. There are a number of record formats (RECFM), but just a few is relevant for the
  176.      * FTP transfer.
  177.      *
  178.      * Any one beginning with either F or V can safely be used by FTP transfer. All others should only be used with great care. F means a fixed number of
  179.      * records per allocated storage, and V means a variable number of records.
  180.      *
  181.      *
  182.      * Other notes ===========
  183.      *
  184.      * The file system supports automatically backup and retrieval of datasets. If a file is backed up, the ftp LIST command will return: ARCIVE Not Direct
  185.      * Access Device KJ.IOP998.ERROR.PL.UNITTEST
  186.      *
  187.      *
  188.      * Implementation notes ====================
  189.      *
  190.      * Only datasets that have dsorg PS, PO or PO-E and have recfm beginning with F or V or U, is fully parsed.
  191.      *
  192.      * The following fields in FTPFile is used: FTPFile.Rawlisting: Always set. FTPFile.Type: DIRECTORY_TYPE or FILE_TYPE or UNKNOWN FTPFile.Name: name
  193.      * FTPFile.Timestamp: change time or null
  194.      *
  195.      *
  196.      *
  197.      * Additional information ======================
  198.      *
  199.      * The MVS ftp server supports a number of features via the FTP interface. The features are controlled with the FTP command quote site
  200.      * filetype=<SEQ|JES|DB2> SEQ is the default and used for normal file transfer JES is used to interact with the Job Entry Subsystem (JES) similar to a job
  201.      * scheduler DB2 is used to interact with a DB2 subsystem
  202.      *
  203.      * This parser supports SEQ and JES.
  204.      */

  205.     /**
  206.      * The sole constructor for a MVSFTPEntryParser object.
  207.      */
  208.     public MVSFTPEntryParser() {
  209.         super(""); // note the regex is set in preParse.
  210.         super.configure(null); // configure parser with default configurations
  211.     }

  212.     /*
  213.      * @return
  214.      */
  215.     @Override
  216.     protected FTPClientConfig getDefaultConfiguration() {
  217.         return new FTPClientConfig(FTPClientConfig.SYST_MVS, DEFAULT_DATE_FORMAT, null);
  218.     }

  219.     /**
  220.      * Parses entries representing a dataset list.
  221.      * <pre>
  222.      * Format of ZOS/MVS file list: 1 2 3 4 5 6 7 8 9 10
  223.      * Volume Unit Referred Ext Used Recfm Lrecl BlkSz Dsorg Dsname
  224.      * B10142 3390 2006/03/20 2 31 F 80 80 PS MDI.OKL.WORK
  225.      * ARCIVE Not Direct Access Device KJ.IOP998.ERROR.PL.UNITTEST
  226.      * B1N231 3390 2006/03/20 1 15 VB 256 27998 PO PLU
  227.      * B1N231 3390 2006/03/20 1 15 VB 256 27998 PO-E PLB
  228.      * Migrated                                                HLQ.DATASET.NAME
  229.      * </pre>
  230.      * <pre>
  231.      * ----------------------------------- Group within Regex [1] Volume [2] Unit [3] Referred [4] Ext: number of extents [5] Used [6] Recfm: Record format [7]
  232.      * Lrecl: Logical record length [8] BlkSz: Block size [9] Dsorg: Dataset organisation. Many exists but only support: PS, PO, PO-E [10] Dsname: Dataset name
  233.      * </pre>
  234.      *
  235.      * @param entry zosDirectoryEntry
  236.      * @return null: entry was not parsed.
  237.      */
  238.     private FTPFile parseFileList(final String entry) {
  239.         if (matches(entry)) {
  240.             final FTPFile file = new FTPFile();
  241.             file.setRawListing(entry);
  242.             final String name = group(2);
  243.             final String dsorg = group(1);
  244.             file.setName(name);

  245.             // DSORG
  246.             if ("PS".equals(dsorg)) {
  247.                 file.setType(FTPFile.FILE_TYPE);
  248.             } else if ("PO".equals(dsorg) || "PO-E".equals(dsorg)) {
  249.                 // regex already ruled out anything other than PO or PO-E
  250.                 file.setType(FTPFile.DIRECTORY_TYPE);
  251.             } else {
  252.                 return null;
  253.             }

  254.             return file;
  255.         }

  256.         final boolean migrated = entry.startsWith("Migrated");
  257.         if (migrated || entry.startsWith("ARCIVE")) {
  258.             // Type of file is unknown for migrated datasets
  259.             final FTPFile file = new FTPFile();
  260.             file.setRawListing(entry);
  261.             file.setType(FTPFile.UNKNOWN_TYPE);
  262.             file.setName(entry.split("\\s+")[migrated ? 1 : 5]);
  263.             return file;
  264.         }

  265.         return null;
  266.     }

  267.     /**
  268.      * Parses a line of a z/OS - MVS FTP server file listing and converts it into a usable format in the form of an <code>FTPFile</code> instance. If the
  269.      * file listing line doesn't describe a file, then <code>null</code> is returned. Otherwise, a <code>FTPFile</code> instance representing the file is
  270.      * returned.
  271.      *
  272.      * @param entry A line of text from the file listing
  273.      * @return An FTPFile instance corresponding to the supplied entry
  274.      */
  275.     @Override
  276.     public FTPFile parseFTPEntry(final String entry) {
  277.         switch (isType) {
  278.         case FILE_LIST_TYPE:
  279.             return parseFileList(entry);
  280.         case MEMBER_LIST_TYPE:
  281.             return parseMemberList(entry);
  282.         case UNIX_LIST_TYPE:
  283.             return unixFTPEntryParser.parseFTPEntry(entry);
  284.         case JES_LEVEL_1_LIST_TYPE:
  285.             return parseJeslevel1List(entry);
  286.         case JES_LEVEL_2_LIST_TYPE:
  287.             return parseJeslevel2List(entry);
  288.         default:
  289.             break;
  290.         }

  291.         return null;
  292.     }

  293.     /**
  294.      * Matches these entries, note: no header:
  295.      *
  296.      * <pre>
  297.      * [1]      [2]      [3]   [4] [5]
  298.      * IBMUSER1 JOB01906 OUTPUT 3 Spool Files
  299.      * 012345678901234567890123456789012345678901234
  300.      *           1         2         3         4
  301.      * -------------------------------------------
  302.      * Group in regex
  303.      * [1] Job name
  304.      * [2] Job number
  305.      * [3] Job status (INPUT,ACTIVE,OUTPUT)
  306.      * [4] Number of sysout files
  307.      * [5] The string "Spool Files"
  308.      * </pre>
  309.      *
  310.      * @param entry zosDirectoryEntry
  311.      * @return null: entry was not parsed.
  312.      */
  313.     private FTPFile parseJeslevel1List(final String entry) {
  314.         return parseJeslevelList(entry, 3);
  315.     }

  316.     /**
  317.      * Matches these entries:
  318.      *
  319.      * <pre>
  320.      * [1]      [2]      [3]     [4]    [5]
  321.      * JOBNAME  JOBID    OWNER   STATUS CLASS
  322.      * IBMUSER1 JOB01906 IBMUSER OUTPUT A       RC=0000 3 spool files
  323.      * IBMUSER  TSU01830 IBMUSER OUTPUT TSU     ABEND=522 3 spool files
  324.      * 012345678901234567890123456789012345678901234
  325.      *           1         2         3         4
  326.      * -------------------------------------------
  327.      * Group in regex
  328.      * [1] Job name
  329.      * [2] Job number
  330.      * [3] Owner
  331.      * [4] Job status (INPUT,ACTIVE,OUTPUT)
  332.      * [5] Job Class
  333.      * [6] The rest
  334.      * </pre>
  335.      *
  336.      * @param entry zosDirectoryEntry
  337.      * @return null: entry was not parsed.
  338.      */
  339.     private FTPFile parseJeslevel2List(final String entry) {
  340.         return parseJeslevelList(entry, 4);
  341.     }

  342.     private FTPFile parseJeslevelList(final String entry, final int matchNum) {
  343.         if (matches(entry)) {
  344.             final FTPFile file = new FTPFile();
  345.             if (group(matchNum).equalsIgnoreCase("OUTPUT")) {
  346.                 file.setRawListing(entry);
  347.                 final String name = group(2); /* Job Number, used by GET */
  348.                 file.setName(name);
  349.                 file.setType(FTPFile.FILE_TYPE);
  350.                 return file;
  351.             }
  352.         }
  353.         return null;
  354.     }

  355.     /**
  356.      * Parses entries within a partitioned dataset.
  357.      *
  358.      * Format of a memberlist within a PDS:
  359.      *
  360.      * <pre>
  361.      *    0         1        2          3        4     5     6      7    8
  362.      *   Name      VV.MM   Created       Changed      Size  Init   Mod   Id
  363.      *   TBSHELF   01.03 2002/09/12 2002/10/11 09:37    11    11     0 KIL001
  364.      *   TBTOOL    01.12 2002/09/12 2004/11/26 19:54    51    28     0 KIL001
  365.      *
  366.      * -------------------------------------------
  367.      * [1] Name
  368.      * [2] VV.MM: Version . modification
  369.      * [3] Created: yyyy / MM / dd
  370.      * [4,5] Changed: yyyy / MM / dd HH:mm
  371.      * [6] Size: number of lines
  372.      * [7] Init: number of lines when first created
  373.      * [8] Mod: number of modified lines a last save
  374.      * [9] Id: User id for last update
  375.      * </pre>
  376.      *
  377.      * @param entry zosDirectoryEntry
  378.      * @return null: entry was not parsed.
  379.      */
  380.     private FTPFile parseMemberList(final String entry) {
  381.         final FTPFile file = new FTPFile();
  382.         if (matches(entry)) {
  383.             file.setRawListing(entry);
  384.             final String name = group(1);
  385.             final String datestr = group(2) + " " + group(3);
  386.             file.setName(name);
  387.             file.setType(FTPFile.FILE_TYPE);
  388.             try {
  389.                 file.setTimestamp(super.parseTimestamp(datestr));
  390.             } catch (final ParseException e) {
  391.                 // just ignore parsing errors.
  392.                 // TODO check this is ok
  393.                 // Drop thru to try simple parser
  394.             }
  395.             return file;
  396.         }

  397.         /*
  398.          * Assigns the name to the first word of the entry. Only to be used from a safe context, for example from a memberlist, where the regex for some reason
  399.          * fails. Then just assign the name field of FTPFile.
  400.          */
  401.         if (entry != null && !entry.trim().isEmpty()) {
  402.             file.setRawListing(entry);
  403.             final String name = entry.split(" ")[0];
  404.             file.setName(name);
  405.             file.setType(FTPFile.FILE_TYPE);
  406.             return file;
  407.         }
  408.         return null;
  409.     }

  410.     /**
  411.      * Pre-parses is called as part of the interface. Per definition, it is called before the parsing takes place. Three kinds of lists are recognized:
  412.      * <ul>
  413.      *     <li>z/OS-MVS File lists,</li>
  414.      *     <li>z/OS-MVS Member lists,</li>
  415.      *     <li>unix file lists.</li>
  416.      * </ul>
  417.      * @since 2.0
  418.      */
  419.     @Override
  420.     public List<String> preParse(final List<String> orig) {
  421.         // simply remove the header line. Composite logic will take care of the
  422.         // two different types of
  423.         // list in short order.
  424.         if (orig != null && !orig.isEmpty()) {
  425.             final String header = orig.get(0);
  426.             if (header.contains("Volume") && header.contains("Dsname")) {
  427.                 setType(FILE_LIST_TYPE);
  428.                 super.setRegex(FILE_LIST_REGEX);
  429.             } else if (header.contains("Name") && header.contains("Id")) {
  430.                 setType(MEMBER_LIST_TYPE);
  431.                 super.setRegex(MEMBER_LIST_REGEX);
  432.             } else if (header.startsWith("total")) {
  433.                 setType(UNIX_LIST_TYPE);
  434.                 unixFTPEntryParser = new UnixFTPEntryParser();
  435.             } else if (header.indexOf("Spool Files") >= 30) {
  436.                 setType(JES_LEVEL_1_LIST_TYPE);
  437.                 super.setRegex(JES_LEVEL_1_LIST_REGEX);
  438.             } else if (header.startsWith("JOBNAME") && header.indexOf("JOBID") > 8) { // header contains JOBNAME JOBID OWNER // STATUS CLASS
  439.                 setType(JES_LEVEL_2_LIST_TYPE);
  440.                 super.setRegex(JES_LEVEL_2_LIST_REGEX);
  441.             } else {
  442.                 setType(UNKNOWN_LIST_TYPE);
  443.             }

  444.             if (isType != JES_LEVEL_1_LIST_TYPE) { // remove header is necessary
  445.                 orig.remove(0);
  446.             }
  447.         }

  448.         return orig;
  449.     }

  450.     /**
  451.      * Sets the type of listing being processed.
  452.      *
  453.      * @param type The listing type.
  454.      */
  455.     void setType(final int type) {
  456.         isType = type;
  457.     }

  458. }