IMAPExportMbox.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.net.examples.mail;

  18. import java.io.BufferedWriter;
  19. import java.io.IOException;
  20. import java.io.UncheckedIOException;
  21. import java.net.URI;
  22. import java.net.URISyntaxException;
  23. import java.nio.charset.Charset;
  24. import java.nio.file.Files;
  25. import java.nio.file.Path;
  26. import java.nio.file.Paths;
  27. import java.nio.file.StandardOpenOption;
  28. import java.text.ParseException;
  29. import java.text.SimpleDateFormat;
  30. import java.util.ArrayList;
  31. import java.util.Date;
  32. import java.util.Iterator;
  33. import java.util.List;
  34. import java.util.TimeZone;
  35. import java.util.concurrent.atomic.AtomicInteger;
  36. import java.util.regex.Matcher;
  37. import java.util.regex.Pattern;

  38. import org.apache.commons.net.PrintCommandListener;
  39. import org.apache.commons.net.ProtocolCommandEvent;
  40. import org.apache.commons.net.imap.IMAP;
  41. import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
  42. import org.apache.commons.net.imap.IMAPClient;
  43. import org.apache.commons.net.imap.IMAPReply;

  44. /**
  45.  * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and exports selected messages from a
  46.  * folder into an mbox file.
  47.  * <p>
  48.  * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names]
  49.  * <p>
  50.  * An example sequence-set might be:
  51.  * <ul>
  52.  * <li>11,2,3:10,20:*</li>
  53.  * <li>1:* - this is the default</li>
  54.  * </ul>
  55.  * <p>
  56.  * Some example item-names might be:
  57.  * <ul>
  58.  * <li>BODY.PEEK[HEADER]</li>
  59.  * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li>
  60.  * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li>
  61.  * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li>
  62.  * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li>
  63.  * <li>ENVELOPE X-GM-LABELS</li>
  64.  * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li>
  65.  * </ul>
  66.  * <p>
  67.  * Macro names cannot be combined with anything else; they must be used alone.<br>
  68.  * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br>
  69.  * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br>
  70.  * For example:<br>
  71.  * IMAPExportMbox imaps://user:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br>
  72.  * IMAPExportMbox imaps://user:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br>
  73.  * <p>
  74.  * The sequence-set is passed unmodified to the FETCH command.<br>
  75.  * The item names are wrapped in parentheses if more than one is provided. Otherwise, the parameter is assumed to be wrapped if necessary.<br>
  76.  * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br>
  77.  * Also, the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references). It does not capture the output
  78.  * from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response.
  79.  */
  80. public final class IMAPExportMbox {

  81.     private static final class MboxListener implements IMAPChunkListener {

  82.         private final BufferedWriter bufferedWriter;
  83.         volatile AtomicInteger total = new AtomicInteger();
  84.         volatile String lastFetched;
  85.         volatile List<String> missingIds = new ArrayList<>();
  86.         volatile long lastSeq = -1;
  87.         private final String lineSeparator;
  88.         private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines
  89.                 = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");

  90.         // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000"
  91.         // for parsing INTERNALDATE
  92.         private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
  93.         private final boolean printHash;
  94.         private final boolean printMarker;
  95.         private final boolean checkSequence;

  96.         MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
  97.                 final boolean checkSequence) {
  98.             this.lineSeparator = lineSeparator;
  99.             this.printHash = printHash;
  100.             this.printMarker = printMarker;
  101.             DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
  102.             this.bufferedWriter = bufferedWriter;
  103.             this.checkSequence = checkSequence;
  104.         }

  105.         @Override
  106.         public boolean chunkReceived(final IMAP imap) {
  107.             final String[] replyStrings = imap.getReplyStrings();
  108.             Date received = new Date();
  109.             final String firstLine = replyStrings[0];
  110.             Matcher m = PATID.matcher(firstLine);
  111.             if (m.lookingAt()) { // found a match
  112.                 final String date = m.group(PATID_DATE_GROUP);
  113.                 try {
  114.                     received = IDPARSE.parse(date);
  115.                 } catch (final ParseException e) {
  116.                     System.err.println(e);
  117.                 }
  118.             } else {
  119.                 System.err.println("No timestamp found in: " + firstLine + "  - using current time");
  120.             }
  121.             String replyTo = "MAILER-DAEMON"; // default
  122.             for (int i = 1; i < replyStrings.length - 1; i++) {
  123.                 final String line = replyStrings[i];
  124.                 if (line.startsWith("Return-Path: ")) {
  125.                     final String[] parts = line.split(" ", 2);
  126.                     if (!parts[1].equals("<>")) { // Don't replace default with blank
  127.                         replyTo = parts[1];
  128.                         if (replyTo.startsWith("<")) {
  129.                             if (replyTo.endsWith(">")) {
  130.                                 replyTo = replyTo.substring(1, replyTo.length() - 1); // drop <> wrapper
  131.                             } else {
  132.                                 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
  133.                             }
  134.                         }
  135.                     }
  136.                     break;
  137.                 }
  138.             }
  139.             try {
  140.                 // Add initial mbox header line
  141.                 bufferedWriter.append("From ");
  142.                 bufferedWriter.append(replyTo);
  143.                 bufferedWriter.append(' ');
  144.                 bufferedWriter.append(DATE_FORMAT.format(received));
  145.                 bufferedWriter.append(lineSeparator);
  146.                 // Debug
  147.                 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
  148.                 if (printMarker) {
  149.                     System.err.println("[" + total + "] " + firstLine);
  150.                 }
  151.                 // Skip first and last lines
  152.                 for (int i = 1; i < replyStrings.length - 1; i++) {
  153.                     final String line = replyStrings[i];
  154.                     if (startsWith(line, PATFROM)) {
  155.                         bufferedWriter.append('>'); // Escape a From_ line
  156.                     }
  157.                     bufferedWriter.append(line);
  158.                     bufferedWriter.append(lineSeparator);
  159.                 }
  160.                 // The last line ends with the trailing closing ")" which needs to be stripped
  161.                 final String lastLine = replyStrings[replyStrings.length - 1];
  162.                 final int lastLength = lastLine.length();
  163.                 if (lastLength > 1) { // there's some content, we need to save it
  164.                     bufferedWriter.append(lastLine, 0, lastLength - 1);
  165.                     bufferedWriter.append(lineSeparator);
  166.                 }
  167.                 bufferedWriter.append(lineSeparator); // blank line between entries
  168.             } catch (final IOException e) {
  169.                 e.printStackTrace();
  170.                 throw new UncheckedIOException(e); // chunkReceived cannot throw a checked Exception
  171.             }
  172.             lastFetched = firstLine;
  173.             total.incrementAndGet();
  174.             if (checkSequence) {
  175.                 m = PATSEQ.matcher(firstLine);
  176.                 if (m.lookingAt()) { // found a match
  177.                     final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse
  178.                     if (lastSeq != -1) {
  179.                         final long missing = msgSeq - lastSeq - 1;
  180.                         if (missing != 0) {
  181.                             for (long j = lastSeq + 1; j < msgSeq; j++) {
  182.                                 missingIds.add(String.valueOf(j));
  183.                             }
  184.                             System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
  185.                         }
  186.                     }
  187.                     lastSeq = msgSeq;
  188.                 }
  189.             }
  190.             if (printHash) {
  191.                 System.err.print(".");
  192.             }
  193.             return true;
  194.         }

  195.         public void close() throws IOException {
  196.             if (bufferedWriter != null) {
  197.                 bufferedWriter.close();
  198.             }
  199.         }
  200.     }

  201.     private static final String CRLF = "\r\n";
  202.     private static final String LF = "\n";

  203.     private static final String EOL_DEFAULT = System.lineSeparator();
  204.     private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_
  205.     // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000" BODY[] {nn} ...)
  206.     private static final Pattern PATID = // INTERNALDATE
  207.             Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");

  208.     private static final int PATID_DATE_GROUP = 1;
  209.     private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number

  210.     private static final int PATSEQ_SEQUENCE_GROUP = 1;

  211.     // e.g. * 382 EXISTS
  212.     private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT

  213.     // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL]
  214.     private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
  215.     private static final int CONNECT_TIMEOUT = 10; // Seconds

  216.     private static final int READ_TIMEOUT = 10;

  217.     public static void main(final String[] args) throws IOException, URISyntaxException {
  218.         int connect_timeout = CONNECT_TIMEOUT;
  219.         int read_timeout = READ_TIMEOUT;

  220.         int argIdx = 0;
  221.         String eol = EOL_DEFAULT;
  222.         boolean printHash = false;
  223.         boolean printMarker = false;
  224.         int retryWaitSecs = 0;

  225.         for (argIdx = 0; argIdx < args.length; argIdx++) {
  226.             if (args[argIdx].equals("-c")) {
  227.                 connect_timeout = Integer.parseInt(args[++argIdx]);
  228.             } else if (args[argIdx].equals("-r")) {
  229.                 read_timeout = Integer.parseInt(args[++argIdx]);
  230.             } else if (args[argIdx].equals("-R")) {
  231.                 retryWaitSecs = Integer.parseInt(args[++argIdx]);
  232.             } else if (args[argIdx].equals("-LF")) {
  233.                 eol = LF;
  234.             } else if (args[argIdx].equals("-CRLF")) {
  235.                 eol = CRLF;
  236.             } else if (args[argIdx].equals("-.")) {
  237.                 printHash = true;
  238.             } else if (args[argIdx].equals("-X")) {
  239.                 printMarker = true;
  240.             } else {
  241.                 break;
  242.             }
  243.         }

  244.         final int argCount = args.length - argIdx;

  245.         if (argCount < 2) {
  246.             System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
  247.                     + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
  248.             System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
  249.             System.err.println("\t-c connect timeout in seconds (default 10)");
  250.             System.err.println("\t-r read timeout in seconds (default 10)");
  251.             System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
  252.             System.err.println("\t-. print a . for each complete message received");
  253.             System.err.println("\t-X print the X-IMAP line for each complete message received");
  254.             System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
  255.             System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
  256.             System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
  257.             System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
  258.                     + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
  259.             System.exit(1);
  260.         }

  261.         final String uriString = args[argIdx++];
  262.         URI uri;
  263.         try {
  264.             uri = URI.create(uriString);
  265.         } catch (final IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again
  266.             final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
  267.             if (!m.matches()) {
  268.                 throw e;
  269.             }
  270.             uri = URI.create(m.group(1)); // Just the scheme and auth parts
  271.             uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
  272.         }
  273.         final String file = args[argIdx++];
  274.         String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
  275.         final String itemNames;
  276.         // Handle 0, 1 or multiple item names
  277.         if (argCount > 3) {
  278.             if (argCount > 4) {
  279.                 final StringBuilder sb = new StringBuilder();
  280.                 sb.append("(");
  281.                 for (int i = 4; i <= argCount; i++) {
  282.                     if (i > 4) {
  283.                         sb.append(" ");
  284.                     }
  285.                     sb.append(args[argIdx++]);
  286.                 }
  287.                 sb.append(")");
  288.                 itemNames = sb.toString();
  289.             } else {
  290.                 itemNames = args[argIdx++];
  291.             }
  292.         } else {
  293.             itemNames = "(INTERNALDATE BODY.PEEK[])";
  294.         }

  295.         final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
  296.         final MboxListener mboxListener;
  297.         if (file.equals("-")) {
  298.             mboxListener = null;
  299.         } else if (file.startsWith("+")) {
  300.             final Path mboxPath = Paths.get(file.substring(1));
  301.             System.out.println("Appending to file " + mboxPath);
  302.             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.APPEND),
  303.                     eol, printHash, printMarker, checkSequence);
  304.         } else if (file.startsWith("-")) {
  305.             final Path mboxPath = Paths.get(file.substring(1));
  306.             System.out.println("Writing to file " + mboxPath);
  307.             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
  308.                     checkSequence);
  309.         } else {
  310.             final Path mboxPath = Paths.get(file);
  311.             if (Files.exists(mboxPath) && Files.size(mboxPath) > 0) {
  312.                 throw new IOException("mailbox file: " + mboxPath + " already exists and is non-empty!");
  313.             }
  314.             System.out.println("Creating file " + mboxPath);
  315.             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
  316.                     checkSequence);
  317.         }

  318.         final String path = uri.getPath();
  319.         if (path == null || path.length() < 1) {
  320.             throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
  321.         }
  322.         final String folder = path.substring(1); // skip the leading /

  323.         // suppress login details
  324.         final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
  325.             @Override
  326.             public void protocolReplyReceived(final ProtocolCommandEvent event) {
  327.                 if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener
  328.                     super.protocolReplyReceived(event);
  329.                 }
  330.             }
  331.         };

  332.         // Connect and login
  333.         final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);

  334.         String maxIndexInFolder = null;

  335.         try {

  336.             imap.setSoTimeout(read_timeout * 1000);

  337.             if (!imap.select(folder)) {
  338.                 throw new IOException("Could not select folder: " + folder);
  339.             }

  340.             for (final String line : imap.getReplyStrings()) {
  341.                 maxIndexInFolder = matches(line, PATEXISTS, 1);
  342.                 if (maxIndexInFolder != null) {
  343.                     break;
  344.                 }
  345.             }

  346.             if (mboxListener != null) {
  347.                 imap.setChunkListener(mboxListener);
  348.             } // else the command listener displays the full output without processing

  349.             while (true) {
  350.                 final boolean ok = imap.fetch(sequenceSet, itemNames);
  351.                 // If the fetch failed, can we retry?
  352.                 if (ok || retryWaitSecs <= 0 || mboxListener == null || !checkSequence) {
  353.                     break;
  354.                 }
  355.                 final String replyString = imap.getReplyString(); // includes EOL
  356.                 if (!startsWith(replyString, PATTEMPFAIL)) {
  357.                     throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
  358.                 }
  359.                 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
  360.                 sequenceSet = mboxListener.lastSeq + 1 + ":*";
  361.                 try {
  362.                     Thread.sleep(retryWaitSecs * 1000);
  363.                 } catch (final InterruptedException e) {
  364.                     // ignored
  365.                 }
  366.             }

  367.         } catch (final IOException ioe) {
  368.             final String count = mboxListener == null ? "?" : mboxListener.total.toString();
  369.             System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
  370.             if (mboxListener != null) {
  371.                 System.err.println("Last complete response seen: " + mboxListener.lastFetched);
  372.             }
  373.             throw ioe;
  374.         } finally {

  375.             if (printHash) {
  376.                 System.err.println();
  377.             }

  378.             if (mboxListener != null) {
  379.                 mboxListener.close();
  380.                 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
  381.                 if (missingIds.hasNext()) {
  382.                     final StringBuilder sb = new StringBuilder();
  383.                     for (;;) {
  384.                         sb.append(missingIds.next());
  385.                         if (!missingIds.hasNext()) {
  386.                             break;
  387.                         }
  388.                         sb.append(",");
  389.                     }
  390.                     System.err.println("*** Missing ids: " + sb.toString());
  391.                 }
  392.             }
  393.             imap.logout();
  394.             imap.disconnect();
  395.         }
  396.         if (mboxListener != null) {
  397.             System.out.println("Processed " + mboxListener.total + " messages.");
  398.         }
  399.         if (maxIndexInFolder != null) {
  400.             System.out.println("Folder contained " + maxIndexInFolder + " messages.");
  401.         }
  402.     }

  403.     private static String matches(final String input, final Pattern pat, final int index) {
  404.         final Matcher m = pat.matcher(input);
  405.         if (m.lookingAt()) {
  406.             return m.group(index);
  407.         }
  408.         return null;
  409.     }

  410.     private static boolean startsWith(final String input, final Pattern pat) {
  411.         final Matcher m = pat.matcher(input);
  412.         return m.lookingAt();
  413.     }
  414. }