IMAPImportMbox.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.net.examples.mail;

  18. import java.io.BufferedReader;
  19. import java.io.File;
  20. import java.io.IOException;
  21. import java.net.URI;
  22. import java.nio.charset.Charset;
  23. import java.nio.file.Files;
  24. import java.nio.file.Paths;
  25. import java.util.ArrayList;
  26. import java.util.BitSet;
  27. import java.util.List;
  28. import java.util.regex.Matcher;
  29. import java.util.regex.Pattern;

  30. import org.apache.commons.net.imap.IMAPClient;

  31. /**
  32.  * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and imports messages into the folder
  33.  * from an mbox file.
  34.  * <p>
  35.  * Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]
  36.  * <p>
  37.  * An example selector might be:
  38.  * <ul>
  39.  * <li>1,2,3,7-10</li>
  40.  * <li>-142986- : this is useful for retrieving messages by apmail number, which appears as From xyz-return-142986-apmail-...</li>
  41.  * </ul>
  42.  * <p>
  43.  * For example:<br>
  44.  * IMAPImportMbox imaps://user:pass@imap.googlemail.com/imported_messages 201401.mbox 1-10,20 -142986-
  45.  */
  46. public final class IMAPImportMbox {

  47.     private static final String CRLF = "\r\n";
  48.     private static final Pattern PATFROM = Pattern.compile(">+From "); // escaped From

  49.     private static String getDate(final String msg) {
  50.         // From SENDER Fri Sep 13 17:04:01 2019
  51.         final Pattern FROM_RE = Pattern.compile("From \\S+ +\\S+ (\\S+)  ?(\\S+) (\\S+) (\\S+)");
  52.         // [Fri] Sep 13 HMS 2019
  53.         // output date: 13-Sep-2019 17:04:01 +0000
  54.         String date = null;
  55.         final Matcher m = FROM_RE.matcher(msg);
  56.         if (m.lookingAt()) {
  57.             date = m.group(2) + "-" + m.group(1) + "-" + m.group(4) + " " + m.group(3) + " +0000";
  58.         }
  59.         return date;
  60.     }

  61.     /**
  62.      * Is at least one entry in the list contained in the string?
  63.      *
  64.      * @param contains the list of strings to look for
  65.      * @param string   the String to check against
  66.      * @return true if at least one entry in the contains list is contained in the string
  67.      */
  68.     private static boolean listContains(final List<String> contains, final String string) {
  69.         for (final String entry : contains) {
  70.             if (string.contains(entry)) {
  71.                 return true;
  72.             }
  73.         }
  74.         return false;
  75.     }

  76.     public static void main(final String[] args) throws IOException {
  77.         if (args.length < 2) {
  78.             System.err.println("Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]");
  79.             System.err
  80.                     .println("\tWhere: a selector is a list of numbers/number ranges - 1,2,3-10" + " - or a list of strings to match in the initial From line");
  81.             System.exit(1);
  82.         }

  83.         final URI uri = URI.create(args[0]);
  84.         final String file = args[1];

  85.         final File mbox = new File(file);
  86.         if (!mbox.isFile() || !mbox.canRead()) {
  87.             throw new IOException("Cannot read mailbox file: " + mbox);
  88.         }

  89.         final String path = uri.getPath();
  90.         if (path == null || path.length() < 1) {
  91.             throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
  92.         }
  93.         final String folder = path.substring(1); // skip the leading /

  94.         final List<String> contains = new ArrayList<>(); // list of strings to find
  95.         final BitSet msgNums = new BitSet(); // list of message numbers

  96.         for (int i = 2; i < args.length; i++) {
  97.             final String arg = args[i];
  98.             if (arg.matches("\\d+(-\\d+)?(,\\d+(-\\d+)?)*")) { // number,m-n
  99.                 for (final String entry : arg.split(",")) {
  100.                     final String[] parts = entry.split("-");
  101.                     if (parts.length == 2) { // m-n
  102.                         final int low = Integer.parseInt(parts[0]);
  103.                         final int high = Integer.parseInt(parts[1]);
  104.                         for (int j = low; j <= high; j++) {
  105.                             msgNums.set(j);
  106.                         }
  107.                     } else {
  108.                         msgNums.set(Integer.parseInt(entry));
  109.                     }
  110.                 }
  111.             } else {
  112.                 contains.add(arg); // not a number/number range
  113.             }
  114.         }
  115. //        System.out.println(msgNums.toString());
  116. //        System.out.println(java.util.Arrays.toString(contains.toArray()));

  117.         // Connect and login
  118.         final IMAPClient imap = IMAPUtils.imapLogin(uri, 10000, null);

  119.         int total = 0;
  120.         int loaded = 0;
  121.         try {
  122.             imap.setSoTimeout(6000);
  123.             boolean wanted = false; // Skip any leading rubbish
  124.             final StringBuilder sb = new StringBuilder();
  125.             try (BufferedReader br = Files.newBufferedReader(Paths.get(file), Charset.defaultCharset())) {
  126.                 String line;
  127.                 while ((line = br.readLine()) != null) {
  128.                     if (line.startsWith("From ")) { // start of message; i.e. end of previous (if any)
  129.                         if (process(sb, imap, folder, total)) { // process previous message (if any)
  130.                             loaded++;
  131.                         }
  132.                         sb.setLength(0);
  133.                         total++;
  134.                         wanted = wanted(total, line, msgNums, contains);
  135.                     } else if (startsWith(line, PATFROM)) { // Unescape ">+From " in body text
  136.                         line = line.substring(1);
  137.                     }
  138.                     // TODO process first Received: line to determine arrival date?
  139.                     if (wanted) {
  140.                         sb.append(line);
  141.                         sb.append(CRLF);
  142.                     }
  143.                 }
  144.             }
  145.             if (wanted && process(sb, imap, folder, total)) { // last message (if any)
  146.                 loaded++;
  147.             }
  148.         } catch (final IOException e) {
  149.             System.out.println("Error processing msg: " + total + " " + imap.getReplyString());
  150.             e.printStackTrace();
  151.             System.exit(10);
  152.             return;
  153.         } finally {
  154.             imap.logout();
  155.             imap.disconnect();
  156.         }
  157.         System.out.println("Processed " + total + " messages, loaded " + loaded);
  158.     }

  159.     private static boolean process(final StringBuilder sb, final IMAPClient imap, final String folder, final int msgNum) throws IOException {
  160.         final int length = sb.length();
  161.         final boolean haveMessage = length > 2;
  162.         if (haveMessage) {
  163.             System.out.println("MsgNum: " + msgNum + " Length " + length);
  164.             sb.setLength(length - 2); // drop trailing CRLF (mbox format has trailing blank line)
  165.             final String msg = sb.toString();
  166.             if (!imap.append(folder, null, getDate(msg), msg)) {
  167.                 throw new IOException("Failed to import message: " + msgNum + " " + imap.getReplyString());
  168.             }
  169.         }
  170.         return haveMessage;
  171.     }

  172.     private static boolean startsWith(final String input, final Pattern pat) {
  173.         final Matcher m = pat.matcher(input);
  174.         return m.lookingAt();
  175.     }

  176.     /**
  177.      * Is the message wanted?
  178.      *
  179.      * @param msgNum   the message number
  180.      * @param line     the {@code From} line
  181.      * @param msgNums  the list of wanted message numbers
  182.      * @param contains the list of strings to be contained
  183.      * @return true if the message is wanted
  184.      */
  185.     private static boolean wanted(final int msgNum, final String line, final BitSet msgNums, final List<String> contains) {
  186.         return msgNums.isEmpty() && contains.isEmpty() // no selectors
  187.                 || msgNums.get(msgNum) // matches message number
  188.                 || listContains(contains, line); // contains string
  189.     }

  190. }