View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.examples.mail;
19  
20  import java.io.BufferedReader;
21  import java.io.File;
22  import java.io.IOException;
23  import java.net.URI;
24  import java.nio.charset.Charset;
25  import java.nio.file.Files;
26  import java.nio.file.Paths;
27  import java.util.ArrayList;
28  import java.util.BitSet;
29  import java.util.List;
30  import java.util.regex.Matcher;
31  import java.util.regex.Pattern;
32  
33  import org.apache.commons.net.imap.IMAPClient;
34  
35  /**
36   * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and imports messages into the folder
37   * from an mbox file.
38   * <p>
39   * Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]
40   * <p>
41   * An example selector might be:
42   * <ul>
43   * <li>1,2,3,7-10</li>
44   * <li>-142986- : this is useful for retrieving messages by apmail number, which appears as From xyz-return-142986-apmail-...</li>
45   * </ul>
46   * <p>
47   * For example:<br>
48   * IMAPImportMbox imaps://user:pass@imap.googlemail.com/imported_messages 201401.mbox 1-10,20 -142986-
49   */
50  public final class IMAPImportMbox {
51  
52      private static final String CRLF = "\r\n";
53      private static final Pattern PATFROM = Pattern.compile(">+From "); // escaped From
54  
55      private static String getDate(final String msg) {
56          // From SENDER Fri Sep 13 17:04:01 2019
57          final Pattern FROM_RE = Pattern.compile("From \\S+ +\\S+ (\\S+)  ?(\\S+) (\\S+) (\\S+)");
58          // [Fri] Sep 13 HMS 2019
59          // output date: 13-Sep-2019 17:04:01 +0000
60          String date = null;
61          final Matcher m = FROM_RE.matcher(msg);
62          if (m.lookingAt()) {
63              date = m.group(2) + "-" + m.group(1) + "-" + m.group(4) + " " + m.group(3) + " +0000";
64          }
65          return date;
66      }
67  
68      /**
69       * Is at least one entry in the list contained in the string?
70       *
71       * @param contains the list of strings to look for
72       * @param string   the String to check against
73       * @return true if at least one entry in the contains list is contained in the string
74       */
75      private static boolean listContains(final List<String> contains, final String string) {
76          for (final String entry : contains) {
77              if (string.contains(entry)) {
78                  return true;
79              }
80          }
81          return false;
82      }
83  
84      public static void main(final String[] args) throws IOException {
85          if (args.length < 2) {
86              System.err.println("Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]");
87              System.err
88                      .println("\tWhere: a selector is a list of numbers/number ranges - 1,2,3-10" + " - or a list of strings to match in the initial From line");
89              System.exit(1);
90          }
91  
92          final URI uri = URI.create(args[0]);
93          final String file = args[1];
94  
95          final File mbox = new File(file);
96          if (!mbox.isFile() || !mbox.canRead()) {
97              throw new IOException("Cannot read mailbox file: " + mbox);
98          }
99  
100         final String path = uri.getPath();
101         if (path == null || path.length() < 1) {
102             throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
103         }
104         final String folder = path.substring(1); // skip the leading /
105 
106         final List<String> contains = new ArrayList<>(); // list of strings to find
107         final BitSet msgNums = new BitSet(); // list of message numbers
108 
109         for (int i = 2; i < args.length; i++) {
110             final String arg = args[i];
111             if (arg.matches("\\d+(-\\d+)?(,\\d+(-\\d+)?)*")) { // number,m-n
112                 for (final String entry : arg.split(",")) {
113                     final String[] parts = entry.split("-");
114                     if (parts.length == 2) { // m-n
115                         final int low = Integer.parseInt(parts[0]);
116                         final int high = Integer.parseInt(parts[1]);
117                         for (int j = low; j <= high; j++) {
118                             msgNums.set(j);
119                         }
120                     } else {
121                         msgNums.set(Integer.parseInt(entry));
122                     }
123                 }
124             } else {
125                 contains.add(arg); // not a number/number range
126             }
127         }
128 //        System.out.println(msgNums.toString());
129 //        System.out.println(java.util.Arrays.toString(contains.toArray()));
130 
131         // Connect and login
132         final IMAPClient imap = IMAPUtils.imapLogin(uri, 10000, null);
133 
134         int total = 0;
135         int loaded = 0;
136         try {
137             imap.setSoTimeout(6000);
138             boolean wanted = false; // Skip any leading rubbish
139             final StringBuilder sb = new StringBuilder();
140             try (BufferedReader br = Files.newBufferedReader(Paths.get(file), Charset.defaultCharset())) {
141                 String line;
142                 while ((line = br.readLine()) != null) {
143                     if (line.startsWith("From ")) { // start of message; i.e. end of previous (if any)
144                         if (process(sb, imap, folder, total)) { // process previous message (if any)
145                             loaded++;
146                         }
147                         sb.setLength(0);
148                         total++;
149                         wanted = wanted(total, line, msgNums, contains);
150                     } else if (startsWith(line, PATFROM)) { // Unescape ">+From " in body text
151                         line = line.substring(1);
152                     }
153                     // TODO process first Received: line to determine arrival date?
154                     if (wanted) {
155                         sb.append(line);
156                         sb.append(CRLF);
157                     }
158                 }
159             }
160             if (wanted && process(sb, imap, folder, total)) { // last message (if any)
161                 loaded++;
162             }
163         } catch (final IOException e) {
164             System.out.println("Error processing msg: " + total + " " + imap.getReplyString());
165             e.printStackTrace();
166             System.exit(10);
167             return;
168         } finally {
169             imap.logout();
170             imap.disconnect();
171         }
172         System.out.println("Processed " + total + " messages, loaded " + loaded);
173     }
174 
175     private static boolean process(final StringBuilder sb, final IMAPClient imap, final String folder, final int msgNum) throws IOException {
176         final int length = sb.length();
177         final boolean haveMessage = length > 2;
178         if (haveMessage) {
179             System.out.println("MsgNum: " + msgNum + " Length " + length);
180             sb.setLength(length - 2); // drop trailing CRLF (mbox format has trailing blank line)
181             final String msg = sb.toString();
182             if (!imap.append(folder, null, getDate(msg), msg)) {
183                 throw new IOException("Failed to import message: " + msgNum + " " + imap.getReplyString());
184             }
185         }
186         return haveMessage;
187     }
188 
189     private static boolean startsWith(final String input, final Pattern pat) {
190         final Matcher m = pat.matcher(input);
191         return m.lookingAt();
192     }
193 
194     /**
195      * Is the message wanted?
196      *
197      * @param msgNum   the message number
198      * @param line     the {@code From} line
199      * @param msgNums  the list of wanted message numbers
200      * @param contains the list of strings to be contained
201      * @return true if the message is wanted
202      */
203     private static boolean wanted(final int msgNum, final String line, final BitSet msgNums, final List<String> contains) {
204         return msgNums.isEmpty() && contains.isEmpty() // no selectors
205                 || msgNums.get(msgNum) // matches message number
206                 || listContains(contains, line); // contains string
207     }
208 
209 }