View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.examples.mail;
19  
20  import java.io.BufferedReader;
21  import java.io.File;
22  import java.io.FileReader;
23  import java.io.IOException;
24  import java.net.URI;
25  import java.util.ArrayList;
26  import java.util.BitSet;
27  import java.util.List;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.apache.commons.net.imap.IMAPClient;
32  
33  /**
34   * This is an example program demonstrating how to use the IMAP[S]Client class.
35   * This program connects to a IMAP[S] server and imports messages into the folder from an mbox file.
36   * <p>
37   * Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]
38   * <p>
39   * An example selector might be:
40   * <ul>
41   * <li>1,2,3,7-10</li>
42   * <li>-142986- : this is useful for retrieving messages by apmail number, which appears as From xyz-return-142986-apmail-...</li>
43   * </ul>
44   * <p>
45   * For example:<br>
46   * IMAPImportMbox imaps://user:pass@imap.googlemail.com/imported_messages 201401.mbox 1-10,20 -142986-
47   */
48  public final class IMAPImportMbox
49  {
50  
51      private static final String CRLF = "\r\n";
52      private static final Pattern PATFROM = Pattern.compile(">+From "); // escaped From
53  
54      public static void main(final String[] args) throws IOException
55      {
56          if (args.length < 2)
57          {
58              System.err.println("Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]");
59              System.err.println("\tWhere: a selector is a list of numbers/number ranges - 1,2,3-10" +
60                                 " - or a list of strings to match in the initial From line");
61              System.exit(1);
62          }
63  
64          final URI uri      = URI.create(args[0]);
65          final String file  = args[1];
66  
67          final File mbox = new File(file);
68          if (!mbox.isFile() || !mbox.canRead()) {
69              throw new IOException("Cannot read mailbox file: " + mbox);
70          }
71  
72          final String path = uri.getPath();
73          if (path == null || path.length() < 1) {
74              throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
75          }
76          final String folder = path.substring(1); // skip the leading /
77  
78          final List<String> contains = new ArrayList<>(); // list of strings to find
79          final BitSet msgNums = new BitSet(); // list of message numbers
80  
81          for(int i = 2; i < args.length; i++) {
82              final String arg = args[i];
83              if (arg.matches("\\d+(-\\d+)?(,\\d+(-\\d+)?)*")) { // number,m-n
84                  for(final String entry : arg.split(",")) {
85                      final String []parts = entry.split("-");
86                      if (parts.length == 2) { // m-n
87                          final int low = Integer.parseInt(parts[0]);
88                          final int high = Integer.parseInt(parts[1]);
89                          for(int j=low; j <= high; j++) {
90                              msgNums.set(j);
91                          }
92                      } else {
93                          msgNums.set(Integer.parseInt(entry));
94                      }
95                  }
96              } else {
97                  contains.add(arg); // not a number/number range
98              }
99          }
100 //        System.out.println(msgNums.toString());
101 //        System.out.println(java.util.Arrays.toString(contains.toArray()));
102 
103         // Connect and login
104         final IMAPClient imap = IMAPUtils.imapLogin(uri, 10000, null);
105 
106         int total = 0;
107         int loaded = 0;
108         try {
109             imap.setSoTimeout(6000);
110 
111             final BufferedReader br = new BufferedReader(new FileReader(file)); // TODO charset?
112 
113             String line;
114             final StringBuilder sb = new StringBuilder();
115             boolean wanted = false; // Skip any leading rubbish
116             while((line=br.readLine())!=null) {
117                 if (line.startsWith("From ")) { // start of message; i.e. end of previous (if any)
118                     if (process(sb, imap, folder, total)) { // process previous message (if any)
119                         loaded++;
120                     }
121                     sb.setLength(0);
122                     total ++;
123                     wanted = wanted(total, line, msgNums, contains);
124                 } else if (startsWith(line, PATFROM)) { // Unescape ">+From " in body text
125                     line = line.substring(1);
126                 }
127                 // TODO process first Received: line to determine arrival date?
128                 if (wanted) {
129                     sb.append(line);
130                     sb.append(CRLF);
131                 }
132             }
133             br.close();
134             if (wanted && process(sb, imap, folder, total)) { // last message (if any)
135                 loaded++;
136             }
137         } catch (final IOException e) {
138             System.out.println("Error processing msg: " + total + " " + imap.getReplyString());
139             e.printStackTrace();
140             System.exit(10);
141             return;
142         } finally {
143             imap.logout();
144             imap.disconnect();
145         }
146         System.out.println("Processed " + total + " messages, loaded " + loaded);
147     }
148 
149     private static boolean startsWith(final String input, final Pattern pat) {
150         final Matcher m = pat.matcher(input);
151         return m.lookingAt();
152     }
153 
154     private static String getDate(final String msg) {
155                                               // From SENDER Fri Sep 13 17:04:01 2019
156         final Pattern FROM_RE = Pattern.compile("From \\S+ +\\S+ (\\S+)  ?(\\S+) (\\S+) (\\S+)");
157         //                                                 [Fri]   Sep      13     HMS   2019
158         // output date: 13-Sep-2019 17:04:01 +0000
159         String date = null;
160         final Matcher m = FROM_RE.matcher(msg);
161         if (m.lookingAt()) {
162             date = m.group(2)+"-"+m.group(1)+"-"+m.group(4)+" "+m.group(3)+" +0000";
163         }
164         return date;
165     }
166 
167     private static boolean process(final StringBuilder sb, final IMAPClient imap, final String folder
168             ,final int msgNum) throws IOException {
169         final int length = sb.length();
170         final boolean haveMessage = length > 2;
171         if (haveMessage) {
172             System.out.println("MsgNum: " + msgNum +" Length " + length);
173             sb.setLength(length-2); // drop trailing CRLF (mbox format has trailing blank line)
174             final String msg = sb.toString();
175             if (!imap.append(folder, null, getDate(msg), msg)) {
176                 throw new IOException("Failed to import message: " + msgNum + " " + imap.getReplyString());
177             }
178         }
179         return haveMessage;
180     }
181 
182     /**
183      * Is the message wanted?
184      *
185      * @param msgNum the message number
186      * @param line the From line
187      * @param msgNums the list of wanted message numbers
188      * @param contains the list of strings to be contained
189      * @return true if the message is wanted
190      */
191     private static boolean wanted(final int msgNum, final String line, final BitSet msgNums, final List<String> contains) {
192         return (msgNums.isEmpty() && contains.isEmpty()) // no selectors
193              || msgNums.get(msgNum) // matches message number
194              || listContains(contains, line); // contains string
195     }
196 
197     /**
198      * Is at least one entry in the list contained in the string?
199      * @param contains the list of strings to look for
200      * @param string the String to check against
201      * @return true if at least one entry in the contains list is contained in the string
202      */
203     private static boolean listContains(final List<String> contains, final String string) {
204         for(final String entry : contains) {
205             if (string.contains(entry)) {
206                 return true;
207             }
208         }
209         return false;
210     }
211 
212 }