View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.examples.mail;
19  
20  import java.io.BufferedWriter;
21  import java.io.File;
22  import java.io.FileWriter;
23  import java.io.IOException;
24  import java.net.URI;
25  import java.net.URISyntaxException;
26  import java.text.ParseException;
27  import java.text.SimpleDateFormat;
28  import java.util.ArrayList;
29  import java.util.Date;
30  import java.util.Iterator;
31  import java.util.List;
32  import java.util.TimeZone;
33  import java.util.concurrent.atomic.AtomicInteger;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  
37  import org.apache.commons.net.PrintCommandListener;
38  import org.apache.commons.net.ProtocolCommandEvent;
39  import org.apache.commons.net.imap.IMAP;
40  import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
41  import org.apache.commons.net.imap.IMAPClient;
42  import org.apache.commons.net.imap.IMAPReply;
43  
44  /**
45   * This is an example program demonstrating how to use the IMAP[S]Client class.
46   * This program connects to a IMAP[S] server and exports selected messages from a folder into an mbox file.
47   * <p>
48   * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names]
49   * <p>
50   * An example sequence-set might be:
51   * <ul>
52   * <li>11,2,3:10,20:*</li>
53   * <li>1:* - this is the default</li>
54   * </ul>
55   * <p>
56   * Some example item-names might be:
57   * <ul>
58   * <li>BODY.PEEK[HEADER]</li>
59   * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li>
60   * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li>
61   * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li>
62   * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li>
63   * <li>ENVELOPE X-GM-LABELS</li>
64   * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li>
65   * </ul>
66   * <p>
67   * Macro names cannot be combined with anything else; they must be used alone.<br>
68   * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br>
69   * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br>
70   * For example:<br>
71   * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br>
72   * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br>
73   * <p>
74   * The sequence-set is passed unmodified to the FETCH command.<br>
75   * The item names are wrapped in parentheses if more than one is provided.
76   * Otherwise, the parameter is assumed to be wrapped if necessary.<br>
77   * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br>
78   * Also the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references).
79   * It does not capture the output from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response.
80   */
81  public final class IMAPExportMbox
82  {
83  
84      private static final String CRLF = "\r\n";
85      private static final String LF = "\n";
86      private static final String EOL_DEFAULT = System.getProperty("line.separator");
87  
88      private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_
89      // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000"  BODY[] {nn} ...)
90      private static final Pattern PATID = // INTERNALDATE
91              Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
92      private static final int PATID_DATE_GROUP = 1;
93  
94      private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number
95      private static final int PATSEQ_SEQUENCE_GROUP = 1;
96  
97      // e.g. * 382 EXISTS
98      private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT
99  
100     // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL]
101     private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
102 
103     private static final int CONNECT_TIMEOUT = 10; // Seconds
104     private static final int READ_TIMEOUT = 10;
105 
106     public static void main(final String[] args) throws IOException, URISyntaxException
107     {
108         int connect_timeout = CONNECT_TIMEOUT;
109         int read_timeout = READ_TIMEOUT;
110 
111         int argIdx = 0;
112         String eol = EOL_DEFAULT;
113         boolean printHash = false;
114         boolean printMarker = false;
115         int retryWaitSecs = 0;
116 
117         for(argIdx = 0; argIdx < args.length; argIdx++) {
118             if (args[argIdx].equals("-c")) {
119                 connect_timeout = Integer.parseInt(args[++argIdx]);
120             } else if (args[argIdx].equals("-r")) {
121                 read_timeout = Integer.parseInt(args[++argIdx]);
122             } else if (args[argIdx].equals("-R")) {
123                 retryWaitSecs = Integer.parseInt(args[++argIdx]);
124             } else if (args[argIdx].equals("-LF")) {
125                 eol = LF;
126             } else if (args[argIdx].equals("-CRLF")) {
127                 eol = CRLF;
128             } else if (args[argIdx].equals("-.")) {
129                 printHash = true;
130             } else if (args[argIdx].equals("-X")) {
131                 printMarker = true;
132             } else {
133                 break;
134             }
135         }
136 
137         final int argCount = args.length - argIdx;
138 
139         if (argCount < 2)
140         {
141             System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" +
142                                " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
143             System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
144             System.err.println("\t-c connect timeout in seconds (default 10)");
145             System.err.println("\t-r read timeout in seconds (default 10)");
146             System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
147             System.err.println("\t-. print a . for each complete message received");
148             System.err.println("\t-X print the X-IMAP line for each complete message received");
149             System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
150             System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
151             System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
152             System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" +
153                                " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
154             System.exit(1);
155         }
156 
157         final String uriString = args[argIdx++];
158         URI uri;
159         try {
160             uri = URI.create(uriString);
161         } catch(final IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again
162             final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
163             if (m.matches()) {
164                 uri = URI.create(m.group(1)); // Just the scheme and auth parts
165                 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
166             } else {
167                 throw e;
168             }
169         }
170         final String file  = args[argIdx++];
171         String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
172         final String itemNames;
173         // Handle 0, 1 or multiple item names
174         if (argCount > 3) {
175             if (argCount > 4) {
176                 final StringBuilder sb = new StringBuilder();
177                 sb.append("(");
178                 for(int i=4; i <= argCount; i++) {
179                     if (i>4) {
180                         sb.append(" ");
181                     }
182                     sb.append(args[argIdx++]);
183                 }
184                 sb.append(")");
185                 itemNames = sb.toString();
186             } else {
187                 itemNames = args[argIdx++];
188             }
189         } else {
190             itemNames = "(INTERNALDATE BODY.PEEK[])";
191         }
192 
193         final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
194         final MboxListener mboxListener;
195         if (file.equals("-")) {
196             mboxListener = null;
197         } else if (file.startsWith("+")) {
198             final File mbox = new File(file.substring(1));
199             System.out.println("Appending to file " + mbox);
200             mboxListener = new MboxListener(
201                 new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence);
202         } else if (file.startsWith("-")) {
203             final File mbox = new File(file.substring(1));
204             System.out.println("Writing to file " + mbox);
205             mboxListener = new MboxListener(
206                 new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence);
207         } else {
208             final File mboxFile = new File(file);
209             if (mboxFile.exists() && mboxFile.length() > 0) {
210                 throw new IOException("mailbox file: " + mboxFile + " already exists and is non-empty!");
211             }
212             System.out.println("Creating file " + mboxFile);
213             mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mboxFile)), eol, printHash, printMarker,
214                     checkSequence);
215         }
216 
217         final String path = uri.getPath();
218         if (path == null || path.length() < 1) {
219             throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
220         }
221         final String folder = path.substring(1); // skip the leading /
222 
223         // suppress login details
224         final PrintCommandListenertener.html#PrintCommandListener">PrintCommandListener listener = new PrintCommandListener(System.out, true) {
225             @Override
226             public void protocolReplyReceived(final ProtocolCommandEvent event) {
227                 if (event.getReplyCode() != IMAPReply.PARTIAL){ // This is dealt with by the chunk listener
228                     super.protocolReplyReceived(event);
229                 }
230             }
231         };
232 
233         // Connect and login
234         final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
235 
236         String maxIndexInFolder = null;
237 
238         try {
239 
240             imap.setSoTimeout(read_timeout * 1000);
241 
242             if (!imap.select(folder)){
243                 throw new IOException("Could not select folder: " + folder);
244             }
245 
246             for(final String line : imap.getReplyStrings()) {
247                 maxIndexInFolder = matches(line, PATEXISTS, 1);
248                 if (maxIndexInFolder != null) {
249                     break;
250                 }
251             }
252 
253             if (mboxListener != null) {
254                 imap.setChunkListener(mboxListener);
255             } // else the command listener displays the full output without processing
256 
257 
258             while (true) {
259                 final boolean ok = imap.fetch(sequenceSet, itemNames);
260                 // If the fetch failed, can we retry?
261                 if (!ok && retryWaitSecs > 0 && mboxListener != null && checkSequence) {
262                     final String replyString = imap.getReplyString(); //includes EOL
263                     if (startsWith(replyString, PATTEMPFAIL)) {
264                         System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
265                         sequenceSet = mboxListener.lastSeq+1+":*";
266                         try {
267                             Thread.sleep(retryWaitSecs * 1000);
268                         } catch (final InterruptedException e) {
269                             // ignored
270                         }
271                     } else {
272                         throw new IOException("FETCH " + sequenceSet + " " + itemNames+ " failed with " + replyString);
273                     }
274                 } else {
275                     break;
276                 }
277             }
278 
279         } catch (final IOException ioe) {
280             final String count = mboxListener == null ? "?" : mboxListener.total.toString();
281             System.err.println(
282                     "FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
283             if (mboxListener != null) {
284                 System.err.println("Last complete response seen: "+mboxListener.lastFetched);
285             }
286             throw ioe;
287         } finally {
288 
289             if (printHash) {
290                 System.err.println();
291             }
292 
293             if (mboxListener != null) {
294                 mboxListener.close();
295                 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
296                 if (missingIds.hasNext()) {
297                     final StringBuilder sb = new StringBuilder();
298                     for(;;) {
299                         sb.append(missingIds.next());
300                         if (!missingIds.hasNext()) {
301                             break;
302                         }
303                         sb.append(",");
304                     }
305                     System.err.println("*** Missing ids: " + sb.toString());
306                 }
307             }
308             imap.logout();
309             imap.disconnect();
310         }
311         if (mboxListener != null) {
312             System.out.println("Processed " + mboxListener.total + " messages.");
313         }
314         if (maxIndexInFolder != null) {
315             System.out.println("Folder contained " + maxIndexInFolder + " messages.");
316         }
317     }
318 
319     private static boolean startsWith(final String input, final Pattern pat) {
320         final Matcher m = pat.matcher(input);
321         return m.lookingAt();
322     }
323 
324     private static String matches(final String input, final Pattern pat, final int index) {
325         final Matcher m = pat.matcher(input);
326         if (m.lookingAt()) {
327             return m.group(index);
328         }
329         return null;
330     }
331 
332     private static class MboxListener implements IMAPChunkListener {
333 
334         private final BufferedWriter bufferedWriter;
335         volatile AtomicInteger total = new AtomicInteger();
336         volatile String lastFetched;
337         volatile List<String> missingIds = new ArrayList<>();
338         volatile long lastSeq = -1;
339         private final String lineSeparator;
340         private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines
341             = new SimpleDateFormat("EEE MMM dd HH:mm:ss YYYY");
342 
343         // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000"
344         // for parsing INTERNALDATE
345         private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
346         private final boolean printHash;
347         private final boolean printMarker;
348         private final boolean checkSequence;
349 
350         MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash,
351             final boolean printMarker, final boolean checkSequence) {
352             this.lineSeparator = lineSeparator;
353             this.printHash = printHash;
354             this.printMarker = printMarker;
355             DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
356             this.bufferedWriter = bufferedWriter;
357             this.checkSequence = checkSequence;
358         }
359 
360         @Override
361         public boolean chunkReceived(final IMAP imap) {
362             final String[] replyStrings = imap.getReplyStrings();
363             Date received = new Date();
364             final String firstLine = replyStrings[0];
365             Matcher m = PATID.matcher(firstLine);
366             if (m.lookingAt()) { // found a match
367                 final String date = m.group(PATID_DATE_GROUP);
368                 try {
369                     received=IDPARSE.parse(date);
370                 } catch (final ParseException e) {
371                     System.err.println(e);
372                 }
373             } else {
374                 System.err.println("No timestamp found in: " + firstLine + "  - using current time");
375             }
376             String replyTo = "MAILER-DAEMON"; // default
377             for(int i=1; i< replyStrings.length - 1; i++) {
378                 final String line = replyStrings[i];
379                 if (line.startsWith("Return-Path: ")) {
380                     final String[] parts = line.split(" ", 2);
381                     if (!parts[1].equals("<>")) {// Don't replace default with blank
382                         replyTo = parts[1];
383                         if (replyTo.startsWith("<")) {
384                             if (replyTo.endsWith(">")) {
385                                 replyTo = replyTo.substring(1,replyTo.length()-1); // drop <> wrapper
386                             } else {
387                                 System.err.println("Unexpected Return-path: '" + line+ "' in " + firstLine);
388                             }
389                         }
390                     }
391                     break;
392                 }
393             }
394             try {
395                 // Add initial mbox header line
396                 bufferedWriter.append("From ");
397                 bufferedWriter.append(replyTo);
398                 bufferedWriter.append(' ');
399                 bufferedWriter.append(DATE_FORMAT.format(received));
400                 bufferedWriter.append(lineSeparator);
401                 // Debug
402                 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
403                 if (printMarker) {
404                     System.err.println("[" + total + "] " + firstLine);
405                 }
406                 // Skip first and last lines
407                 for(int i=1; i< replyStrings.length - 1; i++) {
408                     final String line = replyStrings[i];
409                         if (startsWith(line, PATFROM)) {
410                             bufferedWriter.append('>'); // Escape a From_ line
411                         }
412                         bufferedWriter.append(line);
413                         bufferedWriter.append(lineSeparator);
414                 }
415                 // The last line ends with the trailing closing ")" which needs to be stripped
416                 final String lastLine = replyStrings[replyStrings.length-1];
417                 final int lastLength = lastLine.length();
418                 if (lastLength > 1) { // there's some content, we need to save it
419                     bufferedWriter.append(lastLine, 0, lastLength-1);
420                     bufferedWriter.append(lineSeparator);
421                 }
422                 bufferedWriter.append(lineSeparator); // blank line between entries
423             } catch (final IOException e) {
424                 e.printStackTrace();
425                 throw new RuntimeException(e); // chunkReceived cannot throw a checked Exception
426             }
427             lastFetched = firstLine;
428             total.incrementAndGet();
429             if (checkSequence) {
430                 m = PATSEQ.matcher(firstLine);
431                 if (m.lookingAt()) { // found a match
432                     final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse
433                     if (lastSeq != -1) {
434                         final long missing = msgSeq - lastSeq - 1;
435                         if (missing != 0) {
436                             for(long j = lastSeq + 1; j < msgSeq; j++) {
437                                 missingIds.add(String.valueOf(j));
438                             }
439                             System.err.println(
440                                 "*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
441                         }
442                     }
443                     lastSeq = msgSeq;
444                 }
445             }
446             if (printHash) {
447                 System.err.print(".");
448             }
449             return true;
450         }
451 
452         public void close() throws IOException {
453             if (bufferedWriter != null) {
454                 bufferedWriter.close();
455             }
456         }
457     }
458 }