View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.examples.mail;
19  
20  import java.io.BufferedWriter;
21  import java.io.IOException;
22  import java.io.UncheckedIOException;
23  import java.net.URI;
24  import java.net.URISyntaxException;
25  import java.nio.charset.Charset;
26  import java.nio.file.Files;
27  import java.nio.file.Path;
28  import java.nio.file.Paths;
29  import java.nio.file.StandardOpenOption;
30  import java.text.ParseException;
31  import java.text.SimpleDateFormat;
32  import java.util.ArrayList;
33  import java.util.Date;
34  import java.util.Iterator;
35  import java.util.List;
36  import java.util.TimeZone;
37  import java.util.concurrent.atomic.AtomicInteger;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  
41  import org.apache.commons.io.IOUtils;
42  import org.apache.commons.net.PrintCommandListener;
43  import org.apache.commons.net.ProtocolCommandEvent;
44  import org.apache.commons.net.imap.IMAP;
45  import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
46  import org.apache.commons.net.imap.IMAPClient;
47  import org.apache.commons.net.imap.IMAPReply;
48  
49  /**
50   * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and exports selected messages from a
51   * folder into an mbox file.
52   * <p>
53   * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names]
54   * <p>
55   * An example sequence-set might be:
56   * <ul>
57   * <li>11,2,3:10,20:*</li>
58   * <li>1:* - this is the default</li>
59   * </ul>
60   * <p>
61   * Some example item-names might be:
62   * <ul>
63   * <li>BODY.PEEK[HEADER]</li>
64   * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li>
65   * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li>
66   * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li>
67   * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li>
68   * <li>ENVELOPE X-GM-LABELS</li>
69   * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li>
70   * </ul>
71   * <p>
72   * Macro names cannot be combined with anything else; they must be used alone.<br>
73   * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br>
74   * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br>
75   * For example:<br>
76   * IMAPExportMbox imaps://user:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br>
77   * IMAPExportMbox imaps://user:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br>
78   * <p>
79   * The sequence-set is passed unmodified to the FETCH command.<br>
80   * The item names are wrapped in parentheses if more than one is provided. Otherwise, the parameter is assumed to be wrapped if necessary.<br>
81   * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br>
82   * Also, the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references). It does not capture the output
83   * from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response.
84   */
85  public final class IMAPExportMbox {
86  
87      private static final class MboxListener implements IMAPChunkListener {
88  
89          private final BufferedWriter bufferedWriter;
90          volatile AtomicInteger total = new AtomicInteger();
91          volatile String lastFetched;
92          volatile List<String> missingIds = new ArrayList<>();
93          volatile long lastSeq = -1;
94          private final String lineSeparator;
95          private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines
96                  = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
97  
98          // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000"
99          // for parsing INTERNALDATE
100         private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
101         private final boolean printHash;
102         private final boolean printMarker;
103         private final boolean checkSequence;
104 
105         MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
106                 final boolean checkSequence) {
107             this.lineSeparator = lineSeparator;
108             this.printHash = printHash;
109             this.printMarker = printMarker;
110             DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
111             this.bufferedWriter = bufferedWriter;
112             this.checkSequence = checkSequence;
113         }
114 
115         @Override
116         public boolean chunkReceived(final IMAP imap) {
117             final String[] replyStrings = imap.getReplyStrings();
118             Date received = new Date();
119             final String firstLine = replyStrings[0];
120             Matcher m = PATID.matcher(firstLine);
121             if (m.lookingAt()) { // found a match
122                 final String date = m.group(PATID_DATE_GROUP);
123                 try {
124                     received = IDPARSE.parse(date);
125                 } catch (final ParseException e) {
126                     System.err.println(e);
127                 }
128             } else {
129                 System.err.println("No timestamp found in: " + firstLine + "  - using current time");
130             }
131             String replyTo = "MAILER-DAEMON"; // default
132             for (int i = 1; i < replyStrings.length - 1; i++) {
133                 final String line = replyStrings[i];
134                 if (line.startsWith("Return-Path: ")) {
135                     final String[] parts = line.split(" ", 2);
136                     if (!parts[1].equals("<>")) { // Don't replace default with blank
137                         replyTo = parts[1];
138                         if (replyTo.startsWith("<")) {
139                             if (replyTo.endsWith(">")) {
140                                 replyTo = replyTo.substring(1, replyTo.length() - 1); // drop <> wrapper
141                             } else {
142                                 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
143                             }
144                         }
145                     }
146                     break;
147                 }
148             }
149             try {
150                 // Add initial mbox header line
151                 bufferedWriter.append("From ");
152                 bufferedWriter.append(replyTo);
153                 bufferedWriter.append(' ');
154                 bufferedWriter.append(DATE_FORMAT.format(received));
155                 bufferedWriter.append(lineSeparator);
156                 // Debug
157                 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
158                 if (printMarker) {
159                     System.err.println("[" + total + "] " + firstLine);
160                 }
161                 // Skip first and last lines
162                 for (int i = 1; i < replyStrings.length - 1; i++) {
163                     final String line = replyStrings[i];
164                     if (startsWith(line, PATFROM)) {
165                         bufferedWriter.append('>'); // Escape a From_ line
166                     }
167                     bufferedWriter.append(line);
168                     bufferedWriter.append(lineSeparator);
169                 }
170                 // The last line ends with the trailing closing ")" which needs to be stripped
171                 final String lastLine = replyStrings[replyStrings.length - 1];
172                 final int lastLength = lastLine.length();
173                 if (lastLength > 1) { // there's some content, we need to save it
174                     bufferedWriter.append(lastLine, 0, lastLength - 1);
175                     bufferedWriter.append(lineSeparator);
176                 }
177                 bufferedWriter.append(lineSeparator); // blank line between entries
178             } catch (final IOException e) {
179                 e.printStackTrace();
180                 throw new UncheckedIOException(e); // chunkReceived cannot throw a checked Exception
181             }
182             lastFetched = firstLine;
183             total.incrementAndGet();
184             if (checkSequence) {
185                 m = PATSEQ.matcher(firstLine);
186                 if (m.lookingAt()) { // found a match
187                     final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse
188                     if (lastSeq != -1) {
189                         final long missing = msgSeq - lastSeq - 1;
190                         if (missing != 0) {
191                             for (long j = lastSeq + 1; j < msgSeq; j++) {
192                                 missingIds.add(String.valueOf(j));
193                             }
194                             System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
195                         }
196                     }
197                     lastSeq = msgSeq;
198                 }
199             }
200             if (printHash) {
201                 System.err.print(".");
202             }
203             return true;
204         }
205 
206         public void close() throws IOException {
207             IOUtils.close(bufferedWriter);
208         }
209     }
210 
211     private static final String CRLF = "\r\n";
212     private static final String LF = "\n";
213 
214     private static final String EOL_DEFAULT = System.lineSeparator();
215     private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_
216     // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000" BODY[] {nn} ...)
217     private static final Pattern PATID = // INTERNALDATE
218             Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
219 
220     private static final int PATID_DATE_GROUP = 1;
221     private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number
222 
223     private static final int PATSEQ_SEQUENCE_GROUP = 1;
224 
225     // e.g. * 382 EXISTS
226     private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT
227 
228     // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL]
229     private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
230     private static final int CONNECT_TIMEOUT = 10; // Seconds
231 
232     private static final int READ_TIMEOUT = 10;
233 
234     public static void main(final String[] args) throws IOException, URISyntaxException {
235         int connect_timeout = CONNECT_TIMEOUT;
236         int read_timeout = READ_TIMEOUT;
237 
238         int argIdx = 0;
239         String eol = EOL_DEFAULT;
240         boolean printHash = false;
241         boolean printMarker = false;
242         int retryWaitSecs = 0;
243 
244         for (argIdx = 0; argIdx < args.length; argIdx++) {
245             if (args[argIdx].equals("-c")) {
246                 connect_timeout = Integer.parseInt(args[++argIdx]);
247             } else if (args[argIdx].equals("-r")) {
248                 read_timeout = Integer.parseInt(args[++argIdx]);
249             } else if (args[argIdx].equals("-R")) {
250                 retryWaitSecs = Integer.parseInt(args[++argIdx]);
251             } else if (args[argIdx].equals("-LF")) {
252                 eol = LF;
253             } else if (args[argIdx].equals("-CRLF")) {
254                 eol = CRLF;
255             } else if (args[argIdx].equals("-.")) {
256                 printHash = true;
257             } else if (args[argIdx].equals("-X")) {
258                 printMarker = true;
259             } else {
260                 break;
261             }
262         }
263 
264         final int argCount = args.length - argIdx;
265 
266         if (argCount < 2) {
267             System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
268                     + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
269             System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
270             System.err.println("\t-c connect timeout in seconds (default 10)");
271             System.err.println("\t-r read timeout in seconds (default 10)");
272             System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
273             System.err.println("\t-. print a . for each complete message received");
274             System.err.println("\t-X print the X-IMAP line for each complete message received");
275             System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
276             System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
277             System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
278             System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
279                     + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
280             System.exit(1);
281         }
282 
283         final String uriString = args[argIdx++];
284         URI uri;
285         try {
286             uri = URI.create(uriString);
287         } catch (final IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again
288             final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
289             if (!m.matches()) {
290                 throw e;
291             }
292             uri = URI.create(m.group(1)); // Just the scheme and auth parts
293             uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
294         }
295         final String file = args[argIdx++];
296         String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
297         final String itemNames;
298         // Handle 0, 1 or multiple item names
299         if (argCount > 3) {
300             if (argCount > 4) {
301                 final StringBuilder sb = new StringBuilder();
302                 sb.append("(");
303                 for (int i = 4; i <= argCount; i++) {
304                     if (i > 4) {
305                         sb.append(" ");
306                     }
307                     sb.append(args[argIdx++]);
308                 }
309                 sb.append(")");
310                 itemNames = sb.toString();
311             } else {
312                 itemNames = args[argIdx++];
313             }
314         } else {
315             itemNames = "(INTERNALDATE BODY.PEEK[])";
316         }
317 
318         final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
319         final MboxListener mboxListener;
320         if (file.equals("-")) {
321             mboxListener = null;
322         } else if (file.startsWith("+")) {
323             final Path mboxPath = Paths.get(file.substring(1));
324             System.out.println("Appending to file " + mboxPath);
325             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.APPEND),
326                     eol, printHash, printMarker, checkSequence);
327         } else if (file.startsWith("-")) {
328             final Path mboxPath = Paths.get(file.substring(1));
329             System.out.println("Writing to file " + mboxPath);
330             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
331                     checkSequence);
332         } else {
333             final Path mboxPath = Paths.get(file);
334             if (Files.exists(mboxPath) && Files.size(mboxPath) > 0) {
335                 throw new IOException("mailbox file: " + mboxPath + " already exists and is non-empty!");
336             }
337             System.out.println("Creating file " + mboxPath);
338             mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
339                     checkSequence);
340         }
341 
342         final String path = uri.getPath();
343         if (path == null || path.length() < 1) {
344             throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
345         }
346         final String folder = path.substring(1); // skip the leading /
347 
348         // suppress login details
349         final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
350             @Override
351             public void protocolReplyReceived(final ProtocolCommandEvent event) {
352                 if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener
353                     super.protocolReplyReceived(event);
354                 }
355             }
356         };
357 
358         // Connect and login
359         final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
360 
361         String maxIndexInFolder = null;
362 
363         try {
364 
365             imap.setSoTimeout(read_timeout * 1000);
366 
367             if (!imap.select(folder)) {
368                 throw new IOException("Could not select folder: " + folder);
369             }
370 
371             for (final String line : imap.getReplyStrings()) {
372                 maxIndexInFolder = matches(line, PATEXISTS, 1);
373                 if (maxIndexInFolder != null) {
374                     break;
375                 }
376             }
377 
378             if (mboxListener != null) {
379                 imap.setChunkListener(mboxListener);
380             } // else the command listener displays the full output without processing
381 
382             while (true) {
383                 final boolean ok = imap.fetch(sequenceSet, itemNames);
384                 // If the fetch failed, can we retry?
385                 if (ok || retryWaitSecs <= 0 || mboxListener == null || !checkSequence) {
386                     break;
387                 }
388                 final String replyString = imap.getReplyString(); // includes EOL
389                 if (!startsWith(replyString, PATTEMPFAIL)) {
390                     throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
391                 }
392                 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
393                 sequenceSet = mboxListener.lastSeq + 1 + ":*";
394                 try {
395                     Thread.sleep(retryWaitSecs * 1000);
396                 } catch (final InterruptedException e) {
397                     // ignored
398                 }
399             }
400 
401         } catch (final IOException ioe) {
402             final String count = mboxListener == null ? "?" : mboxListener.total.toString();
403             System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
404             if (mboxListener != null) {
405                 System.err.println("Last complete response seen: " + mboxListener.lastFetched);
406             }
407             throw ioe;
408         } finally {
409 
410             if (printHash) {
411                 System.err.println();
412             }
413 
414             if (mboxListener != null) {
415                 mboxListener.close();
416                 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
417                 if (missingIds.hasNext()) {
418                     final StringBuilder sb = new StringBuilder();
419                     for (;;) {
420                         sb.append(missingIds.next());
421                         if (!missingIds.hasNext()) {
422                             break;
423                         }
424                         sb.append(",");
425                     }
426                     System.err.println("*** Missing ids: " + sb.toString());
427                 }
428             }
429             imap.logout();
430             imap.disconnect();
431         }
432         if (mboxListener != null) {
433             System.out.println("Processed " + mboxListener.total + " messages.");
434         }
435         if (maxIndexInFolder != null) {
436             System.out.println("Folder contained " + maxIndexInFolder + " messages.");
437         }
438     }
439 
440     private static String matches(final String input, final Pattern pat, final int index) {
441         final Matcher m = pat.matcher(input);
442         if (m.lookingAt()) {
443             return m.group(index);
444         }
445         return null;
446     }
447 
448     private static boolean startsWith(final String input, final Pattern pat) {
449         final Matcher m = pat.matcher(input);
450         return m.lookingAt();
451     }
452 }