1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.net.examples.mail;
19
20 import java.io.BufferedWriter;
21 import java.io.File;
22 import java.io.FileWriter;
23 import java.io.IOException;
24 import java.net.URI;
25 import java.net.URISyntaxException;
26 import java.text.ParseException;
27 import java.text.SimpleDateFormat;
28 import java.util.ArrayList;
29 import java.util.Date;
30 import java.util.Iterator;
31 import java.util.List;
32 import java.util.TimeZone;
33 import java.util.concurrent.atomic.AtomicInteger;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 import org.apache.commons.net.PrintCommandListener;
38 import org.apache.commons.net.ProtocolCommandEvent;
39 import org.apache.commons.net.imap.IMAP;
40 import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
41 import org.apache.commons.net.imap.IMAPClient;
42 import org.apache.commons.net.imap.IMAPReply;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 public final class IMAPExportMbox
82 {
83
84 private static final String CRLF = "\r\n";
85 private static final String LF = "\n";
86 private static final String EOL_DEFAULT = System.getProperty("line.separator");
87
88 private static final Pattern PATFROM = Pattern.compile(">*From ");
89
90 private static final Pattern PATID =
91 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
92 private static final int PATID_DATE_GROUP = 1;
93
94 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) ");
95 private static final int PATSEQ_SEQUENCE_GROUP = 1;
96
97
98 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS");
99
100
101 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
102
103 private static final int CONNECT_TIMEOUT = 10;
104 private static final int READ_TIMEOUT = 10;
105
106 public static void main(final String[] args) throws IOException, URISyntaxException
107 {
108 int connect_timeout = CONNECT_TIMEOUT;
109 int read_timeout = READ_TIMEOUT;
110
111 int argIdx = 0;
112 String eol = EOL_DEFAULT;
113 boolean printHash = false;
114 boolean printMarker = false;
115 int retryWaitSecs = 0;
116
117 for(argIdx = 0; argIdx < args.length; argIdx++) {
118 if (args[argIdx].equals("-c")) {
119 connect_timeout = Integer.parseInt(args[++argIdx]);
120 } else if (args[argIdx].equals("-r")) {
121 read_timeout = Integer.parseInt(args[++argIdx]);
122 } else if (args[argIdx].equals("-R")) {
123 retryWaitSecs = Integer.parseInt(args[++argIdx]);
124 } else if (args[argIdx].equals("-LF")) {
125 eol = LF;
126 } else if (args[argIdx].equals("-CRLF")) {
127 eol = CRLF;
128 } else if (args[argIdx].equals("-.")) {
129 printHash = true;
130 } else if (args[argIdx].equals("-X")) {
131 printMarker = true;
132 } else {
133 break;
134 }
135 }
136
137 final int argCount = args.length - argIdx;
138
139 if (argCount < 2)
140 {
141 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" +
142 " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
143 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
144 System.err.println("\t-c connect timeout in seconds (default 10)");
145 System.err.println("\t-r read timeout in seconds (default 10)");
146 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
147 System.err.println("\t-. print a . for each complete message received");
148 System.err.println("\t-X print the X-IMAP line for each complete message received");
149 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
150 System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
151 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
152 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" +
153 " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
154 System.exit(1);
155 }
156
157 final String uriString = args[argIdx++];
158 URI uri;
159 try {
160 uri = URI.create(uriString);
161 } catch(final IllegalArgumentException e) {
162 final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
163 if (m.matches()) {
164 uri = URI.create(m.group(1));
165 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
166 } else {
167 throw e;
168 }
169 }
170 final String file = args[argIdx++];
171 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
172 final String itemNames;
173
174 if (argCount > 3) {
175 if (argCount > 4) {
176 final StringBuilder sb = new StringBuilder();
177 sb.append("(");
178 for(int i=4; i <= argCount; i++) {
179 if (i>4) {
180 sb.append(" ");
181 }
182 sb.append(args[argIdx++]);
183 }
184 sb.append(")");
185 itemNames = sb.toString();
186 } else {
187 itemNames = args[argIdx++];
188 }
189 } else {
190 itemNames = "(INTERNALDATE BODY.PEEK[])";
191 }
192
193 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)");
194 final MboxListener mboxListener;
195 if (file.equals("-")) {
196 mboxListener = null;
197 } else if (file.startsWith("+")) {
198 final File mbox = new File(file.substring(1));
199 System.out.println("Appending to file " + mbox);
200 mboxListener = new MboxListener(
201 new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence);
202 } else if (file.startsWith("-")) {
203 final File mbox = new File(file.substring(1));
204 System.out.println("Writing to file " + mbox);
205 mboxListener = new MboxListener(
206 new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence);
207 } else {
208 final File mboxFile = new File(file);
209 if (mboxFile.exists() && mboxFile.length() > 0) {
210 throw new IOException("mailbox file: " + mboxFile + " already exists and is non-empty!");
211 }
212 System.out.println("Creating file " + mboxFile);
213 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mboxFile)), eol, printHash, printMarker,
214 checkSequence);
215 }
216
217 final String path = uri.getPath();
218 if (path == null || path.length() < 1) {
219 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
220 }
221 final String folder = path.substring(1);
222
223
224 final PrintCommandListenertener.html#PrintCommandListener">PrintCommandListener listener = new PrintCommandListener(System.out, true) {
225 @Override
226 public void protocolReplyReceived(final ProtocolCommandEvent event) {
227 if (event.getReplyCode() != IMAPReply.PARTIAL){
228 super.protocolReplyReceived(event);
229 }
230 }
231 };
232
233
234 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
235
236 String maxIndexInFolder = null;
237
238 try {
239
240 imap.setSoTimeout(read_timeout * 1000);
241
242 if (!imap.select(folder)){
243 throw new IOException("Could not select folder: " + folder);
244 }
245
246 for(final String line : imap.getReplyStrings()) {
247 maxIndexInFolder = matches(line, PATEXISTS, 1);
248 if (maxIndexInFolder != null) {
249 break;
250 }
251 }
252
253 if (mboxListener != null) {
254 imap.setChunkListener(mboxListener);
255 }
256
257
258 while (true) {
259 final boolean ok = imap.fetch(sequenceSet, itemNames);
260
261 if (!ok && retryWaitSecs > 0 && mboxListener != null && checkSequence) {
262 final String replyString = imap.getReplyString();
263 if (startsWith(replyString, PATTEMPFAIL)) {
264 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
265 sequenceSet = mboxListener.lastSeq+1+":*";
266 try {
267 Thread.sleep(retryWaitSecs * 1000);
268 } catch (final InterruptedException e) {
269
270 }
271 } else {
272 throw new IOException("FETCH " + sequenceSet + " " + itemNames+ " failed with " + replyString);
273 }
274 } else {
275 break;
276 }
277 }
278
279 } catch (final IOException ioe) {
280 final String count = mboxListener == null ? "?" : mboxListener.total.toString();
281 System.err.println(
282 "FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
283 if (mboxListener != null) {
284 System.err.println("Last complete response seen: "+mboxListener.lastFetched);
285 }
286 throw ioe;
287 } finally {
288
289 if (printHash) {
290 System.err.println();
291 }
292
293 if (mboxListener != null) {
294 mboxListener.close();
295 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
296 if (missingIds.hasNext()) {
297 final StringBuilder sb = new StringBuilder();
298 for(;;) {
299 sb.append(missingIds.next());
300 if (!missingIds.hasNext()) {
301 break;
302 }
303 sb.append(",");
304 }
305 System.err.println("*** Missing ids: " + sb.toString());
306 }
307 }
308 imap.logout();
309 imap.disconnect();
310 }
311 if (mboxListener != null) {
312 System.out.println("Processed " + mboxListener.total + " messages.");
313 }
314 if (maxIndexInFolder != null) {
315 System.out.println("Folder contained " + maxIndexInFolder + " messages.");
316 }
317 }
318
319 private static boolean startsWith(final String input, final Pattern pat) {
320 final Matcher m = pat.matcher(input);
321 return m.lookingAt();
322 }
323
324 private static String matches(final String input, final Pattern pat, final int index) {
325 final Matcher m = pat.matcher(input);
326 if (m.lookingAt()) {
327 return m.group(index);
328 }
329 return null;
330 }
331
332 private static class MboxListener implements IMAPChunkListener {
333
334 private final BufferedWriter bufferedWriter;
335 volatile AtomicInteger total = new AtomicInteger();
336 volatile String lastFetched;
337 volatile List<String> missingIds = new ArrayList<>();
338 volatile long lastSeq = -1;
339 private final String lineSeparator;
340 private final SimpleDateFormat DATE_FORMAT
341 = new SimpleDateFormat("EEE MMM dd HH:mm:ss YYYY");
342
343
344
345 private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
346 private final boolean printHash;
347 private final boolean printMarker;
348 private final boolean checkSequence;
349
350 MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash,
351 final boolean printMarker, final boolean checkSequence) {
352 this.lineSeparator = lineSeparator;
353 this.printHash = printHash;
354 this.printMarker = printMarker;
355 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
356 this.bufferedWriter = bufferedWriter;
357 this.checkSequence = checkSequence;
358 }
359
360 @Override
361 public boolean chunkReceived(final IMAP imap) {
362 final String[] replyStrings = imap.getReplyStrings();
363 Date received = new Date();
364 final String firstLine = replyStrings[0];
365 Matcher m = PATID.matcher(firstLine);
366 if (m.lookingAt()) {
367 final String date = m.group(PATID_DATE_GROUP);
368 try {
369 received=IDPARSE.parse(date);
370 } catch (final ParseException e) {
371 System.err.println(e);
372 }
373 } else {
374 System.err.println("No timestamp found in: " + firstLine + " - using current time");
375 }
376 String replyTo = "MAILER-DAEMON";
377 for(int i=1; i< replyStrings.length - 1; i++) {
378 final String line = replyStrings[i];
379 if (line.startsWith("Return-Path: ")) {
380 final String[] parts = line.split(" ", 2);
381 if (!parts[1].equals("<>")) {
382 replyTo = parts[1];
383 if (replyTo.startsWith("<")) {
384 if (replyTo.endsWith(">")) {
385 replyTo = replyTo.substring(1,replyTo.length()-1);
386 } else {
387 System.err.println("Unexpected Return-path: '" + line+ "' in " + firstLine);
388 }
389 }
390 }
391 break;
392 }
393 }
394 try {
395
396 bufferedWriter.append("From ");
397 bufferedWriter.append(replyTo);
398 bufferedWriter.append(' ');
399 bufferedWriter.append(DATE_FORMAT.format(received));
400 bufferedWriter.append(lineSeparator);
401
402 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
403 if (printMarker) {
404 System.err.println("[" + total + "] " + firstLine);
405 }
406
407 for(int i=1; i< replyStrings.length - 1; i++) {
408 final String line = replyStrings[i];
409 if (startsWith(line, PATFROM)) {
410 bufferedWriter.append('>');
411 }
412 bufferedWriter.append(line);
413 bufferedWriter.append(lineSeparator);
414 }
415
416 final String lastLine = replyStrings[replyStrings.length-1];
417 final int lastLength = lastLine.length();
418 if (lastLength > 1) {
419 bufferedWriter.append(lastLine, 0, lastLength-1);
420 bufferedWriter.append(lineSeparator);
421 }
422 bufferedWriter.append(lineSeparator);
423 } catch (final IOException e) {
424 e.printStackTrace();
425 throw new RuntimeException(e);
426 }
427 lastFetched = firstLine;
428 total.incrementAndGet();
429 if (checkSequence) {
430 m = PATSEQ.matcher(firstLine);
431 if (m.lookingAt()) {
432 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP));
433 if (lastSeq != -1) {
434 final long missing = msgSeq - lastSeq - 1;
435 if (missing != 0) {
436 for(long j = lastSeq + 1; j < msgSeq; j++) {
437 missingIds.add(String.valueOf(j));
438 }
439 System.err.println(
440 "*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
441 }
442 }
443 lastSeq = msgSeq;
444 }
445 }
446 if (printHash) {
447 System.err.print(".");
448 }
449 return true;
450 }
451
452 public void close() throws IOException {
453 if (bufferedWriter != null) {
454 bufferedWriter.close();
455 }
456 }
457 }
458 }