1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.net.examples.mail;
19
20 import java.io.BufferedWriter;
21 import java.io.File;
22 import java.io.FileWriter;
23 import java.io.IOException;
24 import java.io.UncheckedIOException;
25 import java.net.URI;
26 import java.net.URISyntaxException;
27 import java.text.ParseException;
28 import java.text.SimpleDateFormat;
29 import java.util.ArrayList;
30 import java.util.Date;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.TimeZone;
34 import java.util.concurrent.atomic.AtomicInteger;
35 import java.util.regex.Matcher;
36 import java.util.regex.Pattern;
37
38 import org.apache.commons.net.PrintCommandListener;
39 import org.apache.commons.net.ProtocolCommandEvent;
40 import org.apache.commons.net.imap.IMAP;
41 import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
42 import org.apache.commons.net.imap.IMAPClient;
43 import org.apache.commons.net.imap.IMAPReply;
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 public final class IMAPExportMbox {
82
83 private static class MboxListener implements IMAPChunkListener {
84
85 private final BufferedWriter bufferedWriter;
86 volatile AtomicInteger total = new AtomicInteger();
87 volatile String lastFetched;
88 volatile List<String> missingIds = new ArrayList<>();
89 volatile long lastSeq = -1;
90 private final String lineSeparator;
91 private final SimpleDateFormat DATE_FORMAT
92 = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
93
94
95
96 private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
97 private final boolean printHash;
98 private final boolean printMarker;
99 private final boolean checkSequence;
100
101 MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
102 final boolean checkSequence) {
103 this.lineSeparator = lineSeparator;
104 this.printHash = printHash;
105 this.printMarker = printMarker;
106 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
107 this.bufferedWriter = bufferedWriter;
108 this.checkSequence = checkSequence;
109 }
110
111 @Override
112 public boolean chunkReceived(final IMAP imap) {
113 final String[] replyStrings = imap.getReplyStrings();
114 Date received = new Date();
115 final String firstLine = replyStrings[0];
116 Matcher m = PATID.matcher(firstLine);
117 if (m.lookingAt()) {
118 final String date = m.group(PATID_DATE_GROUP);
119 try {
120 received = IDPARSE.parse(date);
121 } catch (final ParseException e) {
122 System.err.println(e);
123 }
124 } else {
125 System.err.println("No timestamp found in: " + firstLine + " - using current time");
126 }
127 String replyTo = "MAILER-DAEMON";
128 for (int i = 1; i < replyStrings.length - 1; i++) {
129 final String line = replyStrings[i];
130 if (line.startsWith("Return-Path: ")) {
131 final String[] parts = line.split(" ", 2);
132 if (!parts[1].equals("<>")) {
133 replyTo = parts[1];
134 if (replyTo.startsWith("<")) {
135 if (replyTo.endsWith(">")) {
136 replyTo = replyTo.substring(1, replyTo.length() - 1);
137 } else {
138 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
139 }
140 }
141 }
142 break;
143 }
144 }
145 try {
146
147 bufferedWriter.append("From ");
148 bufferedWriter.append(replyTo);
149 bufferedWriter.append(' ');
150 bufferedWriter.append(DATE_FORMAT.format(received));
151 bufferedWriter.append(lineSeparator);
152
153 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
154 if (printMarker) {
155 System.err.println("[" + total + "] " + firstLine);
156 }
157
158 for (int i = 1; i < replyStrings.length - 1; i++) {
159 final String line = replyStrings[i];
160 if (startsWith(line, PATFROM)) {
161 bufferedWriter.append('>');
162 }
163 bufferedWriter.append(line);
164 bufferedWriter.append(lineSeparator);
165 }
166
167 final String lastLine = replyStrings[replyStrings.length - 1];
168 final int lastLength = lastLine.length();
169 if (lastLength > 1) {
170 bufferedWriter.append(lastLine, 0, lastLength - 1);
171 bufferedWriter.append(lineSeparator);
172 }
173 bufferedWriter.append(lineSeparator);
174 } catch (final IOException e) {
175 e.printStackTrace();
176 throw new UncheckedIOException(e);
177 }
178 lastFetched = firstLine;
179 total.incrementAndGet();
180 if (checkSequence) {
181 m = PATSEQ.matcher(firstLine);
182 if (m.lookingAt()) {
183 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP));
184 if (lastSeq != -1) {
185 final long missing = msgSeq - lastSeq - 1;
186 if (missing != 0) {
187 for (long j = lastSeq + 1; j < msgSeq; j++) {
188 missingIds.add(String.valueOf(j));
189 }
190 System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
191 }
192 }
193 lastSeq = msgSeq;
194 }
195 }
196 if (printHash) {
197 System.err.print(".");
198 }
199 return true;
200 }
201
202 public void close() throws IOException {
203 if (bufferedWriter != null) {
204 bufferedWriter.close();
205 }
206 }
207 }
208
209 private static final String CRLF = "\r\n";
210 private static final String LF = "\n";
211
212 private static final String EOL_DEFAULT = System.lineSeparator();
213 private static final Pattern PATFROM = Pattern.compile(">*From ");
214
215 private static final Pattern PATID =
216 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
217
218 private static final int PATID_DATE_GROUP = 1;
219 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) ");
220
221 private static final int PATSEQ_SEQUENCE_GROUP = 1;
222
223
224 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS");
225
226
227 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
228 private static final int CONNECT_TIMEOUT = 10;
229
230 private static final int READ_TIMEOUT = 10;
231
232 public static void main(final String[] args) throws IOException, URISyntaxException {
233 int connect_timeout = CONNECT_TIMEOUT;
234 int read_timeout = READ_TIMEOUT;
235
236 int argIdx = 0;
237 String eol = EOL_DEFAULT;
238 boolean printHash = false;
239 boolean printMarker = false;
240 int retryWaitSecs = 0;
241
242 for (argIdx = 0; argIdx < args.length; argIdx++) {
243 if (args[argIdx].equals("-c")) {
244 connect_timeout = Integer.parseInt(args[++argIdx]);
245 } else if (args[argIdx].equals("-r")) {
246 read_timeout = Integer.parseInt(args[++argIdx]);
247 } else if (args[argIdx].equals("-R")) {
248 retryWaitSecs = Integer.parseInt(args[++argIdx]);
249 } else if (args[argIdx].equals("-LF")) {
250 eol = LF;
251 } else if (args[argIdx].equals("-CRLF")) {
252 eol = CRLF;
253 } else if (args[argIdx].equals("-.")) {
254 printHash = true;
255 } else if (args[argIdx].equals("-X")) {
256 printMarker = true;
257 } else {
258 break;
259 }
260 }
261
262 final int argCount = args.length - argIdx;
263
264 if (argCount < 2) {
265 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
266 + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
267 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
268 System.err.println("\t-c connect timeout in seconds (default 10)");
269 System.err.println("\t-r read timeout in seconds (default 10)");
270 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
271 System.err.println("\t-. print a . for each complete message received");
272 System.err.println("\t-X print the X-IMAP line for each complete message received");
273 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
274 System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
275 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
276 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
277 + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
278 System.exit(1);
279 }
280
281 final String uriString = args[argIdx++];
282 URI uri;
283 try {
284 uri = URI.create(uriString);
285 } catch (final IllegalArgumentException e) {
286 final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
287 if (!m.matches()) {
288 throw e;
289 }
290 uri = URI.create(m.group(1));
291 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
292 }
293 final String file = args[argIdx++];
294 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
295 final String itemNames;
296
297 if (argCount > 3) {
298 if (argCount > 4) {
299 final StringBuilder sb = new StringBuilder();
300 sb.append("(");
301 for (int i = 4; i <= argCount; i++) {
302 if (i > 4) {
303 sb.append(" ");
304 }
305 sb.append(args[argIdx++]);
306 }
307 sb.append(")");
308 itemNames = sb.toString();
309 } else {
310 itemNames = args[argIdx++];
311 }
312 } else {
313 itemNames = "(INTERNALDATE BODY.PEEK[])";
314 }
315
316 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)");
317 final MboxListener mboxListener;
318 if (file.equals("-")) {
319 mboxListener = null;
320 } else if (file.startsWith("+")) {
321 final File mbox = new File(file.substring(1));
322 System.out.println("Appending to file " + mbox);
323 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence);
324 } else if (file.startsWith("-")) {
325 final File mbox = new File(file.substring(1));
326 System.out.println("Writing to file " + mbox);
327 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence);
328 } else {
329 final File mboxFile = new File(file);
330 if (mboxFile.exists() && mboxFile.length() > 0) {
331 throw new IOException("mailbox file: " + mboxFile + " already exists and is non-empty!");
332 }
333 System.out.println("Creating file " + mboxFile);
334 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mboxFile)), eol, printHash, printMarker, checkSequence);
335 }
336
337 final String path = uri.getPath();
338 if (path == null || path.length() < 1) {
339 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
340 }
341 final String folder = path.substring(1);
342
343
344 final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
345 @Override
346 public void protocolReplyReceived(final ProtocolCommandEvent event) {
347 if (event.getReplyCode() != IMAPReply.PARTIAL) {
348 super.protocolReplyReceived(event);
349 }
350 }
351 };
352
353
354 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
355
356 String maxIndexInFolder = null;
357
358 try {
359
360 imap.setSoTimeout(read_timeout * 1000);
361
362 if (!imap.select(folder)) {
363 throw new IOException("Could not select folder: " + folder);
364 }
365
366 for (final String line : imap.getReplyStrings()) {
367 maxIndexInFolder = matches(line, PATEXISTS, 1);
368 if (maxIndexInFolder != null) {
369 break;
370 }
371 }
372
373 if (mboxListener != null) {
374 imap.setChunkListener(mboxListener);
375 }
376
377 while (true) {
378 final boolean ok = imap.fetch(sequenceSet, itemNames);
379
380 if (ok || retryWaitSecs <= 0 || mboxListener == null || !checkSequence) {
381 break;
382 }
383 final String replyString = imap.getReplyString();
384 if (!startsWith(replyString, PATTEMPFAIL)) {
385 throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
386 }
387 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
388 sequenceSet = mboxListener.lastSeq + 1 + ":*";
389 try {
390 Thread.sleep(retryWaitSecs * 1000);
391 } catch (final InterruptedException e) {
392
393 }
394 }
395
396 } catch (final IOException ioe) {
397 final String count = mboxListener == null ? "?" : mboxListener.total.toString();
398 System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
399 if (mboxListener != null) {
400 System.err.println("Last complete response seen: " + mboxListener.lastFetched);
401 }
402 throw ioe;
403 } finally {
404
405 if (printHash) {
406 System.err.println();
407 }
408
409 if (mboxListener != null) {
410 mboxListener.close();
411 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
412 if (missingIds.hasNext()) {
413 final StringBuilder sb = new StringBuilder();
414 for (;;) {
415 sb.append(missingIds.next());
416 if (!missingIds.hasNext()) {
417 break;
418 }
419 sb.append(",");
420 }
421 System.err.println("*** Missing ids: " + sb.toString());
422 }
423 }
424 imap.logout();
425 imap.disconnect();
426 }
427 if (mboxListener != null) {
428 System.out.println("Processed " + mboxListener.total + " messages.");
429 }
430 if (maxIndexInFolder != null) {
431 System.out.println("Folder contained " + maxIndexInFolder + " messages.");
432 }
433 }
434
435 private static String matches(final String input, final Pattern pat, final int index) {
436 final Matcher m = pat.matcher(input);
437 if (m.lookingAt()) {
438 return m.group(index);
439 }
440 return null;
441 }
442
443 private static boolean startsWith(final String input, final Pattern pat) {
444 final Matcher m = pat.matcher(input);
445 return m.lookingAt();
446 }
447 }