1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.net.examples.mail;
19
20 import java.io.BufferedWriter;
21 import java.io.IOException;
22 import java.io.UncheckedIOException;
23 import java.net.URI;
24 import java.net.URISyntaxException;
25 import java.nio.charset.Charset;
26 import java.nio.file.Files;
27 import java.nio.file.Path;
28 import java.nio.file.Paths;
29 import java.nio.file.StandardOpenOption;
30 import java.text.ParseException;
31 import java.text.SimpleDateFormat;
32 import java.util.ArrayList;
33 import java.util.Date;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.TimeZone;
37 import java.util.concurrent.atomic.AtomicInteger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40
41 import org.apache.commons.io.IOUtils;
42 import org.apache.commons.net.PrintCommandListener;
43 import org.apache.commons.net.ProtocolCommandEvent;
44 import org.apache.commons.net.imap.IMAP;
45 import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
46 import org.apache.commons.net.imap.IMAPClient;
47 import org.apache.commons.net.imap.IMAPReply;
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 public final class IMAPExportMbox {
86
87 private static final class MboxListener implements IMAPChunkListener {
88
89 private final BufferedWriter bufferedWriter;
90 volatile AtomicInteger total = new AtomicInteger();
91 volatile String lastFetched;
92 volatile List<String> missingIds = new ArrayList<>();
93 volatile long lastSeq = -1;
94 private final String lineSeparator;
95 private final SimpleDateFormat DATE_FORMAT
96 = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
97
98
99
100 private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
101 private final boolean printHash;
102 private final boolean printMarker;
103 private final boolean checkSequence;
104
105 MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
106 final boolean checkSequence) {
107 this.lineSeparator = lineSeparator;
108 this.printHash = printHash;
109 this.printMarker = printMarker;
110 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
111 this.bufferedWriter = bufferedWriter;
112 this.checkSequence = checkSequence;
113 }
114
115 @Override
116 public boolean chunkReceived(final IMAP imap) {
117 final String[] replyStrings = imap.getReplyStrings();
118 Date received = new Date();
119 final String firstLine = replyStrings[0];
120 Matcher m = PATID.matcher(firstLine);
121 if (m.lookingAt()) {
122 final String date = m.group(PATID_DATE_GROUP);
123 try {
124 received = IDPARSE.parse(date);
125 } catch (final ParseException e) {
126 System.err.println(e);
127 }
128 } else {
129 System.err.println("No timestamp found in: " + firstLine + " - using current time");
130 }
131 String replyTo = "MAILER-DAEMON";
132 for (int i = 1; i < replyStrings.length - 1; i++) {
133 final String line = replyStrings[i];
134 if (line.startsWith("Return-Path: ")) {
135 final String[] parts = line.split(" ", 2);
136 if (!parts[1].equals("<>")) {
137 replyTo = parts[1];
138 if (replyTo.startsWith("<")) {
139 if (replyTo.endsWith(">")) {
140 replyTo = replyTo.substring(1, replyTo.length() - 1);
141 } else {
142 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
143 }
144 }
145 }
146 break;
147 }
148 }
149 try {
150
151 bufferedWriter.append("From ");
152 bufferedWriter.append(replyTo);
153 bufferedWriter.append(' ');
154 bufferedWriter.append(DATE_FORMAT.format(received));
155 bufferedWriter.append(lineSeparator);
156
157 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
158 if (printMarker) {
159 System.err.println("[" + total + "] " + firstLine);
160 }
161
162 for (int i = 1; i < replyStrings.length - 1; i++) {
163 final String line = replyStrings[i];
164 if (startsWith(line, PATFROM)) {
165 bufferedWriter.append('>');
166 }
167 bufferedWriter.append(line);
168 bufferedWriter.append(lineSeparator);
169 }
170
171 final String lastLine = replyStrings[replyStrings.length - 1];
172 final int lastLength = lastLine.length();
173 if (lastLength > 1) {
174 bufferedWriter.append(lastLine, 0, lastLength - 1);
175 bufferedWriter.append(lineSeparator);
176 }
177 bufferedWriter.append(lineSeparator);
178 } catch (final IOException e) {
179 e.printStackTrace();
180 throw new UncheckedIOException(e);
181 }
182 lastFetched = firstLine;
183 total.incrementAndGet();
184 if (checkSequence) {
185 m = PATSEQ.matcher(firstLine);
186 if (m.lookingAt()) {
187 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP));
188 if (lastSeq != -1) {
189 final long missing = msgSeq - lastSeq - 1;
190 if (missing != 0) {
191 for (long j = lastSeq + 1; j < msgSeq; j++) {
192 missingIds.add(String.valueOf(j));
193 }
194 System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
195 }
196 }
197 lastSeq = msgSeq;
198 }
199 }
200 if (printHash) {
201 System.err.print(".");
202 }
203 return true;
204 }
205
206 public void close() throws IOException {
207 IOUtils.close(bufferedWriter);
208 }
209 }
210
211 private static final String CRLF = "\r\n";
212 private static final String LF = "\n";
213
214 private static final String EOL_DEFAULT = System.lineSeparator();
215 private static final Pattern PATFROM = Pattern.compile(">*From ");
216
217 private static final Pattern PATID =
218 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
219
220 private static final int PATID_DATE_GROUP = 1;
221 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) ");
222
223 private static final int PATSEQ_SEQUENCE_GROUP = 1;
224
225
226 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS");
227
228
229 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
230 private static final int CONNECT_TIMEOUT = 10;
231
232 private static final int READ_TIMEOUT = 10;
233
234 public static void main(final String[] args) throws IOException, URISyntaxException {
235 int connect_timeout = CONNECT_TIMEOUT;
236 int read_timeout = READ_TIMEOUT;
237
238 int argIdx = 0;
239 String eol = EOL_DEFAULT;
240 boolean printHash = false;
241 boolean printMarker = false;
242 int retryWaitSecs = 0;
243
244 for (argIdx = 0; argIdx < args.length; argIdx++) {
245 if (args[argIdx].equals("-c")) {
246 connect_timeout = Integer.parseInt(args[++argIdx]);
247 } else if (args[argIdx].equals("-r")) {
248 read_timeout = Integer.parseInt(args[++argIdx]);
249 } else if (args[argIdx].equals("-R")) {
250 retryWaitSecs = Integer.parseInt(args[++argIdx]);
251 } else if (args[argIdx].equals("-LF")) {
252 eol = LF;
253 } else if (args[argIdx].equals("-CRLF")) {
254 eol = CRLF;
255 } else if (args[argIdx].equals("-.")) {
256 printHash = true;
257 } else if (args[argIdx].equals("-X")) {
258 printMarker = true;
259 } else {
260 break;
261 }
262 }
263
264 final int argCount = args.length - argIdx;
265
266 if (argCount < 2) {
267 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
268 + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
269 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
270 System.err.println("\t-c connect timeout in seconds (default 10)");
271 System.err.println("\t-r read timeout in seconds (default 10)");
272 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
273 System.err.println("\t-. print a . for each complete message received");
274 System.err.println("\t-X print the X-IMAP line for each complete message received");
275 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
276 System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
277 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
278 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
279 + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
280 System.exit(1);
281 }
282
283 final String uriString = args[argIdx++];
284 URI uri;
285 try {
286 uri = URI.create(uriString);
287 } catch (final IllegalArgumentException e) {
288 final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
289 if (!m.matches()) {
290 throw e;
291 }
292 uri = URI.create(m.group(1));
293 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
294 }
295 final String file = args[argIdx++];
296 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
297 final String itemNames;
298
299 if (argCount > 3) {
300 if (argCount > 4) {
301 final StringBuilder sb = new StringBuilder();
302 sb.append("(");
303 for (int i = 4; i <= argCount; i++) {
304 if (i > 4) {
305 sb.append(" ");
306 }
307 sb.append(args[argIdx++]);
308 }
309 sb.append(")");
310 itemNames = sb.toString();
311 } else {
312 itemNames = args[argIdx++];
313 }
314 } else {
315 itemNames = "(INTERNALDATE BODY.PEEK[])";
316 }
317
318 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)");
319 final MboxListener mboxListener;
320 if (file.equals("-")) {
321 mboxListener = null;
322 } else if (file.startsWith("+")) {
323 final Path mboxPath = Paths.get(file.substring(1));
324 System.out.println("Appending to file " + mboxPath);
325 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.APPEND),
326 eol, printHash, printMarker, checkSequence);
327 } else if (file.startsWith("-")) {
328 final Path mboxPath = Paths.get(file.substring(1));
329 System.out.println("Writing to file " + mboxPath);
330 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
331 checkSequence);
332 } else {
333 final Path mboxPath = Paths.get(file);
334 if (Files.exists(mboxPath) && Files.size(mboxPath) > 0) {
335 throw new IOException("mailbox file: " + mboxPath + " already exists and is non-empty!");
336 }
337 System.out.println("Creating file " + mboxPath);
338 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
339 checkSequence);
340 }
341
342 final String path = uri.getPath();
343 if (path == null || path.length() < 1) {
344 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
345 }
346 final String folder = path.substring(1);
347
348
349 final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
350 @Override
351 public void protocolReplyReceived(final ProtocolCommandEvent event) {
352 if (event.getReplyCode() != IMAPReply.PARTIAL) {
353 super.protocolReplyReceived(event);
354 }
355 }
356 };
357
358
359 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
360
361 String maxIndexInFolder = null;
362
363 try {
364
365 imap.setSoTimeout(read_timeout * 1000);
366
367 if (!imap.select(folder)) {
368 throw new IOException("Could not select folder: " + folder);
369 }
370
371 for (final String line : imap.getReplyStrings()) {
372 maxIndexInFolder = matches(line, PATEXISTS, 1);
373 if (maxIndexInFolder != null) {
374 break;
375 }
376 }
377
378 if (mboxListener != null) {
379 imap.setChunkListener(mboxListener);
380 }
381
382 while (true) {
383 final boolean ok = imap.fetch(sequenceSet, itemNames);
384
385 if (ok || retryWaitSecs <= 0 || mboxListener == null || !checkSequence) {
386 break;
387 }
388 final String replyString = imap.getReplyString();
389 if (!startsWith(replyString, PATTEMPFAIL)) {
390 throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
391 }
392 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
393 sequenceSet = mboxListener.lastSeq + 1 + ":*";
394 try {
395 Thread.sleep(retryWaitSecs * 1000);
396 } catch (final InterruptedException e) {
397
398 }
399 }
400
401 } catch (final IOException ioe) {
402 final String count = mboxListener == null ? "?" : mboxListener.total.toString();
403 System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
404 if (mboxListener != null) {
405 System.err.println("Last complete response seen: " + mboxListener.lastFetched);
406 }
407 throw ioe;
408 } finally {
409
410 if (printHash) {
411 System.err.println();
412 }
413
414 if (mboxListener != null) {
415 mboxListener.close();
416 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
417 if (missingIds.hasNext()) {
418 final StringBuilder sb = new StringBuilder();
419 for (;;) {
420 sb.append(missingIds.next());
421 if (!missingIds.hasNext()) {
422 break;
423 }
424 sb.append(",");
425 }
426 System.err.println("*** Missing ids: " + sb.toString());
427 }
428 }
429 imap.logout();
430 imap.disconnect();
431 }
432 if (mboxListener != null) {
433 System.out.println("Processed " + mboxListener.total + " messages.");
434 }
435 if (maxIndexInFolder != null) {
436 System.out.println("Folder contained " + maxIndexInFolder + " messages.");
437 }
438 }
439
440 private static String matches(final String input, final Pattern pat, final int index) {
441 final Matcher m = pat.matcher(input);
442 if (m.lookingAt()) {
443 return m.group(index);
444 }
445 return null;
446 }
447
448 private static boolean startsWith(final String input, final Pattern pat) {
449 final Matcher m = pat.matcher(input);
450 return m.lookingAt();
451 }
452 }