1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.mail2.javax.util;
18
19 import java.io.IOException;
20 import java.io.UnsupportedEncodingException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28
29 import javax.activation.DataSource;
30 import javax.mail.Address;
31 import javax.mail.Message;
32 import javax.mail.MessagingException;
33 import javax.mail.Multipart;
34 import javax.mail.Part;
35 import javax.mail.internet.ContentType;
36 import javax.mail.internet.InternetAddress;
37 import javax.mail.internet.MimeBodyPart;
38 import javax.mail.internet.MimeMessage;
39 import javax.mail.internet.MimePart;
40 import javax.mail.internet.MimeUtility;
41 import javax.mail.internet.ParseException;
42
43 import org.apache.commons.mail2.javax.activation.InputStreamDataSource;
44
45 /**
46 * Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments.
47 *
48 * @since 1.3
49 */
50 public class MimeMessageParser {
51
52 /** The MimeMessage to convert. */
53 private final MimeMessage mimeMessage;
54
55 /** Plain mail content from MimeMessage. */
56 private String plainContent;
57
58 /** HTML mail content from MimeMessage. */
59 private String htmlContent;
60
61 /** List of attachments of MimeMessage. */
62 private final List<DataSource> attachmentList;
63
64 /** Attachments stored by their content-id. */
65 private final Map<String, DataSource> cidMap;
66
67 /** Is this a Multipart email. */
68 private boolean isMultiPart;
69
70 /**
71 * Constructs an instance with the MimeMessage to be extracted.
72 *
73 * @param mimeMessage the message to parse
74 */
75 public MimeMessageParser(final MimeMessage mimeMessage) {
76 this.attachmentList = new ArrayList<>();
77 this.cidMap = new HashMap<>();
78 this.mimeMessage = mimeMessage;
79 this.isMultiPart = false;
80 }
81
82 private List<Address> asList(final Address[] recipients) {
83 return recipients != null ? Arrays.asList(recipients) : new ArrayList<>();
84 }
85
86 /**
87 * Parses the MimePart to create a DataSource.
88 *
89 * @param parent the parent multi-part
90 * @param part the current part to be processed
91 * @return the DataSource
92 * @throws MessagingException creating the DataSource failed
93 * @throws IOException error getting InputStream or unsupported encoding
94 */
95 @SuppressWarnings("resource") // Caller closes InputStream
96 protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException {
97 final DataSource dataSource = part.getDataHandler().getDataSource();
98 final String contentType = getBaseMimeType(dataSource.getContentType());
99 final String dataSourceName = getDataSourceName(part, dataSource);
100 return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName);
101 }
102
103 /**
104 * Find an attachment using its content-id.
105 * <p>
106 * The content-id must be stripped of any angle brackets, i.e. "part1" instead of "<part1>".
107 * </p>
108 *
109 * @param cid the content-id of the attachment
110 * @return the corresponding datasource or null if nothing was found
111 * @since 1.3.4
112 */
113 public DataSource findAttachmentByCid(final String cid) {
114 return cidMap.get(cid);
115 }
116
117 /**
118 * Find an attachment using its name.
119 *
120 * @param name the name of the attachment
121 * @return the corresponding datasource or null if nothing was found
122 */
123 public DataSource findAttachmentByName(final String name) {
124 for (final DataSource dataSource : getAttachmentList()) {
125 if (name.equalsIgnoreCase(dataSource.getName())) {
126 return dataSource;
127 }
128 }
129 return null;
130 }
131
132 /**
133 * Gets the attachment list.
134 *
135 * @return Returns the attachment list.
136 */
137 public List<DataSource> getAttachmentList() {
138 return attachmentList;
139 }
140
141 /**
142 * Gets the MIME type.
143 *
144 * @param fullMimeType the mime type from the mail API
145 * @return the real mime type
146 */
147 private String getBaseMimeType(final String fullMimeType) {
148 final int pos = fullMimeType.indexOf(';');
149 return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos);
150 }
151
152 /**
153 * Gets the BCC Address list.
154 *
155 * @return the 'BCC' recipients of the message
156 * @throws MessagingException determining the recipients failed
157 */
158 public List<Address> getBcc() throws MessagingException {
159 return asList(mimeMessage.getRecipients(Message.RecipientType.BCC));
160 }
161
162 /**
163 * Gets the CC Address list.
164 *
165 * @return the 'CC' recipients of the message
166 * @throws MessagingException determining the recipients failed
167 */
168 public List<Address> getCc() throws MessagingException {
169 return asList(mimeMessage.getRecipients(Message.RecipientType.CC));
170 }
171
172 /**
173 * Returns a collection of all content-ids in the parsed message.
174 * <p>
175 * The content-ids are stripped of any angle brackets, i.e. "part1" instead of "<part1>".
176 * </p>
177 *
178 * @return the collection of content ids.
179 * @since 1.3.4
180 */
181 public Collection<String> getContentIds() {
182 return Collections.unmodifiableSet(cidMap.keySet());
183 }
184
185 /**
186 * Determines the name of the data source if it is not already set.
187 *
188 * @param part the mail part
189 * @param dataSource the data source
190 * @return the name of the data source or {@code null} if no name can be determined
191 * @throws MessagingException accessing the part failed
192 * @throws UnsupportedEncodingException decoding the text failed
193 */
194 protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException {
195 String result = dataSource.getName();
196 if (isEmpty(result)) {
197 result = part.getFileName();
198 }
199 if (!isEmpty(result)) {
200 result = MimeUtility.decodeText(result);
201 } else {
202 result = null;
203 }
204 return result;
205 }
206
207 /**
208 * Gets the FROM field.
209 *
210 * @return the FROM field of the message
211 * @throws MessagingException parsing the mime message failed
212 */
213 public String getFrom() throws MessagingException {
214 final Address[] addresses = mimeMessage.getFrom();
215 if (isEmpty(addresses)) {
216 return null;
217 }
218 return ((InternetAddress) addresses[0]).getAddress();
219 }
220
221 /**
222 * Gets the htmlContent if any.
223 *
224 * @return Returns the htmlContent if any
225 */
226 public String getHtmlContent() {
227 return htmlContent;
228 }
229
230 /**
231 * Gets the MimeMessage.
232 *
233 * @return Returns the mimeMessage.
234 */
235 public MimeMessage getMimeMessage() {
236 return mimeMessage;
237 }
238
239 /**
240 * Gets the plain content if any.
241 *
242 * @return Returns the plainContent if any
243 */
244 public String getPlainContent() {
245 return plainContent;
246 }
247
248 /**
249 * Gets the 'replyTo' address of the email.
250 *
251 * @return the 'replyTo' address of the email
252 * @throws MessagingException parsing the mime message failed
253 */
254 public String getReplyTo() throws MessagingException {
255 final Address[] addresses = mimeMessage.getReplyTo();
256 if (isEmpty(addresses)) {
257 return null;
258 }
259 return ((InternetAddress) addresses[0]).getAddress();
260 }
261
262 /**
263 * Gets the MIME message subject.
264 *
265 * @return the MIME message subject.
266 * @throws MessagingException parsing the mime message failed.
267 */
268 public String getSubject() throws MessagingException {
269 return mimeMessage.getSubject();
270 }
271
272 /**
273 * Gets the MIME message 'to' list.
274 *
275 * @return the 'to' recipients of the message.
276 * @throws MessagingException determining the recipients failed
277 */
278 public List<Address> getTo() throws MessagingException {
279 return asList(mimeMessage.getRecipients(Message.RecipientType.TO));
280 }
281
282 /**
283 * Tests if attachments are present.
284 *
285 * @return true if attachments are present.
286 */
287 public boolean hasAttachments() {
288 return !attachmentList.isEmpty();
289 }
290
291 /**
292 * Tests is HTML content is present.
293 *
294 * @return true if HTML content is present.
295 */
296 public boolean hasHtmlContent() {
297 return htmlContent != null;
298 }
299
300 /**
301 * Tests is plain content is present.
302 *
303 * @return true if a plain content is present.
304 */
305 public boolean hasPlainContent() {
306 return plainContent != null;
307 }
308
309 private boolean isEmpty(final Object[] array) {
310 return array == null || array.length == 0;
311 }
312
313 private boolean isEmpty(final String result) {
314 return result == null || result.isEmpty();
315 }
316
317 /**
318 * Tests whether the MimePart contains an object of the given mime type.
319 *
320 * @param part the current MimePart
321 * @param mimeType the mime type to check
322 * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
323 * @throws MessagingException parsing the MimeMessage failed
324 */
325 private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException {
326 // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
327 // and does not really check the actual content type.
328 try {
329 return new ContentType(part.getDataHandler().getContentType()).match(mimeType);
330 } catch (final ParseException ex) {
331 return part.getContentType().equalsIgnoreCase(mimeType);
332 }
333 }
334
335 /**
336 * Tests whether this is multipart.
337 *
338 * @return Returns the isMultiPart.
339 */
340 public boolean isMultipart() {
341 return isMultiPart;
342 }
343
344 /**
345 * Does the actual extraction.
346 *
347 * @return this instance
348 * @throws MessagingException parsing the mime message failed
349 * @throws IOException parsing the mime message failed
350 */
351 public MimeMessageParser parse() throws MessagingException, IOException {
352 parse(null, mimeMessage);
353 return this;
354 }
355
356 /**
357 * Extracts the content of a MimeMessage recursively.
358 *
359 * @param parent the parent multi-part
360 * @param part the current MimePart
361 * @throws MessagingException parsing the MimeMessage failed
362 * @throws IOException parsing the MimeMessage failed
363 */
364 protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException {
365 if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
366 plainContent = (String) part.getContent();
367 } else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
368 htmlContent = (String) part.getContent();
369 } else if (isMimeType(part, "multipart/*")) {
370 isMultiPart = true;
371 final Multipart multipart = (Multipart) part.getContent();
372 final int count = multipart.getCount();
373 // iterate over all MimeBodyPart
374 for (int i = 0; i < count; i++) {
375 parse(multipart, (MimeBodyPart) multipart.getBodyPart(i));
376 }
377 } else {
378 final String cid = stripContentId(part.getContentID());
379 final DataSource dataSource = createDataSource(parent, part);
380 if (cid != null) {
381 cidMap.put(cid, dataSource);
382 }
383 attachmentList.add(dataSource);
384 }
385 }
386
387 /**
388 * Strips the content id of any whitespace and angle brackets.
389 *
390 * @param contentId the string to strip
391 * @return a stripped version of the content id
392 */
393 private String stripContentId(final String contentId) {
394 return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", "");
395 }
396 }