View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.mail.util;
18  
19  import java.io.BufferedInputStream;
20  import java.io.BufferedOutputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.UnsupportedEncodingException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Collections;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  
33  import javax.activation.DataHandler;
34  import javax.activation.DataSource;
35  import javax.mail.Message;
36  import javax.mail.MessagingException;
37  import javax.mail.Multipart;
38  import javax.mail.Part;
39  import javax.mail.internet.ContentType;
40  import javax.mail.internet.InternetAddress;
41  import javax.mail.internet.MimeBodyPart;
42  import javax.mail.internet.MimeMessage;
43  import javax.mail.internet.MimePart;
44  import javax.mail.internet.MimeUtility;
45  import javax.mail.internet.ParseException;
46  import javax.mail.util.ByteArrayDataSource;
47  
48  /**
49   * Parses a MimeMessage and stores the individual parts such a plain text,
50   * HTML text and attachments.
51   *
52   * @since 1.3
53   */
54  public class MimeMessageParser
55  {
56      /** The MimeMessage to convert */
57      private final MimeMessage mimeMessage;
58  
59      /** Plain mail content from MimeMessage */
60      private String plainContent;
61  
62      /** Html mail content from MimeMessage */
63      private String htmlContent;
64  
65      /** List of attachments of MimeMessage */
66      private final List<DataSource> attachmentList;
67  
68      /** Attachments stored by their content-id */
69      private final Map<String, DataSource> cidMap;
70  
71      /** Is this a Multipart email */
72      private boolean isMultiPart;
73  
74      /**
75       * Constructs an instance with the MimeMessage to be extracted.
76       *
77       * @param message the message to parse
78       */
79      public MimeMessageParser(final MimeMessage message)
80      {
81          attachmentList = new ArrayList<DataSource>();
82          cidMap = new HashMap<String, DataSource>();
83          this.mimeMessage = message;
84          this.isMultiPart = false;
85      }
86  
87      /**
88       * Does the actual extraction.
89       *
90       * @return this instance
91       * @throws Exception parsing the mime message failed
92       */
93      public MimeMessageParser parse() throws Exception
94      {
95          this.parse(null, mimeMessage);
96          return this;
97      }
98  
99      /**
100      * @return the 'to' recipients of the message
101      * @throws Exception determining the recipients failed
102      */
103     public List<javax.mail.Address> getTo() throws Exception
104     {
105         final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO);
106         return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
107     }
108 
109     /**
110      * @return the 'cc' recipients of the message
111      * @throws Exception determining the recipients failed
112      */
113     public List<javax.mail.Address> getCc() throws Exception
114     {
115         final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC);
116         return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
117     }
118 
119     /**
120      * @return the 'bcc' recipients of the message
121      * @throws Exception determining the recipients failed
122      */
123     public List<javax.mail.Address> getBcc() throws Exception
124     {
125         final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC);
126         return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
127     }
128 
129     /**
130      * @return the 'from' field of the message
131      * @throws Exception parsing the mime message failed
132      */
133     public String getFrom() throws Exception
134     {
135         final javax.mail.Address[] addresses = this.mimeMessage.getFrom();
136         if (addresses == null || addresses.length == 0)
137         {
138             return null;
139         }
140         return ((InternetAddress) addresses[0]).getAddress();
141     }
142 
143     /**
144      * @return the 'replyTo' address of the email
145      * @throws Exception parsing the mime message failed
146      */
147     public String getReplyTo() throws Exception
148     {
149         final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo();
150         if (addresses == null || addresses.length == 0)
151         {
152             return null;
153         }
154         return ((InternetAddress) addresses[0]).getAddress();
155     }
156 
157     /**
158      * @return the mail subject
159      * @throws Exception parsing the mime message failed
160      */
161     public String getSubject() throws Exception
162     {
163         return this.mimeMessage.getSubject();
164     }
165 
166     /**
167      * Extracts the content of a MimeMessage recursively.
168      *
169      * @param parent the parent multi-part
170      * @param part   the current MimePart
171      * @throws MessagingException parsing the MimeMessage failed
172      * @throws IOException        parsing the MimeMessage failed
173      */
174     protected void parse(final Multipart parent, final MimePart part)
175         throws MessagingException, IOException
176     {
177         if (isMimeType(part, "text/plain") && plainContent == null
178                 && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
179         {
180             plainContent = (String) part.getContent();
181         }
182         else
183         {
184             if (isMimeType(part, "text/html") && htmlContent == null
185                     && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
186             {
187                 htmlContent = (String) part.getContent();
188             }
189             else
190             {
191                 if (isMimeType(part, "multipart/*"))
192                 {
193                     this.isMultiPart = true;
194                     final Multipart mp = (Multipart) part.getContent();
195                     final int count = mp.getCount();
196 
197                     // iterate over all MimeBodyPart
198 
199                     for (int i = 0; i < count; i++)
200                     {
201                         parse(mp, (MimeBodyPart) mp.getBodyPart(i));
202                     }
203                 }
204                 else
205                 {
206                     final String cid = stripContentId(part.getContentID());
207                     final DataSource ds = createDataSource(parent, part);
208                     if (cid != null)
209                     {
210                         this.cidMap.put(cid, ds);
211                     }
212                     this.attachmentList.add(ds);
213                 }
214             }
215         }
216     }
217 
218     /**
219      * Strips the content id of any whitespace and angle brackets.
220      * @param contentId the string to strip
221      * @return a stripped version of the content id
222      */
223     private String stripContentId(final String contentId)
224     {
225         if (contentId == null)
226         {
227             return null;
228         }
229         return contentId.trim().replaceAll("[\\<\\>]", "");
230     }
231 
232     /**
233      * Checks whether the MimePart contains an object of the given mime type.
234      *
235      * @param part     the current MimePart
236      * @param mimeType the mime type to check
237      * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
238      * @throws MessagingException parsing the MimeMessage failed
239      * @throws IOException        parsing the MimeMessage failed
240      */
241     private boolean isMimeType(final MimePart part, final String mimeType)
242         throws MessagingException, IOException
243     {
244         // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
245         // and does not really check the actual content type.
246 
247         try
248         {
249             final ContentType ct = new ContentType(part.getDataHandler().getContentType());
250             return ct.match(mimeType);
251         }
252         catch (final ParseException ex)
253         {
254             return part.getContentType().equalsIgnoreCase(mimeType);
255         }
256     }
257 
258     /**
259      * Parses the MimePart to create a DataSource.
260      *
261      * @param parent the parent multi-part
262      * @param part   the current part to be processed
263      * @return the DataSource
264      * @throws MessagingException creating the DataSource failed
265      * @throws IOException        creating the DataSource failed
266      */
267     protected DataSource createDataSource(final Multipart parent, final MimePart part)
268         throws MessagingException, IOException
269     {
270         final DataHandler dataHandler = part.getDataHandler();
271         final DataSource dataSource = dataHandler.getDataSource();
272         final String contentType = getBaseMimeType(dataSource.getContentType());
273         final byte[] content = this.getContent(dataSource.getInputStream());
274         final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType);
275         final String dataSourceName = getDataSourceName(part, dataSource);
276 
277         result.setName(dataSourceName);
278         return result;
279     }
280 
281     /** @return Returns the mimeMessage. */
282     public MimeMessage getMimeMessage()
283     {
284         return mimeMessage;
285     }
286 
287     /** @return Returns the isMultiPart. */
288     public boolean isMultipart()
289     {
290         return isMultiPart;
291     }
292 
293     /** @return Returns the plainContent if any */
294     public String getPlainContent()
295     {
296         return plainContent;
297     }
298 
299     /** @return Returns the attachmentList. */
300     public List<DataSource> getAttachmentList()
301     {
302         return attachmentList;
303     }
304 
305     /**
306      * Returns a collection of all content-ids in the parsed message.
307      * <p>
308      * The content-ids are stripped of any angle brackets, i.e. "part1" instead
309      * of "&lt;part1&gt;".
310      *
311      * @return the collection of content ids.
312      * @since 1.3.4
313      */
314     public Collection<String> getContentIds()
315     {
316         return Collections.unmodifiableSet(cidMap.keySet());
317     }
318 
319     /** @return Returns the htmlContent if any */
320     public String getHtmlContent()
321     {
322         return htmlContent;
323     }
324 
325     /** @return true if a plain content is available */
326     public boolean hasPlainContent()
327     {
328         return this.plainContent != null;
329     }
330 
331     /** @return true if HTML content is available */
332     public boolean hasHtmlContent()
333     {
334         return this.htmlContent != null;
335     }
336 
337     /** @return true if attachments are available */
338     public boolean hasAttachments()
339     {
340         return this.attachmentList.size() > 0;
341     }
342 
343     /**
344      * Find an attachment using its name.
345      *
346      * @param name the name of the attachment
347      * @return the corresponding datasource or null if nothing was found
348      */
349     public DataSource findAttachmentByName(final String name)
350     {
351         DataSource dataSource;
352 
353         for (int i = 0; i < getAttachmentList().size(); i++)
354         {
355             dataSource = getAttachmentList().get(i);
356             if (name.equalsIgnoreCase(dataSource.getName()))
357             {
358                 return dataSource;
359             }
360         }
361 
362         return null;
363     }
364 
365     /**
366      * Find an attachment using its content-id.
367      * <p>
368      * The content-id must be stripped of any angle brackets,
369      * i.e. "part1" instead of "&lt;part1&gt;".
370      *
371      * @param cid the content-id of the attachment
372      * @return the corresponding datasource or null if nothing was found
373      * @since 1.3.4
374      */
375     public DataSource findAttachmentByCid(final String cid)
376     {
377         final DataSource dataSource = cidMap.get(cid);
378         return dataSource;
379     }
380 
381     /**
382      * Determines the name of the data source if it is not already set.
383      *
384      * @param part the mail part
385      * @param dataSource the data source
386      * @return the name of the data source or {@code null} if no name can be determined
387      * @throws MessagingException accessing the part failed
388      * @throws UnsupportedEncodingException decoding the text failed
389      */
390     protected String getDataSourceName(final Part part, final DataSource dataSource)
391         throws MessagingException, UnsupportedEncodingException
392     {
393         String result = dataSource.getName();
394 
395         if (result == null || result.length() == 0)
396         {
397             result = part.getFileName();
398         }
399 
400         if (result != null && result.length() > 0)
401         {
402             result = MimeUtility.decodeText(result);
403         }
404         else
405         {
406             result = null;
407         }
408 
409         return result;
410     }
411 
412     /**
413      * Read the content of the input stream.
414      *
415      * @param is the input stream to process
416      * @return the content of the input stream
417      * @throws IOException reading the input stream failed
418      */
419     private byte[] getContent(final InputStream is)
420         throws IOException
421     {
422         int ch;
423         byte[] result;
424 
425         final ByteArrayOutputStream os = new ByteArrayOutputStream();
426         final BufferedInputStream isReader = new BufferedInputStream(is);
427         final BufferedOutputStream osWriter = new BufferedOutputStream(os);
428 
429         while ((ch = isReader.read()) != -1)
430         {
431             osWriter.write(ch);
432         }
433 
434         osWriter.flush();
435         result = os.toByteArray();
436         osWriter.close();
437 
438         return result;
439     }
440 
441     /**
442      * Parses the mimeType.
443      *
444      * @param fullMimeType the mime type from the mail api
445      * @return the real mime type
446      */
447     private String getBaseMimeType(final String fullMimeType)
448     {
449         final int pos = fullMimeType.indexOf(';');
450         if (pos >= 0)
451         {
452             return fullMimeType.substring(0, pos);
453         }
454         return fullMimeType;
455     }
456 }