001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail.util;
018
019import java.io.BufferedInputStream;
020import java.io.BufferedOutputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033import javax.activation.DataHandler;
034import javax.activation.DataSource;
035import javax.mail.Message;
036import javax.mail.MessagingException;
037import javax.mail.Multipart;
038import javax.mail.Part;
039import javax.mail.internet.ContentType;
040import javax.mail.internet.InternetAddress;
041import javax.mail.internet.MimeBodyPart;
042import javax.mail.internet.MimeMessage;
043import javax.mail.internet.MimePart;
044import javax.mail.internet.MimeUtility;
045import javax.mail.internet.ParseException;
046import javax.mail.util.ByteArrayDataSource;
047
048/**
049 * Parses a MimeMessage and stores the individual parts such a plain text,
050 * HTML text and attachments.
051 *
052 * @since 1.3
053 * @version $Id: MimeMessageParser.html 952467 2015-05-23 18:45:36Z tn $
054 */
055public class MimeMessageParser
056{
057    /** The MimeMessage to convert */
058    private final MimeMessage mimeMessage;
059
060    /** Plain mail content from MimeMessage */
061    private String plainContent;
062
063    /** Html mail content from MimeMessage */
064    private String htmlContent;
065
066    /** List of attachments of MimeMessage */
067    private final List<DataSource> attachmentList;
068
069    /** Attachments stored by their content-id */
070    private final Map<String, DataSource> cidMap;
071
072    /** Is this a Multipart email */
073    private boolean isMultiPart;
074
075    /**
076     * Constructs an instance with the MimeMessage to be extracted.
077     *
078     * @param message the message to parse
079     */
080    public MimeMessageParser(final MimeMessage message)
081    {
082        attachmentList = new ArrayList<DataSource>();
083        cidMap = new HashMap<String, DataSource>();
084        this.mimeMessage = message;
085        this.isMultiPart = false;
086    }
087
088    /**
089     * Does the actual extraction.
090     *
091     * @return this instance
092     * @throws Exception parsing the mime message failed
093     */
094    public MimeMessageParser parse() throws Exception
095    {
096        this.parse(null, mimeMessage);
097        return this;
098    }
099
100    /**
101     * @return the 'to' recipients of the message
102     * @throws Exception determining the recipients failed
103     */
104    public List<javax.mail.Address> getTo() throws Exception
105    {
106        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO);
107        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
108    }
109
110    /**
111     * @return the 'cc' recipients of the message
112     * @throws Exception determining the recipients failed
113     */
114    public List<javax.mail.Address> getCc() throws Exception
115    {
116        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC);
117        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
118    }
119
120    /**
121     * @return the 'bcc' recipients of the message
122     * @throws Exception determining the recipients failed
123     */
124    public List<javax.mail.Address> getBcc() throws Exception
125    {
126        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC);
127        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
128    }
129
130    /**
131     * @return the 'from' field of the message
132     * @throws Exception parsing the mime message failed
133     */
134    public String getFrom() throws Exception
135    {
136        final javax.mail.Address[] addresses = this.mimeMessage.getFrom();
137        if (addresses == null || addresses.length == 0)
138        {
139            return null;
140        }
141        return ((InternetAddress) addresses[0]).getAddress();
142    }
143
144    /**
145     * @return the 'replyTo' address of the email
146     * @throws Exception parsing the mime message failed
147     */
148    public String getReplyTo() throws Exception
149    {
150        final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo();
151        if (addresses == null || addresses.length == 0)
152        {
153            return null;
154        }
155        return ((InternetAddress) addresses[0]).getAddress();
156    }
157
158    /**
159     * @return the mail subject
160     * @throws Exception parsing the mime message failed
161     */
162    public String getSubject() throws Exception
163    {
164        return this.mimeMessage.getSubject();
165    }
166
167    /**
168     * Extracts the content of a MimeMessage recursively.
169     *
170     * @param parent the parent multi-part
171     * @param part   the current MimePart
172     * @throws MessagingException parsing the MimeMessage failed
173     * @throws IOException        parsing the MimeMessage failed
174     */
175    protected void parse(final Multipart parent, final MimePart part)
176        throws MessagingException, IOException
177    {
178        if (isMimeType(part, "text/plain") && plainContent == null
179                && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
180        {
181            plainContent = (String) part.getContent();
182        }
183        else
184        {
185            if (isMimeType(part, "text/html") && htmlContent == null
186                    && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
187            {
188                htmlContent = (String) part.getContent();
189            }
190            else
191            {
192                if (isMimeType(part, "multipart/*"))
193                {
194                    this.isMultiPart = true;
195                    final Multipart mp = (Multipart) part.getContent();
196                    final int count = mp.getCount();
197
198                    // iterate over all MimeBodyPart
199
200                    for (int i = 0; i < count; i++)
201                    {
202                        parse(mp, (MimeBodyPart) mp.getBodyPart(i));
203                    }
204                }
205                else
206                {
207                    final String cid = stripContentId(part.getContentID());
208                    final DataSource ds = createDataSource(parent, part);
209                    if (cid != null)
210                    {
211                        this.cidMap.put(cid, ds);
212                    }
213                    this.attachmentList.add(ds);
214                }
215            }
216        }
217    }
218
219    /**
220     * Strips the content id of any whitespace and angle brackets.
221     * @param contentId the string to strip
222     * @return a stripped version of the content id
223     */
224    private String stripContentId(final String contentId)
225    {
226        if (contentId == null)
227        {
228            return null;
229        }
230        return contentId.trim().replaceAll("[\\<\\>]", "");
231    }
232
233    /**
234     * Checks whether the MimePart contains an object of the given mime type.
235     *
236     * @param part     the current MimePart
237     * @param mimeType the mime type to check
238     * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
239     * @throws MessagingException parsing the MimeMessage failed
240     * @throws IOException        parsing the MimeMessage failed
241     */
242    private boolean isMimeType(final MimePart part, final String mimeType)
243        throws MessagingException, IOException
244    {
245        // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
246        // and does not really check the actual content type.
247
248        try
249        {
250            final ContentType ct = new ContentType(part.getDataHandler().getContentType());
251            return ct.match(mimeType);
252        }
253        catch (final ParseException ex)
254        {
255            return part.getContentType().equalsIgnoreCase(mimeType);
256        }
257    }
258
259    /**
260     * Parses the MimePart to create a DataSource.
261     *
262     * @param parent the parent multi-part
263     * @param part   the current part to be processed
264     * @return the DataSource
265     * @throws MessagingException creating the DataSource failed
266     * @throws IOException        creating the DataSource failed
267     */
268    protected DataSource createDataSource(final Multipart parent, final MimePart part)
269        throws MessagingException, IOException
270    {
271        final DataHandler dataHandler = part.getDataHandler();
272        final DataSource dataSource = dataHandler.getDataSource();
273        final String contentType = getBaseMimeType(dataSource.getContentType());
274        final byte[] content = this.getContent(dataSource.getInputStream());
275        final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType);
276        final String dataSourceName = getDataSourceName(part, dataSource);
277
278        result.setName(dataSourceName);
279        return result;
280    }
281
282    /** @return Returns the mimeMessage. */
283    public MimeMessage getMimeMessage()
284    {
285        return mimeMessage;
286    }
287
288    /** @return Returns the isMultiPart. */
289    public boolean isMultipart()
290    {
291        return isMultiPart;
292    }
293
294    /** @return Returns the plainContent if any */
295    public String getPlainContent()
296    {
297        return plainContent;
298    }
299
300    /** @return Returns the attachmentList. */
301    public List<DataSource> getAttachmentList()
302    {
303        return attachmentList;
304    }
305
306    /**
307     * Returns a collection of all content-ids in the parsed message.
308     * <p>
309     * The content-ids are stripped of any angle brackets, i.e. "part1" instead
310     * of "&lt;part1&gt;".
311     *
312     * @return the collection of content ids.
313     * @since 1.3.4
314     */
315    public Collection<String> getContentIds()
316    {
317        return Collections.unmodifiableSet(cidMap.keySet());
318    }
319
320    /** @return Returns the htmlContent if any */
321    public String getHtmlContent()
322    {
323        return htmlContent;
324    }
325
326    /** @return true if a plain content is available */
327    public boolean hasPlainContent()
328    {
329        return this.plainContent != null;
330    }
331
332    /** @return true if HTML content is available */
333    public boolean hasHtmlContent()
334    {
335        return this.htmlContent != null;
336    }
337
338    /** @return true if attachments are available */
339    public boolean hasAttachments()
340    {
341        return this.attachmentList.size() > 0;
342    }
343
344    /**
345     * Find an attachment using its name.
346     *
347     * @param name the name of the attachment
348     * @return the corresponding datasource or null if nothing was found
349     */
350    public DataSource findAttachmentByName(final String name)
351    {
352        DataSource dataSource;
353
354        for (int i = 0; i < getAttachmentList().size(); i++)
355        {
356            dataSource = getAttachmentList().get(i);
357            if (name.equalsIgnoreCase(dataSource.getName()))
358            {
359                return dataSource;
360            }
361        }
362
363        return null;
364    }
365
366    /**
367     * Find an attachment using its content-id.
368     * <p>
369     * The content-id must be stripped of any angle brackets,
370     * i.e. "part1" instead of "&lt;part1&gt;".
371     *
372     * @param cid the content-id of the attachment
373     * @return the corresponding datasource or null if nothing was found
374     * @since 1.3.4
375     */
376    public DataSource findAttachmentByCid(final String cid)
377    {
378        final DataSource dataSource = cidMap.get(cid);
379        return dataSource;
380    }
381
382    /**
383     * Determines the name of the data source if it is not already set.
384     *
385     * @param part the mail part
386     * @param dataSource the data source
387     * @return the name of the data source or {@code null} if no name can be determined
388     * @throws MessagingException accessing the part failed
389     * @throws UnsupportedEncodingException decoding the text failed
390     */
391    protected String getDataSourceName(final Part part, final DataSource dataSource)
392        throws MessagingException, UnsupportedEncodingException
393    {
394        String result = dataSource.getName();
395
396        if (result == null || result.length() == 0)
397        {
398            result = part.getFileName();
399        }
400
401        if (result != null && result.length() > 0)
402        {
403            result = MimeUtility.decodeText(result);
404        }
405        else
406        {
407            result = null;
408        }
409
410        return result;
411    }
412
413    /**
414     * Read the content of the input stream.
415     *
416     * @param is the input stream to process
417     * @return the content of the input stream
418     * @throws IOException reading the input stream failed
419     */
420    private byte[] getContent(final InputStream is)
421        throws IOException
422    {
423        int ch;
424        byte[] result;
425
426        final ByteArrayOutputStream os = new ByteArrayOutputStream();
427        final BufferedInputStream isReader = new BufferedInputStream(is);
428        final BufferedOutputStream osWriter = new BufferedOutputStream(os);
429
430        while ((ch = isReader.read()) != -1)
431        {
432            osWriter.write(ch);
433        }
434
435        osWriter.flush();
436        result = os.toByteArray();
437        osWriter.close();
438
439        return result;
440    }
441
442    /**
443     * Parses the mimeType.
444     *
445     * @param fullMimeType the mime type from the mail api
446     * @return the real mime type
447     */
448    private String getBaseMimeType(final String fullMimeType)
449    {
450        final int pos = fullMimeType.indexOf(';');
451        if (pos >= 0)
452        {
453            return fullMimeType.substring(0, pos);
454        }
455        return fullMimeType;
456    }
457}