001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail.util;
018
019import java.io.BufferedInputStream;
020import java.io.BufferedOutputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.List;
028
029import javax.activation.DataHandler;
030import javax.activation.DataSource;
031import javax.mail.Message;
032import javax.mail.MessagingException;
033import javax.mail.Multipart;
034import javax.mail.Part;
035import javax.mail.internet.ContentType;
036import javax.mail.internet.InternetAddress;
037import javax.mail.internet.MimeBodyPart;
038import javax.mail.internet.MimeMessage;
039import javax.mail.internet.MimePart;
040import javax.mail.internet.MimeUtility;
041import javax.mail.internet.ParseException;
042import javax.mail.util.ByteArrayDataSource;
043
044/**
045 * Parses a MimeMessage and stores the individual parts such a plain text,
046 * HTML text and attachments.
047 *
048 * @since 1.3
049 * @version $Id: MimeMessageParser.java 1606710 2014-06-30 12:29:13Z ggregory $
050 */
051public class MimeMessageParser
052{
053    /** The MimeMessage to convert */
054    private final MimeMessage mimeMessage;
055
056    /** Plain mail content from MimeMessage */
057    private String plainContent;
058
059    /** Html mail content from MimeMessage */
060    private String htmlContent;
061
062    /** List of attachments of MimeMessage */
063    private final List<DataSource> attachmentList;
064
065    /** Is this a Multipart email */
066    private boolean isMultiPart;
067
068    /**
069     * Constructs an instance with the MimeMessage to be extracted.
070     *
071     * @param message the message to parse
072     */
073    public MimeMessageParser(final MimeMessage message)
074    {
075        attachmentList = new ArrayList<DataSource>();
076        this.mimeMessage = message;
077        this.isMultiPart = false;
078    }
079
080    /**
081     * Does the actual extraction.
082     *
083     * @return this instance
084     * @throws Exception parsing the mime message failed
085     */
086    public MimeMessageParser parse() throws Exception
087    {
088        this.parse(null, mimeMessage);
089        return this;
090    }
091
092    /**
093     * @return the 'to' recipients of the message
094     * @throws Exception determining the recipients failed
095     */
096    public List<javax.mail.Address> getTo() throws Exception
097    {
098        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO);
099        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
100    }
101
102    /**
103     * @return the 'cc' recipients of the message
104     * @throws Exception determining the recipients failed
105     */
106    public List<javax.mail.Address> getCc() throws Exception
107    {
108        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC);
109        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
110    }
111
112    /**
113     * @return the 'bcc' recipients of the message
114     * @throws Exception determining the recipients failed
115     */
116    public List<javax.mail.Address> getBcc() throws Exception
117    {
118        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC);
119        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
120    }
121
122    /**
123     * @return the 'from' field of the message
124     * @throws Exception parsing the mime message failed
125     */
126    public String getFrom() throws Exception
127    {
128        final javax.mail.Address[] addresses = this.mimeMessage.getFrom();
129        if (addresses == null || addresses.length == 0)
130        {
131            return null;
132        }
133        else
134        {
135            return ((InternetAddress) addresses[0]).getAddress();
136        }
137    }
138
139    /**
140     * @return the 'replyTo' address of the email
141     * @throws Exception parsing the mime message failed
142     */
143    public String getReplyTo() throws Exception
144    {
145        final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo();
146        if (addresses == null || addresses.length == 0)
147        {
148            return null;
149        }
150        else
151        {
152            return ((InternetAddress) addresses[0]).getAddress();
153        }
154    }
155
156    /**
157     * @return the mail subject
158     * @throws Exception parsing the mime message failed
159     */
160    public String getSubject() throws Exception
161    {
162        return this.mimeMessage.getSubject();
163    }
164
165    /**
166     * Extracts the content of a MimeMessage recursively.
167     *
168     * @param parent the parent multi-part
169     * @param part   the current MimePart
170     * @throws MessagingException parsing the MimeMessage failed
171     * @throws IOException        parsing the MimeMessage failed
172     */
173    protected void parse(final Multipart parent, final MimePart part)
174        throws MessagingException, IOException
175    {
176        if (isMimeType(part, "text/plain") && plainContent == null
177                && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
178        {
179            plainContent = (String) part.getContent();
180        }
181        else
182        {
183            if (isMimeType(part, "text/html") && htmlContent == null
184                    && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
185            {
186                htmlContent = (String) part.getContent();
187            }
188            else
189            {
190                if (isMimeType(part, "multipart/*"))
191                {
192                    this.isMultiPart = true;
193                    final Multipart mp = (Multipart) part.getContent();
194                    final int count = mp.getCount();
195
196                    // iterate over all MimeBodyPart
197
198                    for (int i = 0; i < count; i++)
199                    {
200                        parse(mp, (MimeBodyPart) mp.getBodyPart(i));
201                    }
202                }
203                else
204                {
205                    this.attachmentList.add(createDataSource(parent, part));
206                }
207            }
208        }
209    }
210
211    /**
212     * Checks whether the MimePart contains an object of the given mime type.
213     *
214     * @param part     the current MimePart
215     * @param mimeType the mime type to check
216     * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
217     * @throws MessagingException parsing the MimeMessage failed
218     * @throws IOException        parsing the MimeMessage failed
219     */
220    private boolean isMimeType(final MimePart part, final String mimeType)
221        throws MessagingException, IOException
222    {
223        // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
224        // and does not really check the actual content type.
225
226        try
227        {
228            final ContentType ct = new ContentType(part.getDataHandler().getContentType());
229            return ct.match(mimeType);
230        }
231        catch (final ParseException ex)
232        {
233            return part.getContentType().equalsIgnoreCase(mimeType);
234        }
235    }
236
237    /**
238     * Parses the MimePart to create a DataSource.
239     *
240     * @param parent the parent multi-part
241     * @param part   the current part to be processed
242     * @return the DataSource
243     * @throws MessagingException creating the DataSource failed
244     * @throws IOException        creating the DataSource failed
245     */
246    protected DataSource createDataSource(final Multipart parent, final MimePart part)
247        throws MessagingException, IOException
248    {
249        final DataHandler dataHandler = part.getDataHandler();
250        final DataSource dataSource = dataHandler.getDataSource();
251        final String contentType = getBaseMimeType(dataSource.getContentType());
252        final byte[] content = this.getContent(dataSource.getInputStream());
253        final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType);
254        final String dataSourceName = getDataSourceName(part, dataSource);
255
256        result.setName(dataSourceName);
257        return result;
258    }
259
260    /** @return Returns the mimeMessage. */
261    public MimeMessage getMimeMessage()
262    {
263        return mimeMessage;
264    }
265
266    /** @return Returns the isMultiPart. */
267    public boolean isMultipart()
268    {
269        return isMultiPart;
270    }
271
272    /** @return Returns the plainContent if any */
273    public String getPlainContent()
274    {
275        return plainContent;
276    }
277
278    /** @return Returns the attachmentList. */
279    public List<DataSource> getAttachmentList()
280    {
281        return attachmentList;
282    }
283
284    /** @return Returns the htmlContent if any */
285    public String getHtmlContent()
286    {
287        return htmlContent;
288    }
289
290    /** @return true if a plain content is available */
291    public boolean hasPlainContent()
292    {
293        return this.plainContent != null;
294    }
295
296    /** @return true if HTML content is available */
297    public boolean hasHtmlContent()
298    {
299        return this.htmlContent != null;
300    }
301
302    /** @return true if attachments are available */
303    public boolean hasAttachments()
304    {
305        return this.attachmentList.size() > 0;
306    }
307
308    /**
309     * Find an attachment using its name.
310     *
311     * @param name the name of the attachment
312     * @return the corresponding datasource or null if nothing was found
313     */
314    public DataSource findAttachmentByName(final String name)
315    {
316        DataSource dataSource;
317
318        for (int i = 0; i < getAttachmentList().size(); i++)
319        {
320            dataSource = getAttachmentList().get(i);
321            if (name.equalsIgnoreCase(dataSource.getName()))
322            {
323                return dataSource;
324            }
325        }
326
327        return null;
328    }
329
330    /**
331     * Determines the name of the data source if it is not already set.
332     *
333     * @param part the mail part
334     * @param dataSource the data source
335     * @return the name of the data source or {@code null} if no name can be determined
336     * @throws MessagingException accessing the part failed
337     * @throws UnsupportedEncodingException decoding the text failed
338     */
339    protected String getDataSourceName(final Part part, final DataSource dataSource)
340        throws MessagingException, UnsupportedEncodingException
341    {
342        String result = dataSource.getName();
343
344        if (result == null || result.length() == 0)
345        {
346            result = part.getFileName();
347        }
348
349        if (result != null && result.length() > 0)
350        {
351            result = MimeUtility.decodeText(result);
352        }
353        else
354        {
355            result = null;
356        }
357
358        return result;
359    }
360
361    /**
362     * Read the content of the input stream.
363     *
364     * @param is the input stream to process
365     * @return the content of the input stream
366     * @throws IOException reading the input stream failed
367     */
368    private byte[] getContent(final InputStream is)
369        throws IOException
370    {
371        int ch;
372        byte[] result;
373
374        final ByteArrayOutputStream os = new ByteArrayOutputStream();
375        final BufferedInputStream isReader = new BufferedInputStream(is);
376        final BufferedOutputStream osWriter = new BufferedOutputStream(os);
377
378        while ((ch = isReader.read()) != -1)
379        {
380            osWriter.write(ch);
381        }
382
383        osWriter.flush();
384        result = os.toByteArray();
385        osWriter.close();
386
387        return result;
388    }
389
390    /**
391     * Parses the mimeType.
392     *
393     * @param fullMimeType the mime type from the mail api
394     * @return the real mime type
395     */
396    private String getBaseMimeType(final String fullMimeType)
397    {
398        final int pos = fullMimeType.indexOf(';');
399        if (pos >= 0)
400        {
401            return fullMimeType.substring(0, pos);
402        }
403        else
404        {
405            return fullMimeType;
406        }
407    }
408}