001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail.util;
018
019import java.io.BufferedInputStream;
020import java.io.BufferedOutputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033import javax.activation.DataHandler;
034import javax.activation.DataSource;
035import javax.mail.Message;
036import javax.mail.MessagingException;
037import javax.mail.Multipart;
038import javax.mail.Part;
039import javax.mail.internet.ContentType;
040import javax.mail.internet.InternetAddress;
041import javax.mail.internet.MimeBodyPart;
042import javax.mail.internet.MimeMessage;
043import javax.mail.internet.MimePart;
044import javax.mail.internet.MimeUtility;
045import javax.mail.internet.ParseException;
046import javax.mail.util.ByteArrayDataSource;
047
048/**
049 * Parses a MimeMessage and stores the individual parts such a plain text,
050 * HTML text and attachments.
051 *
052 * @since 1.3
053 */
054public class MimeMessageParser
055{
056    /** The MimeMessage to convert */
057    private final MimeMessage mimeMessage;
058
059    /** Plain mail content from MimeMessage */
060    private String plainContent;
061
062    /** Html mail content from MimeMessage */
063    private String htmlContent;
064
065    /** List of attachments of MimeMessage */
066    private final List<DataSource> attachmentList;
067
068    /** Attachments stored by their content-id */
069    private final Map<String, DataSource> cidMap;
070
071    /** Is this a Multipart email */
072    private boolean isMultiPart;
073
074    /**
075     * Constructs an instance with the MimeMessage to be extracted.
076     *
077     * @param message the message to parse
078     */
079    public MimeMessageParser(final MimeMessage message)
080    {
081        attachmentList = new ArrayList<DataSource>();
082        cidMap = new HashMap<String, DataSource>();
083        this.mimeMessage = message;
084        this.isMultiPart = false;
085    }
086
087    /**
088     * Does the actual extraction.
089     *
090     * @return this instance
091     * @throws Exception parsing the mime message failed
092     */
093    public MimeMessageParser parse() throws Exception
094    {
095        this.parse(null, mimeMessage);
096        return this;
097    }
098
099    /**
100     * @return the 'to' recipients of the message
101     * @throws Exception determining the recipients failed
102     */
103    public List<javax.mail.Address> getTo() throws Exception
104    {
105        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO);
106        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
107    }
108
109    /**
110     * @return the 'cc' recipients of the message
111     * @throws Exception determining the recipients failed
112     */
113    public List<javax.mail.Address> getCc() throws Exception
114    {
115        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC);
116        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
117    }
118
119    /**
120     * @return the 'bcc' recipients of the message
121     * @throws Exception determining the recipients failed
122     */
123    public List<javax.mail.Address> getBcc() throws Exception
124    {
125        final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC);
126        return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>();
127    }
128
129    /**
130     * @return the 'from' field of the message
131     * @throws Exception parsing the mime message failed
132     */
133    public String getFrom() throws Exception
134    {
135        final javax.mail.Address[] addresses = this.mimeMessage.getFrom();
136        if (addresses == null || addresses.length == 0)
137        {
138            return null;
139        }
140        return ((InternetAddress) addresses[0]).getAddress();
141    }
142
143    /**
144     * @return the 'replyTo' address of the email
145     * @throws Exception parsing the mime message failed
146     */
147    public String getReplyTo() throws Exception
148    {
149        final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo();
150        if (addresses == null || addresses.length == 0)
151        {
152            return null;
153        }
154        return ((InternetAddress) addresses[0]).getAddress();
155    }
156
157    /**
158     * @return the mail subject
159     * @throws Exception parsing the mime message failed
160     */
161    public String getSubject() throws Exception
162    {
163        return this.mimeMessage.getSubject();
164    }
165
166    /**
167     * Extracts the content of a MimeMessage recursively.
168     *
169     * @param parent the parent multi-part
170     * @param part   the current MimePart
171     * @throws MessagingException parsing the MimeMessage failed
172     * @throws IOException        parsing the MimeMessage failed
173     */
174    protected void parse(final Multipart parent, final MimePart part)
175        throws MessagingException, IOException
176    {
177        if (isMimeType(part, "text/plain") && plainContent == null
178                && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
179        {
180            plainContent = (String) part.getContent();
181        }
182        else
183        {
184            if (isMimeType(part, "text/html") && htmlContent == null
185                    && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition()))
186            {
187                htmlContent = (String) part.getContent();
188            }
189            else
190            {
191                if (isMimeType(part, "multipart/*"))
192                {
193                    this.isMultiPart = true;
194                    final Multipart mp = (Multipart) part.getContent();
195                    final int count = mp.getCount();
196
197                    // iterate over all MimeBodyPart
198
199                    for (int i = 0; i < count; i++)
200                    {
201                        parse(mp, (MimeBodyPart) mp.getBodyPart(i));
202                    }
203                }
204                else
205                {
206                    final String cid = stripContentId(part.getContentID());
207                    final DataSource ds = createDataSource(parent, part);
208                    if (cid != null)
209                    {
210                        this.cidMap.put(cid, ds);
211                    }
212                    this.attachmentList.add(ds);
213                }
214            }
215        }
216    }
217
218    /**
219     * Strips the content id of any whitespace and angle brackets.
220     * @param contentId the string to strip
221     * @return a stripped version of the content id
222     */
223    private String stripContentId(final String contentId)
224    {
225        if (contentId == null)
226        {
227            return null;
228        }
229        return contentId.trim().replaceAll("[\\<\\>]", "");
230    }
231
232    /**
233     * Checks whether the MimePart contains an object of the given mime type.
234     *
235     * @param part     the current MimePart
236     * @param mimeType the mime type to check
237     * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
238     * @throws MessagingException parsing the MimeMessage failed
239     * @throws IOException        parsing the MimeMessage failed
240     */
241    private boolean isMimeType(final MimePart part, final String mimeType)
242        throws MessagingException, IOException
243    {
244        // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
245        // and does not really check the actual content type.
246
247        try
248        {
249            final ContentType ct = new ContentType(part.getDataHandler().getContentType());
250            return ct.match(mimeType);
251        }
252        catch (final ParseException ex)
253        {
254            return part.getContentType().equalsIgnoreCase(mimeType);
255        }
256    }
257
258    /**
259     * Parses the MimePart to create a DataSource.
260     *
261     * @param parent the parent multi-part
262     * @param part   the current part to be processed
263     * @return the DataSource
264     * @throws MessagingException creating the DataSource failed
265     * @throws IOException        creating the DataSource failed
266     */
267    protected DataSource createDataSource(final Multipart parent, final MimePart part)
268        throws MessagingException, IOException
269    {
270        final DataHandler dataHandler = part.getDataHandler();
271        final DataSource dataSource = dataHandler.getDataSource();
272        final String contentType = getBaseMimeType(dataSource.getContentType());
273        final byte[] content = this.getContent(dataSource.getInputStream());
274        final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType);
275        final String dataSourceName = getDataSourceName(part, dataSource);
276
277        result.setName(dataSourceName);
278        return result;
279    }
280
281    /** @return Returns the mimeMessage. */
282    public MimeMessage getMimeMessage()
283    {
284        return mimeMessage;
285    }
286
287    /** @return Returns the isMultiPart. */
288    public boolean isMultipart()
289    {
290        return isMultiPart;
291    }
292
293    /** @return Returns the plainContent if any */
294    public String getPlainContent()
295    {
296        return plainContent;
297    }
298
299    /** @return Returns the attachmentList. */
300    public List<DataSource> getAttachmentList()
301    {
302        return attachmentList;
303    }
304
305    /**
306     * Returns a collection of all content-ids in the parsed message.
307     * <p>
308     * The content-ids are stripped of any angle brackets, i.e. "part1" instead
309     * of "&lt;part1&gt;".
310     *
311     * @return the collection of content ids.
312     * @since 1.3.4
313     */
314    public Collection<String> getContentIds()
315    {
316        return Collections.unmodifiableSet(cidMap.keySet());
317    }
318
319    /** @return Returns the htmlContent if any */
320    public String getHtmlContent()
321    {
322        return htmlContent;
323    }
324
325    /** @return true if a plain content is available */
326    public boolean hasPlainContent()
327    {
328        return this.plainContent != null;
329    }
330
331    /** @return true if HTML content is available */
332    public boolean hasHtmlContent()
333    {
334        return this.htmlContent != null;
335    }
336
337    /** @return true if attachments are available */
338    public boolean hasAttachments()
339    {
340        return this.attachmentList.size() > 0;
341    }
342
343    /**
344     * Find an attachment using its name.
345     *
346     * @param name the name of the attachment
347     * @return the corresponding datasource or null if nothing was found
348     */
349    public DataSource findAttachmentByName(final String name)
350    {
351        DataSource dataSource;
352
353        for (int i = 0; i < getAttachmentList().size(); i++)
354        {
355            dataSource = getAttachmentList().get(i);
356            if (name.equalsIgnoreCase(dataSource.getName()))
357            {
358                return dataSource;
359            }
360        }
361
362        return null;
363    }
364
365    /**
366     * Find an attachment using its content-id.
367     * <p>
368     * The content-id must be stripped of any angle brackets,
369     * i.e. "part1" instead of "&lt;part1&gt;".
370     *
371     * @param cid the content-id of the attachment
372     * @return the corresponding datasource or null if nothing was found
373     * @since 1.3.4
374     */
375    public DataSource findAttachmentByCid(final String cid)
376    {
377        final DataSource dataSource = cidMap.get(cid);
378        return dataSource;
379    }
380
381    /**
382     * Determines the name of the data source if it is not already set.
383     *
384     * @param part the mail part
385     * @param dataSource the data source
386     * @return the name of the data source or {@code null} if no name can be determined
387     * @throws MessagingException accessing the part failed
388     * @throws UnsupportedEncodingException decoding the text failed
389     */
390    protected String getDataSourceName(final Part part, final DataSource dataSource)
391        throws MessagingException, UnsupportedEncodingException
392    {
393        String result = dataSource.getName();
394
395        if (result == null || result.length() == 0)
396        {
397            result = part.getFileName();
398        }
399
400        if (result != null && result.length() > 0)
401        {
402            result = MimeUtility.decodeText(result);
403        }
404        else
405        {
406            result = null;
407        }
408
409        return result;
410    }
411
412    /**
413     * Read the content of the input stream.
414     *
415     * @param is the input stream to process
416     * @return the content of the input stream
417     * @throws IOException reading the input stream failed
418     */
419    private byte[] getContent(final InputStream is)
420        throws IOException
421    {
422        int ch;
423        byte[] result;
424
425        final ByteArrayOutputStream os = new ByteArrayOutputStream();
426        final BufferedInputStream isReader = new BufferedInputStream(is);
427        final BufferedOutputStream osWriter = new BufferedOutputStream(os);
428
429        while ((ch = isReader.read()) != -1)
430        {
431            osWriter.write(ch);
432        }
433
434        osWriter.flush();
435        result = os.toByteArray();
436        osWriter.close();
437
438        return result;
439    }
440
441    /**
442     * Parses the mimeType.
443     *
444     * @param fullMimeType the mime type from the mail api
445     * @return the real mime type
446     */
447    private String getBaseMimeType(final String fullMimeType)
448    {
449        final int pos = fullMimeType.indexOf(';');
450        if (pos >= 0)
451        {
452            return fullMimeType.substring(0, pos);
453        }
454        return fullMimeType;
455    }
456}