MimeMessageParser.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.mail2.jakarta.util;

  18. import java.io.IOException;
  19. import java.io.UnsupportedEncodingException;
  20. import java.util.ArrayList;
  21. import java.util.Arrays;
  22. import java.util.Collection;
  23. import java.util.Collections;
  24. import java.util.HashMap;
  25. import java.util.List;
  26. import java.util.Map;

  27. import org.apache.commons.mail2.jakarta.activation.InputStreamDataSource;

  28. import jakarta.activation.DataSource;
  29. import jakarta.mail.Address;
  30. import jakarta.mail.Message;
  31. import jakarta.mail.MessagingException;
  32. import jakarta.mail.Multipart;
  33. import jakarta.mail.Part;
  34. import jakarta.mail.internet.ContentType;
  35. import jakarta.mail.internet.InternetAddress;
  36. import jakarta.mail.internet.MimeBodyPart;
  37. import jakarta.mail.internet.MimeMessage;
  38. import jakarta.mail.internet.MimePart;
  39. import jakarta.mail.internet.MimeUtility;
  40. import jakarta.mail.internet.ParseException;

  41. /**
  42.  * Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments.
  43.  *
  44.  * @since 1.3
  45.  */
  46. public class MimeMessageParser {

  47.     /** The MimeMessage to convert. */
  48.     private final MimeMessage mimeMessage;

  49.     /** Plain mail content from MimeMessage. */
  50.     private String plainContent;

  51.     /** HTML mail content from MimeMessage. */
  52.     private String htmlContent;

  53.     /** List of attachments of MimeMessage. */
  54.     private final List<DataSource> attachmentList;

  55.     /** Attachments stored by their content-id. */
  56.     private final Map<String, DataSource> cidMap;

  57.     /** Is this a Multipart email. */
  58.     private boolean isMultiPart;

  59.     /**
  60.      * Constructs an instance with the MimeMessage to be extracted.
  61.      *
  62.      * @param mimeMessage the message to parse
  63.      */
  64.     public MimeMessageParser(final MimeMessage mimeMessage) {
  65.         this.attachmentList = new ArrayList<>();
  66.         this.cidMap = new HashMap<>();
  67.         this.mimeMessage = mimeMessage;
  68.         this.isMultiPart = false;
  69.     }

  70.     private List<Address> asList(final Address[] recipients) {
  71.         return recipients != null ? Arrays.asList(recipients) : new ArrayList<>();
  72.     }

  73.     /**
  74.      * Parses the MimePart to create a DataSource.
  75.      *
  76.      * @param parent the parent multi-part
  77.      * @param part   the current part to be processed
  78.      * @return the DataSource
  79.      * @throws MessagingException creating the DataSource failed
  80.      * @throws IOException        error getting InputStream or unsupported encoding
  81.      */
  82.     @SuppressWarnings("resource") // Caller closes InputStream
  83.     protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException {
  84.         final DataSource dataSource = part.getDataHandler().getDataSource();
  85.         final String contentType = getBaseMimeType(dataSource.getContentType());
  86.         final String dataSourceName = getDataSourceName(part, dataSource);
  87.         return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName);
  88.     }

  89.     /**
  90.      * Find an attachment using its content-id.
  91.      * <p>
  92.      * The content-id must be stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
  93.      * </p>
  94.      *
  95.      * @param cid the content-id of the attachment
  96.      * @return the corresponding datasource or null if nothing was found
  97.      * @since 1.3.4
  98.      */
  99.     public DataSource findAttachmentByCid(final String cid) {
  100.         return cidMap.get(cid);
  101.     }

  102.     /**
  103.      * Find an attachment using its name.
  104.      *
  105.      * @param name the name of the attachment
  106.      * @return the corresponding datasource or null if nothing was found
  107.      */
  108.     public DataSource findAttachmentByName(final String name) {
  109.         for (final DataSource dataSource : getAttachmentList()) {
  110.             if (name.equalsIgnoreCase(dataSource.getName())) {
  111.                 return dataSource;
  112.             }
  113.         }
  114.         return null;
  115.     }

  116.     /**
  117.      * Gets the attachment list.
  118.      *
  119.      * @return Returns the attachment list.
  120.      */
  121.     public List<DataSource> getAttachmentList() {
  122.         return attachmentList;
  123.     }

  124.     /**
  125.      * Gets the MIME type.
  126.      *
  127.      * @param fullMimeType the mime type from the mail API
  128.      * @return the real mime type
  129.      */
  130.     private String getBaseMimeType(final String fullMimeType) {
  131.         final int pos = fullMimeType.indexOf(';');
  132.         return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos);
  133.     }

  134.     /**
  135.      * Gets the BCC Address list.
  136.      *
  137.      * @return the 'BCC' recipients of the message
  138.      * @throws MessagingException determining the recipients failed
  139.      */
  140.     public List<Address> getBcc() throws MessagingException {
  141.         return asList(mimeMessage.getRecipients(Message.RecipientType.BCC));
  142.     }

  143.     /**
  144.      * Gets the CC Address list.
  145.      *
  146.      * @return the 'CC' recipients of the message
  147.      * @throws MessagingException determining the recipients failed
  148.      */
  149.     public List<Address> getCc() throws MessagingException {
  150.         return asList(mimeMessage.getRecipients(Message.RecipientType.CC));
  151.     }

  152.     /**
  153.      * Returns a collection of all content-ids in the parsed message.
  154.      * <p>
  155.      * The content-ids are stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
  156.      * </p>
  157.      *
  158.      * @return the collection of content ids.
  159.      * @since 1.3.4
  160.      */
  161.     public Collection<String> getContentIds() {
  162.         return Collections.unmodifiableSet(cidMap.keySet());
  163.     }

  164.     /**
  165.      * Determines the name of the data source if it is not already set.
  166.      *
  167.      * @param part       the mail part
  168.      * @param dataSource the data source
  169.      * @return the name of the data source or {@code null} if no name can be determined
  170.      * @throws MessagingException           accessing the part failed
  171.      * @throws UnsupportedEncodingException decoding the text failed
  172.      */
  173.     protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException {
  174.         String result = dataSource.getName();
  175.         if (isEmpty(result)) {
  176.             result = part.getFileName();
  177.         }
  178.         if (!isEmpty(result)) {
  179.             result = MimeUtility.decodeText(result);
  180.         } else {
  181.             result = null;
  182.         }
  183.         return result;
  184.     }

  185.     /**
  186.      * Gets the FROM field.
  187.      *
  188.      * @return the FROM field of the message
  189.      * @throws MessagingException parsing the mime message failed
  190.      */
  191.     public String getFrom() throws MessagingException {
  192.         final Address[] addresses = mimeMessage.getFrom();
  193.         if (isEmpty(addresses)) {
  194.             return null;
  195.         }
  196.         return ((InternetAddress) addresses[0]).getAddress();
  197.     }

  198.     /**
  199.      * Gets the htmlContent if any.
  200.      *
  201.      * @return Returns the htmlContent if any
  202.      */
  203.     public String getHtmlContent() {
  204.         return htmlContent;
  205.     }

  206.     /**
  207.      * Gets the MimeMessage.
  208.      *
  209.      * @return Returns the mimeMessage.
  210.      */
  211.     public MimeMessage getMimeMessage() {
  212.         return mimeMessage;
  213.     }

  214.     /**
  215.      * Gets the plain content if any.
  216.      *
  217.      * @return Returns the plainContent if any
  218.      */
  219.     public String getPlainContent() {
  220.         return plainContent;
  221.     }

  222.     /**
  223.      * Gets the 'replyTo' address of the email.
  224.      *
  225.      * @return the 'replyTo' address of the email
  226.      * @throws MessagingException parsing the mime message failed
  227.      */
  228.     public String getReplyTo() throws MessagingException {
  229.         final Address[] addresses = mimeMessage.getReplyTo();
  230.         if (isEmpty(addresses)) {
  231.             return null;
  232.         }
  233.         return ((InternetAddress) addresses[0]).getAddress();
  234.     }

  235.     /**
  236.      * Gets the MIME message subject.
  237.      *
  238.      * @return the MIME message subject.
  239.      * @throws MessagingException parsing the mime message failed.
  240.      */
  241.     public String getSubject() throws MessagingException {
  242.         return mimeMessage.getSubject();
  243.     }

  244.     /**
  245.      * Gets the MIME message 'to' list.
  246.      *
  247.      * @return the 'to' recipients of the message.
  248.      * @throws MessagingException determining the recipients failed
  249.      */
  250.     public List<Address> getTo() throws MessagingException {
  251.         return asList(mimeMessage.getRecipients(Message.RecipientType.TO));
  252.     }

  253.     /**
  254.      * Tests if attachments are present.
  255.      *
  256.      * @return true if attachments are present.
  257.      */
  258.     public boolean hasAttachments() {
  259.         return !attachmentList.isEmpty();
  260.     }

  261.     /**
  262.      * Tests is HTML content is present.
  263.      *
  264.      * @return true if HTML content is present.
  265.      */
  266.     public boolean hasHtmlContent() {
  267.         return htmlContent != null;
  268.     }

  269.     /**
  270.      * Tests is plain content is present.
  271.      *
  272.      * @return true if a plain content is present.
  273.      */
  274.     public boolean hasPlainContent() {
  275.         return plainContent != null;
  276.     }

  277.     private boolean isEmpty(final Object[] array) {
  278.         return array == null || array.length == 0;
  279.     }

  280.     private boolean isEmpty(final String result) {
  281.         return result == null || result.isEmpty();
  282.     }

  283.     /**
  284.      * Tests whether the MimePart contains an object of the given mime type.
  285.      *
  286.      * @param part     the current MimePart
  287.      * @param mimeType the mime type to check
  288.      * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
  289.      * @throws MessagingException parsing the MimeMessage failed
  290.      */
  291.     private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException {
  292.         // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
  293.         // and does not really check the actual content type.
  294.         try {
  295.             return new ContentType(part.getDataHandler().getContentType()).match(mimeType);
  296.         } catch (final ParseException ex) {
  297.             return part.getContentType().equalsIgnoreCase(mimeType);
  298.         }
  299.     }

  300.     /**
  301.      * Tests whether this is multipart.
  302.      *
  303.      * @return Returns the isMultiPart.
  304.      */
  305.     public boolean isMultipart() {
  306.         return isMultiPart;
  307.     }

  308.     /**
  309.      * Does the actual extraction.
  310.      *
  311.      * @return this instance
  312.      * @throws MessagingException parsing the mime message failed
  313.      * @throws IOException        parsing the mime message failed
  314.      */
  315.     public MimeMessageParser parse() throws MessagingException, IOException {
  316.         parse(null, mimeMessage);
  317.         return this;
  318.     }

  319.     /**
  320.      * Extracts the content of a MimeMessage recursively.
  321.      *
  322.      * @param parent the parent multi-part
  323.      * @param part   the current MimePart
  324.      * @throws MessagingException parsing the MimeMessage failed
  325.      * @throws IOException        parsing the MimeMessage failed
  326.      */
  327.     protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException {
  328.         if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
  329.             plainContent = (String) part.getContent();
  330.         } else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
  331.             htmlContent = (String) part.getContent();
  332.         } else if (isMimeType(part, "multipart/*")) {
  333.             isMultiPart = true;
  334.             final Multipart multipart = (Multipart) part.getContent();
  335.             final int count = multipart.getCount();
  336.             // iterate over all MimeBodyPart
  337.             for (int i = 0; i < count; i++) {
  338.                 parse(multipart, (MimeBodyPart) multipart.getBodyPart(i));
  339.             }
  340.         } else {
  341.             final String cid = stripContentId(part.getContentID());
  342.             final DataSource dataSource = createDataSource(parent, part);
  343.             if (cid != null) {
  344.                 cidMap.put(cid, dataSource);
  345.             }
  346.             attachmentList.add(dataSource);
  347.         }
  348.     }

  349.     /**
  350.      * Strips the content id of any whitespace and angle brackets.
  351.      *
  352.      * @param contentId the string to strip
  353.      * @return a stripped version of the content id
  354.      */
  355.     private String stripContentId(final String contentId) {
  356.         return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", "");
  357.     }
  358. }