ImageHtmlEmail.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.mail2.javax;

  18. import java.io.IOException;
  19. import java.util.HashMap;
  20. import java.util.Map;
  21. import java.util.regex.Matcher;
  22. import java.util.regex.Pattern;

  23. import javax.activation.DataSource;

  24. import org.apache.commons.mail2.core.EmailException;
  25. import org.apache.commons.mail2.core.EmailUtils;

  26. /**
  27.  * <p>
  28.  * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "&lt;img src=../&gt;" elements in the
  29.  * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
  30.  * </p>
  31.  * <p>
  32.  * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
  33.  * that are not found locally, the implementation tries to download the element and link it in.
  34.  * </p>
  35.  * <p>
  36.  * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller.
  37.  * </p>
  38.  *
  39.  * @since 1.3
  40.  */
  41. public class ImageHtmlEmail extends HtmlEmail {
  42.     // Regular Expression to find all <IMG SRC="..."> entries in an HTML
  43.     // document.It needs to cater for various things, like more whitespaces
  44.     // including newlines on any place, HTML is not case sensitive and there
  45.     // can be arbitrary text between "IMG" and "SRC" like IDs and other things.

  46.     /** Regexp for extracting {@code <img>} tags */
  47.     public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";

  48.     /** Regexp for extracting {@code <script>} tags */
  49.     public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";

  50.     // this pattern looks for the HTML image tag which indicates embedded images,
  51.     // the grouping is necessary to allow to replace the element with the CID

  52.     /** Pattern for extracting {@code <img>} tags */
  53.     private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);

  54.     /** Pattern for extracting {@code <script>} tags */
  55.     private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);

  56.     /** Resolve the images and script resources to a DataSource */
  57.     private DataSourceResolver dataSourceResolver;

  58.     /**
  59.      * Constructs a new instance.
  60.      */
  61.     public ImageHtmlEmail() {
  62.         // empty
  63.     }

  64.     /**
  65.      * Does the work of actually building the MimeMessage.
  66.      *
  67.      * @see org.apache.commons.mail2.javax.HtmlEmail#buildMimeMessage()
  68.      * @throws EmailException building the MimeMessage failed
  69.      */
  70.     @Override
  71.     public void buildMimeMessage() throws EmailException {
  72.         try {
  73.             // embed all the matching image and script resources within the email
  74.             String temp = replacePattern(getHtml(), IMG_PATTERN);
  75.             temp = replacePattern(temp, SCRIPT_PATTERN);
  76.             setHtmlMsg(temp);
  77.             super.buildMimeMessage();
  78.         } catch (final IOException e) {
  79.             throw new EmailException("Building the MimeMessage failed", e);
  80.         }
  81.     }

  82.     /**
  83.      * Gets the data source resolver.
  84.      *
  85.      * @return the resolver
  86.      */
  87.     public DataSourceResolver getDataSourceResolver() {
  88.         return dataSourceResolver;
  89.     }

  90.     /**
  91.      * Replace the regexp matching resource locations with "cid:..." references.
  92.      *
  93.      * @param htmlMessage the HTML message to analyze
  94.      * @param pattern     the regular expression to find resources
  95.      * @return the HTML message containing "cid" references
  96.      * @throws EmailException creating the email failed
  97.      * @throws IOException    resolving the resources failed
  98.      */
  99.     private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException {
  100.         DataSource dataSource;
  101.         final StringBuffer stringBuffer = new StringBuffer();

  102.         // maps "cid" --> name
  103.         final Map<String, String> cidCache = new HashMap<>();

  104.         // maps "name" --> dataSource
  105.         final Map<String, DataSource> dataSourceCache = new HashMap<>();

  106.         // in the String, replace all "img src" with a CID and embed the related
  107.         // image file if we find it.
  108.         final Matcher matcher = pattern.matcher(htmlMessage);

  109.         // the matcher returns all instances one by one
  110.         while (matcher.find()) {
  111.             // in the RegEx we have the <src> element as second "group"
  112.             final String resourceLocation = matcher.group(2);

  113.             // avoid loading the same data source more than once
  114.             if (dataSourceCache.get(resourceLocation) == null) {
  115.                 // in lenient mode we might get a 'null' data source if the resource was not found
  116.                 dataSource = getDataSourceResolver().resolve(resourceLocation);

  117.                 if (dataSource != null) {
  118.                     dataSourceCache.put(resourceLocation, dataSource);
  119.                 }
  120.             } else {
  121.                 dataSource = dataSourceCache.get(resourceLocation);
  122.             }

  123.             if (dataSource != null) {
  124.                 String name = dataSource.getName();
  125.                 if (EmailUtils.isEmpty(name)) {
  126.                     name = resourceLocation;
  127.                 }

  128.                 String cid = cidCache.get(name);

  129.                 if (cid == null) {
  130.                     cid = embed(dataSource, name);
  131.                     cidCache.put(name, cid);
  132.                 }

  133.                 // if we embedded something, then we need to replace the URL with
  134.                 // the CID, otherwise the Matcher takes care of adding the
  135.                 // non-replaced text afterwards, so no else is necessary here!
  136.                 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
  137.             }
  138.         }

  139.         // append the remaining items...
  140.         matcher.appendTail(stringBuffer);

  141.         cidCache.clear();
  142.         dataSourceCache.clear();

  143.         return stringBuffer.toString();
  144.     }

  145.     /**
  146.      * Sets the data source resolver.
  147.      *
  148.      * @param dataSourceResolver the resolver
  149.      */
  150.     public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) {
  151.         this.dataSourceResolver = dataSourceResolver;
  152.     }
  153. }