View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.mail;
18  
19  import javax.activation.DataSource;
20  import java.io.IOException;
21  import java.util.HashMap;
22  import java.util.Map;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  /**
27   * <p>Small wrapper class on top of HtmlEmail which encapsulates the required logic
28   * to retrieve images that are contained in "&lt;img src=../&gt;" elements in the HTML
29   * code. This is done by replacing all img-src-elements with "cid:"-entries and
30   * embedding images in the email.
31   * </p>
32   * <p>
33   * For local files the class tries to either load them via an absolute path or -
34   * if available - use a relative path starting from a base directory. For files
35   * that are not found locally, the implementation tries to download
36   * the element and link it in.
37   * </p>
38   * <p>
39   * The image loading is done by an instance of <code>DataSourceResolver</code>
40   * which has to be provided by the caller.
41   * </p>
42   *
43   * @since 1.3
44   */
45  public class ImageHtmlEmail extends HtmlEmail
46  {
47      // Regular Expression to find all <IMG SRC="..."> entries in an HTML
48      // document.It needs to cater for various things, like more whitespaces
49      // including newlines on any place, HTML is not case sensitive and there
50      // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
51  
52      /** Regexp for extracting {@code <img>} tags */
53      public static final String REGEX_IMG_SRC =
54              "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
55  
56      /** regexp for extracting {@code <script>} tags */
57      public static final String REGEX_SCRIPT_SRC =
58              "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
59  
60      // this pattern looks for the HTML image tag which indicates embedded images,
61      // the grouping is necessary to allow to replace the element with the CID
62  
63      /** pattern for extracting <img> tags */
64      private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
65  
66      /** pattern for extracting <script> tags */
67      private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
68  
69      /** resolve the images and script resources to a DataSource */
70      private DataSourceResolver dataSourceResolver;
71  
72      /**
73       * Get the data source resolver.
74       *
75       * @return the resolver
76       */
77      public DataSourceResolver getDataSourceResolver()
78      {
79          return dataSourceResolver;
80      }
81  
82      /**
83       * Set the data source resolver.
84       *
85       * @param dataSourceResolver the resolver
86       */
87      public void setDataSourceResolver(final DataSourceResolver dataSourceResolver)
88      {
89          this.dataSourceResolver = dataSourceResolver;
90      }
91  
92       /**
93        * Does the work of actually building the MimeMessage.
94        *
95        * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage()
96        * @throws EmailException building the MimeMessage failed
97        */
98      @Override
99      public void buildMimeMessage() throws EmailException
100     {
101         try
102         {
103             // embed all the matching image and script resources within the email
104             String temp = replacePattern(super.html, IMG_PATTERN);
105             temp = replacePattern(temp, SCRIPT_PATTERN);
106             setHtmlMsg(temp);
107             super.buildMimeMessage();
108         }
109         catch (final IOException e)
110         {
111             throw new EmailException("Building the MimeMessage failed", e);
112         }
113     }
114 
115     /**
116      * Replace the regexp matching resource locations with "cid:..." references.
117      *
118      * @param htmlMessage the HTML message to analyze
119      * @param pattern the regular expression to find resources
120      * @return the HTML message containing "cid" references
121      * @throws EmailException creating the email failed
122      * @throws IOException resolving the resources failed
123      */
124     private String replacePattern(final String htmlMessage, final Pattern pattern)
125             throws EmailException, IOException
126     {
127         DataSource dataSource;
128         final StringBuffer stringBuffer = new StringBuffer();
129 
130         // maps "cid" --> name
131         final Map<String, String> cidCache = new HashMap<String, String>();
132 
133         // maps "name" --> dataSource
134         final Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>();
135 
136         // in the String, replace all "img src" with a CID and embed the related
137         // image file if we find it.
138         final Matcher matcher = pattern.matcher(htmlMessage);
139 
140         // the matcher returns all instances one by one
141         while (matcher.find())
142         {
143             // in the RegEx we have the <src> element as second "group"
144             final String resourceLocation = matcher.group(2);
145 
146             // avoid loading the same data source more than once
147             if (dataSourceCache.get(resourceLocation) == null)
148             {
149                 // in lenient mode we might get a 'null' data source if the resource was not found
150                 dataSource = getDataSourceResolver().resolve(resourceLocation);
151 
152                 if (dataSource != null)
153                 {
154                     dataSourceCache.put(resourceLocation, dataSource);
155                 }
156             }
157             else
158             {
159                 dataSource = dataSourceCache.get(resourceLocation);
160             }
161 
162             if (dataSource != null)
163             {
164                 String name = dataSource.getName();
165                 if (EmailUtils.isEmpty(name))
166                 {
167                     name = resourceLocation;
168                 }
169 
170                 String cid = cidCache.get(name);
171 
172                 if (cid == null)
173                 {
174                     cid = embed(dataSource, name);
175                     cidCache.put(name, cid);
176                 }
177 
178                 // if we embedded something, then we need to replace the URL with
179                 // the CID, otherwise the Matcher takes care of adding the
180                 // non-replaced text afterwards, so no else is necessary here!
181                 matcher.appendReplacement(stringBuffer,
182                         Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
183             }
184         }
185 
186         // append the remaining items...
187         matcher.appendTail(stringBuffer);
188 
189         cidCache.clear();
190         dataSourceCache.clear();
191 
192         return stringBuffer.toString();
193     }
194 }