View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.mail;
18  
19  import javax.activation.DataSource;
20  import java.io.IOException;
21  import java.util.HashMap;
22  import java.util.Map;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  /**
27   * Small wrapper class on top of HtmlEmail which encapsulates the required logic
28   * to retrieve images that are contained in "<img src=../>" elements in the HTML
29   * code. This is done by replacing all img-src-elements with "cid:"-entries and
30   * embedding images in the email.
31   * </br>
32   * For local files the class tries to either load them via an absolute path or -
33   * if available - use a relative path starting from a base directory. For files
34   * that are not found locally, the implementation tries to download
35   * the element and link it in.
36   * </br>
37   * The image loading is done by an instance of <code>DataSourceResolver</code>
38   * which has to be provided by the caller.
39   * </br>
40   *
41   * @since 1.3
42   * @version $Id: ImageHtmlEmail.java 1448981 2013-02-22 10:40:34Z tn $
43   */
44  public class ImageHtmlEmail extends HtmlEmail
45  {
46      // Regular Expression to find all <IMG SRC="..."> entries in an HTML
47      // document.It needs to cater for various things, like more whitespaces
48      // including newlines on any place, HTML is not case sensitive and there
49      // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
50  
51      /** regexp for extracting <img> tags */
52      public static final String REGEX_IMG_SRC =
53              "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
54  
55      /** regexp for extracting <script> tags */
56      public static final String REGEX_SCRIPT_SRC =
57              "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
58  
59      // this pattern looks for the HTML image tag which indicates embedded images,
60      // the grouping is necessary to allow to replace the element with the CID
61  
62      /** pattern for extracting <img> tags */
63      private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
64  
65      /** pattern for extracting <script> tags */
66      private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
67  
68      /** resolve the images and script resources to a DataSource */
69      private DataSourceResolver dataSourceResolver;
70  
71      /**
72       * Get the data source resolver.
73       *
74       * @return the resolver
75       */
76      public DataSourceResolver getDataSourceResolver()
77      {
78          return dataSourceResolver;
79      }
80  
81      /**
82       * Set the data source resolver.
83       *
84       * @param dataSourceResolver the resolver
85       */
86      public void setDataSourceResolver(DataSourceResolver dataSourceResolver)
87      {
88          this.dataSourceResolver = dataSourceResolver;
89      }
90  
91       /**
92        * Does the work of actually building the MimeMessage.
93        *
94        * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage()
95        * @throws EmailException building the MimeMessage failed
96        */
97      @Override
98      public void buildMimeMessage() throws EmailException
99      {
100         try
101         {
102             // embed all the matching image and script resources within the email
103             String temp = replacePattern(super.html, IMG_PATTERN);
104             temp = replacePattern(temp, SCRIPT_PATTERN);
105             setHtmlMsg(temp);
106             super.buildMimeMessage();
107         }
108         catch (IOException e)
109         {
110             throw new EmailException("Building the MimeMessage failed", e);
111         }
112     }
113 
114     /**
115      * Replace the regexp matching resource locations with "cid:..." references.
116      *
117      * @param htmlMessage the HTML message to analyze
118      * @param pattern the regular expression to find resources
119      * @return the HTML message containing "cid" references
120      * @throws EmailException creating the email failed
121      * @throws IOException resolving the resources failed
122      */
123     private String replacePattern(final String htmlMessage, final Pattern pattern)
124             throws EmailException, IOException
125     {
126         DataSource dataSource;
127         StringBuffer stringBuffer = new StringBuffer();
128 
129         // maps "cid" --> name
130         Map<String, String> cidCache = new HashMap<String, String>();
131 
132         // maps "name" --> dataSource
133         Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>();
134 
135         // in the String, replace all "img src" with a CID and embed the related
136         // image file if we find it.
137         Matcher matcher = pattern.matcher(htmlMessage);
138 
139         // the matcher returns all instances one by one
140         while (matcher.find())
141         {
142             // in the RegEx we have the <src> element as second "group"
143             String resourceLocation = matcher.group(2);
144 
145             // avoid loading the same data source more than once
146             if (dataSourceCache.get(resourceLocation) == null)
147             {
148                 // in lenient mode we might get a 'null' data source if the resource was not found
149                 dataSource = getDataSourceResolver().resolve(resourceLocation);
150 
151                 if (dataSource != null)
152                 {
153                     dataSourceCache.put(resourceLocation, dataSource);
154                 }
155             }
156             else
157             {
158                 dataSource = dataSourceCache.get(resourceLocation);
159             }
160 
161             if (dataSource != null)
162             {
163                 String name = dataSource.getName();
164                 if (EmailUtils.isEmpty(name))
165                 {
166                     name = resourceLocation;
167                 }
168 
169                 String cid = cidCache.get(name);
170 
171                 if (cid == null)
172                 {
173                     cid = embed(dataSource, dataSource.getName());
174                     cidCache.put(name, cid);
175                 }
176 
177                 // if we embedded something, then we need to replace the URL with
178                 // the CID, otherwise the Matcher takes care of adding the
179                 // non-replaced text afterwards, so no else is necessary here!
180                 matcher.appendReplacement(stringBuffer, matcher.group(1) + "cid:" + cid + matcher.group(3));
181             }
182         }
183 
184         // append the remaining items...
185         matcher.appendTail(stringBuffer);
186 
187         cidCache.clear();
188         dataSourceCache.clear();
189 
190         return stringBuffer.toString();
191     }
192 }