1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.mail;
18
19 import javax.activation.DataSource;
20 import java.io.IOException;
21 import java.util.HashMap;
22 import java.util.Map;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 /**
27 * Small wrapper class on top of HtmlEmail which encapsulates the required logic
28 * to retrieve images that are contained in "<img src=../>" elements in the HTML
29 * code. This is done by replacing all img-src-elements with "cid:"-entries and
30 * embedding images in the email.
31 * </br>
32 * For local files the class tries to either load them via an absolute path or -
33 * if available - use a relative path starting from a base directory. For files
34 * that are not found locally, the implementation tries to download
35 * the element and link it in.
36 * </br>
37 * The image loading is done by an instance of <code>DataSourceResolver</code>
38 * which has to be provided by the caller.
39 * </br>
40 *
41 * @since 1.3
42 * @version $Id: ImageHtmlEmail.java 1448981 2013-02-22 10:40:34Z tn $
43 */
44 public class ImageHtmlEmail extends HtmlEmail
45 {
46 // Regular Expression to find all <IMG SRC="..."> entries in an HTML
47 // document.It needs to cater for various things, like more whitespaces
48 // including newlines on any place, HTML is not case sensitive and there
49 // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
50
51 /** regexp for extracting <img> tags */
52 public static final String REGEX_IMG_SRC =
53 "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
54
55 /** regexp for extracting <script> tags */
56 public static final String REGEX_SCRIPT_SRC =
57 "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
58
59 // this pattern looks for the HTML image tag which indicates embedded images,
60 // the grouping is necessary to allow to replace the element with the CID
61
62 /** pattern for extracting <img> tags */
63 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
64
65 /** pattern for extracting <script> tags */
66 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
67
68 /** resolve the images and script resources to a DataSource */
69 private DataSourceResolver dataSourceResolver;
70
71 /**
72 * Get the data source resolver.
73 *
74 * @return the resolver
75 */
76 public DataSourceResolver getDataSourceResolver()
77 {
78 return dataSourceResolver;
79 }
80
81 /**
82 * Set the data source resolver.
83 *
84 * @param dataSourceResolver the resolver
85 */
86 public void setDataSourceResolver(DataSourceResolver dataSourceResolver)
87 {
88 this.dataSourceResolver = dataSourceResolver;
89 }
90
91 /**
92 * Does the work of actually building the MimeMessage.
93 *
94 * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage()
95 * @throws EmailException building the MimeMessage failed
96 */
97 @Override
98 public void buildMimeMessage() throws EmailException
99 {
100 try
101 {
102 // embed all the matching image and script resources within the email
103 String temp = replacePattern(super.html, IMG_PATTERN);
104 temp = replacePattern(temp, SCRIPT_PATTERN);
105 setHtmlMsg(temp);
106 super.buildMimeMessage();
107 }
108 catch (IOException e)
109 {
110 throw new EmailException("Building the MimeMessage failed", e);
111 }
112 }
113
114 /**
115 * Replace the regexp matching resource locations with "cid:..." references.
116 *
117 * @param htmlMessage the HTML message to analyze
118 * @param pattern the regular expression to find resources
119 * @return the HTML message containing "cid" references
120 * @throws EmailException creating the email failed
121 * @throws IOException resolving the resources failed
122 */
123 private String replacePattern(final String htmlMessage, final Pattern pattern)
124 throws EmailException, IOException
125 {
126 DataSource dataSource;
127 StringBuffer stringBuffer = new StringBuffer();
128
129 // maps "cid" --> name
130 Map<String, String> cidCache = new HashMap<String, String>();
131
132 // maps "name" --> dataSource
133 Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>();
134
135 // in the String, replace all "img src" with a CID and embed the related
136 // image file if we find it.
137 Matcher matcher = pattern.matcher(htmlMessage);
138
139 // the matcher returns all instances one by one
140 while (matcher.find())
141 {
142 // in the RegEx we have the <src> element as second "group"
143 String resourceLocation = matcher.group(2);
144
145 // avoid loading the same data source more than once
146 if (dataSourceCache.get(resourceLocation) == null)
147 {
148 // in lenient mode we might get a 'null' data source if the resource was not found
149 dataSource = getDataSourceResolver().resolve(resourceLocation);
150
151 if (dataSource != null)
152 {
153 dataSourceCache.put(resourceLocation, dataSource);
154 }
155 }
156 else
157 {
158 dataSource = dataSourceCache.get(resourceLocation);
159 }
160
161 if (dataSource != null)
162 {
163 String name = dataSource.getName();
164 if (EmailUtils.isEmpty(name))
165 {
166 name = resourceLocation;
167 }
168
169 String cid = cidCache.get(name);
170
171 if (cid == null)
172 {
173 cid = embed(dataSource, dataSource.getName());
174 cidCache.put(name, cid);
175 }
176
177 // if we embedded something, then we need to replace the URL with
178 // the CID, otherwise the Matcher takes care of adding the
179 // non-replaced text afterwards, so no else is necessary here!
180 matcher.appendReplacement(stringBuffer, matcher.group(1) + "cid:" + cid + matcher.group(3));
181 }
182 }
183
184 // append the remaining items...
185 matcher.appendTail(stringBuffer);
186
187 cidCache.clear();
188 dataSourceCache.clear();
189
190 return stringBuffer.toString();
191 }
192 }