1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.mail2.javax;
18
19 import java.io.IOException;
20 import java.util.HashMap;
21 import java.util.Map;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import javax.activation.DataSource;
26
27 import org.apache.commons.mail2.core.EmailException;
28 import org.apache.commons.mail2.core.EmailUtils;
29
30 /**
31 * <p>
32 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "<img src=../>" elements in the
33 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
34 * </p>
35 * <p>
36 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
37 * that are not found locally, the implementation tries to download the element and link it in.
38 * </p>
39 * <p>
40 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller.
41 * </p>
42 *
43 * @since 1.3
44 */
45 public class ImageHtmlEmail extends HtmlEmail {
46 // Regular Expression to find all <IMG SRC="..."> entries in an HTML
47 // document.It needs to cater for various things, like more whitespaces
48 // including newlines on any place, HTML is not case sensitive and there
49 // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
50
51 /** Regexp for extracting {@code <img>} tags */
52 public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
53
54 /** Regexp for extracting {@code <script>} tags */
55 public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
56
57 // this pattern looks for the HTML image tag which indicates embedded images,
58 // the grouping is necessary to allow to replace the element with the CID
59
60 /** Pattern for extracting {@code <img>} tags */
61 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
62
63 /** Pattern for extracting {@code <script>} tags */
64 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
65
66 /** Resolve the images and script resources to a DataSource */
67 private DataSourceResolver dataSourceResolver;
68
69 /**
70 * Constructs a new instance.
71 */
72 public ImageHtmlEmail() {
73 // empty
74 }
75
76 /**
77 * Does the work of actually building the MimeMessage.
78 *
79 * @see org.apache.commons.mail2.javax.HtmlEmail#buildMimeMessage()
80 * @throws EmailException building the MimeMessage failed
81 */
82 @Override
83 public void buildMimeMessage() throws EmailException {
84 try {
85 // embed all the matching image and script resources within the email
86 String temp = replacePattern(getHtml(), IMG_PATTERN);
87 temp = replacePattern(temp, SCRIPT_PATTERN);
88 setHtmlMsg(temp);
89 super.buildMimeMessage();
90 } catch (final IOException e) {
91 throw new EmailException("Building the MimeMessage failed", e);
92 }
93 }
94
95 /**
96 * Gets the data source resolver.
97 *
98 * @return the resolver
99 */
100 public DataSourceResolver getDataSourceResolver() {
101 return dataSourceResolver;
102 }
103
104 /**
105 * Replace the regexp matching resource locations with "cid:..." references.
106 *
107 * @param htmlMessage the HTML message to analyze
108 * @param pattern the regular expression to find resources
109 * @return the HTML message containing "cid" references
110 * @throws EmailException creating the email failed
111 * @throws IOException resolving the resources failed
112 */
113 private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException {
114 DataSource dataSource;
115 final StringBuffer stringBuffer = new StringBuffer();
116
117 // maps "cid" --> name
118 final Map<String, String> cidCache = new HashMap<>();
119
120 // maps "name" --> dataSource
121 final Map<String, DataSource> dataSourceCache = new HashMap<>();
122
123 // in the String, replace all "img src" with a CID and embed the related
124 // image file if we find it.
125 final Matcher matcher = pattern.matcher(htmlMessage);
126
127 // the matcher returns all instances one by one
128 while (matcher.find()) {
129 // in the RegEx we have the <src> element as second "group"
130 final String resourceLocation = matcher.group(2);
131
132 // avoid loading the same data source more than once
133 if (dataSourceCache.get(resourceLocation) == null) {
134 // in lenient mode we might get a 'null' data source if the resource was not found
135 dataSource = getDataSourceResolver().resolve(resourceLocation);
136
137 if (dataSource != null) {
138 dataSourceCache.put(resourceLocation, dataSource);
139 }
140 } else {
141 dataSource = dataSourceCache.get(resourceLocation);
142 }
143
144 if (dataSource != null) {
145 String name = dataSource.getName();
146 if (EmailUtils.isEmpty(name)) {
147 name = resourceLocation;
148 }
149
150 String cid = cidCache.get(name);
151
152 if (cid == null) {
153 cid = embed(dataSource, name);
154 cidCache.put(name, cid);
155 }
156
157 // if we embedded something, then we need to replace the URL with
158 // the CID, otherwise the Matcher takes care of adding the
159 // non-replaced text afterwards, so no else is necessary here!
160 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
161 }
162 }
163
164 // append the remaining items...
165 matcher.appendTail(stringBuffer);
166
167 cidCache.clear();
168 dataSourceCache.clear();
169
170 return stringBuffer.toString();
171 }
172
173 /**
174 * Sets the data source resolver.
175 *
176 * @param dataSourceResolver the resolver
177 */
178 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) {
179 this.dataSourceResolver = dataSourceResolver;
180 }
181 }