1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.mail; 18 19 import javax.activation.DataSource; 20 import java.io.IOException; 21 import java.util.HashMap; 22 import java.util.Map; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 /** 27 * <p>Small wrapper class on top of HtmlEmail which encapsulates the required logic 28 * to retrieve images that are contained in "<img src=../>" elements in the HTML 29 * code. This is done by replacing all img-src-elements with "cid:"-entries and 30 * embedding images in the email. 31 * </p> 32 * <p> 33 * For local files the class tries to either load them via an absolute path or - 34 * if available - use a relative path starting from a base directory. For files 35 * that are not found locally, the implementation tries to download 36 * the element and link it in. 37 * </p> 38 * <p> 39 * The image loading is done by an instance of <code>DataSourceResolver</code> 40 * which has to be provided by the caller. 41 * </p> 42 * 43 * @since 1.3 44 */ 45 public class ImageHtmlEmail extends HtmlEmail 46 { 47 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 48 // document.It needs to cater for various things, like more whitespaces 49 // including newlines on any place, HTML is not case sensitive and there 50 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 51 52 /** Regexp for extracting {@code <img>} tags */ 53 public static final String REGEX_IMG_SRC = 54 "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 55 56 /** regexp for extracting {@code <script>} tags */ 57 public static final String REGEX_SCRIPT_SRC = 58 "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 59 60 // this pattern looks for the HTML image tag which indicates embedded images, 61 // the grouping is necessary to allow to replace the element with the CID 62 63 /** pattern for extracting <img> tags */ 64 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 65 66 /** pattern for extracting <script> tags */ 67 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 68 69 /** resolve the images and script resources to a DataSource */ 70 private DataSourceResolver dataSourceResolver; 71 72 /** 73 * Get the data source resolver. 74 * 75 * @return the resolver 76 */ 77 public DataSourceResolver getDataSourceResolver() 78 { 79 return dataSourceResolver; 80 } 81 82 /** 83 * Set the data source resolver. 84 * 85 * @param dataSourceResolver the resolver 86 */ 87 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) 88 { 89 this.dataSourceResolver = dataSourceResolver; 90 } 91 92 /** 93 * Does the work of actually building the MimeMessage. 94 * 95 * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage() 96 * @throws EmailException building the MimeMessage failed 97 */ 98 @Override 99 public void buildMimeMessage() throws EmailException 100 { 101 try 102 { 103 // embed all the matching image and script resources within the email 104 String temp = replacePattern(super.html, IMG_PATTERN); 105 temp = replacePattern(temp, SCRIPT_PATTERN); 106 setHtmlMsg(temp); 107 super.buildMimeMessage(); 108 } 109 catch (final IOException e) 110 { 111 throw new EmailException("Building the MimeMessage failed", e); 112 } 113 } 114 115 /** 116 * Replace the regexp matching resource locations with "cid:..." references. 117 * 118 * @param htmlMessage the HTML message to analyze 119 * @param pattern the regular expression to find resources 120 * @return the HTML message containing "cid" references 121 * @throws EmailException creating the email failed 122 * @throws IOException resolving the resources failed 123 */ 124 private String replacePattern(final String htmlMessage, final Pattern pattern) 125 throws EmailException, IOException 126 { 127 DataSource dataSource; 128 final StringBuffer stringBuffer = new StringBuffer(); 129 130 // maps "cid" --> name 131 final Map<String, String> cidCache = new HashMap<String, String>(); 132 133 // maps "name" --> dataSource 134 final Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>(); 135 136 // in the String, replace all "img src" with a CID and embed the related 137 // image file if we find it. 138 final Matcher matcher = pattern.matcher(htmlMessage); 139 140 // the matcher returns all instances one by one 141 while (matcher.find()) 142 { 143 // in the RegEx we have the <src> element as second "group" 144 final String resourceLocation = matcher.group(2); 145 146 // avoid loading the same data source more than once 147 if (dataSourceCache.get(resourceLocation) == null) 148 { 149 // in lenient mode we might get a 'null' data source if the resource was not found 150 dataSource = getDataSourceResolver().resolve(resourceLocation); 151 152 if (dataSource != null) 153 { 154 dataSourceCache.put(resourceLocation, dataSource); 155 } 156 } 157 else 158 { 159 dataSource = dataSourceCache.get(resourceLocation); 160 } 161 162 if (dataSource != null) 163 { 164 String name = dataSource.getName(); 165 if (EmailUtils.isEmpty(name)) 166 { 167 name = resourceLocation; 168 } 169 170 String cid = cidCache.get(name); 171 172 if (cid == null) 173 { 174 cid = embed(dataSource, name); 175 cidCache.put(name, cid); 176 } 177 178 // if we embedded something, then we need to replace the URL with 179 // the CID, otherwise the Matcher takes care of adding the 180 // non-replaced text afterwards, so no else is necessary here! 181 matcher.appendReplacement(stringBuffer, 182 Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 183 } 184 } 185 186 // append the remaining items... 187 matcher.appendTail(stringBuffer); 188 189 cidCache.clear(); 190 dataSourceCache.clear(); 191 192 return stringBuffer.toString(); 193 } 194 }