001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail; 018 019import javax.activation.DataSource; 020import java.io.IOException; 021import java.util.HashMap; 022import java.util.Map; 023import java.util.regex.Matcher; 024import java.util.regex.Pattern; 025 026/** 027 * <p>Small wrapper class on top of HtmlEmail which encapsulates the required logic 028 * to retrieve images that are contained in "<img src=../>" elements in the HTML 029 * code. This is done by replacing all img-src-elements with "cid:"-entries and 030 * embedding images in the email. 031 * </p> 032 * <p> 033 * For local files the class tries to either load them via an absolute path or - 034 * if available - use a relative path starting from a base directory. For files 035 * that are not found locally, the implementation tries to download 036 * the element and link it in. 037 * </p> 038 * <p> 039 * The image loading is done by an instance of <code>DataSourceResolver</code> 040 * which has to be provided by the caller. 041 * </p> 042 * 043 * @since 1.3 044 * @version $Id: ImageHtmlEmail.html 952467 2015-05-23 18:45:36Z tn $ 045 */ 046public class ImageHtmlEmail extends HtmlEmail 047{ 048 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 049 // document.It needs to cater for various things, like more whitespaces 050 // including newlines on any place, HTML is not case sensitive and there 051 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 052 053 /** Regexp for extracting {@code <img>} tags */ 054 public static final String REGEX_IMG_SRC = 055 "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 056 057 /** regexp for extracting {@code <script>} tags */ 058 public static final String REGEX_SCRIPT_SRC = 059 "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 060 061 // this pattern looks for the HTML image tag which indicates embedded images, 062 // the grouping is necessary to allow to replace the element with the CID 063 064 /** pattern for extracting <img> tags */ 065 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 066 067 /** pattern for extracting <script> tags */ 068 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 069 070 /** resolve the images and script resources to a DataSource */ 071 private DataSourceResolver dataSourceResolver; 072 073 /** 074 * Get the data source resolver. 075 * 076 * @return the resolver 077 */ 078 public DataSourceResolver getDataSourceResolver() 079 { 080 return dataSourceResolver; 081 } 082 083 /** 084 * Set the data source resolver. 085 * 086 * @param dataSourceResolver the resolver 087 */ 088 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) 089 { 090 this.dataSourceResolver = dataSourceResolver; 091 } 092 093 /** 094 * Does the work of actually building the MimeMessage. 095 * 096 * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage() 097 * @throws EmailException building the MimeMessage failed 098 */ 099 @Override 100 public void buildMimeMessage() throws EmailException 101 { 102 try 103 { 104 // embed all the matching image and script resources within the email 105 String temp = replacePattern(super.html, IMG_PATTERN); 106 temp = replacePattern(temp, SCRIPT_PATTERN); 107 setHtmlMsg(temp); 108 super.buildMimeMessage(); 109 } 110 catch (final IOException e) 111 { 112 throw new EmailException("Building the MimeMessage failed", e); 113 } 114 } 115 116 /** 117 * Replace the regexp matching resource locations with "cid:..." references. 118 * 119 * @param htmlMessage the HTML message to analyze 120 * @param pattern the regular expression to find resources 121 * @return the HTML message containing "cid" references 122 * @throws EmailException creating the email failed 123 * @throws IOException resolving the resources failed 124 */ 125 private String replacePattern(final String htmlMessage, final Pattern pattern) 126 throws EmailException, IOException 127 { 128 DataSource dataSource; 129 final StringBuffer stringBuffer = new StringBuffer(); 130 131 // maps "cid" --> name 132 final Map<String, String> cidCache = new HashMap<String, String>(); 133 134 // maps "name" --> dataSource 135 final Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>(); 136 137 // in the String, replace all "img src" with a CID and embed the related 138 // image file if we find it. 139 final Matcher matcher = pattern.matcher(htmlMessage); 140 141 // the matcher returns all instances one by one 142 while (matcher.find()) 143 { 144 // in the RegEx we have the <src> element as second "group" 145 final String resourceLocation = matcher.group(2); 146 147 // avoid loading the same data source more than once 148 if (dataSourceCache.get(resourceLocation) == null) 149 { 150 // in lenient mode we might get a 'null' data source if the resource was not found 151 dataSource = getDataSourceResolver().resolve(resourceLocation); 152 153 if (dataSource != null) 154 { 155 dataSourceCache.put(resourceLocation, dataSource); 156 } 157 } 158 else 159 { 160 dataSource = dataSourceCache.get(resourceLocation); 161 } 162 163 if (dataSource != null) 164 { 165 String name = dataSource.getName(); 166 if (EmailUtils.isEmpty(name)) 167 { 168 name = resourceLocation; 169 } 170 171 String cid = cidCache.get(name); 172 173 if (cid == null) 174 { 175 cid = embed(dataSource, name); 176 cidCache.put(name, cid); 177 } 178 179 // if we embedded something, then we need to replace the URL with 180 // the CID, otherwise the Matcher takes care of adding the 181 // non-replaced text afterwards, so no else is necessary here! 182 matcher.appendReplacement(stringBuffer, 183 Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 184 } 185 } 186 187 // append the remaining items... 188 matcher.appendTail(stringBuffer); 189 190 cidCache.clear(); 191 dataSourceCache.clear(); 192 193 return stringBuffer.toString(); 194 } 195}