001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail; 018 019import javax.activation.DataSource; 020import java.io.IOException; 021import java.util.HashMap; 022import java.util.Map; 023import java.util.regex.Matcher; 024import java.util.regex.Pattern; 025 026/** 027 * <p>Small wrapper class on top of HtmlEmail which encapsulates the required logic 028 * to retrieve images that are contained in "<img src=../>" elements in the HTML 029 * code. This is done by replacing all img-src-elements with "cid:"-entries and 030 * embedding images in the email. 031 * </p> 032 * <p> 033 * For local files the class tries to either load them via an absolute path or - 034 * if available - use a relative path starting from a base directory. For files 035 * that are not found locally, the implementation tries to download 036 * the element and link it in. 037 * </p> 038 * <p> 039 * The image loading is done by an instance of <code>DataSourceResolver</code> 040 * which has to be provided by the caller. 041 * </p> 042 * 043 * @since 1.3 044 */ 045public class ImageHtmlEmail extends HtmlEmail 046{ 047 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 048 // document.It needs to cater for various things, like more whitespaces 049 // including newlines on any place, HTML is not case sensitive and there 050 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 051 052 /** Regexp for extracting {@code <img>} tags */ 053 public static final String REGEX_IMG_SRC = 054 "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 055 056 /** regexp for extracting {@code <script>} tags */ 057 public static final String REGEX_SCRIPT_SRC = 058 "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 059 060 // this pattern looks for the HTML image tag which indicates embedded images, 061 // the grouping is necessary to allow to replace the element with the CID 062 063 /** pattern for extracting <img> tags */ 064 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 065 066 /** pattern for extracting <script> tags */ 067 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 068 069 /** resolve the images and script resources to a DataSource */ 070 private DataSourceResolver dataSourceResolver; 071 072 /** 073 * Get the data source resolver. 074 * 075 * @return the resolver 076 */ 077 public DataSourceResolver getDataSourceResolver() 078 { 079 return dataSourceResolver; 080 } 081 082 /** 083 * Set the data source resolver. 084 * 085 * @param dataSourceResolver the resolver 086 */ 087 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) 088 { 089 this.dataSourceResolver = dataSourceResolver; 090 } 091 092 /** 093 * Does the work of actually building the MimeMessage. 094 * 095 * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage() 096 * @throws EmailException building the MimeMessage failed 097 */ 098 @Override 099 public void buildMimeMessage() throws EmailException 100 { 101 try 102 { 103 // embed all the matching image and script resources within the email 104 String temp = replacePattern(super.html, IMG_PATTERN); 105 temp = replacePattern(temp, SCRIPT_PATTERN); 106 setHtmlMsg(temp); 107 super.buildMimeMessage(); 108 } 109 catch (final IOException e) 110 { 111 throw new EmailException("Building the MimeMessage failed", e); 112 } 113 } 114 115 /** 116 * Replace the regexp matching resource locations with "cid:..." references. 117 * 118 * @param htmlMessage the HTML message to analyze 119 * @param pattern the regular expression to find resources 120 * @return the HTML message containing "cid" references 121 * @throws EmailException creating the email failed 122 * @throws IOException resolving the resources failed 123 */ 124 private String replacePattern(final String htmlMessage, final Pattern pattern) 125 throws EmailException, IOException 126 { 127 DataSource dataSource; 128 final StringBuffer stringBuffer = new StringBuffer(); 129 130 // maps "cid" --> name 131 final Map<String, String> cidCache = new HashMap<String, String>(); 132 133 // maps "name" --> dataSource 134 final Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>(); 135 136 // in the String, replace all "img src" with a CID and embed the related 137 // image file if we find it. 138 final Matcher matcher = pattern.matcher(htmlMessage); 139 140 // the matcher returns all instances one by one 141 while (matcher.find()) 142 { 143 // in the RegEx we have the <src> element as second "group" 144 final String resourceLocation = matcher.group(2); 145 146 // avoid loading the same data source more than once 147 if (dataSourceCache.get(resourceLocation) == null) 148 { 149 // in lenient mode we might get a 'null' data source if the resource was not found 150 dataSource = getDataSourceResolver().resolve(resourceLocation); 151 152 if (dataSource != null) 153 { 154 dataSourceCache.put(resourceLocation, dataSource); 155 } 156 } 157 else 158 { 159 dataSource = dataSourceCache.get(resourceLocation); 160 } 161 162 if (dataSource != null) 163 { 164 String name = dataSource.getName(); 165 if (EmailUtils.isEmpty(name)) 166 { 167 name = resourceLocation; 168 } 169 170 String cid = cidCache.get(name); 171 172 if (cid == null) 173 { 174 cid = embed(dataSource, name); 175 cidCache.put(name, cid); 176 } 177 178 // if we embedded something, then we need to replace the URL with 179 // the CID, otherwise the Matcher takes care of adding the 180 // non-replaced text afterwards, so no else is necessary here! 181 matcher.appendReplacement(stringBuffer, 182 Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 183 } 184 } 185 186 // append the remaining items... 187 matcher.appendTail(stringBuffer); 188 189 cidCache.clear(); 190 dataSourceCache.clear(); 191 192 return stringBuffer.toString(); 193 } 194}