001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail;
018
019import javax.activation.DataSource;
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.Map;
023import java.util.regex.Matcher;
024import java.util.regex.Pattern;
025
026/**
027 * <p>Small wrapper class on top of HtmlEmail which encapsulates the required logic
028 * to retrieve images that are contained in "&lt;img src=../&gt;" elements in the HTML
029 * code. This is done by replacing all img-src-elements with "cid:"-entries and
030 * embedding images in the email.
031 * </p>
032 * <p>
033 * For local files the class tries to either load them via an absolute path or -
034 * if available - use a relative path starting from a base directory. For files
035 * that are not found locally, the implementation tries to download
036 * the element and link it in.
037 * </p>
038 * <p>
039 * The image loading is done by an instance of <code>DataSourceResolver</code>
040 * which has to be provided by the caller.
041 * </p>
042 *
043 * @since 1.3
044 * @version $Id: ImageHtmlEmail.html 952467 2015-05-23 18:45:36Z tn $
045 */
046public class ImageHtmlEmail extends HtmlEmail
047{
048    // Regular Expression to find all <IMG SRC="..."> entries in an HTML
049    // document.It needs to cater for various things, like more whitespaces
050    // including newlines on any place, HTML is not case sensitive and there
051    // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
052
053    /** Regexp for extracting {@code <img>} tags */
054    public static final String REGEX_IMG_SRC =
055            "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
056
057    /** regexp for extracting {@code <script>} tags */
058    public static final String REGEX_SCRIPT_SRC =
059            "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
060
061    // this pattern looks for the HTML image tag which indicates embedded images,
062    // the grouping is necessary to allow to replace the element with the CID
063
064    /** pattern for extracting <img> tags */
065    private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
066
067    /** pattern for extracting <script> tags */
068    private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
069
070    /** resolve the images and script resources to a DataSource */
071    private DataSourceResolver dataSourceResolver;
072
073    /**
074     * Get the data source resolver.
075     *
076     * @return the resolver
077     */
078    public DataSourceResolver getDataSourceResolver()
079    {
080        return dataSourceResolver;
081    }
082
083    /**
084     * Set the data source resolver.
085     *
086     * @param dataSourceResolver the resolver
087     */
088    public void setDataSourceResolver(final DataSourceResolver dataSourceResolver)
089    {
090        this.dataSourceResolver = dataSourceResolver;
091    }
092
093     /**
094      * Does the work of actually building the MimeMessage.
095      *
096      * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage()
097      * @throws EmailException building the MimeMessage failed
098      */
099    @Override
100    public void buildMimeMessage() throws EmailException
101    {
102        try
103        {
104            // embed all the matching image and script resources within the email
105            String temp = replacePattern(super.html, IMG_PATTERN);
106            temp = replacePattern(temp, SCRIPT_PATTERN);
107            setHtmlMsg(temp);
108            super.buildMimeMessage();
109        }
110        catch (final IOException e)
111        {
112            throw new EmailException("Building the MimeMessage failed", e);
113        }
114    }
115
116    /**
117     * Replace the regexp matching resource locations with "cid:..." references.
118     *
119     * @param htmlMessage the HTML message to analyze
120     * @param pattern the regular expression to find resources
121     * @return the HTML message containing "cid" references
122     * @throws EmailException creating the email failed
123     * @throws IOException resolving the resources failed
124     */
125    private String replacePattern(final String htmlMessage, final Pattern pattern)
126            throws EmailException, IOException
127    {
128        DataSource dataSource;
129        final StringBuffer stringBuffer = new StringBuffer();
130
131        // maps "cid" --> name
132        final Map<String, String> cidCache = new HashMap<String, String>();
133
134        // maps "name" --> dataSource
135        final Map<String, DataSource> dataSourceCache = new HashMap<String, DataSource>();
136
137        // in the String, replace all "img src" with a CID and embed the related
138        // image file if we find it.
139        final Matcher matcher = pattern.matcher(htmlMessage);
140
141        // the matcher returns all instances one by one
142        while (matcher.find())
143        {
144            // in the RegEx we have the <src> element as second "group"
145            final String resourceLocation = matcher.group(2);
146
147            // avoid loading the same data source more than once
148            if (dataSourceCache.get(resourceLocation) == null)
149            {
150                // in lenient mode we might get a 'null' data source if the resource was not found
151                dataSource = getDataSourceResolver().resolve(resourceLocation);
152
153                if (dataSource != null)
154                {
155                    dataSourceCache.put(resourceLocation, dataSource);
156                }
157            }
158            else
159            {
160                dataSource = dataSourceCache.get(resourceLocation);
161            }
162
163            if (dataSource != null)
164            {
165                String name = dataSource.getName();
166                if (EmailUtils.isEmpty(name))
167                {
168                    name = resourceLocation;
169                }
170
171                String cid = cidCache.get(name);
172
173                if (cid == null)
174                {
175                    cid = embed(dataSource, name);
176                    cidCache.put(name, cid);
177                }
178
179                // if we embedded something, then we need to replace the URL with
180                // the CID, otherwise the Matcher takes care of adding the
181                // non-replaced text afterwards, so no else is necessary here!
182                matcher.appendReplacement(stringBuffer,
183                        Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
184            }
185        }
186
187        // append the remaining items...
188        matcher.appendTail(stringBuffer);
189
190        cidCache.clear();
191        dataSourceCache.clear();
192
193        return stringBuffer.toString();
194    }
195}