001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail.util; 018 019import java.io.BufferedInputStream; 020import java.io.BufferedOutputStream; 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033import javax.activation.DataHandler; 034import javax.activation.DataSource; 035import javax.mail.Message; 036import javax.mail.MessagingException; 037import javax.mail.Multipart; 038import javax.mail.Part; 039import javax.mail.internet.ContentType; 040import javax.mail.internet.InternetAddress; 041import javax.mail.internet.MimeBodyPart; 042import javax.mail.internet.MimeMessage; 043import javax.mail.internet.MimePart; 044import javax.mail.internet.MimeUtility; 045import javax.mail.internet.ParseException; 046import javax.mail.util.ByteArrayDataSource; 047 048/** 049 * Parses a MimeMessage and stores the individual parts such a plain text, 050 * HTML text and attachments. 051 * 052 * @since 1.3 053 * @version $Id: MimeMessageParser.html 952467 2015-05-23 18:45:36Z tn $ 054 */ 055public class MimeMessageParser 056{ 057 /** The MimeMessage to convert */ 058 private final MimeMessage mimeMessage; 059 060 /** Plain mail content from MimeMessage */ 061 private String plainContent; 062 063 /** Html mail content from MimeMessage */ 064 private String htmlContent; 065 066 /** List of attachments of MimeMessage */ 067 private final List<DataSource> attachmentList; 068 069 /** Attachments stored by their content-id */ 070 private final Map<String, DataSource> cidMap; 071 072 /** Is this a Multipart email */ 073 private boolean isMultiPart; 074 075 /** 076 * Constructs an instance with the MimeMessage to be extracted. 077 * 078 * @param message the message to parse 079 */ 080 public MimeMessageParser(final MimeMessage message) 081 { 082 attachmentList = new ArrayList<DataSource>(); 083 cidMap = new HashMap<String, DataSource>(); 084 this.mimeMessage = message; 085 this.isMultiPart = false; 086 } 087 088 /** 089 * Does the actual extraction. 090 * 091 * @return this instance 092 * @throws Exception parsing the mime message failed 093 */ 094 public MimeMessageParser parse() throws Exception 095 { 096 this.parse(null, mimeMessage); 097 return this; 098 } 099 100 /** 101 * @return the 'to' recipients of the message 102 * @throws Exception determining the recipients failed 103 */ 104 public List<javax.mail.Address> getTo() throws Exception 105 { 106 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO); 107 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 108 } 109 110 /** 111 * @return the 'cc' recipients of the message 112 * @throws Exception determining the recipients failed 113 */ 114 public List<javax.mail.Address> getCc() throws Exception 115 { 116 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC); 117 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 118 } 119 120 /** 121 * @return the 'bcc' recipients of the message 122 * @throws Exception determining the recipients failed 123 */ 124 public List<javax.mail.Address> getBcc() throws Exception 125 { 126 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC); 127 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 128 } 129 130 /** 131 * @return the 'from' field of the message 132 * @throws Exception parsing the mime message failed 133 */ 134 public String getFrom() throws Exception 135 { 136 final javax.mail.Address[] addresses = this.mimeMessage.getFrom(); 137 if (addresses == null || addresses.length == 0) 138 { 139 return null; 140 } 141 return ((InternetAddress) addresses[0]).getAddress(); 142 } 143 144 /** 145 * @return the 'replyTo' address of the email 146 * @throws Exception parsing the mime message failed 147 */ 148 public String getReplyTo() throws Exception 149 { 150 final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo(); 151 if (addresses == null || addresses.length == 0) 152 { 153 return null; 154 } 155 return ((InternetAddress) addresses[0]).getAddress(); 156 } 157 158 /** 159 * @return the mail subject 160 * @throws Exception parsing the mime message failed 161 */ 162 public String getSubject() throws Exception 163 { 164 return this.mimeMessage.getSubject(); 165 } 166 167 /** 168 * Extracts the content of a MimeMessage recursively. 169 * 170 * @param parent the parent multi-part 171 * @param part the current MimePart 172 * @throws MessagingException parsing the MimeMessage failed 173 * @throws IOException parsing the MimeMessage failed 174 */ 175 protected void parse(final Multipart parent, final MimePart part) 176 throws MessagingException, IOException 177 { 178 if (isMimeType(part, "text/plain") && plainContent == null 179 && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) 180 { 181 plainContent = (String) part.getContent(); 182 } 183 else 184 { 185 if (isMimeType(part, "text/html") && htmlContent == null 186 && !MimePart.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) 187 { 188 htmlContent = (String) part.getContent(); 189 } 190 else 191 { 192 if (isMimeType(part, "multipart/*")) 193 { 194 this.isMultiPart = true; 195 final Multipart mp = (Multipart) part.getContent(); 196 final int count = mp.getCount(); 197 198 // iterate over all MimeBodyPart 199 200 for (int i = 0; i < count; i++) 201 { 202 parse(mp, (MimeBodyPart) mp.getBodyPart(i)); 203 } 204 } 205 else 206 { 207 final String cid = stripContentId(part.getContentID()); 208 final DataSource ds = createDataSource(parent, part); 209 if (cid != null) 210 { 211 this.cidMap.put(cid, ds); 212 } 213 this.attachmentList.add(ds); 214 } 215 } 216 } 217 } 218 219 /** 220 * Strips the content id of any whitespace and angle brackets. 221 * @param contentId the string to strip 222 * @return a stripped version of the content id 223 */ 224 private String stripContentId(final String contentId) 225 { 226 if (contentId == null) 227 { 228 return null; 229 } 230 return contentId.trim().replaceAll("[\\<\\>]", ""); 231 } 232 233 /** 234 * Checks whether the MimePart contains an object of the given mime type. 235 * 236 * @param part the current MimePart 237 * @param mimeType the mime type to check 238 * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise 239 * @throws MessagingException parsing the MimeMessage failed 240 * @throws IOException parsing the MimeMessage failed 241 */ 242 private boolean isMimeType(final MimePart part, final String mimeType) 243 throws MessagingException, IOException 244 { 245 // Do not use part.isMimeType(String) as it is broken for MimeBodyPart 246 // and does not really check the actual content type. 247 248 try 249 { 250 final ContentType ct = new ContentType(part.getDataHandler().getContentType()); 251 return ct.match(mimeType); 252 } 253 catch (final ParseException ex) 254 { 255 return part.getContentType().equalsIgnoreCase(mimeType); 256 } 257 } 258 259 /** 260 * Parses the MimePart to create a DataSource. 261 * 262 * @param parent the parent multi-part 263 * @param part the current part to be processed 264 * @return the DataSource 265 * @throws MessagingException creating the DataSource failed 266 * @throws IOException creating the DataSource failed 267 */ 268 protected DataSource createDataSource(final Multipart parent, final MimePart part) 269 throws MessagingException, IOException 270 { 271 final DataHandler dataHandler = part.getDataHandler(); 272 final DataSource dataSource = dataHandler.getDataSource(); 273 final String contentType = getBaseMimeType(dataSource.getContentType()); 274 final byte[] content = this.getContent(dataSource.getInputStream()); 275 final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType); 276 final String dataSourceName = getDataSourceName(part, dataSource); 277 278 result.setName(dataSourceName); 279 return result; 280 } 281 282 /** @return Returns the mimeMessage. */ 283 public MimeMessage getMimeMessage() 284 { 285 return mimeMessage; 286 } 287 288 /** @return Returns the isMultiPart. */ 289 public boolean isMultipart() 290 { 291 return isMultiPart; 292 } 293 294 /** @return Returns the plainContent if any */ 295 public String getPlainContent() 296 { 297 return plainContent; 298 } 299 300 /** @return Returns the attachmentList. */ 301 public List<DataSource> getAttachmentList() 302 { 303 return attachmentList; 304 } 305 306 /** 307 * Returns a collection of all content-ids in the parsed message. 308 * <p> 309 * The content-ids are stripped of any angle brackets, i.e. "part1" instead 310 * of "<part1>". 311 * 312 * @return the collection of content ids. 313 * @since 1.3.4 314 */ 315 public Collection<String> getContentIds() 316 { 317 return Collections.unmodifiableSet(cidMap.keySet()); 318 } 319 320 /** @return Returns the htmlContent if any */ 321 public String getHtmlContent() 322 { 323 return htmlContent; 324 } 325 326 /** @return true if a plain content is available */ 327 public boolean hasPlainContent() 328 { 329 return this.plainContent != null; 330 } 331 332 /** @return true if HTML content is available */ 333 public boolean hasHtmlContent() 334 { 335 return this.htmlContent != null; 336 } 337 338 /** @return true if attachments are available */ 339 public boolean hasAttachments() 340 { 341 return this.attachmentList.size() > 0; 342 } 343 344 /** 345 * Find an attachment using its name. 346 * 347 * @param name the name of the attachment 348 * @return the corresponding datasource or null if nothing was found 349 */ 350 public DataSource findAttachmentByName(final String name) 351 { 352 DataSource dataSource; 353 354 for (int i = 0; i < getAttachmentList().size(); i++) 355 { 356 dataSource = getAttachmentList().get(i); 357 if (name.equalsIgnoreCase(dataSource.getName())) 358 { 359 return dataSource; 360 } 361 } 362 363 return null; 364 } 365 366 /** 367 * Find an attachment using its content-id. 368 * <p> 369 * The content-id must be stripped of any angle brackets, 370 * i.e. "part1" instead of "<part1>". 371 * 372 * @param cid the content-id of the attachment 373 * @return the corresponding datasource or null if nothing was found 374 * @since 1.3.4 375 */ 376 public DataSource findAttachmentByCid(final String cid) 377 { 378 final DataSource dataSource = cidMap.get(cid); 379 return dataSource; 380 } 381 382 /** 383 * Determines the name of the data source if it is not already set. 384 * 385 * @param part the mail part 386 * @param dataSource the data source 387 * @return the name of the data source or {@code null} if no name can be determined 388 * @throws MessagingException accessing the part failed 389 * @throws UnsupportedEncodingException decoding the text failed 390 */ 391 protected String getDataSourceName(final Part part, final DataSource dataSource) 392 throws MessagingException, UnsupportedEncodingException 393 { 394 String result = dataSource.getName(); 395 396 if (result == null || result.length() == 0) 397 { 398 result = part.getFileName(); 399 } 400 401 if (result != null && result.length() > 0) 402 { 403 result = MimeUtility.decodeText(result); 404 } 405 else 406 { 407 result = null; 408 } 409 410 return result; 411 } 412 413 /** 414 * Read the content of the input stream. 415 * 416 * @param is the input stream to process 417 * @return the content of the input stream 418 * @throws IOException reading the input stream failed 419 */ 420 private byte[] getContent(final InputStream is) 421 throws IOException 422 { 423 int ch; 424 byte[] result; 425 426 final ByteArrayOutputStream os = new ByteArrayOutputStream(); 427 final BufferedInputStream isReader = new BufferedInputStream(is); 428 final BufferedOutputStream osWriter = new BufferedOutputStream(os); 429 430 while ((ch = isReader.read()) != -1) 431 { 432 osWriter.write(ch); 433 } 434 435 osWriter.flush(); 436 result = os.toByteArray(); 437 osWriter.close(); 438 439 return result; 440 } 441 442 /** 443 * Parses the mimeType. 444 * 445 * @param fullMimeType the mime type from the mail api 446 * @return the real mime type 447 */ 448 private String getBaseMimeType(final String fullMimeType) 449 { 450 final int pos = fullMimeType.indexOf(';'); 451 if (pos >= 0) 452 { 453 return fullMimeType.substring(0, pos); 454 } 455 return fullMimeType; 456 } 457}