001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail.util; 018 019import java.io.BufferedInputStream; 020import java.io.BufferedOutputStream; 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033import javax.activation.DataHandler; 034import javax.activation.DataSource; 035import javax.mail.Message; 036import javax.mail.MessagingException; 037import javax.mail.Multipart; 038import javax.mail.Part; 039import javax.mail.internet.ContentType; 040import javax.mail.internet.InternetAddress; 041import javax.mail.internet.MimeBodyPart; 042import javax.mail.internet.MimeMessage; 043import javax.mail.internet.MimePart; 044import javax.mail.internet.MimeUtility; 045import javax.mail.internet.ParseException; 046import javax.mail.util.ByteArrayDataSource; 047 048/** 049 * Parses a MimeMessage and stores the individual parts such a plain text, 050 * HTML text and attachments. 051 * 052 * @since 1.3 053 */ 054public class MimeMessageParser 055{ 056 /** The MimeMessage to convert */ 057 private final MimeMessage mimeMessage; 058 059 /** Plain mail content from MimeMessage */ 060 private String plainContent; 061 062 /** Html mail content from MimeMessage */ 063 private String htmlContent; 064 065 /** List of attachments of MimeMessage */ 066 private final List<DataSource> attachmentList; 067 068 /** Attachments stored by their content-id */ 069 private final Map<String, DataSource> cidMap; 070 071 /** Is this a Multipart email */ 072 private boolean isMultiPart; 073 074 /** 075 * Constructs an instance with the MimeMessage to be extracted. 076 * 077 * @param message the message to parse 078 */ 079 public MimeMessageParser(final MimeMessage message) 080 { 081 attachmentList = new ArrayList<DataSource>(); 082 cidMap = new HashMap<String, DataSource>(); 083 this.mimeMessage = message; 084 this.isMultiPart = false; 085 } 086 087 /** 088 * Does the actual extraction. 089 * 090 * @return this instance 091 * @throws Exception parsing the mime message failed 092 */ 093 public MimeMessageParser parse() throws Exception 094 { 095 this.parse(null, mimeMessage); 096 return this; 097 } 098 099 /** 100 * @return the 'to' recipients of the message 101 * @throws Exception determining the recipients failed 102 */ 103 public List<javax.mail.Address> getTo() throws Exception 104 { 105 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.TO); 106 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 107 } 108 109 /** 110 * @return the 'cc' recipients of the message 111 * @throws Exception determining the recipients failed 112 */ 113 public List<javax.mail.Address> getCc() throws Exception 114 { 115 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.CC); 116 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 117 } 118 119 /** 120 * @return the 'bcc' recipients of the message 121 * @throws Exception determining the recipients failed 122 */ 123 public List<javax.mail.Address> getBcc() throws Exception 124 { 125 final javax.mail.Address[] recipients = this.mimeMessage.getRecipients(Message.RecipientType.BCC); 126 return recipients != null ? Arrays.asList(recipients) : new ArrayList<javax.mail.Address>(); 127 } 128 129 /** 130 * @return the 'from' field of the message 131 * @throws Exception parsing the mime message failed 132 */ 133 public String getFrom() throws Exception 134 { 135 final javax.mail.Address[] addresses = this.mimeMessage.getFrom(); 136 if (addresses == null || addresses.length == 0) 137 { 138 return null; 139 } 140 return ((InternetAddress) addresses[0]).getAddress(); 141 } 142 143 /** 144 * @return the 'replyTo' address of the email 145 * @throws Exception parsing the mime message failed 146 */ 147 public String getReplyTo() throws Exception 148 { 149 final javax.mail.Address[] addresses = this.mimeMessage.getReplyTo(); 150 if (addresses == null || addresses.length == 0) 151 { 152 return null; 153 } 154 return ((InternetAddress) addresses[0]).getAddress(); 155 } 156 157 /** 158 * @return the mail subject 159 * @throws Exception parsing the mime message failed 160 */ 161 public String getSubject() throws Exception 162 { 163 return this.mimeMessage.getSubject(); 164 } 165 166 /** 167 * Extracts the content of a MimeMessage recursively. 168 * 169 * @param parent the parent multi-part 170 * @param part the current MimePart 171 * @throws MessagingException parsing the MimeMessage failed 172 * @throws IOException parsing the MimeMessage failed 173 */ 174 protected void parse(final Multipart parent, final MimePart part) 175 throws MessagingException, IOException 176 { 177 if (isMimeType(part, "text/plain") && plainContent == null 178 && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) 179 { 180 plainContent = (String) part.getContent(); 181 } 182 else 183 { 184 if (isMimeType(part, "text/html") && htmlContent == null 185 && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) 186 { 187 htmlContent = (String) part.getContent(); 188 } 189 else 190 { 191 if (isMimeType(part, "multipart/*")) 192 { 193 this.isMultiPart = true; 194 final Multipart mp = (Multipart) part.getContent(); 195 final int count = mp.getCount(); 196 197 // iterate over all MimeBodyPart 198 199 for (int i = 0; i < count; i++) 200 { 201 parse(mp, (MimeBodyPart) mp.getBodyPart(i)); 202 } 203 } 204 else 205 { 206 final String cid = stripContentId(part.getContentID()); 207 final DataSource ds = createDataSource(parent, part); 208 if (cid != null) 209 { 210 this.cidMap.put(cid, ds); 211 } 212 this.attachmentList.add(ds); 213 } 214 } 215 } 216 } 217 218 /** 219 * Strips the content id of any whitespace and angle brackets. 220 * @param contentId the string to strip 221 * @return a stripped version of the content id 222 */ 223 private String stripContentId(final String contentId) 224 { 225 if (contentId == null) 226 { 227 return null; 228 } 229 return contentId.trim().replaceAll("[\\<\\>]", ""); 230 } 231 232 /** 233 * Checks whether the MimePart contains an object of the given mime type. 234 * 235 * @param part the current MimePart 236 * @param mimeType the mime type to check 237 * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise 238 * @throws MessagingException parsing the MimeMessage failed 239 * @throws IOException parsing the MimeMessage failed 240 */ 241 private boolean isMimeType(final MimePart part, final String mimeType) 242 throws MessagingException, IOException 243 { 244 // Do not use part.isMimeType(String) as it is broken for MimeBodyPart 245 // and does not really check the actual content type. 246 247 try 248 { 249 final ContentType ct = new ContentType(part.getDataHandler().getContentType()); 250 return ct.match(mimeType); 251 } 252 catch (final ParseException ex) 253 { 254 return part.getContentType().equalsIgnoreCase(mimeType); 255 } 256 } 257 258 /** 259 * Parses the MimePart to create a DataSource. 260 * 261 * @param parent the parent multi-part 262 * @param part the current part to be processed 263 * @return the DataSource 264 * @throws MessagingException creating the DataSource failed 265 * @throws IOException creating the DataSource failed 266 */ 267 protected DataSource createDataSource(final Multipart parent, final MimePart part) 268 throws MessagingException, IOException 269 { 270 final DataHandler dataHandler = part.getDataHandler(); 271 final DataSource dataSource = dataHandler.getDataSource(); 272 final String contentType = getBaseMimeType(dataSource.getContentType()); 273 final byte[] content = this.getContent(dataSource.getInputStream()); 274 final ByteArrayDataSource result = new ByteArrayDataSource(content, contentType); 275 final String dataSourceName = getDataSourceName(part, dataSource); 276 277 result.setName(dataSourceName); 278 return result; 279 } 280 281 /** @return Returns the mimeMessage. */ 282 public MimeMessage getMimeMessage() 283 { 284 return mimeMessage; 285 } 286 287 /** @return Returns the isMultiPart. */ 288 public boolean isMultipart() 289 { 290 return isMultiPart; 291 } 292 293 /** @return Returns the plainContent if any */ 294 public String getPlainContent() 295 { 296 return plainContent; 297 } 298 299 /** @return Returns the attachmentList. */ 300 public List<DataSource> getAttachmentList() 301 { 302 return attachmentList; 303 } 304 305 /** 306 * Returns a collection of all content-ids in the parsed message. 307 * <p> 308 * The content-ids are stripped of any angle brackets, i.e. "part1" instead 309 * of "<part1>". 310 * 311 * @return the collection of content ids. 312 * @since 1.3.4 313 */ 314 public Collection<String> getContentIds() 315 { 316 return Collections.unmodifiableSet(cidMap.keySet()); 317 } 318 319 /** @return Returns the htmlContent if any */ 320 public String getHtmlContent() 321 { 322 return htmlContent; 323 } 324 325 /** @return true if a plain content is available */ 326 public boolean hasPlainContent() 327 { 328 return this.plainContent != null; 329 } 330 331 /** @return true if HTML content is available */ 332 public boolean hasHtmlContent() 333 { 334 return this.htmlContent != null; 335 } 336 337 /** @return true if attachments are available */ 338 public boolean hasAttachments() 339 { 340 return this.attachmentList.size() > 0; 341 } 342 343 /** 344 * Find an attachment using its name. 345 * 346 * @param name the name of the attachment 347 * @return the corresponding datasource or null if nothing was found 348 */ 349 public DataSource findAttachmentByName(final String name) 350 { 351 DataSource dataSource; 352 353 for (int i = 0; i < getAttachmentList().size(); i++) 354 { 355 dataSource = getAttachmentList().get(i); 356 if (name.equalsIgnoreCase(dataSource.getName())) 357 { 358 return dataSource; 359 } 360 } 361 362 return null; 363 } 364 365 /** 366 * Find an attachment using its content-id. 367 * <p> 368 * The content-id must be stripped of any angle brackets, 369 * i.e. "part1" instead of "<part1>". 370 * 371 * @param cid the content-id of the attachment 372 * @return the corresponding datasource or null if nothing was found 373 * @since 1.3.4 374 */ 375 public DataSource findAttachmentByCid(final String cid) 376 { 377 final DataSource dataSource = cidMap.get(cid); 378 return dataSource; 379 } 380 381 /** 382 * Determines the name of the data source if it is not already set. 383 * 384 * @param part the mail part 385 * @param dataSource the data source 386 * @return the name of the data source or {@code null} if no name can be determined 387 * @throws MessagingException accessing the part failed 388 * @throws UnsupportedEncodingException decoding the text failed 389 */ 390 protected String getDataSourceName(final Part part, final DataSource dataSource) 391 throws MessagingException, UnsupportedEncodingException 392 { 393 String result = dataSource.getName(); 394 395 if (result == null || result.length() == 0) 396 { 397 result = part.getFileName(); 398 } 399 400 if (result != null && result.length() > 0) 401 { 402 result = MimeUtility.decodeText(result); 403 } 404 else 405 { 406 result = null; 407 } 408 409 return result; 410 } 411 412 /** 413 * Read the content of the input stream. 414 * 415 * @param is the input stream to process 416 * @return the content of the input stream 417 * @throws IOException reading the input stream failed 418 */ 419 private byte[] getContent(final InputStream is) 420 throws IOException 421 { 422 int ch; 423 byte[] result; 424 425 final ByteArrayOutputStream os = new ByteArrayOutputStream(); 426 final BufferedInputStream isReader = new BufferedInputStream(is); 427 final BufferedOutputStream osWriter = new BufferedOutputStream(os); 428 429 while ((ch = isReader.read()) != -1) 430 { 431 osWriter.write(ch); 432 } 433 434 osWriter.flush(); 435 result = os.toByteArray(); 436 osWriter.close(); 437 438 return result; 439 } 440 441 /** 442 * Parses the mimeType. 443 * 444 * @param fullMimeType the mime type from the mail api 445 * @return the real mime type 446 */ 447 private String getBaseMimeType(final String fullMimeType) 448 { 449 final int pos = fullMimeType.indexOf(';'); 450 if (pos >= 0) 451 { 452 return fullMimeType.substring(0, pos); 453 } 454 return fullMimeType; 455 } 456}