001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.IOException; 020import java.nio.charset.Charset; 021import java.nio.charset.StandardCharsets; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Locale; 026import java.util.Map; 027import java.util.Objects; 028 029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 030import org.apache.commons.io.IOUtils; 031 032/** 033 * High level API for processing file uploads. 034 * <p> 035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by 036 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with 037 * a given HTML widget. 038 * </p> 039 * <p> 040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else. 041 * </p> 042 * 043 * @param <R> The request context type. 044 * @param <I> The FileItem type. 045 * @param <F> the FileItemFactory type. 046 */ 047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> { 048 049 /** 050 * Boundary parameter key. 051 */ 052 private static final String BOUNDARY_KEY = "boundary"; 053 054 /** 055 * Name parameter key. 056 */ 057 private static final String NAME_KEY = "name"; 058 059 /** 060 * File name parameter key. 061 */ 062 private static final String FILENAME_KEY = "filename"; 063 064 /** 065 * HTTP content type header name. 066 */ 067 public static final String CONTENT_TYPE = "Content-type"; 068 069 /** 070 * HTTP content disposition header name. 071 */ 072 public static final String CONTENT_DISPOSITION = "Content-disposition"; 073 074 /** 075 * HTTP content length header name. 076 */ 077 public static final String CONTENT_LENGTH = "Content-length"; 078 079 /** 080 * Content-disposition value for form data. 081 */ 082 public static final String FORM_DATA = "form-data"; 083 084 /** 085 * Content-disposition value for file attachment. 086 */ 087 public static final String ATTACHMENT = "attachment"; 088 089 /** 090 * Part of HTTP content type header. 091 */ 092 public static final String MULTIPART = "multipart/"; 093 094 /** 095 * HTTP content type header for multipart forms. 096 */ 097 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 098 099 /** 100 * HTTP content type header for multiple uploads. 101 */ 102 public static final String MULTIPART_MIXED = "multipart/mixed"; 103 104 /** 105 * Utility method that determines whether the request contains multipart content. 106 * <p> 107 * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this 108 * method is static, it is not possible to provide its replacement until this method is removed. 109 * </p> 110 * 111 * @param ctx The request context to be evaluated. Must be non-null. 112 * @return {@code true} if the request is multipart; {@code false} otherwise. 113 */ 114 public static final boolean isMultipartContent(final RequestContext ctx) { 115 final var contentType = ctx.getContentType(); 116 if (contentType == null) { 117 return false; 118 } 119 return contentType.toLowerCase(Locale.ENGLISH).startsWith(MULTIPART); 120 } 121 122 /** 123 * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum. 124 */ 125 private long sizeMax = -1; 126 127 /** 128 * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum. 129 */ 130 private long fileSizeMax = -1; 131 132 /** 133 * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum. 134 */ 135 private long fileCountMax = -1; 136 137 /** 138 * The content encoding to use when reading part headers. 139 */ 140 private Charset headerCharset; 141 142 /** 143 * The progress listener. 144 */ 145 private ProgressListener progressListener = ProgressListener.NOP; 146 147 /** 148 * The factory to use to create new form items. 149 */ 150 private F fileItemFactory; 151 152 /** 153 * Gets the boundary from the {@code Content-type} header. 154 * 155 * @param contentType The value of the content type header from which to extract the boundary value. 156 * @return The boundary, as a byte array. 157 */ 158 public byte[] getBoundary(final String contentType) { 159 final var parser = new ParameterParser(); 160 parser.setLowerCaseNames(true); 161 // Parameter parser can handle null input 162 final var params = parser.parse(contentType, new char[] { ';', ',' }); 163 final var boundaryStr = params.get(BOUNDARY_KEY); 164 return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null; 165 } 166 167 /** 168 * Gets the field name from the {@code Content-disposition} header. 169 * 170 * @param headers A {@code Map} containing the HTTP request headers. 171 * @return The field name for the current {@code encapsulation}. 172 */ 173 public String getFieldName(final FileItemHeaders headers) { 174 return getFieldName(headers.getHeader(CONTENT_DISPOSITION)); 175 } 176 177 /** 178 * Gets the field name, which is given by the content-disposition header. 179 * 180 * @param contentDisposition The content-dispositions header value. 181 * @return The field name. 182 */ 183 private String getFieldName(final String contentDisposition) { 184 String fieldName = null; 185 if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ENGLISH).startsWith(FORM_DATA)) { 186 final var parser = new ParameterParser(); 187 parser.setLowerCaseNames(true); 188 // Parameter parser can handle null input 189 final var params = parser.parse(contentDisposition, ';'); 190 fieldName = params.get(NAME_KEY); 191 if (fieldName != null) { 192 fieldName = fieldName.trim(); 193 } 194 } 195 return fieldName; 196 } 197 198 /** 199 * Gets the maximum number of files allowed in a single request. 200 * 201 * @return The maximum number of files allowed in a single request. 202 */ 203 public long getFileCountMax() { 204 return fileCountMax; 205 } 206 207 /** 208 * Gets the factory class used when creating file items. 209 * 210 * @return The factory class for new file items. 211 */ 212 public F getFileItemFactory() { 213 return fileItemFactory; 214 } 215 216 /** 217 * Gets the file name from the {@code Content-disposition} header. 218 * 219 * @param headers The HTTP headers object. 220 * 221 * @return The file name for the current {@code encapsulation}. 222 */ 223 public String getFileName(final FileItemHeaders headers) { 224 return getFileName(headers.getHeader(CONTENT_DISPOSITION)); 225 } 226 227 /** 228 * Gets the given content-disposition headers file name. 229 * 230 * @param contentDisposition The content-disposition headers value. 231 * @return The file name 232 */ 233 private String getFileName(final String contentDisposition) { 234 String fileName = null; 235 if (contentDisposition != null) { 236 final var cdl = contentDisposition.toLowerCase(Locale.ENGLISH); 237 if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) { 238 final var parser = new ParameterParser(); 239 parser.setLowerCaseNames(true); 240 // Parameter parser can handle null input 241 final var params = parser.parse(contentDisposition, ';'); 242 if (params.containsKey(FILENAME_KEY)) { 243 fileName = params.get(FILENAME_KEY); 244 if (fileName != null) { 245 fileName = fileName.trim(); 246 } else { 247 // Even if there is no value, the parameter is present, 248 // so we return an empty file name rather than no file 249 // name. 250 fileName = ""; 251 } 252 } 253 } 254 } 255 return fileName; 256 } 257 258 /** 259 * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}. 260 * 261 * @see #setFileSizeMax(long) 262 * @return Maximum size of a single uploaded file. 263 */ 264 public long getFileSizeMax() { 265 return fileSizeMax; 266 } 267 268 /** 269 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If 270 * that is also not specified, or {@code null}, the platform default encoding is used. 271 * 272 * @return The encoding used to read part headers. 273 */ 274 public Charset getHeaderCharset() { 275 return headerCharset; 276 } 277 278 /** 279 * Gets a file item iterator. 280 * 281 * @param request The servlet request to be parsed. 282 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 283 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 284 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 285 * uploaded content. 286 */ 287 public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException; 288 289 /** 290 * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 291 * 292 * @param requestContext The context for the request to be parsed. 293 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 294 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 295 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 296 * uploaded content. 297 */ 298 public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException { 299 return new FileItemInputIteratorImpl(this, requestContext); 300 } 301 302 /** 303 * Parses the {@code header-part} and returns as key/value pairs. 304 * <p> 305 * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values. 306 * </p> 307 * 308 * @param headerPart The {@code header-part} of the current {@code encapsulation}. 309 * @return A {@code Map} containing the parsed HTTP request headers. 310 */ 311 public FileItemHeaders getParsedHeaders(final String headerPart) { 312 final var len = headerPart.length(); 313 final var headers = newFileItemHeaders(); 314 var start = 0; 315 for (;;) { 316 var end = parseEndOfLine(headerPart, start); 317 if (start == end) { 318 break; 319 } 320 final var header = new StringBuilder(headerPart.substring(start, end)); 321 start = end + 2; 322 while (start < len) { 323 var nonWs = start; 324 while (nonWs < len) { 325 final var c = headerPart.charAt(nonWs); 326 if (c != ' ' && c != '\t') { 327 break; 328 } 329 ++nonWs; 330 } 331 if (nonWs == start) { 332 break; 333 } 334 // Continuation line found 335 end = parseEndOfLine(headerPart, nonWs); 336 header.append(' ').append(headerPart, nonWs, end); 337 start = end + 2; 338 } 339 parseHeaderLine(headers, header.toString()); 340 } 341 return headers; 342 } 343 344 /** 345 * Gets the progress listener. 346 * 347 * @return The progress listener, if any, or null. 348 */ 349 public ProgressListener getProgressListener() { 350 return progressListener; 351 } 352 353 /** 354 * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}. 355 * 356 * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 357 * @see #setSizeMax(long) 358 * 359 */ 360 public long getSizeMax() { 361 return sizeMax; 362 } 363 364 /** 365 * Creates a new instance of {@link FileItemHeaders}. 366 * 367 * @return The new instance. 368 */ 369 protected FileItemHeaders newFileItemHeaders() { 370 return AbstractFileItemBuilder.newFileItemHeaders(); 371 } 372 373 /** 374 * Skips bytes until the end of the current line. 375 * 376 * @param headerPart The headers, which are being parsed. 377 * @param end Index of the last byte, which has yet been processed. 378 * @return Index of the \r\n sequence, which indicates end of line. 379 */ 380 private int parseEndOfLine(final String headerPart, final int end) { 381 var index = end; 382 for (;;) { 383 final var offset = headerPart.indexOf('\r', index); 384 if (offset == -1 || offset + 1 >= headerPart.length()) { 385 throw new IllegalStateException("Expected headers to be terminated by an empty line."); 386 } 387 if (headerPart.charAt(offset + 1) == '\n') { 388 return offset; 389 } 390 index = offset + 1; 391 } 392 } 393 394 /** 395 * Parses the next header line. 396 * 397 * @param headers String with all headers. 398 * @param header Map where to store the current header. 399 */ 400 private void parseHeaderLine(final FileItemHeaders headers, final String header) { 401 final var colonOffset = header.indexOf(':'); 402 if (colonOffset == -1) { 403 // This header line is malformed, skip it. 404 return; 405 } 406 final var headerName = header.substring(0, colonOffset).trim(); 407 final var headerValue = header.substring(colonOffset + 1).trim(); 408 headers.addHeader(headerName, headerValue); 409 } 410 411 /** 412 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 413 * 414 * @param request The servlet request to be parsed. 415 * @return A map of {@code FileItem} instances parsed from the request. 416 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 417 */ 418 public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException; 419 420 /** 421 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 422 * 423 * @param ctx The context for the request to be parsed. 424 * @return A map of {@code FileItem} instances parsed from the request. 425 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 426 */ 427 public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException { 428 final var items = parseRequest(ctx); 429 final Map<String, List<I>> itemsMap = new HashMap<>(items.size()); 430 431 for (final I fileItem : items) { 432 final var fieldName = fileItem.getFieldName(); 433 final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>()); 434 mappedItems.add(fileItem); 435 } 436 437 return itemsMap; 438 } 439 440 /** 441 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 442 * 443 * @param request The servlet request to be parsed. 444 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 445 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 446 */ 447 public abstract List<I> parseRequest(R request) throws FileUploadException; 448 449 /** 450 * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 451 * 452 * @param requestContext The context for the request to be parsed. 453 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 454 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 455 */ 456 public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException { 457 final List<I> itemList = new ArrayList<>(); 458 var successful = false; 459 try { 460 final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set."); 461 final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE]; 462 getItemIterator(requestContext).forEachRemaining(fileItemInput -> { 463 if (itemList.size() == fileCountMax) { 464 // The next item will exceed the limit. 465 throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size()); 466 } 467 // Don't use getName() here to prevent an InvalidFileNameException. 468 // @formatter:off 469 final var fileItem = fileItemFactory.fileItemBuilder() 470 .setFieldName(fileItemInput.getFieldName()) 471 .setContentType(fileItemInput.getContentType()) 472 .setFormField(fileItemInput.isFormField()) 473 .setFileName(fileItemInput.getName()) 474 .setFileItemHeaders(fileItemInput.getHeaders()) 475 .get(); 476 // @formatter:on 477 itemList.add(fileItem); 478 try (var inputStream = fileItemInput.getInputStream(); 479 var outputStream = fileItem.getOutputStream()) { 480 IOUtils.copyLarge(inputStream, outputStream, buffer); 481 } catch (final FileUploadException e) { 482 throw e; 483 } catch (final IOException e) { 484 throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e); 485 } 486 }); 487 successful = true; 488 return itemList; 489 } catch (final FileUploadException e) { 490 throw e; 491 } catch (final IOException e) { 492 throw new FileUploadException(e.getMessage(), e); 493 } finally { 494 if (!successful) { 495 for (final I fileItem : itemList) { 496 try { 497 fileItem.delete(); 498 } catch (final Exception ignored) { 499 // ignored TODO perhaps add to tracker delete failure list somehow? 500 } 501 } 502 } 503 } 504 } 505 506 /** 507 * Sets the maximum number of files allowed per request. 508 * 509 * @param fileCountMax The new limit. {@code -1} means no limit. 510 */ 511 public void setFileCountMax(final long fileCountMax) { 512 this.fileCountMax = fileCountMax; 513 } 514 515 /** 516 * Sets the factory class to use when creating file items. 517 * 518 * @param factory The factory class for new file items. 519 */ 520 public void setFileItemFactory(final F factory) { 521 this.fileItemFactory = factory; 522 } 523 524 /** 525 * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}. 526 * 527 * @see #getFileSizeMax() 528 * @param fileSizeMax Maximum size of a single uploaded file. 529 */ 530 public void setFileSizeMax(final long fileSizeMax) { 531 this.fileSizeMax = fileSizeMax; 532 } 533 534 /** 535 * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is 536 * used. If that is also not specified, or {@code null}, the platform default encoding is used. 537 * 538 * @param headerCharset The encoding used to read part headers. 539 */ 540 public void setHeaderCharset(final Charset headerCharset) { 541 this.headerCharset = headerCharset; 542 } 543 544 /** 545 * Sets the progress listener. 546 * 547 * @param progressListener The progress listener, if any. Defaults to null. 548 */ 549 public void setProgressListener(final ProgressListener progressListener) { 550 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP; 551 } 552 553 /** 554 * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}. 555 * 556 * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 557 * @see #getSizeMax() 558 */ 559 public void setSizeMax(final long sizeMax) { 560 this.sizeMax = sizeMax; 561 } 562 563}