AbstractFileUpload.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.fileupload2.core;

  18. import java.io.IOException;
  19. import java.nio.charset.Charset;
  20. import java.nio.charset.StandardCharsets;
  21. import java.util.ArrayList;
  22. import java.util.HashMap;
  23. import java.util.List;
  24. import java.util.Locale;
  25. import java.util.Map;
  26. import java.util.Objects;

  27. import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
  28. import org.apache.commons.io.IOUtils;

  29. /**
  30.  * High level API for processing file uploads.
  31.  * <p>
  32.  * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
  33.  * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
  34.  * a given HTML widget.
  35.  * </p>
  36.  * <p>
  37.  * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
  38.  * </p>
  39.  *
  40.  * @param <R> The request context type.
  41.  * @param <I> The FileItem type.
  42.  * @param <F> the FileItemFactory type.
  43.  */
  44. public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {

  45.     /**
  46.      * Boundary parameter key.
  47.      */
  48.     private static final String BOUNDARY_KEY = "boundary";

  49.     /**
  50.      * Name parameter key.
  51.      */
  52.     private static final String NAME_KEY = "name";

  53.     /**
  54.      * File name parameter key.
  55.      */
  56.     private static final String FILENAME_KEY = "filename";

  57.     /**
  58.      * HTTP content type header name.
  59.      */
  60.     public static final String CONTENT_TYPE = "Content-type";

  61.     /**
  62.      * HTTP content disposition header name.
  63.      */
  64.     public static final String CONTENT_DISPOSITION = "Content-disposition";

  65.     /**
  66.      * HTTP content length header name.
  67.      */
  68.     public static final String CONTENT_LENGTH = "Content-length";

  69.     /**
  70.      * Content-disposition value for form data.
  71.      */
  72.     public static final String FORM_DATA = "form-data";

  73.     /**
  74.      * Content-disposition value for file attachment.
  75.      */
  76.     public static final String ATTACHMENT = "attachment";

  77.     /**
  78.      * Part of HTTP content type header.
  79.      */
  80.     public static final String MULTIPART = "multipart/";

  81.     /**
  82.      * HTTP content type header for multipart forms.
  83.      */
  84.     public static final String MULTIPART_FORM_DATA = "multipart/form-data";

  85.     /**
  86.      * HTTP content type header for multiple uploads.
  87.      */
  88.     public static final String MULTIPART_MIXED = "multipart/mixed";

  89.     /**
  90.      * Utility method that determines whether the request contains multipart content.
  91.      * <p>
  92.      * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
  93.      * method is static, it is not possible to provide its replacement until this method is removed.
  94.      * </p>
  95.      *
  96.      * @param ctx The request context to be evaluated. Must be non-null.
  97.      * @return {@code true} if the request is multipart; {@code false} otherwise.
  98.      */
  99.     public static final boolean isMultipartContent(final RequestContext ctx) {
  100.         final var contentType = ctx.getContentType();
  101.         if (contentType == null) {
  102.             return false;
  103.         }
  104.         return contentType.toLowerCase(Locale.ROOT).startsWith(MULTIPART);
  105.     }

  106.     /**
  107.      * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
  108.      */
  109.     private long sizeMax = -1;

  110.     /**
  111.      * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
  112.      */
  113.     private long fileSizeMax = -1;

  114.     /**
  115.      * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
  116.      */
  117.     private long fileCountMax = -1;

  118.     /**
  119.      * The content encoding to use when reading part headers.
  120.      */
  121.     private Charset headerCharset;

  122.     /**
  123.      * The progress listener.
  124.      */
  125.     private ProgressListener progressListener = ProgressListener.NOP;

  126.     /**
  127.      * The factory to use to create new form items.
  128.      */
  129.     private F fileItemFactory;

  130.     /**
  131.      * Constructs a new instance for subclasses.
  132.      */
  133.     public AbstractFileUpload() {
  134.         // empty
  135.     }

  136.     /**
  137.      * Gets the boundary from the {@code Content-type} header.
  138.      *
  139.      * @param contentType The value of the content type header from which to extract the boundary value.
  140.      * @return The boundary, as a byte array.
  141.      */
  142.     public byte[] getBoundary(final String contentType) {
  143.         final var parser = new ParameterParser();
  144.         parser.setLowerCaseNames(true);
  145.         // Parameter parser can handle null input
  146.         final var params = parser.parse(contentType, new char[] { ';', ',' });
  147.         final var boundaryStr = params.get(BOUNDARY_KEY);
  148.         return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
  149.     }

  150.     /**
  151.      * Gets the field name from the {@code Content-disposition} header.
  152.      *
  153.      * @param headers A {@code Map} containing the HTTP request headers.
  154.      * @return The field name for the current {@code encapsulation}.
  155.      */
  156.     public String getFieldName(final FileItemHeaders headers) {
  157.         return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
  158.     }

  159.     /**
  160.      * Gets the field name, which is given by the content-disposition header.
  161.      *
  162.      * @param contentDisposition The content-dispositions header value.
  163.      * @return The field name.
  164.      */
  165.     private String getFieldName(final String contentDisposition) {
  166.         String fieldName = null;
  167.         if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ROOT).startsWith(FORM_DATA)) {
  168.             final var parser = new ParameterParser();
  169.             parser.setLowerCaseNames(true);
  170.             // Parameter parser can handle null input
  171.             final var params = parser.parse(contentDisposition, ';');
  172.             fieldName = params.get(NAME_KEY);
  173.             if (fieldName != null) {
  174.                 fieldName = fieldName.trim();
  175.             }
  176.         }
  177.         return fieldName;
  178.     }

  179.     /**
  180.      * Gets the maximum number of files allowed in a single request.
  181.      *
  182.      * @return The maximum number of files allowed in a single request.
  183.      */
  184.     public long getFileCountMax() {
  185.         return fileCountMax;
  186.     }

  187.     /**
  188.      * Gets the factory class used when creating file items.
  189.      *
  190.      * @return The factory class for new file items.
  191.      */
  192.     public F getFileItemFactory() {
  193.         return fileItemFactory;
  194.     }

  195.     /**
  196.      * Gets the file name from the {@code Content-disposition} header.
  197.      *
  198.      * @param headers The HTTP headers object.
  199.      * @return The file name for the current {@code encapsulation}.
  200.      */
  201.     public String getFileName(final FileItemHeaders headers) {
  202.         return getFileName(headers.getHeader(CONTENT_DISPOSITION));
  203.     }

  204.     /**
  205.      * Gets the given content-disposition headers file name.
  206.      *
  207.      * @param contentDisposition The content-disposition headers value.
  208.      * @return The file name
  209.      */
  210.     private String getFileName(final String contentDisposition) {
  211.         String fileName = null;
  212.         if (contentDisposition != null) {
  213.             final var cdl = contentDisposition.toLowerCase(Locale.ROOT);
  214.             if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
  215.                 final var parser = new ParameterParser();
  216.                 parser.setLowerCaseNames(true);
  217.                 // Parameter parser can handle null input
  218.                 final var params = parser.parse(contentDisposition, ';');
  219.                 if (params.containsKey(FILENAME_KEY)) {
  220.                     fileName = params.get(FILENAME_KEY);
  221.                     if (fileName != null) {
  222.                         fileName = fileName.trim();
  223.                     } else {
  224.                         // Even if there is no value, the parameter is present,
  225.                         // so we return an empty file name rather than no file
  226.                         // name.
  227.                         fileName = "";
  228.                     }
  229.                 }
  230.             }
  231.         }
  232.         return fileName;
  233.     }

  234.     /**
  235.      * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
  236.      *
  237.      * @see #setFileSizeMax(long)
  238.      * @return Maximum size of a single uploaded file.
  239.      */
  240.     public long getFileSizeMax() {
  241.         return fileSizeMax;
  242.     }

  243.     /**
  244.      * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
  245.      * that is also not specified, or {@code null}, the platform default encoding is used.
  246.      *
  247.      * @return The encoding used to read part headers.
  248.      */
  249.     public Charset getHeaderCharset() {
  250.         return headerCharset;
  251.     }

  252.     /**
  253.      * Gets a file item iterator.
  254.      *
  255.      * @param request The servlet request to be parsed.
  256.      * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
  257.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  258.      * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
  259.      *                             uploaded content.
  260.      */
  261.     public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;

  262.     /**
  263.      * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
  264.      *
  265.      * @param requestContext The context for the request to be parsed.
  266.      * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
  267.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  268.      * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
  269.      *                             uploaded content.
  270.      */
  271.     public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
  272.         return new FileItemInputIteratorImpl(this, requestContext);
  273.     }

  274.     /**
  275.      * Parses the {@code header-part} and returns as key/value pairs.
  276.      * <p>
  277.      * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
  278.      * </p>
  279.      *
  280.      * @param headerPart The {@code header-part} of the current {@code encapsulation}.
  281.      * @return A {@code Map} containing the parsed HTTP request headers.
  282.      */
  283.     public FileItemHeaders getParsedHeaders(final String headerPart) {
  284.         final var len = headerPart.length();
  285.         final var headers = newFileItemHeaders();
  286.         var start = 0;
  287.         for (;;) {
  288.             var end = parseEndOfLine(headerPart, start);
  289.             if (start == end) {
  290.                 break;
  291.             }
  292.             final var header = new StringBuilder(headerPart.substring(start, end));
  293.             start = end + 2;
  294.             while (start < len) {
  295.                 var nonWs = start;
  296.                 while (nonWs < len) {
  297.                     final var c = headerPart.charAt(nonWs);
  298.                     if (c != ' ' && c != '\t') {
  299.                         break;
  300.                     }
  301.                     ++nonWs;
  302.                 }
  303.                 if (nonWs == start) {
  304.                     break;
  305.                 }
  306.                 // Continuation line found
  307.                 end = parseEndOfLine(headerPart, nonWs);
  308.                 header.append(' ').append(headerPart, nonWs, end);
  309.                 start = end + 2;
  310.             }
  311.             parseHeaderLine(headers, header.toString());
  312.         }
  313.         return headers;
  314.     }

  315.     /**
  316.      * Gets the progress listener.
  317.      *
  318.      * @return The progress listener, if any, or null.
  319.      */
  320.     public ProgressListener getProgressListener() {
  321.         return progressListener;
  322.     }

  323.     /**
  324.      * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
  325.      *
  326.      * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
  327.      * @see #setSizeMax(long)
  328.      */
  329.     public long getSizeMax() {
  330.         return sizeMax;
  331.     }

  332.     /**
  333.      * Creates a new instance of {@link FileItemHeaders}.
  334.      *
  335.      * @return The new instance.
  336.      */
  337.     protected FileItemHeaders newFileItemHeaders() {
  338.         return AbstractFileItemBuilder.newFileItemHeaders();
  339.     }

  340.     /**
  341.      * Skips bytes until the end of the current line.
  342.      *
  343.      * @param headerPart The headers, which are being parsed.
  344.      * @param end        Index of the last byte, which has yet been processed.
  345.      * @return Index of the \r\n sequence, which indicates end of line.
  346.      */
  347.     private int parseEndOfLine(final String headerPart, final int end) {
  348.         var index = end;
  349.         for (;;) {
  350.             final var offset = headerPart.indexOf('\r', index);
  351.             if (offset == -1 || offset + 1 >= headerPart.length()) {
  352.                 throw new IllegalStateException("Expected headers to be terminated by an empty line.");
  353.             }
  354.             if (headerPart.charAt(offset + 1) == '\n') {
  355.                 return offset;
  356.             }
  357.             index = offset + 1;
  358.         }
  359.     }

  360.     /**
  361.      * Parses the next header line.
  362.      *
  363.      * @param headers String with all headers.
  364.      * @param header  Map where to store the current header.
  365.      */
  366.     private void parseHeaderLine(final FileItemHeaders headers, final String header) {
  367.         final var colonOffset = header.indexOf(':');
  368.         if (colonOffset == -1) {
  369.             // This header line is malformed, skip it.
  370.             return;
  371.         }
  372.         final var headerName = header.substring(0, colonOffset).trim();
  373.         final var headerValue = header.substring(colonOffset + 1).trim();
  374.         headers.addHeader(headerName, headerValue);
  375.     }

  376.     /**
  377.      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
  378.      *
  379.      * @param request The servlet request to be parsed.
  380.      * @return A map of {@code FileItem} instances parsed from the request.
  381.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  382.      */
  383.     public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;

  384.     /**
  385.      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
  386.      *
  387.      * @param ctx The context for the request to be parsed.
  388.      * @return A map of {@code FileItem} instances parsed from the request.
  389.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  390.      */
  391.     public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
  392.         final var items = parseRequest(ctx);
  393.         final Map<String, List<I>> itemsMap = new HashMap<>(items.size());

  394.         for (final I fileItem : items) {
  395.             final var fieldName = fileItem.getFieldName();
  396.             final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
  397.             mappedItems.add(fileItem);
  398.         }

  399.         return itemsMap;
  400.     }

  401.     /**
  402.      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
  403.      *
  404.      * @param request The servlet request to be parsed.
  405.      * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
  406.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  407.      */
  408.     public abstract List<I> parseRequest(R request) throws FileUploadException;

  409.     /**
  410.      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
  411.      *
  412.      * @param requestContext The context for the request to be parsed.
  413.      * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
  414.      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
  415.      */
  416.     public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
  417.         final List<I> itemList = new ArrayList<>();
  418.         var successful = false;
  419.         try {
  420.             final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
  421.             final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
  422.             getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
  423.                 if (itemList.size() == fileCountMax) {
  424.                     // The next item will exceed the limit.
  425.                     throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
  426.                 }
  427.                 // Don't use getName() here to prevent an InvalidFileNameException.
  428.                 // @formatter:off
  429.                 final var fileItem = fileItemFactory.fileItemBuilder()
  430.                     .setFieldName(fileItemInput.getFieldName())
  431.                     .setContentType(fileItemInput.getContentType())
  432.                     .setFormField(fileItemInput.isFormField())
  433.                     .setFileName(fileItemInput.getName())
  434.                     .setFileItemHeaders(fileItemInput.getHeaders())
  435.                     .get();
  436.                 // @formatter:on
  437.                 itemList.add(fileItem);
  438.                 try (var inputStream = fileItemInput.getInputStream();
  439.                         var outputStream = fileItem.getOutputStream()) {
  440.                     IOUtils.copyLarge(inputStream, outputStream, buffer);
  441.                 } catch (final FileUploadException e) {
  442.                     throw e;
  443.                 } catch (final IOException e) {
  444.                     throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
  445.                 }
  446.             });
  447.             successful = true;
  448.             return itemList;
  449.         } catch (final FileUploadException e) {
  450.             throw e;
  451.         } catch (final IOException e) {
  452.             throw new FileUploadException(e.getMessage(), e);
  453.         } finally {
  454.             if (!successful) {
  455.                 for (final I fileItem : itemList) {
  456.                     try {
  457.                         fileItem.delete();
  458.                     } catch (final Exception ignored) {
  459.                         // ignored TODO perhaps add to tracker delete failure list somehow?
  460.                     }
  461.                 }
  462.             }
  463.         }
  464.     }

  465.     /**
  466.      * Sets the maximum number of files allowed per request.
  467.      *
  468.      * @param fileCountMax The new limit. {@code -1} means no limit.
  469.      */
  470.     public void setFileCountMax(final long fileCountMax) {
  471.         this.fileCountMax = fileCountMax;
  472.     }

  473.     /**
  474.      * Sets the factory class to use when creating file items.
  475.      *
  476.      * @param factory The factory class for new file items.
  477.      */
  478.     public void setFileItemFactory(final F factory) {
  479.         this.fileItemFactory = factory;
  480.     }

  481.     /**
  482.      * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
  483.      *
  484.      * @see #getFileSizeMax()
  485.      * @param fileSizeMax Maximum size of a single uploaded file.
  486.      */
  487.     public void setFileSizeMax(final long fileSizeMax) {
  488.         this.fileSizeMax = fileSizeMax;
  489.     }

  490.     /**
  491.      * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
  492.      * used. If that is also not specified, or {@code null}, the platform default encoding is used.
  493.      *
  494.      * @param headerCharset The encoding used to read part headers.
  495.      */
  496.     public void setHeaderCharset(final Charset headerCharset) {
  497.         this.headerCharset = headerCharset;
  498.     }

  499.     /**
  500.      * Sets the progress listener.
  501.      *
  502.      * @param progressListener The progress listener, if any. Defaults to null.
  503.      */
  504.     public void setProgressListener(final ProgressListener progressListener) {
  505.         this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
  506.     }

  507.     /**
  508.      * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
  509.      *
  510.      * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
  511.      * @see #getSizeMax()
  512.      */
  513.     public void setSizeMax(final long sizeMax) {
  514.         this.sizeMax = sizeMax;
  515.     }

  516. }