001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.IOException;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Locale;
026import java.util.Map;
027import java.util.Objects;
028
029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
030import org.apache.commons.io.IOUtils;
031
032/**
033 * High level API for processing file uploads.
034 * <p>
035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
036 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
037 * a given HTML widget.
038 * </p>
039 * <p>
040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
041 * </p>
042 *
043 * @param <R> The request context type.
044 * @param <I> The FileItem type.
045 * @param <F> the FileItemFactory type.
046 */
047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
048
049    /**
050     * Boundary parameter key.
051     */
052    private static final String BOUNDARY_KEY = "boundary";
053
054    /**
055     * Name parameter key.
056     */
057    private static final String NAME_KEY = "name";
058
059    /**
060     * File name parameter key.
061     */
062    private static final String FILENAME_KEY = "filename";
063
064    /**
065     * HTTP content type header name.
066     */
067    public static final String CONTENT_TYPE = "Content-type";
068
069    /**
070     * HTTP content disposition header name.
071     */
072    public static final String CONTENT_DISPOSITION = "Content-disposition";
073
074    /**
075     * HTTP content length header name.
076     */
077    public static final String CONTENT_LENGTH = "Content-length";
078
079    /**
080     * Content-disposition value for form data.
081     */
082    public static final String FORM_DATA = "form-data";
083
084    /**
085     * Content-disposition value for file attachment.
086     */
087    public static final String ATTACHMENT = "attachment";
088
089    /**
090     * Part of HTTP content type header.
091     */
092    public static final String MULTIPART = "multipart/";
093
094    /**
095     * HTTP content type header for multipart forms.
096     */
097    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
098
099    /**
100     * HTTP content type header for multiple uploads.
101     */
102    public static final String MULTIPART_MIXED = "multipart/mixed";
103
104    /**
105     * Utility method that determines whether the request contains multipart content.
106     * <p>
107     * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
108     * method is static, it is not possible to provide its replacement until this method is removed.
109     * </p>
110     *
111     * @param ctx The request context to be evaluated. Must be non-null.
112     * @return {@code true} if the request is multipart; {@code false} otherwise.
113     */
114    public static final boolean isMultipartContent(final RequestContext ctx) {
115        final var contentType = ctx.getContentType();
116        if (contentType == null) {
117            return false;
118        }
119        return contentType.toLowerCase(Locale.ENGLISH).startsWith(MULTIPART);
120    }
121
122    /**
123     * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
124     */
125    private long sizeMax = -1;
126
127    /**
128     * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
129     */
130    private long fileSizeMax = -1;
131
132    /**
133     * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
134     */
135    private long fileCountMax = -1;
136
137    /**
138     * The content encoding to use when reading part headers.
139     */
140    private Charset headerCharset;
141
142    /**
143     * The progress listener.
144     */
145    private ProgressListener progressListener = ProgressListener.NOP;
146
147    /**
148     * The factory to use to create new form items.
149     */
150    private F fileItemFactory;
151
152    /**
153     * Gets the boundary from the {@code Content-type} header.
154     *
155     * @param contentType The value of the content type header from which to extract the boundary value.
156     * @return The boundary, as a byte array.
157     */
158    public byte[] getBoundary(final String contentType) {
159        final var parser = new ParameterParser();
160        parser.setLowerCaseNames(true);
161        // Parameter parser can handle null input
162        final var params = parser.parse(contentType, new char[] { ';', ',' });
163        final var boundaryStr = params.get(BOUNDARY_KEY);
164        return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
165    }
166
167    /**
168     * Gets the field name from the {@code Content-disposition} header.
169     *
170     * @param headers A {@code Map} containing the HTTP request headers.
171     * @return The field name for the current {@code encapsulation}.
172     */
173    public String getFieldName(final FileItemHeaders headers) {
174        return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
175    }
176
177    /**
178     * Gets the field name, which is given by the content-disposition header.
179     *
180     * @param contentDisposition The content-dispositions header value.
181     * @return The field name.
182     */
183    private String getFieldName(final String contentDisposition) {
184        String fieldName = null;
185        if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ENGLISH).startsWith(FORM_DATA)) {
186            final var parser = new ParameterParser();
187            parser.setLowerCaseNames(true);
188            // Parameter parser can handle null input
189            final var params = parser.parse(contentDisposition, ';');
190            fieldName = params.get(NAME_KEY);
191            if (fieldName != null) {
192                fieldName = fieldName.trim();
193            }
194        }
195        return fieldName;
196    }
197
198    /**
199     * Gets the maximum number of files allowed in a single request.
200     *
201     * @return The maximum number of files allowed in a single request.
202     */
203    public long getFileCountMax() {
204        return fileCountMax;
205    }
206
207    /**
208     * Gets the factory class used when creating file items.
209     *
210     * @return The factory class for new file items.
211     */
212    public F getFileItemFactory() {
213        return fileItemFactory;
214    }
215
216    /**
217     * Gets the file name from the {@code Content-disposition} header.
218     *
219     * @param headers The HTTP headers object.
220     *
221     * @return The file name for the current {@code encapsulation}.
222     */
223    public String getFileName(final FileItemHeaders headers) {
224        return getFileName(headers.getHeader(CONTENT_DISPOSITION));
225    }
226
227    /**
228     * Gets the given content-disposition headers file name.
229     *
230     * @param contentDisposition The content-disposition headers value.
231     * @return The file name
232     */
233    private String getFileName(final String contentDisposition) {
234        String fileName = null;
235        if (contentDisposition != null) {
236            final var cdl = contentDisposition.toLowerCase(Locale.ENGLISH);
237            if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
238                final var parser = new ParameterParser();
239                parser.setLowerCaseNames(true);
240                // Parameter parser can handle null input
241                final var params = parser.parse(contentDisposition, ';');
242                if (params.containsKey(FILENAME_KEY)) {
243                    fileName = params.get(FILENAME_KEY);
244                    if (fileName != null) {
245                        fileName = fileName.trim();
246                    } else {
247                        // Even if there is no value, the parameter is present,
248                        // so we return an empty file name rather than no file
249                        // name.
250                        fileName = "";
251                    }
252                }
253            }
254        }
255        return fileName;
256    }
257
258    /**
259     * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
260     *
261     * @see #setFileSizeMax(long)
262     * @return Maximum size of a single uploaded file.
263     */
264    public long getFileSizeMax() {
265        return fileSizeMax;
266    }
267
268    /**
269     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
270     * that is also not specified, or {@code null}, the platform default encoding is used.
271     *
272     * @return The encoding used to read part headers.
273     */
274    public Charset getHeaderCharset() {
275        return headerCharset;
276    }
277
278    /**
279     * Gets a file item iterator.
280     *
281     * @param request The servlet request to be parsed.
282     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
283     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
284     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
285     *                             uploaded content.
286     */
287    public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
288
289    /**
290     * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
291     *
292     * @param requestContext The context for the request to be parsed.
293     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
294     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
295     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
296     *                             uploaded content.
297     */
298    public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
299        return new FileItemInputIteratorImpl(this, requestContext);
300    }
301
302    /**
303     * Parses the {@code header-part} and returns as key/value pairs.
304     * <p>
305     * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
306     * </p>
307     *
308     * @param headerPart The {@code header-part} of the current {@code encapsulation}.
309     * @return A {@code Map} containing the parsed HTTP request headers.
310     */
311    public FileItemHeaders getParsedHeaders(final String headerPart) {
312        final var len = headerPart.length();
313        final var headers = newFileItemHeaders();
314        var start = 0;
315        for (;;) {
316            var end = parseEndOfLine(headerPart, start);
317            if (start == end) {
318                break;
319            }
320            final var header = new StringBuilder(headerPart.substring(start, end));
321            start = end + 2;
322            while (start < len) {
323                var nonWs = start;
324                while (nonWs < len) {
325                    final var c = headerPart.charAt(nonWs);
326                    if (c != ' ' && c != '\t') {
327                        break;
328                    }
329                    ++nonWs;
330                }
331                if (nonWs == start) {
332                    break;
333                }
334                // Continuation line found
335                end = parseEndOfLine(headerPart, nonWs);
336                header.append(' ').append(headerPart, nonWs, end);
337                start = end + 2;
338            }
339            parseHeaderLine(headers, header.toString());
340        }
341        return headers;
342    }
343
344    /**
345     * Gets the progress listener.
346     *
347     * @return The progress listener, if any, or null.
348     */
349    public ProgressListener getProgressListener() {
350        return progressListener;
351    }
352
353    /**
354     * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
355     *
356     * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
357     * @see #setSizeMax(long)
358     *
359     */
360    public long getSizeMax() {
361        return sizeMax;
362    }
363
364    /**
365     * Creates a new instance of {@link FileItemHeaders}.
366     *
367     * @return The new instance.
368     */
369    protected FileItemHeaders newFileItemHeaders() {
370        return AbstractFileItemBuilder.newFileItemHeaders();
371    }
372
373    /**
374     * Skips bytes until the end of the current line.
375     *
376     * @param headerPart The headers, which are being parsed.
377     * @param end        Index of the last byte, which has yet been processed.
378     * @return Index of the \r\n sequence, which indicates end of line.
379     */
380    private int parseEndOfLine(final String headerPart, final int end) {
381        var index = end;
382        for (;;) {
383            final var offset = headerPart.indexOf('\r', index);
384            if (offset == -1 || offset + 1 >= headerPart.length()) {
385                throw new IllegalStateException("Expected headers to be terminated by an empty line.");
386            }
387            if (headerPart.charAt(offset + 1) == '\n') {
388                return offset;
389            }
390            index = offset + 1;
391        }
392    }
393
394    /**
395     * Parses the next header line.
396     *
397     * @param headers String with all headers.
398     * @param header  Map where to store the current header.
399     */
400    private void parseHeaderLine(final FileItemHeaders headers, final String header) {
401        final var colonOffset = header.indexOf(':');
402        if (colonOffset == -1) {
403            // This header line is malformed, skip it.
404            return;
405        }
406        final var headerName = header.substring(0, colonOffset).trim();
407        final var headerValue = header.substring(colonOffset + 1).trim();
408        headers.addHeader(headerName, headerValue);
409    }
410
411    /**
412     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
413     *
414     * @param request The servlet request to be parsed.
415     * @return A map of {@code FileItem} instances parsed from the request.
416     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
417     */
418    public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
419
420    /**
421     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
422     *
423     * @param ctx The context for the request to be parsed.
424     * @return A map of {@code FileItem} instances parsed from the request.
425     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
426     */
427    public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
428        final var items = parseRequest(ctx);
429        final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
430
431        for (final I fileItem : items) {
432            final var fieldName = fileItem.getFieldName();
433            final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
434            mappedItems.add(fileItem);
435        }
436
437        return itemsMap;
438    }
439
440    /**
441     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
442     *
443     * @param request The servlet request to be parsed.
444     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
445     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
446     */
447    public abstract List<I> parseRequest(R request) throws FileUploadException;
448
449    /**
450     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
451     *
452     * @param requestContext The context for the request to be parsed.
453     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
454     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
455     */
456    public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
457        final List<I> itemList = new ArrayList<>();
458        var successful = false;
459        try {
460            final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
461            final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
462            getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
463                if (itemList.size() == fileCountMax) {
464                    // The next item will exceed the limit.
465                    throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
466                }
467                // Don't use getName() here to prevent an InvalidFileNameException.
468                // @formatter:off
469                final var fileItem = fileItemFactory.fileItemBuilder()
470                    .setFieldName(fileItemInput.getFieldName())
471                    .setContentType(fileItemInput.getContentType())
472                    .setFormField(fileItemInput.isFormField())
473                    .setFileName(fileItemInput.getName())
474                    .setFileItemHeaders(fileItemInput.getHeaders())
475                    .get();
476                // @formatter:on
477                itemList.add(fileItem);
478                try (var inputStream = fileItemInput.getInputStream();
479                        var outputStream = fileItem.getOutputStream()) {
480                    IOUtils.copyLarge(inputStream, outputStream, buffer);
481                } catch (final FileUploadException e) {
482                    throw e;
483                } catch (final IOException e) {
484                    throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
485                }
486            });
487            successful = true;
488            return itemList;
489        } catch (final FileUploadException e) {
490            throw e;
491        } catch (final IOException e) {
492            throw new FileUploadException(e.getMessage(), e);
493        } finally {
494            if (!successful) {
495                for (final I fileItem : itemList) {
496                    try {
497                        fileItem.delete();
498                    } catch (final Exception ignored) {
499                        // ignored TODO perhaps add to tracker delete failure list somehow?
500                    }
501                }
502            }
503        }
504    }
505
506    /**
507     * Sets the maximum number of files allowed per request.
508     *
509     * @param fileCountMax The new limit. {@code -1} means no limit.
510     */
511    public void setFileCountMax(final long fileCountMax) {
512        this.fileCountMax = fileCountMax;
513    }
514
515    /**
516     * Sets the factory class to use when creating file items.
517     *
518     * @param factory The factory class for new file items.
519     */
520    public void setFileItemFactory(final F factory) {
521        this.fileItemFactory = factory;
522    }
523
524    /**
525     * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
526     *
527     * @see #getFileSizeMax()
528     * @param fileSizeMax Maximum size of a single uploaded file.
529     */
530    public void setFileSizeMax(final long fileSizeMax) {
531        this.fileSizeMax = fileSizeMax;
532    }
533
534    /**
535     * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
536     * used. If that is also not specified, or {@code null}, the platform default encoding is used.
537     *
538     * @param headerCharset The encoding used to read part headers.
539     */
540    public void setHeaderCharset(final Charset headerCharset) {
541        this.headerCharset = headerCharset;
542    }
543
544    /**
545     * Sets the progress listener.
546     *
547     * @param progressListener The progress listener, if any. Defaults to null.
548     */
549    public void setProgressListener(final ProgressListener progressListener) {
550        this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
551    }
552
553    /**
554     * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
555     *
556     * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
557     * @see #getSizeMax()
558     */
559    public void setSizeMax(final long sizeMax) {
560        this.sizeMax = sizeMax;
561    }
562
563}