View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.UncheckedIOException;
24  import java.nio.charset.Charset;
25  import java.nio.charset.StandardCharsets;
26  import java.nio.file.CopyOption;
27  import java.nio.file.Files;
28  import java.nio.file.InvalidPathException;
29  import java.nio.file.Path;
30  import java.nio.file.Paths;
31  import java.nio.file.StandardCopyOption;
32  import java.util.UUID;
33  import java.util.concurrent.atomic.AtomicInteger;
34  
35  import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
36  import org.apache.commons.io.Charsets;
37  import org.apache.commons.io.build.AbstractOrigin;
38  import org.apache.commons.io.file.PathUtils;
39  import org.apache.commons.io.function.Uncheck;
40  import org.apache.commons.io.output.DeferredFileOutputStream;
41  
42  /**
43   * The default implementation of the {@link FileItem FileItem} interface.
44   * <p>
45   * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see
46   * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload
47   * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an
48   * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy
49   * with large files.
50   * </p>
51   * <p>
52   * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a
53   * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must
54   * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of
55   * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the
56   * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web
57   * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload.
58   * </p>
59   */
60  public final class DiskFileItem implements FileItem<DiskFileItem> {
61  
62      /**
63       * Builds a new {@link DiskFileItem} instance.
64       * <p>
65       * For example:
66       * </p>
67       *
68       * <pre>{@code
69       * final FileItem fileItem = fileItemFactory.fileItemBuilder()
70       *   .setFieldName("FieldName")
71       *   .setContentType("ContentType")
72       *   .setFormField(true)
73       *   .setFileName("FileName")
74       *   .setFileItemHeaders(...)
75       *   .get();
76       * }
77       * </pre>
78       */
79      public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> {
80  
81          public Builder() {
82              setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD);
83              setPath(PathUtils.getTempDirectory());
84              setCharset(DEFAULT_CHARSET);
85              setCharsetDefault(DEFAULT_CHARSET);
86          }
87  
88          /**
89           * Constructs a new instance.
90           * <p>
91           * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
92           * {@link UnsupportedOperationException}.
93           * </p>
94           *
95           * @return a new instance.
96           * @throws UnsupportedOperationException if the origin cannot provide a Path.
97           * @see AbstractOrigin#getReader(Charset)
98           */
99          @Override
100         public DiskFileItem get() {
101             final var diskFileItem = new DiskFileItem(getFieldName(), getContentType(), isFormField(), getFileName(), getBufferSize(), getPath(),
102                     getFileItemHeaders(), getCharset());
103             final var tracker = getFileCleaningTracker();
104             if (tracker != null) {
105                 tracker.track(diskFileItem.getTempFile().toFile(), diskFileItem);
106             }
107             return diskFileItem;
108         }
109 
110     }
111 
112     /**
113      * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a
114      * default charset value of "ISO-8859-1" when received via HTTP.
115      */
116     public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
117 
118     /**
119      * UID used in unique file name generation.
120      */
121     private static final String UID = UUID.randomUUID().toString().replace('-', '_');
122 
123     /**
124      * Counter used in unique identifier generation.
125      */
126     private static final AtomicInteger COUNTER = new AtomicInteger(0);
127 
128     /**
129      * Constructs a new {@link Builder}.
130      *
131      * @return a new {@link Builder}.
132      */
133     public static Builder builder() {
134         return new Builder();
135     }
136 
137     /**
138      * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any
139      * modifications. Otherwise, throw an {@link InvalidPathException}.
140      *
141      * @param fileName The file name to check
142      * @return Unmodified file name, if valid.
143      * @throws InvalidPathException The file name is invalid.
144      */
145     public static String checkFileName(final String fileName) {
146         if (fileName != null) {
147             // Specific NUL check to build a better exception message.
148             final var indexOf0 = fileName.indexOf(0);
149             if (indexOf0 != -1) {
150                 final var sb = new StringBuilder();
151                 for (var i = 0; i < fileName.length(); i++) {
152                     final var c = fileName.charAt(i);
153                     switch (c) {
154                     case 0:
155                         sb.append("\\0");
156                         break;
157                     default:
158                         sb.append(c);
159                         break;
160                     }
161                 }
162                 throw new InvalidPathException(fileName, sb.toString(), indexOf0);
163             }
164             // Throws InvalidPathException on invalid file names
165             Paths.get(fileName);
166         }
167         return fileName;
168     }
169 
170     /**
171      * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance.
172      *
173      * @return A String with the non-random looking instance identifier.
174      */
175     private static String getUniqueId() {
176         final var limit = 100_000_000;
177         final var current = COUNTER.getAndIncrement();
178         var id = Integer.toString(current);
179 
180         // If you manage to get more than 100 million of ids, you'll
181         // start getting ids longer than 8 characters.
182         if (current < limit) {
183             id = ("00000000" + id).substring(id.length());
184         }
185         return id;
186     }
187 
188     /**
189      * The name of the form field as provided by the browser.
190      */
191     private String fieldName;
192 
193     /**
194      * The content type passed by the browser, or {@code null} if not defined.
195      */
196     private final String contentType;
197 
198     /**
199      * Whether or not this item is a simple form field.
200      */
201     private boolean isFormField;
202 
203     /**
204      * The original file name in the user's file system.
205      */
206     private final String fileName;
207 
208     /**
209      * The size of the item, in bytes. This is used to cache the size when a file item is moved from its original location.
210      */
211     private long size = -1;
212 
213     /**
214      * The threshold above which uploads will be stored on disk.
215      */
216     private final int threshold;
217 
218     /**
219      * The directory in which uploaded files will be stored, if stored on disk.
220      */
221     private final Path repository;
222 
223     /**
224      * Cached contents of the file.
225      */
226     private byte[] cachedContent;
227 
228     /**
229      * Output stream for this item.
230      */
231     private DeferredFileOutputStream dfos;
232 
233     /**
234      * The temporary file to use.
235      */
236     private final Path tempFile;
237 
238     /**
239      * The file items headers.
240      */
241     private FileItemHeaders fileItemHeaders;
242 
243     /**
244      * Default content Charset to be used when no explicit Charset parameter is provided by the sender.
245      */
246     private Charset charsetDefault = DEFAULT_CHARSET;
247 
248     /**
249      * Constructs a new {@code DiskFileItem} instance.
250      *
251      * @param fieldName       The name of the form field.
252      * @param contentType     The content type passed by the browser or {@code null} if not specified.
253      * @param isFormField     Whether or not this item is a plain form field, as opposed to a file upload.
254      * @param fileName        The original file name in the user's file system, or {@code null} if not specified.
255      * @param threshold       The threshold, in bytes, below which items will be retained in memory and above which they will be stored as a file.
256      * @param repository      The data repository, which is the directory in which files will be created, should the item size exceed the threshold.
257      * @param fileItemHeaders The file item headers.
258      * @param defaultCharset  The default Charset.
259      */
260     private DiskFileItem(final String fieldName, final String contentType, final boolean isFormField, final String fileName, final int threshold,
261             final Path repository, final FileItemHeaders fileItemHeaders, final Charset defaultCharset) {
262         this.fieldName = fieldName;
263         this.contentType = contentType;
264         this.charsetDefault = defaultCharset;
265         this.isFormField = isFormField;
266         this.fileName = fileName;
267         this.fileItemHeaders = fileItemHeaders;
268         this.threshold = threshold;
269         this.repository = repository != null ? repository : PathUtils.getTempDirectory();
270         this.tempFile = this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId()));
271     }
272 
273     /**
274      * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is
275      * done at an earlier time, thus preserving system resources.
276      *
277      * @throws IOException if an error occurs.
278      */
279     @Override
280     public DiskFileItem delete() throws IOException {
281         cachedContent = null;
282         final var outputFile = getPath();
283         if (outputFile != null && !isInMemory() && Files.exists(outputFile)) {
284             Files.delete(outputFile);
285         }
286         return this;
287     }
288 
289     /**
290      * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage
291      * and cached.
292      *
293      * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read.
294      * @throws UncheckedIOException if an I/O error occurs.
295      * @throws OutOfMemoryError     See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger
296      *                              that {@code 2GB}
297      */
298     @Override
299     public byte[] get() throws UncheckedIOException {
300         if (isInMemory()) {
301             if (cachedContent == null && dfos != null) {
302                 cachedContent = dfos.getData();
303             }
304             return cachedContent != null ? cachedContent.clone() : new byte[0];
305         }
306         return Uncheck.get(() -> Files.readAllBytes(dfos.getFile().toPath()));
307     }
308 
309     /**
310      * Gets the content charset passed by the agent or {@code null} if not defined.
311      *
312      * @return The content charset passed by the agent or {@code null} if not defined.
313      */
314     public Charset getCharset() {
315         final var parser = new ParameterParser();
316         parser.setLowerCaseNames(true);
317         // Parameter parser can handle null input
318         final var params = parser.parse(getContentType(), ';');
319         return Charsets.toCharset(params.get("charset"), charsetDefault);
320     }
321 
322     /**
323      * Gets the default charset for use when no explicit charset parameter is provided by the sender.
324      *
325      * @return the default charset
326      */
327     public Charset getCharsetDefault() {
328         return charsetDefault;
329     }
330 
331     /**
332      * Gets the content type passed by the agent or {@code null} if not defined.
333      *
334      * @return The content type passed by the agent or {@code null} if not defined.
335      */
336     @Override
337     public String getContentType() {
338         return contentType;
339     }
340 
341     /**
342      * Gets the name of the field in the multipart form corresponding to this file item.
343      *
344      * @return The name of the form field.
345      * @see #setFieldName(String)
346      */
347     @Override
348     public String getFieldName() {
349         return fieldName;
350     }
351 
352     /**
353      * Gets the file item headers.
354      *
355      * @return The file items headers.
356      */
357     @Override
358     public FileItemHeaders getHeaders() {
359         return fileItemHeaders;
360     }
361 
362     /**
363      * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
364      *
365      * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
366      * @throws IOException if an error occurs.
367      */
368     @Override
369     public InputStream getInputStream() throws IOException {
370         if (!isInMemory()) {
371             return Files.newInputStream(dfos.getFile().toPath());
372         }
373 
374         if (cachedContent == null) {
375             cachedContent = dfos.getData();
376         }
377         return new ByteArrayInputStream(cachedContent);
378     }
379 
380     /**
381      * Gets the original file name in the client's file system.
382      *
383      * @return The original file name in the client's file system.
384      * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name
385      *                              anyways, catch the exception and use {@link InvalidPathException#getInput()}.
386      */
387     @Override
388     public String getName() {
389         return DiskFileItem.checkFileName(fileName);
390     }
391 
392     /**
393      * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
394      *
395      * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
396      */
397     @Override
398     public OutputStream getOutputStream() {
399         if (dfos == null) {
400             dfos = DeferredFileOutputStream.builder().setThreshold(threshold).setOutputFile(getTempFile().toFile()).get();
401         }
402         return dfos;
403     }
404 
405     /**
406      * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in
407      * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to new
408      * location without copying the data, if the source and destination locations reside within the same logical volume.
409      *
410      * @return The data file, or {@code null} if the data is stored in memory.
411      */
412     public Path getPath() {
413         if (dfos == null) {
414             return null;
415         }
416         if (isInMemory()) {
417             return null;
418         }
419         return dfos.getFile().toPath();
420     }
421 
422     /**
423      * Gets the size of the file.
424      *
425      * @return The size of the file, in bytes.
426      */
427     @Override
428     public long getSize() {
429         if (size >= 0) {
430             return size;
431         }
432         if (cachedContent != null) {
433             return cachedContent.length;
434         }
435         return dfos != null ? dfos.getByteCount() : 0;
436     }
437 
438     /**
439      * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file.
440      * <p>
441      * <b>TODO</b> Consider making this method throw UnsupportedEncodingException.
442      * </p>
443      *
444      * @return The contents of the file, as a string.
445      */
446     @Override
447     public String getString() {
448         return new String(get(), getCharset());
449     }
450 
451     /**
452      * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file.
453      *
454      * @param charset The charset to use.
455      * @return The contents of the file, as a string.
456      */
457     @Override
458     public String getString(final Charset charset) throws IOException {
459         return new String(get(), Charsets.toCharset(charset, charsetDefault));
460     }
461 
462     /**
463      * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured repository path. The lifetime of the file
464      * is tied to the lifetime of the {@code FileItem} instance; the file will be deleted when the instance is garbage collected.
465      * <p>
466      * <b>Note: Subclasses that override this method must ensure that they return the same File each time.</b>
467      * </p>
468      *
469      * @return The {@link java.io.File File} to be used for temporary storage.
470      */
471     protected Path getTempFile() {
472         return tempFile;
473     }
474 
475     /**
476      * Tests whether or not a {@code FileItem} instance represents a simple form field.
477      *
478      * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
479      * @see #setFormField(boolean)
480      */
481     @Override
482     public boolean isFormField() {
483         return isFormField;
484     }
485 
486     /**
487      * Provides a hint as to whether or not the file contents will be read from memory.
488      *
489      * @return {@code true} if the file contents will be read from memory; {@code false} otherwise.
490      */
491     @Override
492     public boolean isInMemory() {
493         if (cachedContent != null) {
494             return true;
495         }
496         return dfos.isInMemory();
497     }
498 
499     /**
500      * Sets the default charset for use when no explicit charset parameter is provided by the sender.
501      *
502      * @param charset the default charset
503      * @return this
504      */
505     public DiskFileItem setCharsetDefault(final Charset charset) {
506         charsetDefault = charset;
507         return this;
508     }
509 
510     /**
511      * Sets the field name used to reference this file item.
512      *
513      * @param fieldName The name of the form field.
514      * @see #getFieldName()
515      */
516     @Override
517     public DiskFileItem setFieldName(final String fieldName) {
518         this.fieldName = fieldName;
519         return this;
520     }
521 
522     /**
523      * Specifies whether or not a {@code FileItem} instance represents a simple form field.
524      *
525      * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
526      * @see #isFormField()
527      */
528     @Override
529     public DiskFileItem setFormField(final boolean state) {
530         isFormField = state;
531         return this;
532     }
533 
534     /**
535      * Sets the file item headers.
536      *
537      * @param headers The file items headers.
538      */
539     @Override
540     public DiskFileItem setHeaders(final FileItemHeaders headers) {
541         this.fileItemHeaders = headers;
542         return this;
543     }
544 
545     /**
546      * Returns a string representation of this object.
547      *
548      * @return a string representation of this object.
549      */
550     @Override
551     public String toString() {
552         return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(),
553                 getFieldName());
554     }
555 
556     /**
557      * Writes an uploaded item to disk.
558      * <p>
559      * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the
560      * uploaded item to a file.
561      * </p>
562      * <p>
563      * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise,
564      * the data will be copied to the specified file.
565      * </p>
566      * <p>
567      * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method
568      * renames a temporary file, that file will no longer be available to copy or rename again at a later time.
569      * </p>
570      *
571      * @param file The {@code File} into which the uploaded item should be stored.
572      * @throws IOException if an error occurs.
573      */
574     @Override
575     public DiskFileItem write(final Path file) throws IOException {
576         if (isInMemory()) {
577             try (var fout = Files.newOutputStream(file)) {
578                 fout.write(get());
579             } catch (final IOException e) {
580                 throw new IOException("Unexpected output data", e);
581             }
582         } else {
583             final var outputFile = getPath();
584             if (outputFile == null) {
585                 /*
586                  * For whatever reason we cannot write the file to disk.
587                  */
588                 throw new FileUploadException("Cannot write uploaded file to disk.");
589             }
590             // Save the length of the file
591             size = Files.size(outputFile);
592             //
593             // The uploaded file is being stored on disk in a temporary location so move it to the desired file.
594             //
595             Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING);
596         }
597         return this;
598     }
599 }