001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.OutputStream; 023import java.io.UncheckedIOException; 024import java.nio.charset.Charset; 025import java.nio.charset.StandardCharsets; 026import java.nio.file.CopyOption; 027import java.nio.file.Files; 028import java.nio.file.InvalidPathException; 029import java.nio.file.Path; 030import java.nio.file.Paths; 031import java.nio.file.StandardCopyOption; 032import java.util.UUID; 033import java.util.concurrent.atomic.AtomicInteger; 034 035import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.build.AbstractOrigin; 038import org.apache.commons.io.file.PathUtils; 039import org.apache.commons.io.function.Uncheck; 040import org.apache.commons.io.output.DeferredFileOutputStream; 041 042/** 043 * The default implementation of the {@link FileItem FileItem} interface. 044 * <p> 045 * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see 046 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload 047 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an 048 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy 049 * with large files. 050 * </p> 051 * <p> 052 * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a 053 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must 054 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of 055 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the 056 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web 057 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload. 058 * </p> 059 */ 060public final class DiskFileItem implements FileItem<DiskFileItem> { 061 062 /** 063 * Builds a new {@link DiskFileItem} instance. 064 * <p> 065 * For example: 066 * </p> 067 * 068 * <pre>{@code 069 * final FileItem fileItem = fileItemFactory.fileItemBuilder() 070 * .setFieldName("FieldName") 071 * .setContentType("ContentType") 072 * .setFormField(true) 073 * .setFileName("FileName") 074 * .setFileItemHeaders(...) 075 * .get(); 076 * } 077 * </pre> 078 */ 079 public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> { 080 081 public Builder() { 082 setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD); 083 setPath(PathUtils.getTempDirectory()); 084 setCharset(DEFAULT_CHARSET); 085 setCharsetDefault(DEFAULT_CHARSET); 086 } 087 088 /** 089 * Constructs a new instance. 090 * <p> 091 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an 092 * {@link UnsupportedOperationException}. 093 * </p> 094 * 095 * @return a new instance. 096 * @throws UnsupportedOperationException if the origin cannot provide a Path. 097 * @see AbstractOrigin#getReader(Charset) 098 */ 099 @Override 100 public DiskFileItem get() { 101 final var diskFileItem = new DiskFileItem(getFieldName(), getContentType(), isFormField(), getFileName(), getBufferSize(), getPath(), 102 getFileItemHeaders(), getCharset()); 103 final var tracker = getFileCleaningTracker(); 104 if (tracker != null) { 105 tracker.track(diskFileItem.getTempFile().toFile(), diskFileItem); 106 } 107 return diskFileItem; 108 } 109 110 } 111 112 /** 113 * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a 114 * default charset value of "ISO-8859-1" when received via HTTP. 115 */ 116 public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1; 117 118 /** 119 * UID used in unique file name generation. 120 */ 121 private static final String UID = UUID.randomUUID().toString().replace('-', '_'); 122 123 /** 124 * Counter used in unique identifier generation. 125 */ 126 private static final AtomicInteger COUNTER = new AtomicInteger(0); 127 128 /** 129 * Constructs a new {@link Builder}. 130 * 131 * @return a new {@link Builder}. 132 */ 133 public static Builder builder() { 134 return new Builder(); 135 } 136 137 /** 138 * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any 139 * modifications. Otherwise, throw an {@link InvalidPathException}. 140 * 141 * @param fileName The file name to check 142 * @return Unmodified file name, if valid. 143 * @throws InvalidPathException The file name is invalid. 144 */ 145 public static String checkFileName(final String fileName) { 146 if (fileName != null) { 147 // Specific NUL check to build a better exception message. 148 final var indexOf0 = fileName.indexOf(0); 149 if (indexOf0 != -1) { 150 final var sb = new StringBuilder(); 151 for (var i = 0; i < fileName.length(); i++) { 152 final var c = fileName.charAt(i); 153 switch (c) { 154 case 0: 155 sb.append("\\0"); 156 break; 157 default: 158 sb.append(c); 159 break; 160 } 161 } 162 throw new InvalidPathException(fileName, sb.toString(), indexOf0); 163 } 164 // Throws InvalidPathException on invalid file names 165 Paths.get(fileName); 166 } 167 return fileName; 168 } 169 170 /** 171 * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance. 172 * 173 * @return A String with the non-random looking instance identifier. 174 */ 175 private static String getUniqueId() { 176 final var limit = 100_000_000; 177 final var current = COUNTER.getAndIncrement(); 178 var id = Integer.toString(current); 179 180 // If you manage to get more than 100 million of ids, you'll 181 // start getting ids longer than 8 characters. 182 if (current < limit) { 183 id = ("00000000" + id).substring(id.length()); 184 } 185 return id; 186 } 187 188 /** 189 * The name of the form field as provided by the browser. 190 */ 191 private String fieldName; 192 193 /** 194 * The content type passed by the browser, or {@code null} if not defined. 195 */ 196 private final String contentType; 197 198 /** 199 * Whether or not this item is a simple form field. 200 */ 201 private boolean isFormField; 202 203 /** 204 * The original file name in the user's file system. 205 */ 206 private final String fileName; 207 208 /** 209 * The size of the item, in bytes. This is used to cache the size when a file item is moved from its original location. 210 */ 211 private long size = -1; 212 213 /** 214 * The threshold above which uploads will be stored on disk. 215 */ 216 private final int threshold; 217 218 /** 219 * The directory in which uploaded files will be stored, if stored on disk. 220 */ 221 private final Path repository; 222 223 /** 224 * Cached contents of the file. 225 */ 226 private byte[] cachedContent; 227 228 /** 229 * Output stream for this item. 230 */ 231 private DeferredFileOutputStream dfos; 232 233 /** 234 * The temporary file to use. 235 */ 236 private final Path tempFile; 237 238 /** 239 * The file items headers. 240 */ 241 private FileItemHeaders fileItemHeaders; 242 243 /** 244 * Default content Charset to be used when no explicit Charset parameter is provided by the sender. 245 */ 246 private Charset charsetDefault = DEFAULT_CHARSET; 247 248 /** 249 * Constructs a new {@code DiskFileItem} instance. 250 * 251 * @param fieldName The name of the form field. 252 * @param contentType The content type passed by the browser or {@code null} if not specified. 253 * @param isFormField Whether or not this item is a plain form field, as opposed to a file upload. 254 * @param fileName The original file name in the user's file system, or {@code null} if not specified. 255 * @param threshold The threshold, in bytes, below which items will be retained in memory and above which they will be stored as a file. 256 * @param repository The data repository, which is the directory in which files will be created, should the item size exceed the threshold. 257 * @param fileItemHeaders The file item headers. 258 * @param defaultCharset The default Charset. 259 */ 260 private DiskFileItem(final String fieldName, final String contentType, final boolean isFormField, final String fileName, final int threshold, 261 final Path repository, final FileItemHeaders fileItemHeaders, final Charset defaultCharset) { 262 this.fieldName = fieldName; 263 this.contentType = contentType; 264 this.charsetDefault = defaultCharset; 265 this.isFormField = isFormField; 266 this.fileName = fileName; 267 this.fileItemHeaders = fileItemHeaders; 268 this.threshold = threshold; 269 this.repository = repository != null ? repository : PathUtils.getTempDirectory(); 270 this.tempFile = this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId())); 271 } 272 273 /** 274 * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is 275 * done at an earlier time, thus preserving system resources. 276 * 277 * @throws IOException if an error occurs. 278 */ 279 @Override 280 public DiskFileItem delete() throws IOException { 281 cachedContent = null; 282 final var outputFile = getPath(); 283 if (outputFile != null && !isInMemory() && Files.exists(outputFile)) { 284 Files.delete(outputFile); 285 } 286 return this; 287 } 288 289 /** 290 * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage 291 * and cached. 292 * 293 * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read. 294 * @throws UncheckedIOException if an I/O error occurs. 295 * @throws OutOfMemoryError See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger 296 * that {@code 2GB} 297 */ 298 @Override 299 public byte[] get() throws UncheckedIOException { 300 if (isInMemory()) { 301 if (cachedContent == null && dfos != null) { 302 cachedContent = dfos.getData(); 303 } 304 return cachedContent != null ? cachedContent.clone() : new byte[0]; 305 } 306 return Uncheck.get(() -> Files.readAllBytes(dfos.getFile().toPath())); 307 } 308 309 /** 310 * Gets the content charset passed by the agent or {@code null} if not defined. 311 * 312 * @return The content charset passed by the agent or {@code null} if not defined. 313 */ 314 public Charset getCharset() { 315 final var parser = new ParameterParser(); 316 parser.setLowerCaseNames(true); 317 // Parameter parser can handle null input 318 final var params = parser.parse(getContentType(), ';'); 319 return Charsets.toCharset(params.get("charset"), charsetDefault); 320 } 321 322 /** 323 * Gets the default charset for use when no explicit charset parameter is provided by the sender. 324 * 325 * @return the default charset 326 */ 327 public Charset getCharsetDefault() { 328 return charsetDefault; 329 } 330 331 /** 332 * Gets the content type passed by the agent or {@code null} if not defined. 333 * 334 * @return The content type passed by the agent or {@code null} if not defined. 335 */ 336 @Override 337 public String getContentType() { 338 return contentType; 339 } 340 341 /** 342 * Gets the name of the field in the multipart form corresponding to this file item. 343 * 344 * @return The name of the form field. 345 * @see #setFieldName(String) 346 */ 347 @Override 348 public String getFieldName() { 349 return fieldName; 350 } 351 352 /** 353 * Gets the file item headers. 354 * 355 * @return The file items headers. 356 */ 357 @Override 358 public FileItemHeaders getHeaders() { 359 return fileItemHeaders; 360 } 361 362 /** 363 * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 364 * 365 * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 366 * @throws IOException if an error occurs. 367 */ 368 @Override 369 public InputStream getInputStream() throws IOException { 370 if (!isInMemory()) { 371 return Files.newInputStream(dfos.getFile().toPath()); 372 } 373 374 if (cachedContent == null) { 375 cachedContent = dfos.getData(); 376 } 377 return new ByteArrayInputStream(cachedContent); 378 } 379 380 /** 381 * Gets the original file name in the client's file system. 382 * 383 * @return The original file name in the client's file system. 384 * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name 385 * anyways, catch the exception and use {@link InvalidPathException#getInput()}. 386 */ 387 @Override 388 public String getName() { 389 return DiskFileItem.checkFileName(fileName); 390 } 391 392 /** 393 * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 394 * 395 * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 396 */ 397 @Override 398 public OutputStream getOutputStream() { 399 if (dfos == null) { 400 dfos = DeferredFileOutputStream.builder().setThreshold(threshold).setOutputFile(getTempFile().toFile()).get(); 401 } 402 return dfos; 403 } 404 405 /** 406 * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in 407 * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to new 408 * location without copying the data, if the source and destination locations reside within the same logical volume. 409 * 410 * @return The data file, or {@code null} if the data is stored in memory. 411 */ 412 public Path getPath() { 413 if (dfos == null) { 414 return null; 415 } 416 if (isInMemory()) { 417 return null; 418 } 419 return dfos.getFile().toPath(); 420 } 421 422 /** 423 * Gets the size of the file. 424 * 425 * @return The size of the file, in bytes. 426 */ 427 @Override 428 public long getSize() { 429 if (size >= 0) { 430 return size; 431 } 432 if (cachedContent != null) { 433 return cachedContent.length; 434 } 435 return dfos != null ? dfos.getByteCount() : 0; 436 } 437 438 /** 439 * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file. 440 * <p> 441 * <b>TODO</b> Consider making this method throw UnsupportedEncodingException. 442 * </p> 443 * 444 * @return The contents of the file, as a string. 445 */ 446 @Override 447 public String getString() { 448 return new String(get(), getCharset()); 449 } 450 451 /** 452 * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file. 453 * 454 * @param charset The charset to use. 455 * @return The contents of the file, as a string. 456 */ 457 @Override 458 public String getString(final Charset charset) throws IOException { 459 return new String(get(), Charsets.toCharset(charset, charsetDefault)); 460 } 461 462 /** 463 * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured repository path. The lifetime of the file 464 * is tied to the lifetime of the {@code FileItem} instance; the file will be deleted when the instance is garbage collected. 465 * <p> 466 * <b>Note: Subclasses that override this method must ensure that they return the same File each time.</b> 467 * </p> 468 * 469 * @return The {@link java.io.File File} to be used for temporary storage. 470 */ 471 protected Path getTempFile() { 472 return tempFile; 473 } 474 475 /** 476 * Tests whether or not a {@code FileItem} instance represents a simple form field. 477 * 478 * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 479 * @see #setFormField(boolean) 480 */ 481 @Override 482 public boolean isFormField() { 483 return isFormField; 484 } 485 486 /** 487 * Provides a hint as to whether or not the file contents will be read from memory. 488 * 489 * @return {@code true} if the file contents will be read from memory; {@code false} otherwise. 490 */ 491 @Override 492 public boolean isInMemory() { 493 if (cachedContent != null) { 494 return true; 495 } 496 return dfos.isInMemory(); 497 } 498 499 /** 500 * Sets the default charset for use when no explicit charset parameter is provided by the sender. 501 * 502 * @param charset the default charset 503 * @return this 504 */ 505 public DiskFileItem setCharsetDefault(final Charset charset) { 506 charsetDefault = charset; 507 return this; 508 } 509 510 /** 511 * Sets the field name used to reference this file item. 512 * 513 * @param fieldName The name of the form field. 514 * @see #getFieldName() 515 */ 516 @Override 517 public DiskFileItem setFieldName(final String fieldName) { 518 this.fieldName = fieldName; 519 return this; 520 } 521 522 /** 523 * Specifies whether or not a {@code FileItem} instance represents a simple form field. 524 * 525 * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 526 * @see #isFormField() 527 */ 528 @Override 529 public DiskFileItem setFormField(final boolean state) { 530 isFormField = state; 531 return this; 532 } 533 534 /** 535 * Sets the file item headers. 536 * 537 * @param headers The file items headers. 538 */ 539 @Override 540 public DiskFileItem setHeaders(final FileItemHeaders headers) { 541 this.fileItemHeaders = headers; 542 return this; 543 } 544 545 /** 546 * Returns a string representation of this object. 547 * 548 * @return a string representation of this object. 549 */ 550 @Override 551 public String toString() { 552 return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(), 553 getFieldName()); 554 } 555 556 /** 557 * Writes an uploaded item to disk. 558 * <p> 559 * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the 560 * uploaded item to a file. 561 * </p> 562 * <p> 563 * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise, 564 * the data will be copied to the specified file. 565 * </p> 566 * <p> 567 * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method 568 * renames a temporary file, that file will no longer be available to copy or rename again at a later time. 569 * </p> 570 * 571 * @param file The {@code File} into which the uploaded item should be stored. 572 * @throws IOException if an error occurs. 573 */ 574 @Override 575 public DiskFileItem write(final Path file) throws IOException { 576 if (isInMemory()) { 577 try (var fout = Files.newOutputStream(file)) { 578 fout.write(get()); 579 } catch (final IOException e) { 580 throw new IOException("Unexpected output data", e); 581 } 582 } else { 583 final var outputFile = getPath(); 584 if (outputFile == null) { 585 /* 586 * For whatever reason we cannot write the file to disk. 587 */ 588 throw new FileUploadException("Cannot write uploaded file to disk."); 589 } 590 // Save the length of the file 591 size = Files.size(outputFile); 592 // 593 // The uploaded file is being stored on disk in a temporary location so move it to the desired file. 594 // 595 Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING); 596 } 597 return this; 598 } 599}