1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.fileupload2.core;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.OutputStream;
23 import java.io.UnsupportedEncodingException;
24 import java.nio.charset.Charset;
25
26 import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
27 import org.apache.commons.io.Charsets;
28 import org.apache.commons.io.IOUtils;
29 import org.apache.commons.io.build.AbstractOrigin;
30 import org.apache.commons.io.build.AbstractStreamBuilder;
31 import org.apache.commons.io.output.NullOutputStream;
32
33 /**
34 * Low-level API for processing file uploads.
35 *
36 * <p>
37 * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC
38 * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
39 * </p>
40 * <p>
41 * The format of the stream is defined in the following way:
42 * </p>
43 * <pre>
44 * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
45 * encapsulation := delimiter body CRLF<br>
46 * delimiter := "--" boundary CRLF<br>
47 * close-delimiter := "--" boundary "--"<br>
48 * preamble := <ignore><br>
49 * epilogue := <ignore><br>
50 * body := header-part CRLF body-part<br>
51 * header-part := 1*header CRLF<br>
52 * header := header-name ":" header-value<br>
53 * header-name := <printable ASCII characters except ":"><br>
54 * header-value := <any ASCII characters except CR & LF><br>
55 * body-data := <arbitrary data><br>
56 * </pre>
57 *
58 * <p>
59 * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
60 * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
61 * </p>
62 * <p>
63 * Here is an example of usage of this class:
64 * </p>
65 *
66 * <pre>
67 * try {
68 * MultipartInput multipartStream = MultipartInput.builder()
69 * .setBoundary(boundary)
70 * .setInputStream(input)
71 * .get();
72 * boolean nextPart = multipartStream.skipPreamble();
73 * OutputStream output;
74 * while (nextPart) {
75 * String header = multipartStream.readHeaders();
76 * // process headers
77 * // create some output stream
78 * multipartStream.readBodyData(output);
79 * nextPart = multipartStream.readBoundary();
80 * }
81 * } catch (MultipartInput.MalformedStreamException e) {
82 * // the stream failed to follow required syntax
83 * } catch (IOException e) {
84 * // a read or write error occurred
85 * }
86 * </pre>
87 */
88 public final class MultipartInput {
89
90 /**
91 * Builds a new {@link MultipartInput} instance.
92 * <p>
93 * For example:
94 * </p>
95 *
96 * <pre>{@code
97 * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
98 * }
99 * </pre>
100 */
101 public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {
102
103 /**
104 * Boundary.
105 */
106 private byte[] boundary;
107
108 /**
109 * Progress notifier.
110 */
111 private ProgressNotifier progressNotifier;
112
113 /**
114 * The per part size limit for headers.
115 */
116 private int maxPartHeaderSize = DEFAULT_PART_HEADER_SIZE_MAX;
117
118 /**
119 * Constructs a new instance.
120 */
121 public Builder() {
122 setBufferSizeDefault(DEFAULT_BUFSIZE);
123 }
124
125 /**
126 * Constructs a new instance.
127 * <p>
128 * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
129 * </p>
130 * <p>
131 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
132 * {@link UnsupportedOperationException}.
133 * </p>
134 *
135 * @return a new instance.
136 * @throws IOException if an I/O error occurs.
137 * @throws UnsupportedOperationException if the origin cannot provide a Path.
138 * @see AbstractOrigin#getReader(Charset)
139 */
140 @Override
141 public MultipartInput get() throws IOException {
142 return new MultipartInput(this);
143 }
144
145 /**
146 * Gets the per part size limit for headers.
147 *
148 * @return The maximum size of the headers in bytes.
149 * @since 2.0.0-M5
150 */
151 public int getMaxPartHeaderSize() {
152 return maxPartHeaderSize;
153 }
154
155 /**
156 * Sets the boundary.
157 *
158 * @param boundary the boundary.
159 * @return {@code this} instance.
160 */
161 public Builder setBoundary(final byte[] boundary) {
162 this.boundary = boundary;
163 return this;
164 }
165
166 /**
167 * Sets the per part size limit for headers.
168 * @param partHeaderSizeMax The maximum size of the headers in bytes.
169 * @return This builder.
170 * @since 2.0.0-M5
171 */
172 public Builder setMaxPartHeaderSize(final int partHeaderSizeMax) {
173 this.maxPartHeaderSize = partHeaderSizeMax;
174 return this;
175 }
176
177 /**
178 * Sets the progress notifier.
179 *
180 * @param progressNotifier progress notifier.
181 * @return {@code this} instance.
182 */
183 public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
184 this.progressNotifier = progressNotifier;
185 return this;
186 }
187 }
188
189 /**
190 * Signals an attempt to set an invalid boundary token.
191 */
192 public static class FileUploadBoundaryException extends FileUploadException {
193
194 /**
195 * The UID to use when serializing this instance.
196 */
197 private static final long serialVersionUID = 2;
198
199 /**
200 * Constructs an instance with the specified detail message.
201 *
202 * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
203 */
204 public FileUploadBoundaryException(final String message) {
205 super(message);
206 }
207
208 }
209
210 /**
211 * An {@link InputStream} for reading an items contents.
212 */
213 public class ItemInputStream extends InputStream {
214
215 /**
216 * Offset when converting negative bytes to integers.
217 */
218 private static final int BYTE_POSITIVE_OFFSET = 256;
219
220 /**
221 * The number of bytes, which have been read so far.
222 */
223 private long total;
224
225 /**
226 * The number of bytes, which must be hold, because they might be a part of the boundary.
227 */
228 private int pad;
229
230 /**
231 * The current offset in the buffer.
232 */
233 private int pos;
234
235 /**
236 * Whether the stream is already closed.
237 */
238 private boolean closed;
239
240 /**
241 * Creates a new instance.
242 */
243 ItemInputStream() {
244 findSeparator();
245 }
246
247 /**
248 * Returns the number of bytes, which are currently available, without blocking.
249 *
250 * @throws IOException An I/O error occurs.
251 * @return Number of bytes in the buffer.
252 */
253 @Override
254 public int available() throws IOException {
255 if (pos == -1) {
256 return tail - head - pad;
257 }
258 return pos - head;
259 }
260
261 private void checkOpen() throws ItemSkippedException {
262 if (closed) {
263 throw new FileItemInput.ItemSkippedException("checkOpen()");
264 }
265 }
266
267 /**
268 * Closes the input stream.
269 *
270 * @throws IOException An I/O error occurred.
271 */
272 @Override
273 public void close() throws IOException {
274 close(false);
275 }
276
277 /**
278 * Closes the input stream.
279 *
280 * @param closeUnderlying Whether to close the underlying stream (hard close)
281 * @throws IOException An I/O error occurred.
282 */
283 public void close(final boolean closeUnderlying) throws IOException {
284 if (closed) {
285 return;
286 }
287 if (closeUnderlying) {
288 closed = true;
289 input.close();
290 } else {
291 for (;;) {
292 var avail = available();
293 if (avail == 0) {
294 avail = makeAvailable();
295 if (avail == 0) {
296 break;
297 }
298 }
299 if (skip(avail) != avail) {
300 // TODO What to do?
301 }
302 }
303 }
304 closed = true;
305 }
306
307 /**
308 * Called for finding the separator.
309 */
310 private void findSeparator() {
311 pos = MultipartInput.this.findSeparator();
312 if (pos == -1) {
313 if (tail - head > keepRegion) {
314 pad = keepRegion;
315 } else {
316 pad = tail - head;
317 }
318 }
319 }
320
321 /**
322 * Gets the number of bytes, which have been read by the stream.
323 *
324 * @return Number of bytes, which have been read so far.
325 */
326 public long getBytesRead() {
327 return total;
328 }
329
330 /**
331 * Tests whether this instance is closed.
332 *
333 * @return whether this instance is closed.
334 */
335 public boolean isClosed() {
336 return closed;
337 }
338
339 /**
340 * Attempts to read more data.
341 *
342 * @return Number of available bytes
343 * @throws IOException An I/O error occurred.
344 */
345 private int makeAvailable() throws IOException {
346 if (pos != -1) {
347 return 0;
348 }
349
350 // Move the data to the beginning of the buffer.
351 total += tail - head - pad;
352 System.arraycopy(buffer, tail - pad, buffer, 0, pad);
353
354 // Refill buffer with new data.
355 head = 0;
356 tail = pad;
357
358 for (;;) {
359 final var bytesRead = input.read(buffer, tail, bufSize - tail);
360 if (bytesRead == -1) {
361 // The last pad amount is left in the buffer.
362 // Boundary can't be in there so signal an error
363 // condition.
364 final var msg = "Stream ended unexpectedly";
365 throw new MalformedStreamException(msg);
366 }
367 if (notifier != null) {
368 notifier.noteBytesRead(bytesRead);
369 }
370 tail += bytesRead;
371
372 findSeparator();
373 final var av = available();
374
375 if (av > 0 || pos != -1) {
376 return av;
377 }
378 }
379 }
380
381 /**
382 * Reads the next byte in the stream.
383 *
384 * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
385 * @throws IOException An I/O error occurred.
386 */
387 @Override
388 public int read() throws IOException {
389 checkOpen();
390 if (available() == 0 && makeAvailable() == 0) {
391 return -1;
392 }
393 ++total;
394 final int b = buffer[head++];
395 if (b >= 0) {
396 return b;
397 }
398 return b + BYTE_POSITIVE_OFFSET;
399 }
400
401 /**
402 * Reads bytes into the given buffer.
403 *
404 * @param b The destination buffer, where to write to.
405 * @param off Offset of the first byte in the buffer.
406 * @param len Maximum number of bytes to read.
407 * @return Number of bytes, which have been actually read, or -1 for EOF.
408 * @throws IOException An I/O error occurred.
409 */
410 @Override
411 public int read(final byte[] b, final int off, final int len) throws IOException {
412 checkOpen();
413 if (len == 0) {
414 return 0;
415 }
416 var res = available();
417 if (res == 0) {
418 res = makeAvailable();
419 if (res == 0) {
420 return -1;
421 }
422 }
423 res = Math.min(res, len);
424 System.arraycopy(buffer, head, b, off, res);
425 head += res;
426 total += res;
427 return res;
428 }
429
430 /**
431 * Skips the given number of bytes.
432 *
433 * @param bytes Number of bytes to skip.
434 * @return The number of bytes, which have actually been skipped.
435 * @throws IOException An I/O error occurred.
436 */
437 @Override
438 public long skip(final long bytes) throws IOException {
439 checkOpen();
440 var available = available();
441 if (available == 0) {
442 available = makeAvailable();
443 if (available == 0) {
444 return 0;
445 }
446 }
447 // Fix "Implicit narrowing conversion in compound assignment"
448 // https://github.com/apache/commons-fileupload/security/code-scanning/118
449 // Math.min always returns an int because available is an int.
450 final var res = Math.toIntExact(Math.min(available, bytes));
451 head += res;
452 return res;
453 }
454
455 }
456
457 /**
458 * Signals that the input stream fails to follow the required syntax.
459 */
460 public static class MalformedStreamException extends FileUploadException {
461
462 /**
463 * The UID to use when serializing this instance.
464 */
465 private static final long serialVersionUID = 2;
466
467 /**
468 * Constructs an {@code MalformedStreamException} with the specified detail message.
469 *
470 * @param message The detail message.
471 */
472 public MalformedStreamException(final String message) {
473 super(message);
474 }
475
476 /**
477 * Constructs an {@code MalformedStreamException} with the specified detail message.
478 *
479 * @param message The detail message.
480 * @param cause The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
481 * cause is nonexistent or unknown.)
482 */
483 public MalformedStreamException(final String message, final Throwable cause) {
484 super(message, cause);
485 }
486
487 }
488
489 /**
490 * Internal class, which is used to invoke the {@link ProgressListener}.
491 */
492 public static class ProgressNotifier {
493
494 /**
495 * The listener to invoke.
496 */
497 private final ProgressListener progressListener;
498
499 /**
500 * Number of expected bytes, if known, or -1.
501 */
502 private final long contentLength;
503
504 /**
505 * Number of bytes, which have been read so far.
506 */
507 private long bytesRead;
508
509 /**
510 * Number of items, which have been read so far.
511 */
512 private int items;
513
514 /**
515 * Creates a new instance with the given listener and content length.
516 *
517 * @param progressListener The listener to invoke.
518 * @param contentLength The expected content length.
519 */
520 public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
521 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
522 this.contentLength = contentLength;
523 }
524
525 /**
526 * Called to indicate that bytes have been read.
527 *
528 * @param byteCount Number of bytes, which have been read.
529 */
530 void noteBytesRead(final int byteCount) {
531 //
532 // Indicates, that the given number of bytes have been read from the input stream.
533 //
534 bytesRead += byteCount;
535 notifyListener();
536 }
537
538 /**
539 * Called to indicate, that a new file item has been detected.
540 */
541 public void noteItem() {
542 ++items;
543 notifyListener();
544 }
545
546 /**
547 * Called for notifying the listener.
548 */
549 private void notifyListener() {
550 progressListener.update(bytesRead, contentLength, items);
551 }
552
553 }
554
555 /**
556 * The Carriage Return ASCII character value.
557 */
558 public static final byte CR = 0x0D;
559
560 /**
561 * The Line Feed ASCII character value.
562 */
563 public static final byte LF = 0x0A;
564
565 /**
566 * The dash (-) ASCII character value.
567 */
568 public static final byte DASH = 0x2D;
569
570 /**
571 * The default length of the buffer used for processing a request.
572 */
573 static final int DEFAULT_BUFSIZE = 4096;
574
575 /**
576 * Default per part header size limit in bytes.
577 * @since 2.0.0-M4
578 */
579 public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512;
580
581 /**
582 * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
583 */
584 static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
585
586 /**
587 * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
588 */
589 static final byte[] FIELD_SEPARATOR = { CR, LF };
590
591 /**
592 * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
593 */
594 static final byte[] STREAM_TERMINATOR = { DASH, DASH };
595
596 /**
597 * A byte sequence that precedes a boundary ({@code CRLF--}).
598 */
599 static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };
600
601 /**
602 * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
603 *
604 * @param a The first array to compare.
605 * @param b The second array to compare.
606 * @param count How many bytes should be compared.
607 * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
608 */
609 static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
610 for (var i = 0; i < count; i++) {
611 if (a[i] != b[i]) {
612 return false;
613 }
614 }
615 return true;
616 }
617
618 /**
619 * Constructs a new {@link Builder}.
620 *
621 * @return a new {@link Builder}.
622 */
623 public static Builder builder() {
624 return new Builder();
625 }
626
627 /**
628 * The input stream from which data is read.
629 */
630 private final InputStream input;
631
632 /**
633 * The length of the boundary token plus the leading {@code CRLF--}.
634 */
635 private int boundaryLength;
636
637 /**
638 * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
639 */
640 private final int keepRegion;
641
642 /**
643 * The byte sequence that partitions the stream.
644 */
645 private final byte[] boundary;
646
647 /**
648 * The table for Knuth-Morris-Pratt search algorithm.
649 */
650 private final int[] boundaryTable;
651
652 /**
653 * The length of the buffer used for processing the request.
654 */
655 private final int bufSize;
656
657 /**
658 * The buffer used for processing the request.
659 */
660 private final byte[] buffer;
661
662 /**
663 * The index of first valid character in the buffer. <br>
664 * 0 <= head < bufSize
665 */
666 private int head;
667
668 /**
669 * The index of last valid character in the buffer + 1. <br>
670 * 0 <= tail <= bufSize
671 */
672 private int tail;
673
674 /**
675 * The content encoding to use when reading headers.
676 */
677 private Charset headerCharset;
678
679 /**
680 * The progress notifier, if any, or null.
681 */
682 private final ProgressNotifier notifier;
683
684 /**
685 * The maximum size of the headers in bytes.
686 */
687 private final int maxPartHeaderSize;
688
689 /**
690 * Constructs a {@code MultipartInput} with a custom size buffer.
691 * <p>
692 * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
693 * data. Too small a buffer size setting will degrade performance.
694 * </p>
695 *
696 * @param input The {@code InputStream} to serve as a data source.
697 * @param boundary The token used for dividing the stream into {@code encapsulations}.
698 * @param bufferSize The size of the buffer to be used, in bytes.
699 * @param notifier The notifier, which is used for calling the progress listener, if any.
700 * @throws IOException Thrown if an I/O error occurs.
701 * @throws IllegalArgumentException If the buffer size is too small.
702 */
703 private MultipartInput(final Builder builder) throws IOException {
704 if (builder.boundary == null) {
705 throw new IllegalArgumentException("boundary may not be null");
706 }
707 // We prepend CR/LF to the boundary to chop trailing CR/LF from
708 // body-data tokens.
709 this.boundaryLength = builder.boundary.length + BOUNDARY_PREFIX.length;
710 if (builder.getBufferSize() < this.boundaryLength + 1) {
711 throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
712 }
713 this.input = builder.getInputStream();
714 this.bufSize = Math.max(builder.getBufferSize(), boundaryLength * 2);
715 this.buffer = new byte[this.bufSize];
716 this.notifier = builder.progressNotifier;
717 this.maxPartHeaderSize = builder.getMaxPartHeaderSize();
718 this.boundary = new byte[this.boundaryLength];
719 this.boundaryTable = new int[this.boundaryLength + 1];
720 this.keepRegion = this.boundary.length;
721 System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
722 System.arraycopy(builder.boundary, 0, this.boundary, BOUNDARY_PREFIX.length, builder.boundary.length);
723 computeBoundaryTable();
724 head = 0;
725 tail = 0;
726 }
727
728 /**
729 * Computes the table used for Knuth-Morris-Pratt search algorithm.
730 */
731 private void computeBoundaryTable() {
732 var position = 2;
733 var candidate = 0;
734
735 boundaryTable[0] = -1;
736 boundaryTable[1] = 0;
737
738 while (position <= boundaryLength) {
739 if (boundary[position - 1] == boundary[candidate]) {
740 boundaryTable[position] = candidate + 1;
741 candidate++;
742 position++;
743 } else if (candidate > 0) {
744 candidate = boundaryTable[candidate];
745 } else {
746 boundaryTable[position] = 0;
747 position++;
748 }
749 }
750 }
751
752 /**
753 * Reads {@code body-data} from the current {@code encapsulation} and discards it.
754 * <p>
755 * Use this method to skip encapsulations you don't need or don't understand.
756 * </p>
757 *
758 * @return The amount of data discarded.
759 * @throws MalformedStreamException if the stream ends unexpectedly.
760 * @throws IOException if an i/o error occurs.
761 */
762 public long discardBodyData() throws MalformedStreamException, IOException {
763 return readBodyData(NullOutputStream.INSTANCE);
764 }
765
766 /**
767 * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
768 *
769 * @param value The value to find.
770 * @param pos The starting position for searching.
771 * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
772 */
773 protected int findByte(final byte value, final int pos) {
774 for (var i = pos; i < tail; i++) {
775 if (buffer[i] == value) {
776 return i;
777 }
778 }
779
780 return -1;
781 }
782
783 /**
784 * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
785 *
786 * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
787 */
788 protected int findSeparator() {
789 var bufferPos = this.head;
790 var tablePos = 0;
791 while (bufferPos < this.tail) {
792 while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
793 tablePos = boundaryTable[tablePos];
794 }
795 bufferPos++;
796 tablePos++;
797 if (tablePos == boundaryLength) {
798 return bufferPos - boundaryLength;
799 }
800 }
801 return -1;
802 }
803
804 /**
805 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
806 * used.
807 *
808 * @return The encoding used to read part headers.
809 */
810 public Charset getHeaderCharset() {
811 return headerCharset;
812 }
813
814 /**
815 * Returns the per part size limit for headers.
816 *
817 * @return The maximum size of the headers in bytes.
818 * @since 2.0.0-M5
819 */
820 public int getMaxPartHeaderSize() {
821 return maxPartHeaderSize;
822 }
823
824 /**
825 * Creates a new {@link ItemInputStream}.
826 *
827 * @return A new instance of {@link ItemInputStream}.
828 */
829 public ItemInputStream newInputStream() {
830 return new ItemInputStream();
831 }
832
833 /**
834 * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
835 * <p>
836 * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
837 * </p>
838 *
839 * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
840 * @return the amount of data written.
841 * @throws MalformedStreamException if the stream ends unexpectedly.
842 * @throws IOException if an i/o error occurs.
843 */
844 public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
845 try (var inputStream = newInputStream()) {
846 return IOUtils.copyLarge(inputStream, output);
847 }
848 }
849
850 /**
851 * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
852 *
853 * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
854 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits
855 * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
856 */
857 public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
858 final var marker = new byte[2];
859 final boolean nextChunk;
860 head += boundaryLength;
861 try {
862 marker[0] = readByte();
863 if (marker[0] == LF) {
864 // Work around IE5 Mac bug with input type=image.
865 // Because the boundary delimiter, not including the trailing
866 // CRLF, must not appear within any file (RFC 2046, section
867 // 5.1.1), we know the missing CR is due to a buggy browser
868 // rather than a file containing something similar to a
869 // boundary.
870 return true;
871 }
872
873 marker[1] = readByte();
874 if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
875 nextChunk = false;
876 } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
877 nextChunk = true;
878 } else {
879 throw new MalformedStreamException("Unexpected characters follow a boundary");
880 }
881 } catch (final FileUploadSizeException e) {
882 throw e;
883 } catch (final IOException e) {
884 throw new MalformedStreamException("Stream ended unexpectedly", e);
885 }
886 return nextChunk;
887 }
888
889 /**
890 * Reads a byte from the {@code buffer}, and refills it as necessary.
891 *
892 * @return The next byte from the input stream.
893 * @throws IOException if there is no more data available.
894 */
895 public byte readByte() throws IOException {
896 // Buffer depleted ?
897 if (head == tail) {
898 head = 0;
899 // Refill.
900 tail = input.read(buffer, head, bufSize);
901 if (tail == -1) {
902 // No more data available.
903 throw new IOException("No more data is available");
904 }
905 if (notifier != null) {
906 notifier.noteBytesRead(tail);
907 }
908 }
909 return buffer[head++];
910 }
911
912 /**
913 * Reads the {@code header-part} of the current {@code encapsulation}.
914 * <p>
915 * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
916 * </p>
917 * <p>
918 * <strong>TODO</strong> allow limiting maximum header size to protect against abuse.
919 * </p>
920 *
921 * @return The {@code header-part} of the current encapsulation.
922 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits.
923 * @throws MalformedStreamException if the stream ends unexpectedly.
924 */
925 public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
926 var i = 0;
927 byte b;
928 // to support multi-byte characters
929 final var baos = new ByteArrayOutputStream();
930 var size = 0;
931 while (i < HEADER_SEPARATOR.length) {
932 try {
933 b = readByte();
934 } catch (final FileUploadSizeException e) {
935 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
936 throw e;
937 } catch (final IOException e) {
938 throw new MalformedStreamException("Stream ended unexpectedly", e);
939 }
940 final int phsm = getMaxPartHeaderSize();
941 if (phsm != -1 && ++size > phsm) {
942 throw new FileUploadSizeException(
943 String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size);
944 }
945 if (b == HEADER_SEPARATOR[i]) {
946 i++;
947 } else {
948 i = 0;
949 }
950 baos.write(b);
951 }
952 try {
953 return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
954 } catch (final UnsupportedEncodingException e) {
955 // not possible
956 throw new IllegalStateException(e);
957 }
958 }
959
960 /**
961 * Changes the boundary token used for partitioning the stream.
962 * <p>
963 * This method allows single pass processing of nested multipart streams.
964 * </p>
965 * <p>
966 * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
967 * </p>
968 * <p>
969 * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
970 * </p>
971 *
972 * @param boundary The boundary to be used for parsing of the nested stream.
973 * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
974 */
975 public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
976 if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
977 throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
978 }
979 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
980 computeBoundaryTable();
981 }
982
983 /**
984 * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
985 * is used.
986 *
987 * @param headerCharset The encoding used to read part headers.
988 */
989 public void setHeaderCharset(final Charset headerCharset) {
990 this.headerCharset = headerCharset;
991 }
992
993 /**
994 * Finds the beginning of the first {@code encapsulation}.
995 *
996 * @return {@code true} if an {@code encapsulation} was found in the stream.
997 * @throws IOException if an i/o error occurs.
998 */
999 public boolean skipPreamble() throws IOException {
1000 // First delimiter may be not preceded with a CRLF.
1001 System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
1002 boundaryLength = boundary.length - 2;
1003 computeBoundaryTable();
1004 try {
1005 // Discard all data up to the delimiter.
1006 discardBodyData();
1007
1008 // Read boundary - if succeeded, the stream contains an
1009 // encapsulation.
1010 return readBoundary();
1011 } catch (final MalformedStreamException e) {
1012 return false;
1013 } finally {
1014 // Restore delimiter.
1015 System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
1016 boundaryLength = boundary.length;
1017 boundary[0] = CR;
1018 boundary[1] = LF;
1019 computeBoundaryTable();
1020 }
1021 }
1022
1023 }