View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.UnsupportedEncodingException;
24  import java.nio.charset.Charset;
25  
26  import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
27  import org.apache.commons.io.Charsets;
28  import org.apache.commons.io.IOUtils;
29  import org.apache.commons.io.build.AbstractOrigin;
30  import org.apache.commons.io.build.AbstractStreamBuilder;
31  import org.apache.commons.io.output.NullOutputStream;
32  
33  /**
34   * Low-level API for processing file uploads.
35   *
36   * <p>
37   * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC
38   * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
39   * </p>
40   * <p>
41   * The format of the stream is defined in the following way:
42   * </p>
43   * <pre>
44   *   multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
45   *   encapsulation := delimiter body CRLF<br>
46   *   delimiter := "--" boundary CRLF<br>
47   *   close-delimiter := "--" boundary "--"<br>
48   *   preamble := &lt;ignore&gt;<br>
49   *   epilogue := &lt;ignore&gt;<br>
50   *   body := header-part CRLF body-part<br>
51   *   header-part := 1*header CRLF<br>
52   *   header := header-name ":" header-value<br>
53   *   header-name := &lt;printable ASCII characters except ":"&gt;<br>
54   *   header-value := &lt;any ASCII characters except CR &amp; LF&gt;<br>
55   *   body-data := &lt;arbitrary data&gt;<br>
56   * </pre>
57   *
58   * <p>
59   * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
60   * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
61   * </p>
62   * <p>
63   * Here is an example of usage of this class:
64   * </p>
65   *
66   * <pre>
67   * try {
68   *     MultipartInput multipartStream = MultipartInput.builder()
69   *             .setBoundary(boundary)
70   *             .setInputStream(input)
71   *             .get();
72   *     boolean nextPart = multipartStream.skipPreamble();
73   *     OutputStream output;
74   *     while (nextPart) {
75   *         String header = multipartStream.readHeaders();
76   *         // process headers
77   *         // create some output stream
78   *         multipartStream.readBodyData(output);
79   *         nextPart = multipartStream.readBoundary();
80   *     }
81   * } catch (MultipartInput.MalformedStreamException e) {
82   *     // the stream failed to follow required syntax
83   * } catch (IOException e) {
84   *     // a read or write error occurred
85   * }
86   * </pre>
87   */
88  public final class MultipartInput {
89  
90      /**
91       * Builds a new {@link MultipartInput} instance.
92       * <p>
93       * For example:
94       * </p>
95       *
96       * <pre>{@code
97       * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
98       * }
99       * </pre>
100      */
101     public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {
102 
103         /**
104          * Boundary.
105          */
106         private byte[] boundary;
107 
108         /**
109          * Progress notifier.
110          */
111         private ProgressNotifier progressNotifier;
112 
113         /**
114          * The  per part size limit for headers.
115          */
116         private int maxPartHeaderSize = DEFAULT_PART_HEADER_SIZE_MAX;
117 
118         /**
119          * Constructs a new instance.
120          */
121         public Builder() {
122             setBufferSizeDefault(DEFAULT_BUFSIZE);
123         }
124 
125         /**
126          * Constructs a new instance.
127          * <p>
128          * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
129          * </p>
130          * <p>
131          * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
132          * {@link UnsupportedOperationException}.
133          * </p>
134          *
135          * @return a new instance.
136          * @throws IOException                   if an I/O error occurs.
137          * @throws UnsupportedOperationException if the origin cannot provide a Path.
138          * @see AbstractOrigin#getReader(Charset)
139          */
140         @Override
141         public MultipartInput get() throws IOException {
142             return new MultipartInput(this);
143         }
144 
145         /**
146          * Gets the per part size limit for headers.
147          *
148          * @return The maximum size of the headers in bytes.
149          * @since 2.0.0-M5
150          */
151         public int getMaxPartHeaderSize() {
152             return maxPartHeaderSize;
153         }
154 
155         /**
156          * Sets the boundary.
157          *
158          * @param boundary the boundary.
159          * @return {@code this} instance.
160          */
161         public Builder setBoundary(final byte[] boundary) {
162             this.boundary = boundary;
163             return this;
164         }
165 
166         /**
167          * Sets the per part size limit for headers.
168          * @param partHeaderSizeMax The maximum size of the headers in bytes.
169          * @return This builder.
170          * @since 2.0.0-M5
171          */
172         public Builder setMaxPartHeaderSize(final int partHeaderSizeMax) {
173             this.maxPartHeaderSize = partHeaderSizeMax;
174             return this;
175         }
176 
177         /**
178          * Sets the progress notifier.
179          *
180          * @param progressNotifier progress notifier.
181          * @return {@code this} instance.
182          */
183         public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
184             this.progressNotifier = progressNotifier;
185             return this;
186         }
187     }
188 
189     /**
190      * Signals an attempt to set an invalid boundary token.
191      */
192     public static class FileUploadBoundaryException extends FileUploadException {
193 
194         /**
195          * The UID to use when serializing this instance.
196          */
197         private static final long serialVersionUID = 2;
198 
199         /**
200          * Constructs an instance with the specified detail message.
201          *
202          * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
203          */
204         public FileUploadBoundaryException(final String message) {
205             super(message);
206         }
207 
208     }
209 
210     /**
211      * An {@link InputStream} for reading an items contents.
212      */
213     public class ItemInputStream extends InputStream {
214 
215         /**
216          * Offset when converting negative bytes to integers.
217          */
218         private static final int BYTE_POSITIVE_OFFSET = 256;
219 
220         /**
221          * The number of bytes, which have been read so far.
222          */
223         private long total;
224 
225         /**
226          * The number of bytes, which must be hold, because they might be a part of the boundary.
227          */
228         private int pad;
229 
230         /**
231          * The current offset in the buffer.
232          */
233         private int pos;
234 
235         /**
236          * Whether the stream is already closed.
237          */
238         private boolean closed;
239 
240         /**
241          * Creates a new instance.
242          */
243         ItemInputStream() {
244             findSeparator();
245         }
246 
247         /**
248          * Returns the number of bytes, which are currently available, without blocking.
249          *
250          * @throws IOException An I/O error occurs.
251          * @return Number of bytes in the buffer.
252          */
253         @Override
254         public int available() throws IOException {
255             if (pos == -1) {
256                 return tail - head - pad;
257             }
258             return pos - head;
259         }
260 
261         private void checkOpen() throws ItemSkippedException {
262             if (closed) {
263                 throw new FileItemInput.ItemSkippedException("checkOpen()");
264             }
265         }
266 
267         /**
268          * Closes the input stream.
269          *
270          * @throws IOException An I/O error occurred.
271          */
272         @Override
273         public void close() throws IOException {
274             close(false);
275         }
276 
277         /**
278          * Closes the input stream.
279          *
280          * @param closeUnderlying Whether to close the underlying stream (hard close)
281          * @throws IOException An I/O error occurred.
282          */
283         public void close(final boolean closeUnderlying) throws IOException {
284             if (closed) {
285                 return;
286             }
287             if (closeUnderlying) {
288                 closed = true;
289                 input.close();
290             } else {
291                 for (;;) {
292                     var avail = available();
293                     if (avail == 0) {
294                         avail = makeAvailable();
295                         if (avail == 0) {
296                             break;
297                         }
298                     }
299                     if (skip(avail) != avail) {
300                         // TODO What to do?
301                     }
302                 }
303             }
304             closed = true;
305         }
306 
307         /**
308          * Called for finding the separator.
309          */
310         private void findSeparator() {
311             pos = MultipartInput.this.findSeparator();
312             if (pos == -1) {
313                 if (tail - head > keepRegion) {
314                     pad = keepRegion;
315                 } else {
316                     pad = tail - head;
317                 }
318             }
319         }
320 
321         /**
322          * Gets the number of bytes, which have been read by the stream.
323          *
324          * @return Number of bytes, which have been read so far.
325          */
326         public long getBytesRead() {
327             return total;
328         }
329 
330         /**
331          * Tests whether this instance is closed.
332          *
333          * @return whether this instance is closed.
334          */
335         public boolean isClosed() {
336             return closed;
337         }
338 
339         /**
340          * Attempts to read more data.
341          *
342          * @return Number of available bytes
343          * @throws IOException An I/O error occurred.
344          */
345         private int makeAvailable() throws IOException {
346             if (pos != -1) {
347                 return 0;
348             }
349 
350             // Move the data to the beginning of the buffer.
351             total += tail - head - pad;
352             System.arraycopy(buffer, tail - pad, buffer, 0, pad);
353 
354             // Refill buffer with new data.
355             head = 0;
356             tail = pad;
357 
358             for (;;) {
359                 final var bytesRead = input.read(buffer, tail, bufSize - tail);
360                 if (bytesRead == -1) {
361                     // The last pad amount is left in the buffer.
362                     // Boundary can't be in there so signal an error
363                     // condition.
364                     final var msg = "Stream ended unexpectedly";
365                     throw new MalformedStreamException(msg);
366                 }
367                 if (notifier != null) {
368                     notifier.noteBytesRead(bytesRead);
369                 }
370                 tail += bytesRead;
371 
372                 findSeparator();
373                 final var av = available();
374 
375                 if (av > 0 || pos != -1) {
376                     return av;
377                 }
378             }
379         }
380 
381         /**
382          * Reads the next byte in the stream.
383          *
384          * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
385          * @throws IOException An I/O error occurred.
386          */
387         @Override
388         public int read() throws IOException {
389             checkOpen();
390             if (available() == 0 && makeAvailable() == 0) {
391                 return -1;
392             }
393             ++total;
394             final int b = buffer[head++];
395             if (b >= 0) {
396                 return b;
397             }
398             return b + BYTE_POSITIVE_OFFSET;
399         }
400 
401         /**
402          * Reads bytes into the given buffer.
403          *
404          * @param b   The destination buffer, where to write to.
405          * @param off Offset of the first byte in the buffer.
406          * @param len Maximum number of bytes to read.
407          * @return Number of bytes, which have been actually read, or -1 for EOF.
408          * @throws IOException An I/O error occurred.
409          */
410         @Override
411         public int read(final byte[] b, final int off, final int len) throws IOException {
412             checkOpen();
413             if (len == 0) {
414                 return 0;
415             }
416             var res = available();
417             if (res == 0) {
418                 res = makeAvailable();
419                 if (res == 0) {
420                     return -1;
421                 }
422             }
423             res = Math.min(res, len);
424             System.arraycopy(buffer, head, b, off, res);
425             head += res;
426             total += res;
427             return res;
428         }
429 
430         /**
431          * Skips the given number of bytes.
432          *
433          * @param bytes Number of bytes to skip.
434          * @return The number of bytes, which have actually been skipped.
435          * @throws IOException An I/O error occurred.
436          */
437         @Override
438         public long skip(final long bytes) throws IOException {
439             checkOpen();
440             var available = available();
441             if (available == 0) {
442                 available = makeAvailable();
443                 if (available == 0) {
444                     return 0;
445                 }
446             }
447             // Fix "Implicit narrowing conversion in compound assignment"
448             // https://github.com/apache/commons-fileupload/security/code-scanning/118
449             // Math.min always returns an int because available is an int.
450             final var res = Math.toIntExact(Math.min(available, bytes));
451             head += res;
452             return res;
453         }
454 
455     }
456 
457     /**
458      * Signals that the input stream fails to follow the required syntax.
459      */
460     public static class MalformedStreamException extends FileUploadException {
461 
462         /**
463          * The UID to use when serializing this instance.
464          */
465         private static final long serialVersionUID = 2;
466 
467         /**
468          * Constructs an {@code MalformedStreamException} with the specified detail message.
469          *
470          * @param message The detail message.
471          */
472         public MalformedStreamException(final String message) {
473             super(message);
474         }
475 
476         /**
477          * Constructs an {@code MalformedStreamException} with the specified detail message.
478          *
479          * @param message The detail message.
480          * @param cause   The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
481          *                cause is nonexistent or unknown.)
482          */
483         public MalformedStreamException(final String message, final Throwable cause) {
484             super(message, cause);
485         }
486 
487     }
488 
489     /**
490      * Internal class, which is used to invoke the {@link ProgressListener}.
491      */
492     public static class ProgressNotifier {
493 
494         /**
495          * The listener to invoke.
496          */
497         private final ProgressListener progressListener;
498 
499         /**
500          * Number of expected bytes, if known, or -1.
501          */
502         private final long contentLength;
503 
504         /**
505          * Number of bytes, which have been read so far.
506          */
507         private long bytesRead;
508 
509         /**
510          * Number of items, which have been read so far.
511          */
512         private int items;
513 
514         /**
515          * Creates a new instance with the given listener and content length.
516          *
517          * @param progressListener The listener to invoke.
518          * @param contentLength    The expected content length.
519          */
520         public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
521             this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
522             this.contentLength = contentLength;
523         }
524 
525         /**
526          * Called to indicate that bytes have been read.
527          *
528          * @param byteCount Number of bytes, which have been read.
529          */
530         void noteBytesRead(final int byteCount) {
531             //
532             // Indicates, that the given number of bytes have been read from the input stream.
533             //
534             bytesRead += byteCount;
535             notifyListener();
536         }
537 
538         /**
539          * Called to indicate, that a new file item has been detected.
540          */
541         public void noteItem() {
542             ++items;
543             notifyListener();
544         }
545 
546         /**
547          * Called for notifying the listener.
548          */
549         private void notifyListener() {
550             progressListener.update(bytesRead, contentLength, items);
551         }
552 
553     }
554 
555     /**
556      * The Carriage Return ASCII character value.
557      */
558     public static final byte CR = 0x0D;
559 
560     /**
561      * The Line Feed ASCII character value.
562      */
563     public static final byte LF = 0x0A;
564 
565     /**
566      * The dash (-) ASCII character value.
567      */
568     public static final byte DASH = 0x2D;
569 
570     /**
571      * The default length of the buffer used for processing a request.
572      */
573     static final int DEFAULT_BUFSIZE = 4096;
574 
575     /**
576      * Default per part header size limit in bytes.
577      * @since 2.0.0-M4
578      */
579     public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512;
580 
581     /**
582      * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
583      */
584     static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
585 
586     /**
587      * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
588      */
589     static final byte[] FIELD_SEPARATOR = { CR, LF };
590 
591     /**
592      * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
593      */
594     static final byte[] STREAM_TERMINATOR = { DASH, DASH };
595 
596     /**
597      * A byte sequence that precedes a boundary ({@code CRLF--}).
598      */
599     static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };
600 
601     /**
602      * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
603      *
604      * @param a     The first array to compare.
605      * @param b     The second array to compare.
606      * @param count How many bytes should be compared.
607      * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
608      */
609     static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
610         for (var i = 0; i < count; i++) {
611             if (a[i] != b[i]) {
612                 return false;
613             }
614         }
615         return true;
616     }
617 
618     /**
619      * Constructs a new {@link Builder}.
620      *
621      * @return a new {@link Builder}.
622      */
623     public static Builder builder() {
624         return new Builder();
625     }
626 
627     /**
628      * The input stream from which data is read.
629      */
630     private final InputStream input;
631 
632     /**
633      * The length of the boundary token plus the leading {@code CRLF--}.
634      */
635     private int boundaryLength;
636 
637     /**
638      * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
639      */
640     private final int keepRegion;
641 
642     /**
643      * The byte sequence that partitions the stream.
644      */
645     private final byte[] boundary;
646 
647     /**
648      * The table for Knuth-Morris-Pratt search algorithm.
649      */
650     private final int[] boundaryTable;
651 
652     /**
653      * The length of the buffer used for processing the request.
654      */
655     private final int bufSize;
656 
657     /**
658      * The buffer used for processing the request.
659      */
660     private final byte[] buffer;
661 
662     /**
663      * The index of first valid character in the buffer. <br>
664      * 0 <= head < bufSize
665      */
666     private int head;
667 
668     /**
669      * The index of last valid character in the buffer + 1. <br>
670      * 0 <= tail <= bufSize
671      */
672     private int tail;
673 
674     /**
675      * The content encoding to use when reading headers.
676      */
677     private Charset headerCharset;
678 
679     /**
680      * The progress notifier, if any, or null.
681      */
682     private final ProgressNotifier notifier;
683 
684     /**
685      * The maximum size of the headers in bytes.
686      */
687     private final int maxPartHeaderSize;
688 
689     /**
690      * Constructs a {@code MultipartInput} with a custom size buffer.
691      * <p>
692      * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
693      * data. Too small a buffer size setting will degrade performance.
694      * </p>
695      *
696      * @param input      The {@code InputStream} to serve as a data source.
697      * @param boundary   The token used for dividing the stream into {@code encapsulations}.
698      * @param bufferSize The size of the buffer to be used, in bytes.
699      * @param notifier   The notifier, which is used for calling the progress listener, if any.
700      * @throws IOException Thrown if an I/O error occurs.
701      * @throws IllegalArgumentException If the buffer size is too small.
702      */
703     private MultipartInput(final Builder builder) throws IOException {
704         if (builder.boundary == null) {
705             throw new IllegalArgumentException("boundary may not be null");
706         }
707         // We prepend CR/LF to the boundary to chop trailing CR/LF from
708         // body-data tokens.
709         this.boundaryLength = builder.boundary.length + BOUNDARY_PREFIX.length;
710         if (builder.getBufferSize() < this.boundaryLength + 1) {
711             throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
712         }
713         this.input = builder.getInputStream();
714         this.bufSize = Math.max(builder.getBufferSize(), boundaryLength * 2);
715         this.buffer = new byte[this.bufSize];
716         this.notifier = builder.progressNotifier;
717         this.maxPartHeaderSize = builder.getMaxPartHeaderSize();
718         this.boundary = new byte[this.boundaryLength];
719         this.boundaryTable = new int[this.boundaryLength + 1];
720         this.keepRegion = this.boundary.length;
721         System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
722         System.arraycopy(builder.boundary, 0, this.boundary, BOUNDARY_PREFIX.length, builder.boundary.length);
723         computeBoundaryTable();
724         head = 0;
725         tail = 0;
726     }
727 
728     /**
729      * Computes the table used for Knuth-Morris-Pratt search algorithm.
730      */
731     private void computeBoundaryTable() {
732         var position = 2;
733         var candidate = 0;
734 
735         boundaryTable[0] = -1;
736         boundaryTable[1] = 0;
737 
738         while (position <= boundaryLength) {
739             if (boundary[position - 1] == boundary[candidate]) {
740                 boundaryTable[position] = candidate + 1;
741                 candidate++;
742                 position++;
743             } else if (candidate > 0) {
744                 candidate = boundaryTable[candidate];
745             } else {
746                 boundaryTable[position] = 0;
747                 position++;
748             }
749         }
750     }
751 
752     /**
753      * Reads {@code body-data} from the current {@code encapsulation} and discards it.
754      * <p>
755      * Use this method to skip encapsulations you don't need or don't understand.
756      * </p>
757      *
758      * @return The amount of data discarded.
759      * @throws MalformedStreamException if the stream ends unexpectedly.
760      * @throws IOException              if an i/o error occurs.
761      */
762     public long discardBodyData() throws MalformedStreamException, IOException {
763         return readBodyData(NullOutputStream.INSTANCE);
764     }
765 
766     /**
767      * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
768      *
769      * @param value The value to find.
770      * @param pos   The starting position for searching.
771      * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
772      */
773     protected int findByte(final byte value, final int pos) {
774         for (var i = pos; i < tail; i++) {
775             if (buffer[i] == value) {
776                 return i;
777             }
778         }
779 
780         return -1;
781     }
782 
783     /**
784      * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
785      *
786      * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
787      */
788     protected int findSeparator() {
789         var bufferPos = this.head;
790         var tablePos = 0;
791         while (bufferPos < this.tail) {
792             while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
793                 tablePos = boundaryTable[tablePos];
794             }
795             bufferPos++;
796             tablePos++;
797             if (tablePos == boundaryLength) {
798                 return bufferPos - boundaryLength;
799             }
800         }
801         return -1;
802     }
803 
804     /**
805      * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
806      * used.
807      *
808      * @return The encoding used to read part headers.
809      */
810     public Charset getHeaderCharset() {
811         return headerCharset;
812     }
813 
814     /**
815      * Returns the per part size limit for headers.
816      *
817      * @return The maximum size of the headers in bytes.
818      * @since 2.0.0-M5
819      */
820     public int getMaxPartHeaderSize() {
821         return maxPartHeaderSize;
822     }
823 
824     /**
825      * Creates a new {@link ItemInputStream}.
826      *
827      * @return A new instance of {@link ItemInputStream}.
828      */
829     public ItemInputStream newInputStream() {
830         return new ItemInputStream();
831     }
832 
833     /**
834      * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
835      * <p>
836      * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
837      * </p>
838      *
839      * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
840      * @return the amount of data written.
841      * @throws MalformedStreamException if the stream ends unexpectedly.
842      * @throws IOException              if an i/o error occurs.
843      */
844     public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
845         try (var inputStream = newInputStream()) {
846             return IOUtils.copyLarge(inputStream, output);
847         }
848     }
849 
850     /**
851      * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
852      *
853      * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
854      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits
855      * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
856      */
857     public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
858         final var marker = new byte[2];
859         final boolean nextChunk;
860         head += boundaryLength;
861         try {
862             marker[0] = readByte();
863             if (marker[0] == LF) {
864                 // Work around IE5 Mac bug with input type=image.
865                 // Because the boundary delimiter, not including the trailing
866                 // CRLF, must not appear within any file (RFC 2046, section
867                 // 5.1.1), we know the missing CR is due to a buggy browser
868                 // rather than a file containing something similar to a
869                 // boundary.
870                 return true;
871             }
872 
873             marker[1] = readByte();
874             if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
875                 nextChunk = false;
876             } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
877                 nextChunk = true;
878             } else {
879                 throw new MalformedStreamException("Unexpected characters follow a boundary");
880             }
881         } catch (final FileUploadSizeException e) {
882             throw e;
883         } catch (final IOException e) {
884             throw new MalformedStreamException("Stream ended unexpectedly", e);
885         }
886         return nextChunk;
887     }
888 
889     /**
890      * Reads a byte from the {@code buffer}, and refills it as necessary.
891      *
892      * @return The next byte from the input stream.
893      * @throws IOException if there is no more data available.
894      */
895     public byte readByte() throws IOException {
896         // Buffer depleted ?
897         if (head == tail) {
898             head = 0;
899             // Refill.
900             tail = input.read(buffer, head, bufSize);
901             if (tail == -1) {
902                 // No more data available.
903                 throw new IOException("No more data is available");
904             }
905             if (notifier != null) {
906                 notifier.noteBytesRead(tail);
907             }
908         }
909         return buffer[head++];
910     }
911 
912     /**
913      * Reads the {@code header-part} of the current {@code encapsulation}.
914      * <p>
915      * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
916      * </p>
917      * <p>
918      * <strong>TODO</strong> allow limiting maximum header size to protect against abuse.
919      * </p>
920      *
921      * @return The {@code header-part} of the current encapsulation.
922      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits.
923      * @throws MalformedStreamException if the stream ends unexpectedly.
924      */
925     public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
926         var i = 0;
927         byte b;
928         // to support multi-byte characters
929         final var baos = new ByteArrayOutputStream();
930         var size = 0;
931         while (i < HEADER_SEPARATOR.length) {
932             try {
933                 b = readByte();
934             } catch (final FileUploadSizeException e) {
935                 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
936                 throw e;
937             } catch (final IOException e) {
938                 throw new MalformedStreamException("Stream ended unexpectedly", e);
939             }
940             final int phsm = getMaxPartHeaderSize();
941             if (phsm != -1 && ++size > phsm) {
942                 throw new FileUploadSizeException(
943                         String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size);
944             }
945             if (b == HEADER_SEPARATOR[i]) {
946                 i++;
947             } else {
948                 i = 0;
949             }
950             baos.write(b);
951         }
952         try {
953             return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
954         } catch (final UnsupportedEncodingException e) {
955             // not possible
956             throw new IllegalStateException(e);
957         }
958     }
959 
960     /**
961      * Changes the boundary token used for partitioning the stream.
962      * <p>
963      * This method allows single pass processing of nested multipart streams.
964      * </p>
965      * <p>
966      * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
967      * </p>
968      * <p>
969      * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
970      * </p>
971      *
972      * @param boundary The boundary to be used for parsing of the nested stream.
973      * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
974      */
975     public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
976         if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
977             throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
978         }
979         System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
980         computeBoundaryTable();
981     }
982 
983     /**
984      * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
985      * is used.
986      *
987      * @param headerCharset The encoding used to read part headers.
988      */
989     public void setHeaderCharset(final Charset headerCharset) {
990         this.headerCharset = headerCharset;
991     }
992 
993     /**
994      * Finds the beginning of the first {@code encapsulation}.
995      *
996      * @return {@code true} if an {@code encapsulation} was found in the stream.
997      * @throws IOException if an i/o error occurs.
998      */
999     public boolean skipPreamble() throws IOException {
1000         // First delimiter may be not preceded with a CRLF.
1001         System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
1002         boundaryLength = boundary.length - 2;
1003         computeBoundaryTable();
1004         try {
1005             // Discard all data up to the delimiter.
1006             discardBodyData();
1007 
1008             // Read boundary - if succeeded, the stream contains an
1009             // encapsulation.
1010             return readBoundary();
1011         } catch (final MalformedStreamException e) {
1012             return false;
1013         } finally {
1014             // Restore delimiter.
1015             System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
1016             boundaryLength = boundary.length;
1017             boundary[0] = CR;
1018             boundary[1] = LF;
1019             computeBoundaryTable();
1020         }
1021     }
1022 
1023 }