View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.UnsupportedEncodingException;
24  import java.nio.charset.Charset;
25  
26  import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
27  import org.apache.commons.io.Charsets;
28  import org.apache.commons.io.IOUtils;
29  import org.apache.commons.io.build.AbstractOrigin;
30  import org.apache.commons.io.build.AbstractStreamBuilder;
31  import org.apache.commons.io.output.NullOutputStream;
32  
33  /**
34   * Low-level API for processing file uploads.
35   *
36   * <p>
37   * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC
38   * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
39   * </p>
40   * <p>
41   * The format of the stream is defined in the following way:
42   * </p>
43   * <code>
44   *   multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
45   *   encapsulation := delimiter body CRLF<br>
46   *   delimiter := "--" boundary CRLF<br>
47   *   close-delimiter := "--" boundary "--"<br>
48   *   preamble := &lt;ignore&gt;<br>
49   *   epilogue := &lt;ignore&gt;<br>
50   *   body := header-part CRLF body-part<br>
51   *   header-part := 1*header CRLF<br>
52   *   header := header-name ":" header-value<br>
53   *   header-name := &lt;printable ASCII characters except ":"&gt;<br>
54   *   header-value := &lt;any ASCII characters except CR &amp; LF&gt;<br>
55   *   body-data := &lt;arbitrary data&gt;<br>
56   * </code>
57   *
58   * <p>
59   * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
60   * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
61   * </p>
62   * <p>
63   * Here is an example of usage of this class:
64   * </p>
65   *
66   * <pre>
67   * try {
68   *     MultipartInput multipartStream = new MultipartInput(input, boundary);
69   *     boolean nextPart = multipartStream.skipPreamble();
70   *     OutputStream output;
71   *     while (nextPart) {
72   *         String header = multipartStream.readHeaders();
73   *         // process headers
74   *         // create some output stream
75   *         multipartStream.readBodyData(output);
76   *         nextPart = multipartStream.readBoundary();
77   *     }
78   * } catch (MultipartInput.MalformedStreamException e) {
79   *     // the stream failed to follow required syntax
80   * } catch (IOException e) {
81   *     // a read or write error occurred
82   * }
83   * </pre>
84   */
85  public final class MultipartInput {
86  
87      /**
88       * Builds a new {@link MultipartInput} instance.
89       * <p>
90       * For example:
91       * </p>
92       *
93       * <pre>{@code
94       * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
95       * }
96       * </pre>
97       */
98      public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {
99  
100         /**
101          * Boundary.
102          */
103         private byte[] boundary;
104 
105         /**
106          * Progress notifier.
107          */
108         private ProgressNotifier progressNotifier;
109 
110         public Builder() {
111             setBufferSizeDefault(DEFAULT_BUFSIZE);
112         }
113 
114         /**
115          * Constructs a new instance.
116          * <p>
117          * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
118          * </p>
119          * <p>
120          * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
121          * {@link UnsupportedOperationException}.
122          * </p>
123          *
124          * @return a new instance.
125          * @throws IOException                   if an I/O error occurs.
126          * @throws UnsupportedOperationException if the origin cannot provide a Path.
127          * @see AbstractOrigin#getReader(Charset)
128          */
129         @Override
130         public MultipartInput get() throws IOException {
131             return new MultipartInput(getInputStream(), boundary, getBufferSize(), progressNotifier);
132         }
133 
134         /**
135          * Sets the boundary.
136          *
137          * @param boundary the boundary.
138          * @return this
139          */
140         public Builder setBoundary(final byte[] boundary) {
141             this.boundary = boundary;
142             return this;
143         }
144 
145         /**
146          * Sets the progress notifier.
147          *
148          * @param progressNotifier progress notifier..
149          * @return this
150          */
151         public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
152             this.progressNotifier = progressNotifier;
153             return this;
154         }
155 
156     }
157 
158     /**
159      * Signals an attempt to set an invalid boundary token.
160      */
161     public static class FileUploadBoundaryException extends FileUploadException {
162 
163         /**
164          * The UID to use when serializing this instance.
165          */
166         private static final long serialVersionUID = 2;
167 
168         /**
169          * Constructs an instance with the specified detail message.
170          *
171          * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
172          */
173         public FileUploadBoundaryException(final String message) {
174             super(message);
175         }
176 
177     }
178 
179     /**
180      * An {@link InputStream} for reading an items contents.
181      */
182     public class ItemInputStream extends InputStream {
183 
184         /**
185          * Offset when converting negative bytes to integers.
186          */
187         private static final int BYTE_POSITIVE_OFFSET = 256;
188 
189         /**
190          * The number of bytes, which have been read so far.
191          */
192         private long total;
193 
194         /**
195          * The number of bytes, which must be hold, because they might be a part of the boundary.
196          */
197         private int pad;
198 
199         /**
200          * The current offset in the buffer.
201          */
202         private int pos;
203 
204         /**
205          * Whether the stream is already closed.
206          */
207         private boolean closed;
208 
209         /**
210          * Creates a new instance.
211          */
212         ItemInputStream() {
213             findSeparator();
214         }
215 
216         /**
217          * Returns the number of bytes, which are currently available, without blocking.
218          *
219          * @throws IOException An I/O error occurs.
220          * @return Number of bytes in the buffer.
221          */
222         @Override
223         public int available() throws IOException {
224             if (pos == -1) {
225                 return tail - head - pad;
226             }
227             return pos - head;
228         }
229 
230         private void checkOpen() throws ItemSkippedException {
231             if (closed) {
232                 throw new FileItemInput.ItemSkippedException("checkOpen()");
233             }
234         }
235 
236         /**
237          * Closes the input stream.
238          *
239          * @throws IOException An I/O error occurred.
240          */
241         @Override
242         public void close() throws IOException {
243             close(false);
244         }
245 
246         /**
247          * Closes the input stream.
248          *
249          * @param closeUnderlying Whether to close the underlying stream (hard close)
250          * @throws IOException An I/O error occurred.
251          */
252         public void close(final boolean closeUnderlying) throws IOException {
253             if (closed) {
254                 return;
255             }
256             if (closeUnderlying) {
257                 closed = true;
258                 input.close();
259             } else {
260                 for (;;) {
261                     var av = available();
262                     if (av == 0) {
263                         av = makeAvailable();
264                         if (av == 0) {
265                             break;
266                         }
267                     }
268                     skip(av);
269                 }
270             }
271             closed = true;
272         }
273 
274         /**
275          * Called for finding the separator.
276          */
277         private void findSeparator() {
278             pos = MultipartInput.this.findSeparator();
279             if (pos == -1) {
280                 if (tail - head > keepRegion) {
281                     pad = keepRegion;
282                 } else {
283                     pad = tail - head;
284                 }
285             }
286         }
287 
288         /**
289          * Gets the number of bytes, which have been read by the stream.
290          *
291          * @return Number of bytes, which have been read so far.
292          */
293         public long getBytesRead() {
294             return total;
295         }
296 
297         public boolean isClosed() {
298             return closed;
299         }
300 
301         /**
302          * Attempts to read more data.
303          *
304          * @return Number of available bytes
305          * @throws IOException An I/O error occurred.
306          */
307         private int makeAvailable() throws IOException {
308             if (pos != -1) {
309                 return 0;
310             }
311 
312             // Move the data to the beginning of the buffer.
313             total += tail - head - pad;
314             System.arraycopy(buffer, tail - pad, buffer, 0, pad);
315 
316             // Refill buffer with new data.
317             head = 0;
318             tail = pad;
319 
320             for (;;) {
321                 final var bytesRead = input.read(buffer, tail, bufSize - tail);
322                 if (bytesRead == -1) {
323                     // The last pad amount is left in the buffer.
324                     // Boundary can't be in there so signal an error
325                     // condition.
326                     final var msg = "Stream ended unexpectedly";
327                     throw new MalformedStreamException(msg);
328                 }
329                 if (notifier != null) {
330                     notifier.noteBytesRead(bytesRead);
331                 }
332                 tail += bytesRead;
333 
334                 findSeparator();
335                 final var av = available();
336 
337                 if (av > 0 || pos != -1) {
338                     return av;
339                 }
340             }
341         }
342 
343         /**
344          * Reads the next byte in the stream.
345          *
346          * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
347          * @throws IOException An I/O error occurred.
348          */
349         @Override
350         public int read() throws IOException {
351             checkOpen();
352             if (available() == 0 && makeAvailable() == 0) {
353                 return -1;
354             }
355             ++total;
356             final int b = buffer[head++];
357             if (b >= 0) {
358                 return b;
359             }
360             return b + BYTE_POSITIVE_OFFSET;
361         }
362 
363         /**
364          * Reads bytes into the given buffer.
365          *
366          * @param b   The destination buffer, where to write to.
367          * @param off Offset of the first byte in the buffer.
368          * @param len Maximum number of bytes to read.
369          * @return Number of bytes, which have been actually read, or -1 for EOF.
370          * @throws IOException An I/O error occurred.
371          */
372         @Override
373         public int read(final byte[] b, final int off, final int len) throws IOException {
374             checkOpen();
375             if (len == 0) {
376                 return 0;
377             }
378             var res = available();
379             if (res == 0) {
380                 res = makeAvailable();
381                 if (res == 0) {
382                     return -1;
383                 }
384             }
385             res = Math.min(res, len);
386             System.arraycopy(buffer, head, b, off, res);
387             head += res;
388             total += res;
389             return res;
390         }
391 
392         /**
393          * Skips the given number of bytes.
394          *
395          * @param bytes Number of bytes to skip.
396          * @return The number of bytes, which have actually been skipped.
397          * @throws IOException An I/O error occurred.
398          */
399         @Override
400         public long skip(final long bytes) throws IOException {
401             checkOpen();
402             var available = available();
403             if (available == 0) {
404                 available = makeAvailable();
405                 if (available == 0) {
406                     return 0;
407                 }
408             }
409             // Fix "Implicit narrowing conversion in compound assignment"
410             // https://github.com/apache/commons-fileupload/security/code-scanning/118
411             // Math.min always returns an int because available is an int.
412             final var res = Math.toIntExact(Math.min(available, bytes));
413             head += res;
414             return res;
415         }
416 
417     }
418 
419     /**
420      * Signals that the input stream fails to follow the required syntax.
421      */
422     public static class MalformedStreamException extends FileUploadException {
423 
424         /**
425          * The UID to use when serializing this instance.
426          */
427         private static final long serialVersionUID = 2;
428 
429         /**
430          * Constructs an {@code MalformedStreamException} with the specified detail message.
431          *
432          * @param message The detail message.
433          */
434         public MalformedStreamException(final String message) {
435             super(message);
436         }
437 
438         /**
439          * Constructs an {@code MalformedStreamException} with the specified detail message.
440          *
441          * @param message The detail message.
442          * @param cause   The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
443          *                cause is nonexistent or unknown.)
444          */
445         public MalformedStreamException(final String message, final Throwable cause) {
446             super(message, cause);
447         }
448 
449     }
450 
451     /**
452      * Internal class, which is used to invoke the {@link ProgressListener}.
453      */
454     public static class ProgressNotifier {
455 
456         /**
457          * The listener to invoke.
458          */
459         private final ProgressListener progressListener;
460 
461         /**
462          * Number of expected bytes, if known, or -1.
463          */
464         private final long contentLength;
465 
466         /**
467          * Number of bytes, which have been read so far.
468          */
469         private long bytesRead;
470 
471         /**
472          * Number of items, which have been read so far.
473          */
474         private int items;
475 
476         /**
477          * Creates a new instance with the given listener and content length.
478          *
479          * @param progressListener The listener to invoke.
480          * @param contentLength    The expected content length.
481          */
482         public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
483             this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
484             this.contentLength = contentLength;
485         }
486 
487         /**
488          * Called to indicate that bytes have been read.
489          *
490          * @param byteCount Number of bytes, which have been read.
491          */
492         void noteBytesRead(final int byteCount) {
493             //
494             // Indicates, that the given number of bytes have been read from the input stream.
495             //
496             bytesRead += byteCount;
497             notifyListener();
498         }
499 
500         /**
501          * Called to indicate, that a new file item has been detected.
502          */
503         public void noteItem() {
504             ++items;
505             notifyListener();
506         }
507 
508         /**
509          * Called for notifying the listener.
510          */
511         private void notifyListener() {
512             progressListener.update(bytesRead, contentLength, items);
513         }
514 
515     }
516 
517     /**
518      * The Carriage Return ASCII character value.
519      */
520     public static final byte CR = 0x0D;
521 
522     /**
523      * The Line Feed ASCII character value.
524      */
525     public static final byte LF = 0x0A;
526 
527     /**
528      * The dash (-) ASCII character value.
529      */
530     public static final byte DASH = 0x2D;
531 
532     /**
533      * The maximum length of {@code header-part} that will be processed (10 kilobytes = 10240 bytes.).
534      */
535     public static final int HEADER_PART_SIZE_MAX = 10_240;
536 
537     /**
538      * The default length of the buffer used for processing a request.
539      */
540     static final int DEFAULT_BUFSIZE = 4096;
541 
542     /**
543      * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
544      */
545     static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
546 
547     /**
548      * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
549      */
550     static final byte[] FIELD_SEPARATOR = { CR, LF };
551 
552     /**
553      * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
554      */
555     static final byte[] STREAM_TERMINATOR = { DASH, DASH };
556 
557     /**
558      * A byte sequence that precedes a boundary ({@code CRLF--}).
559      */
560     static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };
561 
562     /**
563      * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
564      *
565      * @param a     The first array to compare.
566      * @param b     The second array to compare.
567      * @param count How many bytes should be compared.
568      * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
569      */
570     static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
571         for (var i = 0; i < count; i++) {
572             if (a[i] != b[i]) {
573                 return false;
574             }
575         }
576         return true;
577     }
578 
579     /**
580      * Constructs a new {@link Builder}.
581      *
582      * @return a new {@link Builder}.
583      */
584     public static Builder builder() {
585         return new Builder();
586     }
587 
588     /**
589      * The input stream from which data is read.
590      */
591     private final InputStream input;
592 
593     /**
594      * The length of the boundary token plus the leading {@code CRLF--}.
595      */
596     private int boundaryLength;
597 
598     /**
599      * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
600      */
601     private final int keepRegion;
602 
603     /**
604      * The byte sequence that partitions the stream.
605      */
606     private final byte[] boundary;
607 
608     /**
609      * The table for Knuth-Morris-Pratt search algorithm.
610      */
611     private final int[] boundaryTable;
612 
613     /**
614      * The length of the buffer used for processing the request.
615      */
616     private final int bufSize;
617 
618     /**
619      * The buffer used for processing the request.
620      */
621     private final byte[] buffer;
622 
623     /**
624      * The index of first valid character in the buffer. <br>
625      * 0 <= head < bufSize
626      */
627     private int head;
628 
629     /**
630      * The index of last valid character in the buffer + 1. <br>
631      * 0 <= tail <= bufSize
632      */
633     private int tail;
634 
635     /**
636      * The content encoding to use when reading headers.
637      */
638     private Charset headerCharset;
639 
640     /**
641      * The progress notifier, if any, or null.
642      */
643     private final ProgressNotifier notifier;
644 
645     /**
646      * Constructs a {@code MultipartInput} with a custom size buffer.
647      * <p>
648      * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
649      * data. Too small a buffer size setting will degrade performance.
650      * </p>
651      *
652      * @param input      The {@code InputStream} to serve as a data source.
653      * @param boundary   The token used for dividing the stream into {@code encapsulations}.
654      * @param bufferSize The size of the buffer to be used, in bytes.
655      * @param notifier   The notifier, which is used for calling the progress listener, if any.
656      * @throws IllegalArgumentException If the buffer size is too small.
657      */
658     private MultipartInput(final InputStream input, final byte[] boundary, final int bufferSize, final ProgressNotifier notifier) {
659         if (boundary == null) {
660             throw new IllegalArgumentException("boundary may not be null");
661         }
662         // We prepend CR/LF to the boundary to chop trailing CR/LF from
663         // body-data tokens.
664         this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length;
665         if (bufferSize < this.boundaryLength + 1) {
666             throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
667         }
668 
669         this.input = input;
670         this.bufSize = Math.max(bufferSize, boundaryLength * 2);
671         this.buffer = new byte[this.bufSize];
672         this.notifier = notifier;
673 
674         this.boundary = new byte[this.boundaryLength];
675         this.boundaryTable = new int[this.boundaryLength + 1];
676         this.keepRegion = this.boundary.length;
677 
678         System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
679         System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
680         computeBoundaryTable();
681 
682         head = 0;
683         tail = 0;
684     }
685 
686     /**
687      * Computes the table used for Knuth-Morris-Pratt search algorithm.
688      */
689     private void computeBoundaryTable() {
690         var position = 2;
691         var candidate = 0;
692 
693         boundaryTable[0] = -1;
694         boundaryTable[1] = 0;
695 
696         while (position <= boundaryLength) {
697             if (boundary[position - 1] == boundary[candidate]) {
698                 boundaryTable[position] = candidate + 1;
699                 candidate++;
700                 position++;
701             } else if (candidate > 0) {
702                 candidate = boundaryTable[candidate];
703             } else {
704                 boundaryTable[position] = 0;
705                 position++;
706             }
707         }
708     }
709 
710     /**
711      * Reads {@code body-data} from the current {@code encapsulation} and discards it.
712      * <p>
713      * Use this method to skip encapsulations you don't need or don't understand.
714      * </p>
715      *
716      * @return The amount of data discarded.
717      * @throws MalformedStreamException if the stream ends unexpectedly.
718      * @throws IOException              if an i/o error occurs.
719      */
720     public long discardBodyData() throws MalformedStreamException, IOException {
721         return readBodyData(NullOutputStream.INSTANCE);
722     }
723 
724     /**
725      * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
726      *
727      * @param value The value to find.
728      * @param pos   The starting position for searching.
729      *
730      * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
731      */
732     protected int findByte(final byte value, final int pos) {
733         for (var i = pos; i < tail; i++) {
734             if (buffer[i] == value) {
735                 return i;
736             }
737         }
738 
739         return -1;
740     }
741 
742     /**
743      * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
744      *
745      * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
746      */
747     protected int findSeparator() {
748         var bufferPos = this.head;
749         var tablePos = 0;
750         while (bufferPos < this.tail) {
751             while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
752                 tablePos = boundaryTable[tablePos];
753             }
754             bufferPos++;
755             tablePos++;
756             if (tablePos == boundaryLength) {
757                 return bufferPos - boundaryLength;
758             }
759         }
760         return -1;
761     }
762 
763     /**
764      * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
765      * used.
766      *
767      * @return The encoding used to read part headers.
768      */
769     public Charset getHeaderCharset() {
770         return headerCharset;
771     }
772 
773     /**
774      * Creates a new {@link ItemInputStream}.
775      *
776      * @return A new instance of {@link ItemInputStream}.
777      */
778     public ItemInputStream newInputStream() {
779         return new ItemInputStream();
780     }
781 
782     /**
783      * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
784      * <p>
785      * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
786      * </p>
787      *
788      * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
789      * @return the amount of data written.
790      * @throws MalformedStreamException if the stream ends unexpectedly.
791      * @throws IOException              if an i/o error occurs.
792      */
793     public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
794         try (var inputStream = newInputStream()) {
795             return IOUtils.copyLarge(inputStream, output);
796         }
797     }
798 
799     /**
800      * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
801      *
802      * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
803      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits
804      * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
805      */
806     public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
807         final var marker = new byte[2];
808         final boolean nextChunk;
809         head += boundaryLength;
810         try {
811             marker[0] = readByte();
812             if (marker[0] == LF) {
813                 // Work around IE5 Mac bug with input type=image.
814                 // Because the boundary delimiter, not including the trailing
815                 // CRLF, must not appear within any file (RFC 2046, section
816                 // 5.1.1), we know the missing CR is due to a buggy browser
817                 // rather than a file containing something similar to a
818                 // boundary.
819                 return true;
820             }
821 
822             marker[1] = readByte();
823             if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
824                 nextChunk = false;
825             } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
826                 nextChunk = true;
827             } else {
828                 throw new MalformedStreamException("Unexpected characters follow a boundary");
829             }
830         } catch (final FileUploadSizeException e) {
831             throw e;
832         } catch (final IOException e) {
833             throw new MalformedStreamException("Stream ended unexpectedly", e);
834         }
835         return nextChunk;
836     }
837 
838     /**
839      * Reads a byte from the {@code buffer}, and refills it as necessary.
840      *
841      * @return The next byte from the input stream.
842      * @throws IOException if there is no more data available.
843      */
844     public byte readByte() throws IOException {
845         // Buffer depleted ?
846         if (head == tail) {
847             head = 0;
848             // Refill.
849             tail = input.read(buffer, head, bufSize);
850             if (tail == -1) {
851                 // No more data available.
852                 throw new IOException("No more data is available");
853             }
854             if (notifier != null) {
855                 notifier.noteBytesRead(tail);
856             }
857         }
858         return buffer[head++];
859     }
860 
861     /**
862      * Reads the {@code header-part} of the current {@code encapsulation}.
863      * <p>
864      * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
865      * </p>
866      *
867      * @return The {@code header-part} of the current encapsulation.
868      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits.
869      * @throws MalformedStreamException if the stream ends unexpectedly.
870      */
871     public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
872         var i = 0;
873         byte b;
874         // to support multi-byte characters
875         final var baos = new ByteArrayOutputStream();
876         var size = 0;
877         while (i < HEADER_SEPARATOR.length) {
878             try {
879                 b = readByte();
880             } catch (final FileUploadSizeException e) {
881                 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
882                 throw e;
883             } catch (final IOException e) {
884                 throw new MalformedStreamException("Stream ended unexpectedly", e);
885             }
886             if (++size > HEADER_PART_SIZE_MAX) {
887                 throw new MalformedStreamException(
888                         String.format("Header section has more than %s bytes (maybe it is not properly terminated)", HEADER_PART_SIZE_MAX));
889             }
890             if (b == HEADER_SEPARATOR[i]) {
891                 i++;
892             } else {
893                 i = 0;
894             }
895             baos.write(b);
896         }
897 
898         try {
899             return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
900         } catch (final UnsupportedEncodingException e) {
901             // not possible
902             throw new IllegalStateException(e);
903         }
904     }
905 
906     /**
907      * Changes the boundary token used for partitioning the stream.
908      * <p>
909      * This method allows single pass processing of nested multipart streams.
910      * </p>
911      * <p>
912      * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
913      * </p>
914      * <p>
915      * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
916      * </p>
917      *
918      * @param boundary The boundary to be used for parsing of the nested stream.
919      * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
920      */
921     public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
922         if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
923             throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
924         }
925         System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
926         computeBoundaryTable();
927     }
928 
929     /**
930      * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
931      * is used.
932      *
933      * @param headerCharset The encoding used to read part headers.
934      */
935     public void setHeaderCharset(final Charset headerCharset) {
936         this.headerCharset = headerCharset;
937     }
938 
939     /**
940      * Finds the beginning of the first {@code encapsulation}.
941      *
942      * @return {@code true} if an {@code encapsulation} was found in the stream.
943      * @throws IOException if an i/o error occurs.
944      */
945     public boolean skipPreamble() throws IOException {
946         // First delimiter may be not preceded with a CRLF.
947         System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
948         boundaryLength = boundary.length - 2;
949         computeBoundaryTable();
950         try {
951             // Discard all data up to the delimiter.
952             discardBodyData();
953 
954             // Read boundary - if succeeded, the stream contains an
955             // encapsulation.
956             return readBoundary();
957         } catch (final MalformedStreamException e) {
958             return false;
959         } finally {
960             // Restore delimiter.
961             System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
962             boundaryLength = boundary.length;
963             boundary[0] = CR;
964             boundary[1] = LF;
965             computeBoundaryTable();
966         }
967     }
968 
969 }