MultipartInput.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.fileupload2.core;

  18. import java.io.ByteArrayOutputStream;
  19. import java.io.IOException;
  20. import java.io.InputStream;
  21. import java.io.OutputStream;
  22. import java.io.UnsupportedEncodingException;
  23. import java.nio.charset.Charset;

  24. import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
  25. import org.apache.commons.io.Charsets;
  26. import org.apache.commons.io.IOUtils;
  27. import org.apache.commons.io.build.AbstractOrigin;
  28. import org.apache.commons.io.build.AbstractStreamBuilder;
  29. import org.apache.commons.io.output.NullOutputStream;

  30. /**
  31.  * Low-level API for processing file uploads.
  32.  *
  33.  * <p>
  34.  * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC
  35.  * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
  36.  * </p>
  37.  * <p>
  38.  * The format of the stream is defined in the following way:
  39.  * </p>
  40.  * <pre>
  41.  *   multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
  42.  *   encapsulation := delimiter body CRLF<br>
  43.  *   delimiter := "--" boundary CRLF<br>
  44.  *   close-delimiter := "--" boundary "--"<br>
  45.  *   preamble := &lt;ignore&gt;<br>
  46.  *   epilogue := &lt;ignore&gt;<br>
  47.  *   body := header-part CRLF body-part<br>
  48.  *   header-part := 1*header CRLF<br>
  49.  *   header := header-name ":" header-value<br>
  50.  *   header-name := &lt;printable ASCII characters except ":"&gt;<br>
  51.  *   header-value := &lt;any ASCII characters except CR &amp; LF&gt;<br>
  52.  *   body-data := &lt;arbitrary data&gt;<br>
  53.  * </pre>
  54.  *
  55.  * <p>
  56.  * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
  57.  * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
  58.  * </p>
  59.  * <p>
  60.  * Here is an example of usage of this class:
  61.  * </p>
  62.  *
  63.  * <pre>
  64.  * try {
  65.  *     MultipartInput multipartStream = MultipartInput.builder()
  66.  *             .setBoundary(boundary)
  67.  *             .setInputStream(input)
  68.  *             .get();
  69.  *     boolean nextPart = multipartStream.skipPreamble();
  70.  *     OutputStream output;
  71.  *     while (nextPart) {
  72.  *         String header = multipartStream.readHeaders();
  73.  *         // process headers
  74.  *         // create some output stream
  75.  *         multipartStream.readBodyData(output);
  76.  *         nextPart = multipartStream.readBoundary();
  77.  *     }
  78.  * } catch (MultipartInput.MalformedStreamException e) {
  79.  *     // the stream failed to follow required syntax
  80.  * } catch (IOException e) {
  81.  *     // a read or write error occurred
  82.  * }
  83.  * </pre>
  84.  */
  85. public final class MultipartInput {

  86.     /**
  87.      * Builds a new {@link MultipartInput} instance.
  88.      * <p>
  89.      * For example:
  90.      * </p>
  91.      *
  92.      * <pre>{@code
  93.      * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
  94.      * }
  95.      * </pre>
  96.      */
  97.     public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {

  98.         /**
  99.          * Boundary.
  100.          */
  101.         private byte[] boundary;

  102.         /**
  103.          * Progress notifier.
  104.          */
  105.         private ProgressNotifier progressNotifier;

  106.         /** The  per part size limit for headers.
  107.          */
  108.         private int partHeaderSizeMax = DEFAULT_PART_HEADER_SIZE_MAX;

  109.         /**
  110.          * Constructs a new instance.
  111.          */
  112.         public Builder() {
  113.             setBufferSizeDefault(DEFAULT_BUFSIZE);
  114.         }

  115.         /**
  116.          * Constructs a new instance.
  117.          * <p>
  118.          * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
  119.          * </p>
  120.          * <p>
  121.          * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
  122.          * {@link UnsupportedOperationException}.
  123.          * </p>
  124.          *
  125.          * @return a new instance.
  126.          * @throws IOException                   if an I/O error occurs.
  127.          * @throws UnsupportedOperationException if the origin cannot provide a Path.
  128.          * @see AbstractOrigin#getReader(Charset)
  129.          */
  130.         @Override
  131.         public MultipartInput get() throws IOException {
  132.             return new MultipartInput(getInputStream(), boundary, getBufferSize(), getPartHeaderSizeMax(), progressNotifier);
  133.         }

  134.         /** Returns the per part size limit for headers.
  135.          * @return The maximum size of the headers in bytes.
  136.          * @since 2.0.0-M4
  137.          */
  138.         public int getPartHeaderSizeMax() {
  139.             return partHeaderSizeMax;
  140.         }

  141.         /**
  142.          * Sets the boundary.
  143.          *
  144.          * @param boundary the boundary.
  145.          * @return {@code this} instance.
  146.          */
  147.         public Builder setBoundary(final byte[] boundary) {
  148.             this.boundary = boundary;
  149.             return this;
  150.         }

  151.        /** Sets the per part size limit for headers.
  152.      * @param partHeaderSizeMax The maximum size of the headers in bytes.
  153.      * @return This builder.
  154.      * @since 2.0.0-M4
  155.      */
  156.     public Builder setPartHeaderSizeMax(final int partHeaderSizeMax) {
  157.         this.partHeaderSizeMax = partHeaderSizeMax;
  158.         return this;
  159.     }

  160.         /**
  161.              * Sets the progress notifier.
  162.              *
  163.              * @param progressNotifier progress notifier.
  164.              * @return {@code this} instance.
  165.              */
  166.             public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
  167.                 this.progressNotifier = progressNotifier;
  168.                 return this;
  169.             }

  170.     }

  171.     /**
  172.      * Signals an attempt to set an invalid boundary token.
  173.      */
  174.     public static class FileUploadBoundaryException extends FileUploadException {

  175.         /**
  176.          * The UID to use when serializing this instance.
  177.          */
  178.         private static final long serialVersionUID = 2;

  179.         /**
  180.          * Constructs an instance with the specified detail message.
  181.          *
  182.          * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
  183.          */
  184.         public FileUploadBoundaryException(final String message) {
  185.             super(message);
  186.         }

  187.     }

  188.     /**
  189.      * An {@link InputStream} for reading an items contents.
  190.      */
  191.     public class ItemInputStream extends InputStream {

  192.         /**
  193.          * Offset when converting negative bytes to integers.
  194.          */
  195.         private static final int BYTE_POSITIVE_OFFSET = 256;

  196.         /**
  197.          * The number of bytes, which have been read so far.
  198.          */
  199.         private long total;

  200.         /**
  201.          * The number of bytes, which must be hold, because they might be a part of the boundary.
  202.          */
  203.         private int pad;

  204.         /**
  205.          * The current offset in the buffer.
  206.          */
  207.         private int pos;

  208.         /**
  209.          * Whether the stream is already closed.
  210.          */
  211.         private boolean closed;

  212.         /**
  213.          * Creates a new instance.
  214.          */
  215.         ItemInputStream() {
  216.             findSeparator();
  217.         }

  218.         /**
  219.          * Returns the number of bytes, which are currently available, without blocking.
  220.          *
  221.          * @throws IOException An I/O error occurs.
  222.          * @return Number of bytes in the buffer.
  223.          */
  224.         @Override
  225.         public int available() throws IOException {
  226.             if (pos == -1) {
  227.                 return tail - head - pad;
  228.             }
  229.             return pos - head;
  230.         }

  231.         private void checkOpen() throws ItemSkippedException {
  232.             if (closed) {
  233.                 throw new FileItemInput.ItemSkippedException("checkOpen()");
  234.             }
  235.         }

  236.         /**
  237.          * Closes the input stream.
  238.          *
  239.          * @throws IOException An I/O error occurred.
  240.          */
  241.         @Override
  242.         public void close() throws IOException {
  243.             close(false);
  244.         }

  245.         /**
  246.          * Closes the input stream.
  247.          *
  248.          * @param closeUnderlying Whether to close the underlying stream (hard close)
  249.          * @throws IOException An I/O error occurred.
  250.          */
  251.         public void close(final boolean closeUnderlying) throws IOException {
  252.             if (closed) {
  253.                 return;
  254.             }
  255.             if (closeUnderlying) {
  256.                 closed = true;
  257.                 input.close();
  258.             } else {
  259.                 for (;;) {
  260.                     var avail = available();
  261.                     if (avail == 0) {
  262.                         avail = makeAvailable();
  263.                         if (avail == 0) {
  264.                             break;
  265.                         }
  266.                     }
  267.                     if (skip(avail) != avail) {
  268.                         // TODO What to do?
  269.                     }
  270.                 }
  271.             }
  272.             closed = true;
  273.         }

  274.         /**
  275.          * Called for finding the separator.
  276.          */
  277.         private void findSeparator() {
  278.             pos = MultipartInput.this.findSeparator();
  279.             if (pos == -1) {
  280.                 if (tail - head > keepRegion) {
  281.                     pad = keepRegion;
  282.                 } else {
  283.                     pad = tail - head;
  284.                 }
  285.             }
  286.         }

  287.         /**
  288.          * Gets the number of bytes, which have been read by the stream.
  289.          *
  290.          * @return Number of bytes, which have been read so far.
  291.          */
  292.         public long getBytesRead() {
  293.             return total;
  294.         }

  295.         /**
  296.          * Tests whether this instance is closed.
  297.          *
  298.          * @return whether this instance is closed.
  299.          */
  300.         public boolean isClosed() {
  301.             return closed;
  302.         }

  303.         /**
  304.          * Attempts to read more data.
  305.          *
  306.          * @return Number of available bytes
  307.          * @throws IOException An I/O error occurred.
  308.          */
  309.         private int makeAvailable() throws IOException {
  310.             if (pos != -1) {
  311.                 return 0;
  312.             }

  313.             // Move the data to the beginning of the buffer.
  314.             total += tail - head - pad;
  315.             System.arraycopy(buffer, tail - pad, buffer, 0, pad);

  316.             // Refill buffer with new data.
  317.             head = 0;
  318.             tail = pad;

  319.             for (;;) {
  320.                 final var bytesRead = input.read(buffer, tail, bufSize - tail);
  321.                 if (bytesRead == -1) {
  322.                     // The last pad amount is left in the buffer.
  323.                     // Boundary can't be in there so signal an error
  324.                     // condition.
  325.                     final var msg = "Stream ended unexpectedly";
  326.                     throw new MalformedStreamException(msg);
  327.                 }
  328.                 if (notifier != null) {
  329.                     notifier.noteBytesRead(bytesRead);
  330.                 }
  331.                 tail += bytesRead;

  332.                 findSeparator();
  333.                 final var av = available();

  334.                 if (av > 0 || pos != -1) {
  335.                     return av;
  336.                 }
  337.             }
  338.         }

  339.         /**
  340.          * Reads the next byte in the stream.
  341.          *
  342.          * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
  343.          * @throws IOException An I/O error occurred.
  344.          */
  345.         @Override
  346.         public int read() throws IOException {
  347.             checkOpen();
  348.             if (available() == 0 && makeAvailable() == 0) {
  349.                 return -1;
  350.             }
  351.             ++total;
  352.             final int b = buffer[head++];
  353.             if (b >= 0) {
  354.                 return b;
  355.             }
  356.             return b + BYTE_POSITIVE_OFFSET;
  357.         }

  358.         /**
  359.          * Reads bytes into the given buffer.
  360.          *
  361.          * @param b   The destination buffer, where to write to.
  362.          * @param off Offset of the first byte in the buffer.
  363.          * @param len Maximum number of bytes to read.
  364.          * @return Number of bytes, which have been actually read, or -1 for EOF.
  365.          * @throws IOException An I/O error occurred.
  366.          */
  367.         @Override
  368.         public int read(final byte[] b, final int off, final int len) throws IOException {
  369.             checkOpen();
  370.             if (len == 0) {
  371.                 return 0;
  372.             }
  373.             var res = available();
  374.             if (res == 0) {
  375.                 res = makeAvailable();
  376.                 if (res == 0) {
  377.                     return -1;
  378.                 }
  379.             }
  380.             res = Math.min(res, len);
  381.             System.arraycopy(buffer, head, b, off, res);
  382.             head += res;
  383.             total += res;
  384.             return res;
  385.         }

  386.         /**
  387.          * Skips the given number of bytes.
  388.          *
  389.          * @param bytes Number of bytes to skip.
  390.          * @return The number of bytes, which have actually been skipped.
  391.          * @throws IOException An I/O error occurred.
  392.          */
  393.         @Override
  394.         public long skip(final long bytes) throws IOException {
  395.             checkOpen();
  396.             var available = available();
  397.             if (available == 0) {
  398.                 available = makeAvailable();
  399.                 if (available == 0) {
  400.                     return 0;
  401.                 }
  402.             }
  403.             // Fix "Implicit narrowing conversion in compound assignment"
  404.             // https://github.com/apache/commons-fileupload/security/code-scanning/118
  405.             // Math.min always returns an int because available is an int.
  406.             final var res = Math.toIntExact(Math.min(available, bytes));
  407.             head += res;
  408.             return res;
  409.         }

  410.     }

  411.     /**
  412.      * Signals that the input stream fails to follow the required syntax.
  413.      */
  414.     public static class MalformedStreamException extends FileUploadException {

  415.         /**
  416.          * The UID to use when serializing this instance.
  417.          */
  418.         private static final long serialVersionUID = 2;

  419.         /**
  420.          * Constructs an {@code MalformedStreamException} with the specified detail message.
  421.          *
  422.          * @param message The detail message.
  423.          */
  424.         public MalformedStreamException(final String message) {
  425.             super(message);
  426.         }

  427.         /**
  428.          * Constructs an {@code MalformedStreamException} with the specified detail message.
  429.          *
  430.          * @param message The detail message.
  431.          * @param cause   The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
  432.          *                cause is nonexistent or unknown.)
  433.          */
  434.         public MalformedStreamException(final String message, final Throwable cause) {
  435.             super(message, cause);
  436.         }

  437.     }

  438.     /**
  439.      * Internal class, which is used to invoke the {@link ProgressListener}.
  440.      */
  441.     public static class ProgressNotifier {

  442.         /**
  443.          * The listener to invoke.
  444.          */
  445.         private final ProgressListener progressListener;

  446.         /**
  447.          * Number of expected bytes, if known, or -1.
  448.          */
  449.         private final long contentLength;

  450.         /**
  451.          * Number of bytes, which have been read so far.
  452.          */
  453.         private long bytesRead;

  454.         /**
  455.          * Number of items, which have been read so far.
  456.          */
  457.         private int items;

  458.         /**
  459.          * Creates a new instance with the given listener and content length.
  460.          *
  461.          * @param progressListener The listener to invoke.
  462.          * @param contentLength    The expected content length.
  463.          */
  464.         public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
  465.             this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
  466.             this.contentLength = contentLength;
  467.         }

  468.         /**
  469.          * Called to indicate that bytes have been read.
  470.          *
  471.          * @param byteCount Number of bytes, which have been read.
  472.          */
  473.         void noteBytesRead(final int byteCount) {
  474.             //
  475.             // Indicates, that the given number of bytes have been read from the input stream.
  476.             //
  477.             bytesRead += byteCount;
  478.             notifyListener();
  479.         }

  480.         /**
  481.          * Called to indicate, that a new file item has been detected.
  482.          */
  483.         public void noteItem() {
  484.             ++items;
  485.             notifyListener();
  486.         }

  487.         /**
  488.          * Called for notifying the listener.
  489.          */
  490.         private void notifyListener() {
  491.             progressListener.update(bytesRead, contentLength, items);
  492.         }

  493.     }

  494.     /**
  495.      * The Carriage Return ASCII character value.
  496.      */
  497.     public static final byte CR = 0x0D;

  498.     /**
  499.      * The Line Feed ASCII character value.
  500.      */
  501.     public static final byte LF = 0x0A;

  502.     /**
  503.      * The dash (-) ASCII character value.
  504.      */
  505.     public static final byte DASH = 0x2D;

  506.     /**
  507.      * The default length of the buffer used for processing a request.
  508.      */
  509.     static final int DEFAULT_BUFSIZE = 4096;

  510.     /**
  511.      * Default per part header size limit in bytes.
  512.      * @since 2.0.0-M4
  513.      */
  514.     public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512;

  515.     /**
  516.      * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
  517.      */
  518.     static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };

  519.     /**
  520.      * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
  521.      */
  522.     static final byte[] FIELD_SEPARATOR = { CR, LF };

  523.     /**
  524.      * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
  525.      */
  526.     static final byte[] STREAM_TERMINATOR = { DASH, DASH };

  527.     /**
  528.      * A byte sequence that precedes a boundary ({@code CRLF--}).
  529.      */
  530.     static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };

  531.     /**
  532.      * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
  533.      *
  534.      * @param a     The first array to compare.
  535.      * @param b     The second array to compare.
  536.      * @param count How many bytes should be compared.
  537.      * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
  538.      */
  539.     static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
  540.         for (var i = 0; i < count; i++) {
  541.             if (a[i] != b[i]) {
  542.                 return false;
  543.             }
  544.         }
  545.         return true;
  546.     }

  547.     /**
  548.      * Constructs a new {@link Builder}.
  549.      *
  550.      * @return a new {@link Builder}.
  551.      */
  552.     public static Builder builder() {
  553.         return new Builder();
  554.     }

  555.     /**
  556.      * The input stream from which data is read.
  557.      */
  558.     private final InputStream input;

  559.     /**
  560.      * The length of the boundary token plus the leading {@code CRLF--}.
  561.      */
  562.     private int boundaryLength;

  563.     /**
  564.      * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
  565.      */
  566.     private final int keepRegion;

  567.     /**
  568.      * The byte sequence that partitions the stream.
  569.      */
  570.     private final byte[] boundary;

  571.     /**
  572.      * The table for Knuth-Morris-Pratt search algorithm.
  573.      */
  574.     private final int[] boundaryTable;

  575.     /**
  576.      * The length of the buffer used for processing the request.
  577.      */
  578.     private final int bufSize;

  579.     /**
  580.      * The buffer used for processing the request.
  581.      */
  582.     private final byte[] buffer;

  583.     /**
  584.      * The index of first valid character in the buffer. <br>
  585.      * 0 <= head < bufSize
  586.      */
  587.     private int head;

  588.     /**
  589.      * The index of last valid character in the buffer + 1. <br>
  590.      * 0 <= tail <= bufSize
  591.      */
  592.     private int tail;

  593.     /**
  594.      * The content encoding to use when reading headers.
  595.      */
  596.     private Charset headerCharset;

  597.     /**
  598.      * The progress notifier, if any, or null.
  599.      */
  600.     private final ProgressNotifier notifier;

  601.     /**
  602.      * The maximum size of the headers in bytes.
  603.      */
  604.     private final int partHeaderSizeMax;

  605.     /**
  606.      * Constructs a {@code MultipartInput} with a custom size buffer.
  607.      * <p>
  608.      * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
  609.      * data. Too small a buffer size setting will degrade performance.
  610.      * </p>
  611.      *
  612.      * @param input      The {@code InputStream} to serve as a data source.
  613.      * @param boundary   The token used for dividing the stream into {@code encapsulations}.
  614.      * @param bufferSize The size of the buffer to be used, in bytes.
  615.      * @param notifier   The notifier, which is used for calling the progress listener, if any.
  616.      * @throws IllegalArgumentException If the buffer size is too small.
  617.      */
  618.     private MultipartInput(final InputStream input, final byte[] boundary, final int bufferSize, final int partHeaderSizeMax, final ProgressNotifier notifier) {
  619.         if (boundary == null) {
  620.             throw new IllegalArgumentException("boundary may not be null");
  621.         }
  622.         // We prepend CR/LF to the boundary to chop trailing CR/LF from
  623.         // body-data tokens.
  624.         this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length;
  625.         if (bufferSize < this.boundaryLength + 1) {
  626.             throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
  627.         }

  628.         this.input = input;
  629.         this.bufSize = Math.max(bufferSize, boundaryLength * 2);
  630.         this.buffer = new byte[this.bufSize];
  631.         this.notifier = notifier;
  632.         this.partHeaderSizeMax = partHeaderSizeMax;

  633.         this.boundary = new byte[this.boundaryLength];
  634.         this.boundaryTable = new int[this.boundaryLength + 1];
  635.         this.keepRegion = this.boundary.length;

  636.         System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
  637.         System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
  638.         computeBoundaryTable();

  639.         head = 0;
  640.         tail = 0;
  641.     }

  642.     /**
  643.      * Computes the table used for Knuth-Morris-Pratt search algorithm.
  644.      */
  645.     private void computeBoundaryTable() {
  646.         var position = 2;
  647.         var candidate = 0;

  648.         boundaryTable[0] = -1;
  649.         boundaryTable[1] = 0;

  650.         while (position <= boundaryLength) {
  651.             if (boundary[position - 1] == boundary[candidate]) {
  652.                 boundaryTable[position] = candidate + 1;
  653.                 candidate++;
  654.                 position++;
  655.             } else if (candidate > 0) {
  656.                 candidate = boundaryTable[candidate];
  657.             } else {
  658.                 boundaryTable[position] = 0;
  659.                 position++;
  660.             }
  661.         }
  662.     }

  663.     /**
  664.      * Reads {@code body-data} from the current {@code encapsulation} and discards it.
  665.      * <p>
  666.      * Use this method to skip encapsulations you don't need or don't understand.
  667.      * </p>
  668.      *
  669.      * @return The amount of data discarded.
  670.      * @throws MalformedStreamException if the stream ends unexpectedly.
  671.      * @throws IOException              if an i/o error occurs.
  672.      */
  673.     public long discardBodyData() throws MalformedStreamException, IOException {
  674.         return readBodyData(NullOutputStream.INSTANCE);
  675.     }

  676.     /**
  677.      * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
  678.      *
  679.      * @param value The value to find.
  680.      * @param pos   The starting position for searching.
  681.      * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
  682.      */
  683.     protected int findByte(final byte value, final int pos) {
  684.         for (var i = pos; i < tail; i++) {
  685.             if (buffer[i] == value) {
  686.                 return i;
  687.             }
  688.         }

  689.         return -1;
  690.     }

  691.     /**
  692.      * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
  693.      *
  694.      * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
  695.      */
  696.     protected int findSeparator() {
  697.         var bufferPos = this.head;
  698.         var tablePos = 0;
  699.         while (bufferPos < this.tail) {
  700.             while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
  701.                 tablePos = boundaryTable[tablePos];
  702.             }
  703.             bufferPos++;
  704.             tablePos++;
  705.             if (tablePos == boundaryLength) {
  706.                 return bufferPos - boundaryLength;
  707.             }
  708.         }
  709.         return -1;
  710.     }

  711.     /**
  712.      * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
  713.      * used.
  714.      *
  715.      * @return The encoding used to read part headers.
  716.      */
  717.     public Charset getHeaderCharset() {
  718.         return headerCharset;
  719.     }

  720.     /** Returns the per part size limit for headers.
  721.      *
  722.      * @return The maximum size of the headers in bytes.
  723.      * @since 2.0.0-M4
  724.      */
  725.     public int getPartHeaderSizeMax() {
  726.         return partHeaderSizeMax;
  727.     }

  728.     /**
  729.      * Creates a new {@link ItemInputStream}.
  730.      *
  731.      * @return A new instance of {@link ItemInputStream}.
  732.      */
  733.     public ItemInputStream newInputStream() {
  734.         return new ItemInputStream();
  735.     }

  736.     /**
  737.      * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
  738.      * <p>
  739.      * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
  740.      * </p>
  741.      *
  742.      * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
  743.      * @return the amount of data written.
  744.      * @throws MalformedStreamException if the stream ends unexpectedly.
  745.      * @throws IOException              if an i/o error occurs.
  746.      */
  747.     public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
  748.         try (var inputStream = newInputStream()) {
  749.             return IOUtils.copyLarge(inputStream, output);
  750.         }
  751.     }

  752.     /**
  753.      * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
  754.      *
  755.      * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
  756.      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits
  757.      * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
  758.      */
  759.     public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
  760.         final var marker = new byte[2];
  761.         final boolean nextChunk;
  762.         head += boundaryLength;
  763.         try {
  764.             marker[0] = readByte();
  765.             if (marker[0] == LF) {
  766.                 // Work around IE5 Mac bug with input type=image.
  767.                 // Because the boundary delimiter, not including the trailing
  768.                 // CRLF, must not appear within any file (RFC 2046, section
  769.                 // 5.1.1), we know the missing CR is due to a buggy browser
  770.                 // rather than a file containing something similar to a
  771.                 // boundary.
  772.                 return true;
  773.             }

  774.             marker[1] = readByte();
  775.             if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
  776.                 nextChunk = false;
  777.             } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
  778.                 nextChunk = true;
  779.             } else {
  780.                 throw new MalformedStreamException("Unexpected characters follow a boundary");
  781.             }
  782.         } catch (final FileUploadSizeException e) {
  783.             throw e;
  784.         } catch (final IOException e) {
  785.             throw new MalformedStreamException("Stream ended unexpectedly", e);
  786.         }
  787.         return nextChunk;
  788.     }

  789.     /**
  790.      * Reads a byte from the {@code buffer}, and refills it as necessary.
  791.      *
  792.      * @return The next byte from the input stream.
  793.      * @throws IOException if there is no more data available.
  794.      */
  795.     public byte readByte() throws IOException {
  796.         // Buffer depleted ?
  797.         if (head == tail) {
  798.             head = 0;
  799.             // Refill.
  800.             tail = input.read(buffer, head, bufSize);
  801.             if (tail == -1) {
  802.                 // No more data available.
  803.                 throw new IOException("No more data is available");
  804.             }
  805.             if (notifier != null) {
  806.                 notifier.noteBytesRead(tail);
  807.             }
  808.         }
  809.         return buffer[head++];
  810.     }

  811.     /**
  812.      * Reads the {@code header-part} of the current {@code encapsulation}.
  813.      * <p>
  814.      * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
  815.      * </p>
  816.      * <p>
  817.      * <strong>TODO</strong> allow limiting maximum header size to protect against abuse.
  818.      * </p>
  819.      *
  820.      * @return The {@code header-part} of the current encapsulation.
  821.      * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits.
  822.      * @throws MalformedStreamException if the stream ends unexpectedly.
  823.      */
  824.     public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
  825.         var i = 0;
  826.         byte b;
  827.         // to support multi-byte characters
  828.         final var baos = new ByteArrayOutputStream();
  829.         var size = 0;
  830.         while (i < HEADER_SEPARATOR.length) {
  831.             try {
  832.                 b = readByte();
  833.             } catch (final FileUploadSizeException e) {
  834.                 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
  835.                 throw e;
  836.             } catch (final IOException e) {
  837.                 throw new MalformedStreamException("Stream ended unexpectedly", e);
  838.             }
  839.             final int phsm = getPartHeaderSizeMax();
  840.             if (phsm != -1 && ++size > phsm) {
  841.                 throw new FileUploadSizeException(
  842.                         String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size);
  843.             }
  844.             if (b == HEADER_SEPARATOR[i]) {
  845.                 i++;
  846.             } else {
  847.                 i = 0;
  848.             }
  849.             baos.write(b);
  850.         }
  851.         try {
  852.             return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
  853.         } catch (final UnsupportedEncodingException e) {
  854.             // not possible
  855.             throw new IllegalStateException(e);
  856.         }
  857.     }

  858.     /**
  859.      * Changes the boundary token used for partitioning the stream.
  860.      * <p>
  861.      * This method allows single pass processing of nested multipart streams.
  862.      * </p>
  863.      * <p>
  864.      * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
  865.      * </p>
  866.      * <p>
  867.      * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
  868.      * </p>
  869.      *
  870.      * @param boundary The boundary to be used for parsing of the nested stream.
  871.      * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
  872.      */
  873.     public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
  874.         if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
  875.             throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
  876.         }
  877.         System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
  878.         computeBoundaryTable();
  879.     }

  880.     /**
  881.      * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
  882.      * is used.
  883.      *
  884.      * @param headerCharset The encoding used to read part headers.
  885.      */
  886.     public void setHeaderCharset(final Charset headerCharset) {
  887.         this.headerCharset = headerCharset;
  888.     }

  889.     /**
  890.      * Finds the beginning of the first {@code encapsulation}.
  891.      *
  892.      * @return {@code true} if an {@code encapsulation} was found in the stream.
  893.      * @throws IOException if an i/o error occurs.
  894.      */
  895.     public boolean skipPreamble() throws IOException {
  896.         // First delimiter may be not preceded with a CRLF.
  897.         System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
  898.         boundaryLength = boundary.length - 2;
  899.         computeBoundaryTable();
  900.         try {
  901.             // Discard all data up to the delimiter.
  902.             discardBodyData();

  903.             // Read boundary - if succeeded, the stream contains an
  904.             // encapsulation.
  905.             return readBoundary();
  906.         } catch (final MalformedStreamException e) {
  907.             return false;
  908.         } finally {
  909.             // Restore delimiter.
  910.             System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
  911.             boundaryLength = boundary.length;
  912.             boundary[0] = CR;
  913.             boundary[1] = LF;
  914.             computeBoundaryTable();
  915.         }
  916.     }

  917. }