UnsynchronizedBufferedReader.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */

  17. package org.apache.commons.io.input;

  18. import static org.apache.commons.io.IOUtils.CR;
  19. import static org.apache.commons.io.IOUtils.EOF;
  20. import static org.apache.commons.io.IOUtils.LF;

  21. import java.io.BufferedReader;
  22. import java.io.BufferedWriter;
  23. import java.io.IOException;
  24. import java.io.Reader;

  25. import org.apache.commons.io.IOUtils;

  26. /**
  27.  * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized,
  28.  * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that
  29.  * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits.
  30.  * <p>
  31.  * A typical application pattern for the class looks like this:
  32.  * </p>
  33.  *
  34.  * <pre>{@code
  35.  * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file"));
  36.  * }</pre>
  37.  * <p>
  38.  * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified.
  39.  * </p>
  40.  *
  41.  * @see BufferedReader
  42.  * @see BufferedWriter
  43.  * @since 2.17.0
  44.  */
  45. public class UnsynchronizedBufferedReader extends UnsynchronizedReader {

  46.     private static final char NUL = '\0';

  47.     private final Reader in;

  48.     /**
  49.      * The characters that can be read and refilled in bulk. We maintain three indices into this buffer:
  50.      *
  51.      * <pre>
  52.      *     { X X X X X X X X X X X X - - }
  53.      *           ^     ^             ^
  54.      *           |     |             |
  55.      *         mark   pos           end
  56.      * </pre>
  57.      * <p>
  58.      * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be
  59.      * {@link #fillBuf() filled} before characters can be read.
  60.      * </p>
  61.      * <p>
  62.      * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer
  63.      * cannot be reset.
  64.      * </p>
  65.      * <p>
  66.      * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an
  67.      * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed).
  68.      * </p>
  69.      */
  70.     private char[] buf;

  71.     private int pos;

  72.     private int end;

  73.     private int mark = -1;

  74.     private int markLimit = -1;

  75.     /**
  76.      * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB).
  77.      *
  78.      * @param in the Reader that is buffered.
  79.      */
  80.     public UnsynchronizedBufferedReader(final Reader in) {
  81.         this(in, IOUtils.DEFAULT_BUFFER_SIZE);
  82.     }

  83.     /**
  84.      * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}.
  85.      *
  86.      * @param in   the Reader that is buffered.
  87.      * @param size the size of the buffer to allocate.
  88.      * @throws IllegalArgumentException if {@code size <= 0}.
  89.      */
  90.     public UnsynchronizedBufferedReader(final Reader in, final int size) {
  91.         if (size <= 0) {
  92.             throw new IllegalArgumentException("size <= 0");
  93.         }
  94.         this.in = in;
  95.         buf = new char[size];
  96.     }

  97.     /**
  98.      * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded.
  99.      */
  100.     final void chompNewline() throws IOException {
  101.         if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) {
  102.             pos++;
  103.         }
  104.     }

  105.     /**
  106.      * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been
  107.      * closed.
  108.      *
  109.      * @throws IOException if an error occurs while closing this reader.
  110.      */
  111.     @Override
  112.     public void close() throws IOException {
  113.         if (!isClosed()) {
  114.             in.close();
  115.             buf = null;
  116.             super.close();
  117.         }
  118.     }

  119.     /**
  120.      * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}.
  121.      *
  122.      * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached.
  123.      */
  124.     private int fillBuf() throws IOException {
  125.         // assert(pos == end);

  126.         if (mark == EOF || pos - mark >= markLimit) {
  127.             /* mark isn't set or has exceeded its limit. use the whole buffer */
  128.             final int result = in.read(buf, 0, buf.length);
  129.             if (result > 0) {
  130.                 mark = -1;
  131.                 pos = 0;
  132.                 end = result;
  133.             }
  134.             return result;
  135.         }

  136.         if (mark == 0 && markLimit > buf.length) {
  137.             /* the only way to make room when mark=0 is by growing the buffer */
  138.             int newLength = buf.length * 2;
  139.             if (newLength > markLimit) {
  140.                 newLength = markLimit;
  141.             }
  142.             final char[] newbuf = new char[newLength];
  143.             System.arraycopy(buf, 0, newbuf, 0, buf.length);
  144.             buf = newbuf;
  145.         } else if (mark > 0) {
  146.             /* make room by shifting the buffered data to left mark positions */
  147.             System.arraycopy(buf, mark, buf, 0, buf.length - mark);
  148.             pos -= mark;
  149.             end -= mark;
  150.             mark = 0;
  151.         }

  152.         /* Set the new position and mark position */
  153.         final int count = in.read(buf, pos, buf.length - pos);
  154.         if (count != EOF) {
  155.             end += count;
  156.         }
  157.         return count;
  158.     }

  159.     /**
  160.      * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling
  161.      * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed.
  162.      *
  163.      * @param markLimit the number of characters that can be read before the mark is invalidated.
  164.      * @throws IllegalArgumentException if {@code markLimit < 0}.
  165.      * @throws IOException              if an error occurs while setting a mark in this reader.
  166.      * @see #markSupported()
  167.      * @see #reset()
  168.      */
  169.     @Override
  170.     public void mark(final int markLimit) throws IOException {
  171.         if (markLimit < 0) {
  172.             throw new IllegalArgumentException();
  173.         }
  174.         checkOpen();
  175.         this.markLimit = markLimit;
  176.         mark = pos;
  177.     }

  178.     /**
  179.      * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}.
  180.      *
  181.      * @return {@code true} for {@code BufferedReader}.
  182.      * @see #mark(int)
  183.      * @see #reset()
  184.      */
  185.     @Override
  186.     public boolean markSupported() {
  187.         return true;
  188.     }

  189.     /**
  190.      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value.
  191.      *
  192.      * @return the next character
  193.      * @throws IOException If an I/O error occurs
  194.      */
  195.     public int peek() throws IOException {
  196.         mark(1);
  197.         final int c = read();
  198.         reset();
  199.         return c;
  200.     }

  201.     /**
  202.      * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
  203.      * still return the next value.
  204.      *
  205.      * @param buf the buffer to fill for the look ahead.
  206.      * @return the buffer itself
  207.      * @throws IOException If an I/O error occurs
  208.      */
  209.     public int peek(final char[] buf) throws IOException {
  210.         final int n = buf.length;
  211.         mark(n);
  212.         final int c = read(buf, 0, n);
  213.         reset();
  214.         return c;
  215.     }

  216.     /**
  217.      * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from
  218.      * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more
  219.      * characters in the source reader.
  220.      *
  221.      * @return the character read or -1 if the end of the source reader has been reached.
  222.      * @throws IOException if this reader is closed or some other I/O error occurs.
  223.      */
  224.     @Override
  225.     public int read() throws IOException {
  226.         checkOpen();
  227.         /* Are there buffered characters available? */
  228.         if (pos < end || fillBuf() != EOF) {
  229.             return buf[pos++];
  230.         }
  231.         return EOF;
  232.     }

  233.     /**
  234.      * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of
  235.      * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set
  236.      * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly
  237.      * into {@code buffer}.
  238.      *
  239.      * @param buffer the character array to store the characters read.
  240.      * @param offset the initial position in {@code buffer} to store the bytes read from this reader.
  241.      * @param length the maximum number of characters to read, must be non-negative.
  242.      * @return number of characters read or -1 if the end of the source reader has been reached.
  243.      * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}.
  244.      * @throws IOException               if this reader is closed or some other I/O error occurs.
  245.      */
  246.     @Override
  247.     public int read(final char[] buffer, int offset, final int length) throws IOException {
  248.         checkOpen();
  249.         if (offset < 0 || offset > buffer.length - length || length < 0) {
  250.             throw new IndexOutOfBoundsException();
  251.         }
  252.         int outstanding = length;
  253.         while (outstanding > 0) {

  254.             /*
  255.              * If there are bytes in the buffer, grab those first.
  256.              */
  257.             final int available = end - pos;
  258.             if (available > 0) {
  259.                 final int count = available >= outstanding ? outstanding : available;
  260.                 System.arraycopy(buf, pos, buffer, offset, count);
  261.                 pos += count;
  262.                 offset += count;
  263.                 outstanding -= count;
  264.             }

  265.             /*
  266.              * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got
  267.              * some bytes and reading from the underlying stream would block.
  268.              */
  269.             if (outstanding == 0 || outstanding < length && !in.ready()) {
  270.                 break;
  271.             }

  272.             // assert(pos == end);

  273.             /*
  274.              * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller
  275.              * buffers because that could result in a many reads.
  276.              */
  277.             if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) {
  278.                 final int count = in.read(buffer, offset, outstanding);
  279.                 if (count > 0) {
  280.                     outstanding -= count;
  281.                     mark = -1;
  282.                 }

  283.                 break; // assume the source stream gave us all that it could
  284.             }

  285.             if (fillBuf() == EOF) {
  286.                 break; // source is exhausted
  287.             }
  288.         }

  289.         final int count = length - outstanding;
  290.         return count > 0 || count == length ? count : EOF;
  291.     }

  292.     /**
  293.      * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR},
  294.      * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence.
  295.      *
  296.      * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached.
  297.      * @throws IOException if this reader is closed or some other I/O error occurs.
  298.      */
  299.     public String readLine() throws IOException {
  300.         checkOpen();
  301.         /* has the underlying stream been exhausted? */
  302.         if (pos == end && fillBuf() == EOF) {
  303.             return null;
  304.         }
  305.         for (int charPos = pos; charPos < end; charPos++) {
  306.             final char ch = buf[charPos];
  307.             if (ch > CR) {
  308.                 continue;
  309.             }
  310.             if (ch == LF) {
  311.                 final String res = new String(buf, pos, charPos - pos);
  312.                 pos = charPos + 1;
  313.                 return res;
  314.             }
  315.             if (ch == CR) {
  316.                 final String res = new String(buf, pos, charPos - pos);
  317.                 pos = charPos + 1;
  318.                 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) {
  319.                     pos++;
  320.                 }
  321.                 return res;
  322.             }
  323.         }

  324.         char eol = NUL;
  325.         final StringBuilder result = new StringBuilder(80);
  326.         /* Typical Line Length */

  327.         result.append(buf, pos, end - pos);
  328.         while (true) {
  329.             pos = end;

  330.             /* Are there buffered characters available? */
  331.             if (eol == LF) {
  332.                 return result.toString();
  333.             }
  334.             // attempt to fill buffer
  335.             if (fillBuf() == EOF) {
  336.                 // characters or null.
  337.                 return result.length() > 0 || eol != NUL ? result.toString() : null;
  338.             }
  339.             for (int charPos = pos; charPos < end; charPos++) {
  340.                 final char c = buf[charPos];
  341.                 if (eol != NUL) {
  342.                     if (eol == CR && c == LF) {
  343.                         if (charPos > pos) {
  344.                             result.append(buf, pos, charPos - pos - 1);
  345.                         }
  346.                         pos = charPos + 1;
  347.                     } else {
  348.                         if (charPos > pos) {
  349.                             result.append(buf, pos, charPos - pos - 1);
  350.                         }
  351.                         pos = charPos;
  352.                     }
  353.                     return result.toString();
  354.                 }
  355.                 if (c == LF || c == CR) {
  356.                     eol = c;
  357.                 }
  358.             }
  359.             if (eol == NUL) {
  360.                 result.append(buf, pos, end - pos);
  361.             } else {
  362.                 result.append(buf, pos, end - pos - 1);
  363.             }
  364.         }
  365.     }

  366.     /**
  367.      * Tests whether this reader is ready to be read without blocking.
  368.      *
  369.      * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur.
  370.      * @throws IOException if this reader is closed or some other I/O error occurs.
  371.      * @see #read()
  372.      * @see #read(char[], int, int)
  373.      * @see #readLine()
  374.      */
  375.     @Override
  376.     public boolean ready() throws IOException {
  377.         checkOpen();
  378.         return end - pos > 0 || in.ready();
  379.     }

  380.     /**
  381.      * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location.
  382.      *
  383.      * @throws IOException if this reader is closed or no mark has been set.
  384.      * @see #mark(int)
  385.      * @see #markSupported()
  386.      */
  387.     @Override
  388.     public void reset() throws IOException {
  389.         checkOpen();
  390.         if (mark == -1) {
  391.             throw new IOException("mark == -1");
  392.         }
  393.         pos = mark;
  394.     }

  395.     /**
  396.      * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping
  397.      * characters may invalidate a mark if {@code markLimit} is surpassed.
  398.      *
  399.      * @param amount the maximum number of characters to skip.
  400.      * @return the number of characters actually skipped.
  401.      * @throws IllegalArgumentException if {@code amount < 0}.
  402.      * @throws IOException              if this reader is closed or some other I/O error occurs.
  403.      * @see #mark(int)
  404.      * @see #markSupported()
  405.      * @see #reset()
  406.      */
  407.     @Override
  408.     public long skip(final long amount) throws IOException {
  409.         if (amount < 0) {
  410.             throw new IllegalArgumentException();
  411.         }
  412.         checkOpen();
  413.         if (amount < 1) {
  414.             return 0;
  415.         }
  416.         if (end - pos >= amount) {
  417.             pos += Math.toIntExact(amount);
  418.             return amount;
  419.         }

  420.         long read = end - pos;
  421.         pos = end;
  422.         while (read < amount) {
  423.             if (fillBuf() == EOF) {
  424.                 return read;
  425.             }
  426.             if (end - pos >= amount - read) {
  427.                 pos += Math.toIntExact(amount - read);
  428.                 return amount;
  429.             }
  430.             // Couldn't get all the characters, skip what we read
  431.             read += end - pos;
  432.             pos = end;
  433.         }
  434.         return amount;
  435.     }

  436. }