View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *     https://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.CR;
21  import static org.apache.commons.io.IOUtils.EOF;
22  import static org.apache.commons.io.IOUtils.LF;
23  
24  import java.io.BufferedReader;
25  import java.io.BufferedWriter;
26  import java.io.IOException;
27  import java.io.Reader;
28  
29  import org.apache.commons.io.IOUtils;
30  
31  /**
32   * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized,
33   * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that
34   * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits.
35   * <p>
36   * A typical application pattern for the class looks like this:
37   * </p>
38   *
39   * <pre>{@code
40   * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file"));
41   * }</pre>
42   * <p>
43   * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified.
44   * </p>
45   *
46   * @see BufferedReader
47   * @see BufferedWriter
48   * @since 2.17.0
49   */
50  public class UnsynchronizedBufferedReader extends UnsynchronizedReader {
51  
52      private static final char NUL = '\0';
53  
54      private final Reader in;
55  
56      /**
57       * The characters that can be read and refilled in bulk. We maintain three indices into this buffer:
58       *
59       * <pre>
60       *     { X X X X X X X X X X X X - - }
61       *           ^     ^             ^
62       *           |     |             |
63       *         mark   pos           end
64       * </pre>
65       * <p>
66       * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be
67       * {@link #fillBuf() filled} before characters can be read.
68       * </p>
69       * <p>
70       * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer
71       * cannot be reset.
72       * </p>
73       * <p>
74       * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an
75       * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed).
76       * </p>
77       */
78      private char[] buf;
79  
80      private int pos;
81  
82      private int end;
83  
84      private int mark = -1;
85  
86      private int markLimit = -1;
87  
88      /**
89       * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB).
90       *
91       * @param in the Reader that is buffered.
92       */
93      public UnsynchronizedBufferedReader(final Reader in) {
94          this(in, IOUtils.DEFAULT_BUFFER_SIZE);
95      }
96  
97      /**
98       * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}.
99       *
100      * @param in   the Reader that is buffered.
101      * @param size the size of the buffer to allocate.
102      * @throws IllegalArgumentException if {@code size <= 0}.
103      */
104     public UnsynchronizedBufferedReader(final Reader in, final int size) {
105         if (size <= 0) {
106             throw new IllegalArgumentException("size <= 0");
107         }
108         this.in = in;
109         buf = new char[size];
110     }
111 
112     /**
113      * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded.
114      */
115     final void chompNewline() throws IOException {
116         if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) {
117             pos++;
118         }
119     }
120 
121     /**
122      * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been
123      * closed.
124      *
125      * @throws IOException if an error occurs while closing this reader.
126      */
127     @Override
128     public void close() throws IOException {
129         if (!isClosed()) {
130             in.close();
131             buf = null;
132             super.close();
133         }
134     }
135 
136     /**
137      * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}.
138      *
139      * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached.
140      */
141     private int fillBuf() throws IOException {
142         // assert(pos == end);
143 
144         if (mark == EOF || pos - mark >= markLimit) {
145             /* mark isn't set or has exceeded its limit. use the whole buffer */
146             final int result = in.read(buf, 0, buf.length);
147             if (result > 0) {
148                 mark = -1;
149                 pos = 0;
150                 end = result;
151             }
152             return result;
153         }
154 
155         if (mark == 0 && markLimit > buf.length) {
156             /* the only way to make room when mark=0 is by growing the buffer */
157             int newLength = buf.length * 2;
158             if (newLength > markLimit) {
159                 newLength = markLimit;
160             }
161             final char[] newbuf = new char[newLength];
162             System.arraycopy(buf, 0, newbuf, 0, buf.length);
163             buf = newbuf;
164         } else if (mark > 0) {
165             /* make room by shifting the buffered data to left mark positions */
166             System.arraycopy(buf, mark, buf, 0, buf.length - mark);
167             pos -= mark;
168             end -= mark;
169             mark = 0;
170         }
171 
172         /* Set the new position and mark position */
173         final int count = in.read(buf, pos, buf.length - pos);
174         if (count != EOF) {
175             end += count;
176         }
177         return count;
178     }
179 
180     /**
181      * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling
182      * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed.
183      *
184      * @param markLimit the number of characters that can be read before the mark is invalidated.
185      * @throws IllegalArgumentException if {@code markLimit < 0}.
186      * @throws IOException              if an error occurs while setting a mark in this reader.
187      * @see #markSupported()
188      * @see #reset()
189      */
190     @Override
191     public void mark(final int markLimit) throws IOException {
192         if (markLimit < 0) {
193             throw new IllegalArgumentException();
194         }
195         checkOpen();
196         this.markLimit = markLimit;
197         mark = pos;
198     }
199 
200     /**
201      * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}.
202      *
203      * @return {@code true} for {@code BufferedReader}.
204      * @see #mark(int)
205      * @see #reset()
206      */
207     @Override
208     public boolean markSupported() {
209         return true;
210     }
211 
212     /**
213      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value.
214      *
215      * @return the next character
216      * @throws IOException If an I/O error occurs
217      */
218     public int peek() throws IOException {
219         mark(1);
220         final int c = read();
221         reset();
222         return c;
223     }
224 
225     /**
226      * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
227      * still return the next value.
228      *
229      * @param buf the buffer to fill for the look ahead.
230      * @return the buffer itself
231      * @throws IOException If an I/O error occurs
232      */
233     public int peek(final char[] buf) throws IOException {
234         final int n = buf.length;
235         mark(n);
236         final int c = read(buf, 0, n);
237         reset();
238         return c;
239     }
240 
241     /**
242      * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from
243      * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more
244      * characters in the source reader.
245      *
246      * @return the character read or -1 if the end of the source reader has been reached.
247      * @throws IOException if this reader is closed or some other I/O error occurs.
248      */
249     @Override
250     public int read() throws IOException {
251         checkOpen();
252         /* Are there buffered characters available? */
253         if (pos < end || fillBuf() != EOF) {
254             return buf[pos++];
255         }
256         return EOF;
257     }
258 
259     /**
260      * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of
261      * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set
262      * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly
263      * into {@code buffer}.
264      *
265      * @param buffer the character array to store the characters read.
266      * @param offset the initial position in {@code buffer} to store the bytes read from this reader.
267      * @param length the maximum number of characters to read, must be non-negative.
268      * @return number of characters read or -1 if the end of the source reader has been reached.
269      * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}.
270      * @throws IOException               if this reader is closed or some other I/O error occurs.
271      */
272     @Override
273     public int read(final char[] buffer, int offset, final int length) throws IOException {
274         /*
275          * First throw on a closed reader, then check the parameters.
276          *
277          * This behavior is not specified in the Javadoc, but is followed by most readers in java.io.
278          */
279         checkOpen();
280         IOUtils.checkFromIndexSize(buffer, offset, length);
281         if (length == 0) {
282             return 0;
283         }
284 
285         int outstanding = length;
286         while (outstanding > 0) {
287 
288             /*
289              * If there are bytes in the buffer, grab those first.
290              */
291             final int available = end - pos;
292             if (available > 0) {
293                 final int count = available >= outstanding ? outstanding : available;
294                 System.arraycopy(buf, pos, buffer, offset, count);
295                 pos += count;
296                 offset += count;
297                 outstanding -= count;
298             }
299 
300             /*
301              * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got
302              * some bytes and reading from the underlying stream would block.
303              */
304             if (outstanding == 0 || outstanding < length && !in.ready()) {
305                 break;
306             }
307 
308             // assert(pos == end);
309 
310             /*
311              * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller
312              * buffers because that could result in a many reads.
313              */
314             if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) {
315                 final int count = in.read(buffer, offset, outstanding);
316                 if (count > 0) {
317                     outstanding -= count;
318                     mark = -1;
319                 }
320 
321                 break; // assume the source stream gave us all that it could
322             }
323 
324             if (fillBuf() == EOF) {
325                 break; // source is exhausted
326             }
327         }
328 
329         final int count = length - outstanding;
330         return count > 0 || count == length ? count : EOF;
331     }
332 
333     /**
334      * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR},
335      * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence.
336      *
337      * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached.
338      * @throws IOException if this reader is closed or some other I/O error occurs.
339      */
340     public String readLine() throws IOException {
341         checkOpen();
342         /* has the underlying stream been exhausted? */
343         if (pos == end && fillBuf() == EOF) {
344             return null;
345         }
346         for (int charPos = pos; charPos < end; charPos++) {
347             final char ch = buf[charPos];
348             if (ch > CR) {
349                 continue;
350             }
351             if (ch == LF) {
352                 final String res = new String(buf, pos, charPos - pos);
353                 pos = charPos + 1;
354                 return res;
355             }
356             if (ch == CR) {
357                 final String res = new String(buf, pos, charPos - pos);
358                 pos = charPos + 1;
359                 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) {
360                     pos++;
361                 }
362                 return res;
363             }
364         }
365 
366         char eol = NUL;
367         final StringBuilder result = new StringBuilder(80);
368         /* Typical Line Length */
369 
370         result.append(buf, pos, end - pos);
371         while (true) {
372             pos = end;
373 
374             /* Are there buffered characters available? */
375             if (eol == LF) {
376                 return result.toString();
377             }
378             // attempt to fill buffer
379             if (fillBuf() == EOF) {
380                 // characters or null.
381                 return result.length() > 0 || eol != NUL ? result.toString() : null;
382             }
383             for (int charPos = pos; charPos < end; charPos++) {
384                 final char c = buf[charPos];
385                 if (eol != NUL) {
386                     if (eol == CR && c == LF) {
387                         if (charPos > pos) {
388                             result.append(buf, pos, charPos - pos - 1);
389                         }
390                         pos = charPos + 1;
391                     } else {
392                         if (charPos > pos) {
393                             result.append(buf, pos, charPos - pos - 1);
394                         }
395                         pos = charPos;
396                     }
397                     return result.toString();
398                 }
399                 if (c == LF || c == CR) {
400                     eol = c;
401                 }
402             }
403             if (eol == NUL) {
404                 result.append(buf, pos, end - pos);
405             } else {
406                 result.append(buf, pos, end - pos - 1);
407             }
408         }
409     }
410 
411     /**
412      * Tests whether this reader is ready to be read without blocking.
413      *
414      * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur.
415      * @throws IOException if this reader is closed or some other I/O error occurs.
416      * @see #read()
417      * @see #read(char[], int, int)
418      * @see #readLine()
419      */
420     @Override
421     public boolean ready() throws IOException {
422         checkOpen();
423         return end - pos > 0 || in.ready();
424     }
425 
426     /**
427      * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location.
428      *
429      * @throws IOException if this reader is closed or no mark has been set.
430      * @see #mark(int)
431      * @see #markSupported()
432      */
433     @Override
434     public void reset() throws IOException {
435         checkOpen();
436         if (mark == -1) {
437             throw new IOException("mark == -1");
438         }
439         pos = mark;
440     }
441 
442     /**
443      * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping
444      * characters may invalidate a mark if {@code markLimit} is surpassed.
445      *
446      * @param amount the maximum number of characters to skip.
447      * @return the number of characters actually skipped.
448      * @throws IllegalArgumentException if {@code amount < 0}.
449      * @throws IOException              if this reader is closed or some other I/O error occurs.
450      * @see #mark(int)
451      * @see #markSupported()
452      * @see #reset()
453      */
454     @Override
455     public long skip(final long amount) throws IOException {
456         if (amount < 0) {
457             throw new IllegalArgumentException();
458         }
459         checkOpen();
460         if (amount < 1) {
461             return 0;
462         }
463         if (end - pos >= amount) {
464             pos += Math.toIntExact(amount);
465             return amount;
466         }
467 
468         long read = end - pos;
469         pos = end;
470         while (read < amount) {
471             if (fillBuf() == EOF) {
472                 return read;
473             }
474             if (end - pos >= amount - read) {
475                 pos += Math.toIntExact(amount - read);
476                 return amount;
477             }
478             // Couldn't get all the characters, skip what we read
479             read += end - pos;
480             pos = end;
481         }
482         return amount;
483     }
484 
485 }