View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.CR;
21  import static org.apache.commons.io.IOUtils.EOF;
22  import static org.apache.commons.io.IOUtils.LF;
23  
24  import java.io.BufferedReader;
25  import java.io.BufferedWriter;
26  import java.io.IOException;
27  import java.io.Reader;
28  
29  import org.apache.commons.io.IOUtils;
30  
31  /**
32   * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized,
33   * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that
34   * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits.
35   * <p>
36   * A typical application pattern for the class looks like this:
37   * </p>
38   *
39   * <pre>{@code
40   * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file"));
41   * }</pre>
42   * <p>
43   * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified.
44   * </p>
45   *
46   * @see BufferedReader
47   * @see BufferedWriter
48   * @since 2.17.0
49   */
50  public class UnsynchronizedBufferedReader extends UnsynchronizedReader {
51  
52      private static final char NUL = '\0';
53  
54      private final Reader in;
55  
56      /**
57       * The characters that can be read and refilled in bulk. We maintain three indices into this buffer:
58       *
59       * <pre>
60       *     { X X X X X X X X X X X X - - }
61       *           ^     ^             ^
62       *           |     |             |
63       *         mark   pos           end
64       * </pre>
65       * <p>
66       * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be
67       * {@link #fillBuf() filled} before characters can be read.
68       * </p>
69       * <p>
70       * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer
71       * cannot be reset.
72       * </p>
73       * <p>
74       * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an
75       * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed).
76       * </p>
77       */
78      private char[] buf;
79  
80      private int pos;
81  
82      private int end;
83  
84      private int mark = -1;
85  
86      private int markLimit = -1;
87  
88      /**
89       * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB).
90       *
91       * @param in the Reader that is buffered.
92       */
93      public UnsynchronizedBufferedReader(final Reader in) {
94          this(in, IOUtils.DEFAULT_BUFFER_SIZE);
95      }
96  
97      /**
98       * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}.
99       *
100      * @param in   the Reader that is buffered.
101      * @param size the size of the buffer to allocate.
102      * @throws IllegalArgumentException if {@code size <= 0}.
103      */
104     public UnsynchronizedBufferedReader(final Reader in, final int size) {
105         if (size <= 0) {
106             throw new IllegalArgumentException("size <= 0");
107         }
108         this.in = in;
109         buf = new char[size];
110     }
111 
112     /**
113      * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded.
114      */
115     final void chompNewline() throws IOException {
116         if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) {
117             pos++;
118         }
119     }
120 
121     /**
122      * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been
123      * closed.
124      *
125      * @throws IOException if an error occurs while closing this reader.
126      */
127     @Override
128     public void close() throws IOException {
129         if (!isClosed()) {
130             in.close();
131             buf = null;
132             super.close();
133         }
134     }
135 
136     /**
137      * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}.
138      *
139      * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached.
140      */
141     private int fillBuf() throws IOException {
142         // assert(pos == end);
143 
144         if (mark == EOF || pos - mark >= markLimit) {
145             /* mark isn't set or has exceeded its limit. use the whole buffer */
146             final int result = in.read(buf, 0, buf.length);
147             if (result > 0) {
148                 mark = -1;
149                 pos = 0;
150                 end = result;
151             }
152             return result;
153         }
154 
155         if (mark == 0 && markLimit > buf.length) {
156             /* the only way to make room when mark=0 is by growing the buffer */
157             int newLength = buf.length * 2;
158             if (newLength > markLimit) {
159                 newLength = markLimit;
160             }
161             final char[] newbuf = new char[newLength];
162             System.arraycopy(buf, 0, newbuf, 0, buf.length);
163             buf = newbuf;
164         } else if (mark > 0) {
165             /* make room by shifting the buffered data to left mark positions */
166             System.arraycopy(buf, mark, buf, 0, buf.length - mark);
167             pos -= mark;
168             end -= mark;
169             mark = 0;
170         }
171 
172         /* Set the new position and mark position */
173         final int count = in.read(buf, pos, buf.length - pos);
174         if (count != EOF) {
175             end += count;
176         }
177         return count;
178     }
179 
180     /**
181      * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling
182      * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed.
183      *
184      * @param markLimit the number of characters that can be read before the mark is invalidated.
185      * @throws IllegalArgumentException if {@code markLimit < 0}.
186      * @throws IOException              if an error occurs while setting a mark in this reader.
187      * @see #markSupported()
188      * @see #reset()
189      */
190     @Override
191     public void mark(final int markLimit) throws IOException {
192         if (markLimit < 0) {
193             throw new IllegalArgumentException();
194         }
195         checkOpen();
196         this.markLimit = markLimit;
197         mark = pos;
198     }
199 
200     /**
201      * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}.
202      *
203      * @return {@code true} for {@code BufferedReader}.
204      * @see #mark(int)
205      * @see #reset()
206      */
207     @Override
208     public boolean markSupported() {
209         return true;
210     }
211 
212     /**
213      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value.
214      *
215      * @return the next character
216      * @throws IOException If an I/O error occurs
217      */
218     public int peek() throws IOException {
219         mark(1);
220         final int c = read();
221         reset();
222         return c;
223     }
224 
225     /**
226      * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
227      * still return the next value.
228      *
229      * @param buf the buffer to fill for the look ahead.
230      * @return the buffer itself
231      * @throws IOException If an I/O error occurs
232      */
233     public int peek(final char[] buf) throws IOException {
234         final int n = buf.length;
235         mark(n);
236         final int c = read(buf, 0, n);
237         reset();
238         return c;
239     }
240 
241     /**
242      * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from
243      * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more
244      * characters in the source reader.
245      *
246      * @return the character read or -1 if the end of the source reader has been reached.
247      * @throws IOException if this reader is closed or some other I/O error occurs.
248      */
249     @Override
250     public int read() throws IOException {
251         checkOpen();
252         /* Are there buffered characters available? */
253         if (pos < end || fillBuf() != EOF) {
254             return buf[pos++];
255         }
256         return EOF;
257     }
258 
259     /**
260      * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of
261      * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set
262      * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly
263      * into {@code buffer}.
264      *
265      * @param buffer the character array to store the characters read.
266      * @param offset the initial position in {@code buffer} to store the bytes read from this reader.
267      * @param length the maximum number of characters to read, must be non-negative.
268      * @return number of characters read or -1 if the end of the source reader has been reached.
269      * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}.
270      * @throws IOException               if this reader is closed or some other I/O error occurs.
271      */
272     @Override
273     public int read(final char[] buffer, int offset, final int length) throws IOException {
274         checkOpen();
275         if (offset < 0 || offset > buffer.length - length || length < 0) {
276             throw new IndexOutOfBoundsException();
277         }
278         int outstanding = length;
279         while (outstanding > 0) {
280 
281             /*
282              * If there are bytes in the buffer, grab those first.
283              */
284             final int available = end - pos;
285             if (available > 0) {
286                 final int count = available >= outstanding ? outstanding : available;
287                 System.arraycopy(buf, pos, buffer, offset, count);
288                 pos += count;
289                 offset += count;
290                 outstanding -= count;
291             }
292 
293             /*
294              * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got
295              * some bytes and reading from the underlying stream would block.
296              */
297             if (outstanding == 0 || outstanding < length && !in.ready()) {
298                 break;
299             }
300 
301             // assert(pos == end);
302 
303             /*
304              * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller
305              * buffers because that could result in a many reads.
306              */
307             if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) {
308                 final int count = in.read(buffer, offset, outstanding);
309                 if (count > 0) {
310                     outstanding -= count;
311                     mark = -1;
312                 }
313 
314                 break; // assume the source stream gave us all that it could
315             }
316 
317             if (fillBuf() == EOF) {
318                 break; // source is exhausted
319             }
320         }
321 
322         final int count = length - outstanding;
323         return count > 0 || count == length ? count : EOF;
324     }
325 
326     /**
327      * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR},
328      * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence.
329      *
330      * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached.
331      * @throws IOException if this reader is closed or some other I/O error occurs.
332      */
333     public String readLine() throws IOException {
334         checkOpen();
335         /* has the underlying stream been exhausted? */
336         if (pos == end && fillBuf() == EOF) {
337             return null;
338         }
339         for (int charPos = pos; charPos < end; charPos++) {
340             final char ch = buf[charPos];
341             if (ch > CR) {
342                 continue;
343             }
344             if (ch == LF) {
345                 final String res = new String(buf, pos, charPos - pos);
346                 pos = charPos + 1;
347                 return res;
348             }
349             if (ch == CR) {
350                 final String res = new String(buf, pos, charPos - pos);
351                 pos = charPos + 1;
352                 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) {
353                     pos++;
354                 }
355                 return res;
356             }
357         }
358 
359         char eol = NUL;
360         final StringBuilder result = new StringBuilder(80);
361         /* Typical Line Length */
362 
363         result.append(buf, pos, end - pos);
364         while (true) {
365             pos = end;
366 
367             /* Are there buffered characters available? */
368             if (eol == LF) {
369                 return result.toString();
370             }
371             // attempt to fill buffer
372             if (fillBuf() == EOF) {
373                 // characters or null.
374                 return result.length() > 0 || eol != NUL ? result.toString() : null;
375             }
376             for (int charPos = pos; charPos < end; charPos++) {
377                 final char c = buf[charPos];
378                 if (eol != NUL) {
379                     if (eol == CR && c == LF) {
380                         if (charPos > pos) {
381                             result.append(buf, pos, charPos - pos - 1);
382                         }
383                         pos = charPos + 1;
384                     } else {
385                         if (charPos > pos) {
386                             result.append(buf, pos, charPos - pos - 1);
387                         }
388                         pos = charPos;
389                     }
390                     return result.toString();
391                 }
392                 if (c == LF || c == CR) {
393                     eol = c;
394                 }
395             }
396             if (eol == NUL) {
397                 result.append(buf, pos, end - pos);
398             } else {
399                 result.append(buf, pos, end - pos - 1);
400             }
401         }
402     }
403 
404     /**
405      * Tests whether this reader is ready to be read without blocking.
406      *
407      * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur.
408      * @throws IOException if this reader is closed or some other I/O error occurs.
409      * @see #read()
410      * @see #read(char[], int, int)
411      * @see #readLine()
412      */
413     @Override
414     public boolean ready() throws IOException {
415         checkOpen();
416         return end - pos > 0 || in.ready();
417     }
418 
419     /**
420      * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location.
421      *
422      * @throws IOException if this reader is closed or no mark has been set.
423      * @see #mark(int)
424      * @see #markSupported()
425      */
426     @Override
427     public void reset() throws IOException {
428         checkOpen();
429         if (mark == -1) {
430             throw new IOException("mark == -1");
431         }
432         pos = mark;
433     }
434 
435     /**
436      * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping
437      * characters may invalidate a mark if {@code markLimit} is surpassed.
438      *
439      * @param amount the maximum number of characters to skip.
440      * @return the number of characters actually skipped.
441      * @throws IllegalArgumentException if {@code amount < 0}.
442      * @throws IOException              if this reader is closed or some other I/O error occurs.
443      * @see #mark(int)
444      * @see #markSupported()
445      * @see #reset()
446      */
447     @Override
448     public long skip(final long amount) throws IOException {
449         if (amount < 0) {
450             throw new IllegalArgumentException();
451         }
452         checkOpen();
453         if (amount < 1) {
454             return 0;
455         }
456         if (end - pos >= amount) {
457             pos += Math.toIntExact(amount);
458             return amount;
459         }
460 
461         long read = end - pos;
462         pos = end;
463         while (read < amount) {
464             if (fillBuf() == EOF) {
465                 return read;
466             }
467             if (end - pos >= amount - read) {
468                 pos += Math.toIntExact(amount - read);
469                 return amount;
470             }
471             // Couldn't get all the characters, skip what we read
472             read += end - pos;
473             pos = end;
474         }
475         return amount;
476     }
477 
478 }