1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.io.input;
19
20 import static org.apache.commons.io.IOUtils.CR;
21 import static org.apache.commons.io.IOUtils.EOF;
22 import static org.apache.commons.io.IOUtils.LF;
23
24 import java.io.BufferedReader;
25 import java.io.BufferedWriter;
26 import java.io.IOException;
27 import java.io.Reader;
28
29 import org.apache.commons.io.IOUtils;
30
31 /**
32 * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized,
33 * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that
34 * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits.
35 * <p>
36 * A typical application pattern for the class looks like this:
37 * </p>
38 *
39 * <pre>{@code
40 * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file"));
41 * }</pre>
42 * <p>
43 * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified.
44 * </p>
45 *
46 * @see BufferedReader
47 * @see BufferedWriter
48 * @since 2.17.0
49 */
50 public class UnsynchronizedBufferedReader extends UnsynchronizedReader {
51
52 private static final char NUL = '\0';
53
54 private final Reader in;
55
56 /**
57 * The characters that can be read and refilled in bulk. We maintain three indices into this buffer:
58 *
59 * <pre>
60 * { X X X X X X X X X X X X - - }
61 * ^ ^ ^
62 * | | |
63 * mark pos end
64 * </pre>
65 * <p>
66 * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be
67 * {@link #fillBuf() filled} before characters can be read.
68 * </p>
69 * <p>
70 * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer
71 * cannot be reset.
72 * </p>
73 * <p>
74 * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an
75 * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed).
76 * </p>
77 */
78 private char[] buf;
79
80 private int pos;
81
82 private int end;
83
84 private int mark = -1;
85
86 private int markLimit = -1;
87
88 /**
89 * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB).
90 *
91 * @param in the Reader that is buffered.
92 */
93 public UnsynchronizedBufferedReader(final Reader in) {
94 this(in, IOUtils.DEFAULT_BUFFER_SIZE);
95 }
96
97 /**
98 * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}.
99 *
100 * @param in the Reader that is buffered.
101 * @param size the size of the buffer to allocate.
102 * @throws IllegalArgumentException if {@code size <= 0}.
103 */
104 public UnsynchronizedBufferedReader(final Reader in, final int size) {
105 if (size <= 0) {
106 throw new IllegalArgumentException("size <= 0");
107 }
108 this.in = in;
109 buf = new char[size];
110 }
111
112 /**
113 * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded.
114 */
115 final void chompNewline() throws IOException {
116 if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) {
117 pos++;
118 }
119 }
120
121 /**
122 * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been
123 * closed.
124 *
125 * @throws IOException if an error occurs while closing this reader.
126 */
127 @Override
128 public void close() throws IOException {
129 if (!isClosed()) {
130 in.close();
131 buf = null;
132 super.close();
133 }
134 }
135
136 /**
137 * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}.
138 *
139 * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached.
140 */
141 private int fillBuf() throws IOException {
142 // assert(pos == end);
143
144 if (mark == EOF || pos - mark >= markLimit) {
145 /* mark isn't set or has exceeded its limit. use the whole buffer */
146 final int result = in.read(buf, 0, buf.length);
147 if (result > 0) {
148 mark = -1;
149 pos = 0;
150 end = result;
151 }
152 return result;
153 }
154
155 if (mark == 0 && markLimit > buf.length) {
156 /* the only way to make room when mark=0 is by growing the buffer */
157 int newLength = buf.length * 2;
158 if (newLength > markLimit) {
159 newLength = markLimit;
160 }
161 final char[] newbuf = new char[newLength];
162 System.arraycopy(buf, 0, newbuf, 0, buf.length);
163 buf = newbuf;
164 } else if (mark > 0) {
165 /* make room by shifting the buffered data to left mark positions */
166 System.arraycopy(buf, mark, buf, 0, buf.length - mark);
167 pos -= mark;
168 end -= mark;
169 mark = 0;
170 }
171
172 /* Set the new position and mark position */
173 final int count = in.read(buf, pos, buf.length - pos);
174 if (count != EOF) {
175 end += count;
176 }
177 return count;
178 }
179
180 /**
181 * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling
182 * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed.
183 *
184 * @param markLimit the number of characters that can be read before the mark is invalidated.
185 * @throws IllegalArgumentException if {@code markLimit < 0}.
186 * @throws IOException if an error occurs while setting a mark in this reader.
187 * @see #markSupported()
188 * @see #reset()
189 */
190 @Override
191 public void mark(final int markLimit) throws IOException {
192 if (markLimit < 0) {
193 throw new IllegalArgumentException();
194 }
195 checkOpen();
196 this.markLimit = markLimit;
197 mark = pos;
198 }
199
200 /**
201 * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}.
202 *
203 * @return {@code true} for {@code BufferedReader}.
204 * @see #mark(int)
205 * @see #reset()
206 */
207 @Override
208 public boolean markSupported() {
209 return true;
210 }
211
212 /**
213 * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value.
214 *
215 * @return the next character
216 * @throws IOException If an I/O error occurs
217 */
218 public int peek() throws IOException {
219 mark(1);
220 final int c = read();
221 reset();
222 return c;
223 }
224
225 /**
226 * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
227 * still return the next value.
228 *
229 * @param buf the buffer to fill for the look ahead.
230 * @return the buffer itself
231 * @throws IOException If an I/O error occurs
232 */
233 public int peek(final char[] buf) throws IOException {
234 final int n = buf.length;
235 mark(n);
236 final int c = read(buf, 0, n);
237 reset();
238 return c;
239 }
240
241 /**
242 * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from
243 * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more
244 * characters in the source reader.
245 *
246 * @return the character read or -1 if the end of the source reader has been reached.
247 * @throws IOException if this reader is closed or some other I/O error occurs.
248 */
249 @Override
250 public int read() throws IOException {
251 checkOpen();
252 /* Are there buffered characters available? */
253 if (pos < end || fillBuf() != EOF) {
254 return buf[pos++];
255 }
256 return EOF;
257 }
258
259 /**
260 * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of
261 * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set
262 * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly
263 * into {@code buffer}.
264 *
265 * @param buffer the character array to store the characters read.
266 * @param offset the initial position in {@code buffer} to store the bytes read from this reader.
267 * @param length the maximum number of characters to read, must be non-negative.
268 * @return number of characters read or -1 if the end of the source reader has been reached.
269 * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}.
270 * @throws IOException if this reader is closed or some other I/O error occurs.
271 */
272 @Override
273 public int read(final char[] buffer, int offset, final int length) throws IOException {
274 /*
275 * First throw on a closed reader, then check the parameters.
276 *
277 * This behavior is not specified in the Javadoc, but is followed by most readers in java.io.
278 */
279 checkOpen();
280 IOUtils.checkFromIndexSize(buffer, offset, length);
281 if (length == 0) {
282 return 0;
283 }
284
285 int outstanding = length;
286 while (outstanding > 0) {
287
288 /*
289 * If there are bytes in the buffer, grab those first.
290 */
291 final int available = end - pos;
292 if (available > 0) {
293 final int count = available >= outstanding ? outstanding : available;
294 System.arraycopy(buf, pos, buffer, offset, count);
295 pos += count;
296 offset += count;
297 outstanding -= count;
298 }
299
300 /*
301 * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got
302 * some bytes and reading from the underlying stream would block.
303 */
304 if (outstanding == 0 || outstanding < length && !in.ready()) {
305 break;
306 }
307
308 // assert(pos == end);
309
310 /*
311 * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller
312 * buffers because that could result in a many reads.
313 */
314 if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) {
315 final int count = in.read(buffer, offset, outstanding);
316 if (count > 0) {
317 outstanding -= count;
318 mark = -1;
319 }
320
321 break; // assume the source stream gave us all that it could
322 }
323
324 if (fillBuf() == EOF) {
325 break; // source is exhausted
326 }
327 }
328
329 final int count = length - outstanding;
330 return count > 0 || count == length ? count : EOF;
331 }
332
333 /**
334 * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR},
335 * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence.
336 *
337 * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached.
338 * @throws IOException if this reader is closed or some other I/O error occurs.
339 */
340 public String readLine() throws IOException {
341 checkOpen();
342 /* has the underlying stream been exhausted? */
343 if (pos == end && fillBuf() == EOF) {
344 return null;
345 }
346 for (int charPos = pos; charPos < end; charPos++) {
347 final char ch = buf[charPos];
348 if (ch > CR) {
349 continue;
350 }
351 if (ch == LF) {
352 final String res = new String(buf, pos, charPos - pos);
353 pos = charPos + 1;
354 return res;
355 }
356 if (ch == CR) {
357 final String res = new String(buf, pos, charPos - pos);
358 pos = charPos + 1;
359 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) {
360 pos++;
361 }
362 return res;
363 }
364 }
365
366 char eol = NUL;
367 final StringBuilder result = new StringBuilder(80);
368 /* Typical Line Length */
369
370 result.append(buf, pos, end - pos);
371 while (true) {
372 pos = end;
373
374 /* Are there buffered characters available? */
375 if (eol == LF) {
376 return result.toString();
377 }
378 // attempt to fill buffer
379 if (fillBuf() == EOF) {
380 // characters or null.
381 return result.length() > 0 || eol != NUL ? result.toString() : null;
382 }
383 for (int charPos = pos; charPos < end; charPos++) {
384 final char c = buf[charPos];
385 if (eol != NUL) {
386 if (eol == CR && c == LF) {
387 if (charPos > pos) {
388 result.append(buf, pos, charPos - pos - 1);
389 }
390 pos = charPos + 1;
391 } else {
392 if (charPos > pos) {
393 result.append(buf, pos, charPos - pos - 1);
394 }
395 pos = charPos;
396 }
397 return result.toString();
398 }
399 if (c == LF || c == CR) {
400 eol = c;
401 }
402 }
403 if (eol == NUL) {
404 result.append(buf, pos, end - pos);
405 } else {
406 result.append(buf, pos, end - pos - 1);
407 }
408 }
409 }
410
411 /**
412 * Tests whether this reader is ready to be read without blocking.
413 *
414 * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur.
415 * @throws IOException if this reader is closed or some other I/O error occurs.
416 * @see #read()
417 * @see #read(char[], int, int)
418 * @see #readLine()
419 */
420 @Override
421 public boolean ready() throws IOException {
422 checkOpen();
423 return end - pos > 0 || in.ready();
424 }
425
426 /**
427 * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location.
428 *
429 * @throws IOException if this reader is closed or no mark has been set.
430 * @see #mark(int)
431 * @see #markSupported()
432 */
433 @Override
434 public void reset() throws IOException {
435 checkOpen();
436 if (mark == -1) {
437 throw new IOException("mark == -1");
438 }
439 pos = mark;
440 }
441
442 /**
443 * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping
444 * characters may invalidate a mark if {@code markLimit} is surpassed.
445 *
446 * @param amount the maximum number of characters to skip.
447 * @return the number of characters actually skipped.
448 * @throws IllegalArgumentException if {@code amount < 0}.
449 * @throws IOException if this reader is closed or some other I/O error occurs.
450 * @see #mark(int)
451 * @see #markSupported()
452 * @see #reset()
453 */
454 @Override
455 public long skip(final long amount) throws IOException {
456 if (amount < 0) {
457 throw new IllegalArgumentException();
458 }
459 checkOpen();
460 if (amount < 1) {
461 return 0;
462 }
463 if (end - pos >= amount) {
464 pos += Math.toIntExact(amount);
465 return amount;
466 }
467
468 long read = end - pos;
469 pos = end;
470 while (read < amount) {
471 if (fillBuf() == EOF) {
472 return read;
473 }
474 if (end - pos >= amount - read) {
475 pos += Math.toIntExact(amount - read);
476 return amount;
477 }
478 // Couldn't get all the characters, skip what we read
479 read += end - pos;
480 pos = end;
481 }
482 return amount;
483 }
484
485 }