1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.channels.SeekableByteChannel;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetEncoder;
27 import java.nio.charset.StandardCharsets;
28 import java.nio.file.Path;
29 import java.nio.file.StandardOpenOption;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collections;
33 import java.util.Iterator;
34 import java.util.List;
35
36 import org.apache.commons.io.Charsets;
37 import org.apache.commons.io.FileSystem;
38 import org.apache.commons.io.StandardLineSeparator;
39 import org.apache.commons.io.build.AbstractStreamBuilder;
40 import org.apache.commons.io.function.IOIterable;
41 import org.apache.commons.io.function.IOIterator;
42
43 /**
44 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
45 * <p>
46 * To build an instance, use {@link Builder}.
47 * </p>
48 * <p>
49 * For example:
50 * </p>
51 * <pre>
52 * <code>
53 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
54 * .setPath(path)
55 * .setBufferSize(4096)
56 * .setCharset(StandardCharsets.UTF_8)
57 * .get()) {
58 * reader.forEach(line -> System.out.println(line));
59 * }
60 * </code>
61 * </pre>
62 *
63 * @see Builder
64 * @since 2.2
65 */
66 public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
67
68 // @formatter:off
69 /**
70 * Builds a new {@link ReversedLinesFileReader}.
71 *
72 * <p>
73 * For example:
74 * </p>
75 * <pre>{@code
76 * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
77 * .setPath(path)
78 * .setBufferSize(4096)
79 * .setCharset(StandardCharsets.UTF_8)
80 * .get());}
81 * </pre>
82 *
83 * @see #get()
84 * @since 2.12.0
85 */
86 // @formatter:on
87 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
88
89 /**
90 * Constructs a new builder of {@link ReversedLinesFileReader}.
91 */
92 public Builder() {
93 setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
94 setBufferSize(DEFAULT_BLOCK_SIZE);
95 setOpenOptions(StandardOpenOption.READ);
96 }
97
98 /**
99 * Builds a new {@link ReversedLinesFileReader}.
100 * <p>
101 * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102 * </p>
103 * <p>
104 * This builder uses the following aspects:
105 * </p>
106 * <ul>
107 * <li>{@link #getPath()} gets the target aspect.</li>
108 * <li>{@link #getBufferSize()}</li>
109 * <li>{@link #getCharset()}</li>
110 * </ul>
111 *
112 * @return a new instance.
113 * @throws IllegalStateException if the {@code origin} is {@code null}.
114 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115 * @throws IOException if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116 * @see #getPath()
117 * @see #getBufferSize()
118 * @see #getCharset()
119 * @see #getUnchecked()
120 */
121 @Override
122 public ReversedLinesFileReader get() throws IOException {
123 return new ReversedLinesFileReader(this);
124 }
125
126 }
127
128 private final class FilePart {
129 private final long partNumber;
130
131 private final byte[] data;
132
133 private byte[] leftOver;
134
135 private int currentLastBytePos;
136
137 /**
138 * Constructs a new instance.
139 *
140 * @param partNumber the part number
141 * @param length its length
142 * @param leftOverOfLastFilePart remainder
143 * @throws IOException if there is a problem reading the file
144 */
145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146 this.partNumber = partNumber;
147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148 this.data = new byte[dataLength];
149 final long off = (partNumber - 1) * blockSize;
150
151 // read data
152 if (partNumber > 0 /* file not empty */) {
153 channel.position(off);
154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155 if (countRead != length) {
156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157 }
158 }
159 // copy left over part into data arr
160 if (leftOverOfLastFilePart != null) {
161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162 }
163 this.currentLastBytePos = data.length - 1;
164 this.leftOver = null;
165 }
166
167 /**
168 * Constructs the buffer containing any leftover bytes.
169 */
170 private void createLeftOver() {
171 final int lineLengthBytes = currentLastBytePos + 1;
172 if (lineLengthBytes > 0) {
173 // create left over for next block
174 leftOver = Arrays.copyOf(data, lineLengthBytes);
175 } else {
176 leftOver = null;
177 }
178 currentLastBytePos = -1;
179 }
180
181 /**
182 * Finds the new-line sequence and return its length.
183 *
184 * @param data buffer to scan
185 * @param i start offset in buffer
186 * @return length of newline sequence or 0 if none found
187 */
188 private int getNewLineMatchByteCount(final byte[] data, final int i) {
189 for (final byte[] newLineSequence : newLineSequences) {
190 boolean match = true;
191 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192 final int k = i + j - (newLineSequence.length - 1);
193 match &= k >= 0 && data[k] == newLineSequence[j];
194 }
195 if (match) {
196 return newLineSequence.length;
197 }
198 }
199 return 0;
200 }
201
202 /**
203 * Reads a line.
204 *
205 * @return the line or null
206 */
207 private String readLine() { //NOPMD Bug in PMD
208
209 String line = null;
210 int newLineMatchByteCount;
211
212 final boolean isLastFilePart = partNumber == 1;
213
214 int i = currentLastBytePos;
215 while (i > -1) {
216
217 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218 // avoidNewlineSplitBuffer: for all except the last file part we
219 // take a few bytes to the next file part to avoid splitting of newlines
220 createLeftOver();
221 break; // skip last few bytes and leave it to the next file part
222 }
223
224 // check for newline
225 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
226 final int lineStart = i + 1;
227 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228
229 if (lineLengthBytes < 0) {
230 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231 }
232 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233
234 line = new String(lineData, charset);
235
236 currentLastBytePos = i - newLineMatchByteCount;
237 break; // found line
238 }
239
240 // move cursor
241 i -= byteDecrement;
242
243 // end of file part handling
244 if (i < 0) {
245 createLeftOver();
246 break; // end of file part
247 }
248 }
249
250 // last file part handling
251 if (isLastFilePart && leftOver != null) {
252 // there will be partNumber line break anymore, this is the first line of the file
253 line = new String(leftOver, charset);
254 leftOver = null;
255 }
256
257 return line;
258 }
259
260 /**
261 * Handles block rollover
262 *
263 * @return the new FilePart or null
264 * @throws IOException if there was a problem reading the file
265 */
266 private FilePart rollOver() throws IOException {
267
268 if (currentLastBytePos > -1) {
269 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271 }
272
273 if (partNumber > 1) {
274 return new FilePart(partNumber - 1, blockSize, leftOver);
275 }
276 // NO 1 was the last FilePart, we're finished
277 if (leftOver != null) {
278 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279 + new String(leftOver, charset));
280 }
281 return null;
282 }
283 }
284
285 private static final String EMPTY_STRING = "";
286
287 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288
289 /**
290 * Constructs a new {@link Builder}.
291 *
292 * @return a new {@link Builder}.
293 * @since 2.12.0
294 */
295 public static Builder builder() {
296 return new Builder();
297 }
298
299 private final int blockSize;
300 private final Charset charset;
301 private final SeekableByteChannel channel;
302 private final long totalByteLength;
303 private final long totalBlockCount;
304 private final byte[][] newLineSequences;
305 private final int avoidNewlineSplitBufferSize;
306 private final int byteDecrement;
307 private FilePart currentFilePart;
308 private boolean trailingNewlineOfFileSkipped;
309
310 private ReversedLinesFileReader(final Builder builder) throws IOException {
311 this.blockSize = builder.getBufferSize();
312 this.charset = Charsets.toCharset(builder.getCharset());
313 // check & prepare encoding
314 final CharsetEncoder charsetEncoder = charset.newEncoder();
315 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
316 if (maxBytesPerChar == 1f || charset == StandardCharsets.UTF_8) {
317 // all one byte encodings are partNumber problem
318 byteDecrement = 1;
319 } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
320 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
321 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
322 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
323 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
324 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
325 byteDecrement = 1;
326 } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
327 // UTF-16 new line sequences are not allowed as second tuple of four byte
328 // sequences,
329 // however byte order has to be specified
330 byteDecrement = 2;
331 } else if (charset == StandardCharsets.UTF_16) {
332 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
333 } else {
334 throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
335 }
336 // NOTE: The new line sequences are matched in the order given, so it is
337 // important that \r\n is BEFORE \n
338 this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(charset), StandardLineSeparator.LF.getBytes(charset),
339 StandardLineSeparator.CR.getBytes(charset) };
340 this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
341 // Open file
342 this.channel = builder.getChannel(SeekableByteChannel.class);
343 this.totalByteLength = channel.size();
344 int lastBlockLength = (int) (totalByteLength % blockSize);
345 if (lastBlockLength > 0) {
346 this.totalBlockCount = totalByteLength / blockSize + 1;
347 } else {
348 this.totalBlockCount = totalByteLength / blockSize;
349 if (totalByteLength > 0) {
350 lastBlockLength = blockSize;
351 }
352 }
353 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
354 }
355
356 /**
357 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@linkplain Charset#defaultCharset() default charset}.
358 *
359 * @param file the file to be read
360 * @throws IOException if an I/O error occurs.
361 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
362 */
363 @Deprecated
364 public ReversedLinesFileReader(final File file) throws IOException {
365 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
366 }
367
368 /**
369 * Constructs a ReversedLinesFileReader with default block size of 4KB and the
370 * specified encoding.
371 *
372 * @param file the file to be read
373 * @param charset the charset to use, null uses the default Charset.
374 * @throws IOException if an I/O error occurs.
375 * @since 2.5
376 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
377 */
378 @Deprecated
379 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
380 this(file.toPath(), charset);
381 }
382
383 /**
384 * Constructs a ReversedLinesFileReader with the given block size and encoding.
385 *
386 * @param file the file to be read
387 * @param blockSize size of the internal buffer (for ideal performance this
388 * should match with the block size of the underlying file
389 * system).
390 * @param charset the encoding of the file, null uses the default Charset.
391 * @throws IOException if an I/O error occurs.
392 * @since 2.3
393 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
394 */
395 @Deprecated
396 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
397 this(file.toPath(), blockSize, charset);
398 }
399
400 /**
401 * Constructs a ReversedLinesFileReader with the given block size and encoding.
402 *
403 * @param file the file to be read
404 * @param blockSize size of the internal buffer (for ideal performance this
405 * should match with the block size of the underlying file
406 * system).
407 * @param charsetName the encoding of the file, null uses the default Charset.
408 * @throws IOException if an I/O error occurs
409 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
410 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
411 */
412 @Deprecated
413 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
414 this(file.toPath(), blockSize, charsetName);
415 }
416
417 /**
418 * Constructs a ReversedLinesFileReader with default block size of 4KB and the
419 * specified encoding.
420 *
421 * @param file the file to be read
422 * @param charset the charset to use, null uses the default Charset.
423 * @throws IOException if an I/O error occurs.
424 * @since 2.7
425 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
426 */
427 @Deprecated
428 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
429 this(file, DEFAULT_BLOCK_SIZE, charset);
430 }
431
432 /**
433 * Constructs a ReversedLinesFileReader with the given block size and encoding.
434 *
435 * @param file the file to be read
436 * @param blockSize size of the internal buffer (for ideal performance this
437 * should match with the block size of the underlying file
438 * system).
439 * @param charset the encoding of the file, null uses the default Charset.
440 * @throws IOException if an I/O error occurs.
441 * @since 2.7
442 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
443 */
444 @Deprecated
445 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
446 this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
447 }
448
449 /**
450 * Constructs a ReversedLinesFileReader with the given block size and encoding.
451 *
452 * @param file the file to be read
453 * @param blockSize size of the internal buffer (for ideal performance this
454 * should match with the block size of the underlying file
455 * system).
456 * @param charsetName the encoding of the file, null uses the default Charset.
457 * @throws IOException if an I/O error occurs
458 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
459 * @since 2.7
460 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
461 */
462 @Deprecated
463 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
464 this(file, blockSize, Charsets.toCharset(charsetName));
465 }
466
467 /**
468 * Closes underlying resources.
469 *
470 * @throws IOException if an I/O error occurs.
471 */
472 @Override
473 public void close() throws IOException {
474 channel.close();
475 }
476
477 @Override
478 public IOIterator<String> iterator() {
479 return new IOIterator<String>() {
480
481 private String next;
482
483 @Override
484 public boolean hasNext() throws IOException {
485 if (next == null) {
486 next = readLine();
487 }
488 return next != null;
489 }
490
491 @Override
492 public String next() throws IOException {
493 if (next == null) {
494 next = readLine();
495 }
496 final String tmp = next;
497 next = null;
498 return tmp;
499 }
500
501 @Override
502 public Iterator<String> unwrap() {
503 return null;
504 }
505
506 };
507 }
508
509 /**
510 * Returns the lines of the file from bottom to top.
511 *
512 * @return the next line or null if the start of the file is reached
513 * @throws IOException if an I/O error occurs.
514 */
515 public String readLine() throws IOException {
516 String line = currentFilePart.readLine();
517 while (line == null) {
518 currentFilePart = currentFilePart.rollOver();
519 if (currentFilePart == null) {
520 // partNumber more FileParts: we're done, leave line set to null
521 break;
522 }
523 line = currentFilePart.readLine();
524 }
525 // aligned behavior with BufferedReader that doesn't return a last, empty line
526 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
527 trailingNewlineOfFileSkipped = true;
528 line = readLine();
529 }
530 return line;
531 }
532
533 /**
534 * Returns {@code lineCount} lines of the file from bottom to top.
535 * <p>
536 * If there are less than {@code lineCount} lines in the file, then that's what
537 * you get.
538 * </p>
539 * <p>
540 * Note: You can easily flip the result with {@link Collections#reverse(List)}.
541 * </p>
542 *
543 * @param lineCount How many lines to read.
544 * @return A new list
545 * @throws IOException if an I/O error occurs.
546 * @since 2.8.0
547 */
548 public List<String> readLines(final int lineCount) throws IOException {
549 if (lineCount < 0) {
550 throw new IllegalArgumentException("lineCount < 0");
551 }
552 final ArrayList<String> arrayList = new ArrayList<>(lineCount);
553 for (int i = 0; i < lineCount; i++) {
554 final String line = readLine();
555 if (line == null) {
556 return arrayList;
557 }
558 arrayList.add(line);
559 }
560 return arrayList;
561 }
562
563 /**
564 * Returns the last {@code lineCount} lines of the file.
565 * <p>
566 * If there are less than {@code lineCount} lines in the file, then that's what
567 * you get.
568 * </p>
569 *
570 * @param lineCount How many lines to read.
571 * @return A String.
572 * @throws IOException if an I/O error occurs.
573 * @since 2.8.0
574 */
575 public String toString(final int lineCount) throws IOException {
576 final List<String> lines = readLines(lineCount);
577 Collections.reverse(lines);
578 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
579 }
580
581 @Override
582 public Iterable<String> unwrap() {
583 return null;
584 }
585
586 }