1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.channels.SeekableByteChannel;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetEncoder;
27 import java.nio.charset.StandardCharsets;
28 import java.nio.file.Path;
29 import java.nio.file.StandardOpenOption;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collections;
33 import java.util.Iterator;
34 import java.util.List;
35
36 import org.apache.commons.io.Charsets;
37 import org.apache.commons.io.FileSystem;
38 import org.apache.commons.io.StandardLineSeparator;
39 import org.apache.commons.io.build.AbstractStreamBuilder;
40 import org.apache.commons.io.function.IOIterable;
41 import org.apache.commons.io.function.IOIterator;
42
43 /**
44 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
45 * <p>
46 * To build an instance, use {@link Builder}.
47 * </p>
48 * <p>
49 * For example:
50 * </p>
51 * <pre>
52 * <code>
53 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
54 * .setPath(path)
55 * .setBufferSize(4096)
56 * .setCharset(StandardCharsets.UTF_8)
57 * .get()) {
58 * reader.forEach(line -> System.out.println(line));
59 * }
60 * </code>
61 * </pre>
62 *
63 * @see Builder
64 * @since 2.2
65 */
66 public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
67
68 // @formatter:off
69 /**
70 * Builds a new {@link ReversedLinesFileReader}.
71 *
72 * <p>
73 * For example:
74 * </p>
75 * <pre>{@code
76 * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
77 * .setPath(path)
78 * .setBufferSize(4096)
79 * .setCharset(StandardCharsets.UTF_8)
80 * .get());}
81 * </pre>
82 *
83 * @see #get()
84 * @since 2.12.0
85 */
86 // @formatter:on
87 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
88
89 /**
90 * Constructs a new builder of {@link ReversedLinesFileReader}.
91 */
92 public Builder() {
93 setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
94 setBufferSize(DEFAULT_BLOCK_SIZE);
95 setOpenOptions(StandardOpenOption.READ);
96 }
97
98 /**
99 * Builds a new {@link ReversedLinesFileReader}.
100 * <p>
101 * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102 * </p>
103 * <p>
104 * This builder uses the following aspects:
105 * </p>
106 * <ul>
107 * <li>{@link #getPath()} gets the target aspect.</li>
108 * <li>{@link #getBufferSize()}</li>
109 * <li>{@link #getCharset()}</li>
110 * </ul>
111 *
112 * @return a new instance.
113 * @throws IllegalStateException if the {@code origin} is {@code null}.
114 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115 * @throws IOException if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116 * @see #getPath()
117 * @see #getBufferSize()
118 * @see #getCharset()
119 * @see #getUnchecked()
120 */
121 @Override
122 public ReversedLinesFileReader get() throws IOException {
123 return new ReversedLinesFileReader(this);
124 }
125
126 }
127
128 private final class FilePart {
129 private final long partNumber;
130
131 private final byte[] data;
132
133 private byte[] leftOver;
134
135 private int currentLastBytePos;
136
137 /**
138 * Constructs a new instance.
139 *
140 * @param partNumber the part number.
141 * @param length its length.
142 * @param leftOverOfLastFilePart remainder.
143 * @throws IOException if there is a problem reading the file.
144 */
145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146 this.partNumber = partNumber;
147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148 this.data = new byte[dataLength];
149 final long off = (partNumber - 1) * blockSize;
150
151 // read data
152 if (partNumber > 0 /* file not empty */) {
153 channel.position(off);
154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155 if (countRead != length) {
156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157 }
158 }
159 // copy left over part into data arr
160 if (leftOverOfLastFilePart != null) {
161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162 }
163 this.currentLastBytePos = data.length - 1;
164 this.leftOver = null;
165 }
166
167 /**
168 * Constructs the buffer containing any leftover bytes.
169 */
170 private void createLeftOver() {
171 final int lineLengthBytes = currentLastBytePos + 1;
172 if (lineLengthBytes > 0) {
173 // create left over for next block
174 leftOver = Arrays.copyOf(data, lineLengthBytes);
175 } else {
176 leftOver = null;
177 }
178 currentLastBytePos = -1;
179 }
180
181 /**
182 * Finds the new-line sequence and return its length.
183 *
184 * @param data buffer to scan.
185 * @param i start offset in buffer.
186 * @return length of newline sequence or 0 if none found.
187 */
188 private int getNewLineMatchByteCount(final byte[] data, final int i) {
189 for (final byte[] newLineSequence : newLineSequences) {
190 boolean match = true;
191 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192 final int k = i + j - (newLineSequence.length - 1);
193 match &= k >= 0 && data[k] == newLineSequence[j];
194 }
195 if (match) {
196 return newLineSequence.length;
197 }
198 }
199 return 0;
200 }
201
202 /**
203 * Reads a line.
204 *
205 * @return the line or null.
206 */
207 private String readLine() { //NOPMD Bug in PMD
208
209 String line = null;
210 int newLineMatchByteCount;
211
212 final boolean isLastFilePart = partNumber == 1;
213
214 int i = currentLastBytePos;
215
216 if (i == -1 && isLastFilePart && leftOver != null) {
217 line = new String(leftOver, charset);
218 leftOver = null;
219 return line;
220 }
221
222 while (i > -1) {
223
224 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
225 // avoidNewlineSplitBuffer: for all except the last file part we
226 // take a few bytes to the next file part to avoid splitting of newlines
227 createLeftOver();
228 break; // skip last few bytes and leave it to the next file part
229 }
230
231 // check for newline
232 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
233 final int lineStart = i + 1;
234 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
235
236 if (lineLengthBytes < 0) {
237 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
238 }
239 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
240
241 line = new String(lineData, charset);
242
243 currentLastBytePos = i - newLineMatchByteCount;
244
245 if (isLastFilePart && currentLastBytePos == -1 && i == 0) {
246 leftOver = new byte[0];
247 }
248 break; // found line
249 }
250
251 // move cursor
252 i -= byteDecrement;
253
254 // end of file part handling
255 if (i < 0) {
256 if (isLastFilePart) {
257 final int lineLengthBytes = currentLastBytePos + 1;
258 if (lineLengthBytes > 0) {
259 final byte[] lineData = Arrays.copyOf(data, lineLengthBytes);
260 line = new String(lineData, charset);
261 }
262 currentLastBytePos = -1;
263 } else {
264 createLeftOver();
265 }
266 break; // end of file part
267 }
268 }
269
270 // there will be partNumber line break anymore, this is the first line of the file
271 if (line == null && isLastFilePart && leftOver != null) {
272 line = new String(leftOver, charset);
273 leftOver = null;
274 }
275
276 return line;
277 }
278
279 /**
280 * Handles block rollover
281 *
282 * @return the new FilePart or null.
283 * @throws IOException if there was a problem reading the file.
284 */
285 private FilePart rollOver() throws IOException {
286
287 if (currentLastBytePos > -1) {
288 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
289 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
290 }
291
292 if (partNumber > 1) {
293 return new FilePart(partNumber - 1, blockSize, leftOver);
294 }
295 // NO 1 was the last FilePart, we're finished
296 if (leftOver != null) {
297 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
298 + new String(leftOver, charset));
299 }
300 return null;
301 }
302 }
303
304 private static final String EMPTY_STRING = "";
305
306 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
307
308 /**
309 * Constructs a new {@link Builder}.
310 *
311 * @return a new {@link Builder}.
312 * @since 2.12.0
313 */
314 public static Builder builder() {
315 return new Builder();
316 }
317
318 private final int blockSize;
319 private final Charset charset;
320 private final SeekableByteChannel channel;
321 private final long totalByteLength;
322 private final long totalBlockCount;
323 private final byte[][] newLineSequences;
324 private final int avoidNewlineSplitBufferSize;
325 private final int byteDecrement;
326 private FilePart currentFilePart;
327 private boolean trailingNewlineOfFileSkipped;
328
329 private ReversedLinesFileReader(final Builder builder) throws IOException {
330 this.blockSize = builder.getBufferSize();
331 this.charset = Charsets.toCharset(builder.getCharset());
332 // check & prepare encoding
333 final CharsetEncoder charsetEncoder = charset.newEncoder();
334 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
335 if (maxBytesPerChar == 1f || charset == StandardCharsets.UTF_8) {
336 // all one byte encodings are partNumber problem
337 byteDecrement = 1;
338 } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
339 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
340 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
341 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
342 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
343 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
344 byteDecrement = 1;
345 } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
346 // UTF-16 new line sequences are not allowed as second tuple of four byte
347 // sequences,
348 // however byte order has to be specified
349 byteDecrement = 2;
350 } else if (charset == StandardCharsets.UTF_16) {
351 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
352 } else {
353 throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
354 }
355 // NOTE: The new line sequences are matched in the order given, so it is
356 // important that \r\n is BEFORE \n
357 this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(charset), StandardLineSeparator.LF.getBytes(charset),
358 StandardLineSeparator.CR.getBytes(charset) };
359 this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
360 // Open file
361 this.channel = builder.getChannel(SeekableByteChannel.class);
362 this.totalByteLength = channel.size();
363 int lastBlockLength = (int) (totalByteLength % blockSize);
364 if (lastBlockLength > 0) {
365 this.totalBlockCount = totalByteLength / blockSize + 1;
366 } else {
367 this.totalBlockCount = totalByteLength / blockSize;
368 if (totalByteLength > 0) {
369 lastBlockLength = blockSize;
370 }
371 }
372 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
373 }
374
375 /**
376 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@linkplain Charset#defaultCharset() default charset}.
377 *
378 * @param file the file to be read.
379 * @throws IOException if an I/O error occurs.
380 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
381 */
382 @Deprecated
383 public ReversedLinesFileReader(final File file) throws IOException {
384 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
385 }
386
387 /**
388 * Constructs a ReversedLinesFileReader with default block size of 4KB and the
389 * specified encoding.
390 *
391 * @param file the file to be read.
392 * @param charset the charset to use, null uses the default Charset.
393 * @throws IOException if an I/O error occurs.
394 * @since 2.5
395 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
396 */
397 @Deprecated
398 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
399 this(file.toPath(), charset);
400 }
401
402 /**
403 * Constructs a ReversedLinesFileReader with the given block size and encoding.
404 *
405 * @param file the file to be read.
406 * @param blockSize size of the internal buffer (for ideal performance this
407 * should match with the block size of the underlying file
408 * system).
409 * @param charset the encoding of the file, null uses the default Charset.
410 * @throws IOException if an I/O error occurs.
411 * @since 2.3
412 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
413 */
414 @Deprecated
415 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
416 this(file.toPath(), blockSize, charset);
417 }
418
419 /**
420 * Constructs a ReversedLinesFileReader with the given block size and encoding.
421 *
422 * @param file the file to be read.
423 * @param blockSize size of the internal buffer (for ideal performance this
424 * should match with the block size of the underlying file
425 * system).
426 * @param charsetName the encoding of the file, null uses the default Charset.
427 * @throws IOException if an I/O error occurs.
428 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
429 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
430 */
431 @Deprecated
432 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
433 this(file.toPath(), blockSize, charsetName);
434 }
435
436 /**
437 * Constructs a ReversedLinesFileReader with default block size of 4KB and the
438 * specified encoding.
439 *
440 * @param file the file to be read.
441 * @param charset the charset to use, null uses the default Charset.
442 * @throws IOException if an I/O error occurs.
443 * @since 2.7
444 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
445 */
446 @Deprecated
447 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
448 this(file, DEFAULT_BLOCK_SIZE, charset);
449 }
450
451 /**
452 * Constructs a ReversedLinesFileReader with the given block size and encoding.
453 *
454 * @param file the file to be read.
455 * @param blockSize size of the internal buffer (for ideal performance this
456 * should match with the block size of the underlying file
457 * system).
458 * @param charset the encoding of the file, null uses the default Charset.
459 * @throws IOException if an I/O error occurs.
460 * @since 2.7
461 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
462 */
463 @Deprecated
464 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
465 this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
466 }
467
468 /**
469 * Constructs a ReversedLinesFileReader with the given block size and encoding.
470 *
471 * @param file the file to be read.
472 * @param blockSize size of the internal buffer (for ideal performance this
473 * should match with the block size of the underlying file
474 * system).
475 * @param charsetName the encoding of the file, null uses the default Charset.
476 * @throws IOException if an I/O error occurs.
477 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
478 * @since 2.7
479 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
480 */
481 @Deprecated
482 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
483 this(file, blockSize, Charsets.toCharset(charsetName));
484 }
485
486 /**
487 * Closes underlying resources.
488 *
489 * @throws IOException if an I/O error occurs.
490 */
491 @Override
492 public void close() throws IOException {
493 channel.close();
494 }
495
496 @Override
497 public IOIterator<String> iterator() {
498 return new IOIterator<String>() {
499
500 private String next;
501
502 @Override
503 public boolean hasNext() throws IOException {
504 if (next == null) {
505 next = readLine();
506 }
507 return next != null;
508 }
509
510 @Override
511 public String next() throws IOException {
512 if (next == null) {
513 next = readLine();
514 }
515 final String tmp = next;
516 next = null;
517 return tmp;
518 }
519
520 @Override
521 public Iterator<String> unwrap() {
522 return null;
523 }
524
525 };
526 }
527
528 /**
529 * Returns the lines of the file from bottom to top.
530 *
531 * @return the next line or null if the start of the file is reached.
532 * @throws IOException if an I/O error occurs.
533 */
534 public String readLine() throws IOException {
535 String line = currentFilePart.readLine();
536 while (line == null) {
537 currentFilePart = currentFilePart.rollOver();
538 if (currentFilePart == null) {
539 // partNumber more FileParts: we're done, leave line set to null
540 break;
541 }
542 line = currentFilePart.readLine();
543 }
544 // aligned behavior with BufferedReader that doesn't return a last, empty line
545 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
546 trailingNewlineOfFileSkipped = true;
547 line = readLine();
548 }
549 return line;
550 }
551
552 /**
553 * Returns {@code lineCount} lines of the file from bottom to top.
554 * <p>
555 * If there are less than {@code lineCount} lines in the file, then that's what
556 * you get.
557 * </p>
558 * <p>
559 * Note: You can easily flip the result with {@link Collections#reverse(List)}.
560 * </p>
561 *
562 * @param lineCount How many lines to read.
563 * @return A new list.
564 * @throws IOException if an I/O error occurs.
565 * @since 2.8.0
566 */
567 public List<String> readLines(final int lineCount) throws IOException {
568 if (lineCount < 0) {
569 throw new IllegalArgumentException("lineCount < 0");
570 }
571 final ArrayList<String> arrayList = new ArrayList<>(lineCount);
572 for (int i = 0; i < lineCount; i++) {
573 final String line = readLine();
574 if (line == null) {
575 return arrayList;
576 }
577 arrayList.add(line);
578 }
579 return arrayList;
580 }
581
582 /**
583 * Returns the last {@code lineCount} lines of the file.
584 * <p>
585 * If there are less than {@code lineCount} lines in the file, then that's what
586 * you get.
587 * </p>
588 *
589 * @param lineCount How many lines to read.
590 * @return A String.
591 * @throws IOException if an I/O error occurs.
592 * @since 2.8.0
593 */
594 public String toString(final int lineCount) throws IOException {
595 final List<String> lines = readLines(lineCount);
596 Collections.reverse(lines);
597 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
598 }
599
600 @Override
601 public Iterable<String> unwrap() {
602 return null;
603 }
604
605 }