001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.Iterator; 035import java.util.List; 036 037import org.apache.commons.io.Charsets; 038import org.apache.commons.io.FileSystem; 039import org.apache.commons.io.StandardLineSeparator; 040import org.apache.commons.io.build.AbstractStreamBuilder; 041import org.apache.commons.io.function.IOIterable; 042import org.apache.commons.io.function.IOIterator; 043 044/** 045 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 046 * <p> 047 * To build an instance, use {@link Builder}. 048 * </p> 049 * <p> 050 * For example: 051 * </p> 052 * <pre> 053 * <code> 054 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 055 * .setPath(path) 056 * .setBufferSize(4096) 057 * .setCharset(StandardCharsets.UTF_8) 058 * .get()) { 059 * reader.forEach(line -> System.out.println(line)); 060 * } 061 * </code> 062 * </pre> 063 * 064 * @see Builder 065 * @since 2.2 066 */ 067public class ReversedLinesFileReader implements Closeable, IOIterable<String> { 068 069 // @formatter:off 070 /** 071 * Builds a new {@link ReversedLinesFileReader}. 072 * 073 * <p> 074 * For example: 075 * </p> 076 * <pre>{@code 077 * ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 078 * .setPath(path) 079 * .setBufferSize(4096) 080 * .setCharset(StandardCharsets.UTF_8) 081 * .get());} 082 * </pre> 083 * 084 * @see #get() 085 * @since 2.12.0 086 */ 087 // @formatter:on 088 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 089 090 /** 091 * Constructs a new builder of {@link ReversedLinesFileReader}. 092 */ 093 public Builder() { 094 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 095 setBufferSize(DEFAULT_BLOCK_SIZE); 096 } 097 098 /** 099 * Builds a new {@link ReversedLinesFileReader}. 100 * <p> 101 * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception. 102 * </p> 103 * <p> 104 * This builder uses the following aspects: 105 * </p> 106 * <ul> 107 * <li>{@link #getPath()} gets the target aspect.</li> 108 * <li>{@link #getBufferSize()}</li> 109 * <li>{@link #getCharset()}</li> 110 * </ul> 111 * 112 * @return a new instance. 113 * @throws IllegalStateException if the {@code origin} is {@code null}. 114 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 115 * @throws IOException if an I/O error occurs converting to a {@link Path} using {@link #getPath()}. 116 * @see #getPath() 117 * @see #getBufferSize() 118 * @see #getCharset() 119 * @see #getUnchecked() 120 */ 121 @Override 122 public ReversedLinesFileReader get() throws IOException { 123 return new ReversedLinesFileReader(this); 124 } 125 126 } 127 128 private final class FilePart { 129 private final long partNumber; 130 131 private final byte[] data; 132 133 private byte[] leftOver; 134 135 private int currentLastBytePos; 136 137 /** 138 * Constructs a new instance. 139 * 140 * @param partNumber the part number 141 * @param length its length 142 * @param leftOverOfLastFilePart remainder 143 * @throws IOException if there is a problem reading the file 144 */ 145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 146 this.partNumber = partNumber; 147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 148 this.data = new byte[dataLength]; 149 final long off = (partNumber - 1) * blockSize; 150 151 // read data 152 if (partNumber > 0 /* file not empty */) { 153 channel.position(off); 154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 155 if (countRead != length) { 156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 157 } 158 } 159 // copy left over part into data arr 160 if (leftOverOfLastFilePart != null) { 161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 162 } 163 this.currentLastBytePos = data.length - 1; 164 this.leftOver = null; 165 } 166 167 /** 168 * Constructs the buffer containing any leftover bytes. 169 */ 170 private void createLeftOver() { 171 final int lineLengthBytes = currentLastBytePos + 1; 172 if (lineLengthBytes > 0) { 173 // create left over for next block 174 leftOver = Arrays.copyOf(data, lineLengthBytes); 175 } else { 176 leftOver = null; 177 } 178 currentLastBytePos = -1; 179 } 180 181 /** 182 * Finds the new-line sequence and return its length. 183 * 184 * @param data buffer to scan 185 * @param i start offset in buffer 186 * @return length of newline sequence or 0 if none found 187 */ 188 private int getNewLineMatchByteCount(final byte[] data, final int i) { 189 for (final byte[] newLineSequence : newLineSequences) { 190 boolean match = true; 191 for (int j = newLineSequence.length - 1; j >= 0; j--) { 192 final int k = i + j - (newLineSequence.length - 1); 193 match &= k >= 0 && data[k] == newLineSequence[j]; 194 } 195 if (match) { 196 return newLineSequence.length; 197 } 198 } 199 return 0; 200 } 201 202 /** 203 * Reads a line. 204 * 205 * @return the line or null 206 */ 207 private String readLine() { //NOPMD Bug in PMD 208 209 String line = null; 210 int newLineMatchByteCount; 211 212 final boolean isLastFilePart = partNumber == 1; 213 214 int i = currentLastBytePos; 215 while (i > -1) { 216 217 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 218 // avoidNewlineSplitBuffer: for all except the last file part we 219 // take a few bytes to the next file part to avoid splitting of newlines 220 createLeftOver(); 221 break; // skip last few bytes and leave it to the next file part 222 } 223 224 // check for newline 225 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 226 final int lineStart = i + 1; 227 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 228 229 if (lineLengthBytes < 0) { 230 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 231 } 232 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 233 234 line = new String(lineData, charset); 235 236 currentLastBytePos = i - newLineMatchByteCount; 237 break; // found line 238 } 239 240 // move cursor 241 i -= byteDecrement; 242 243 // end of file part handling 244 if (i < 0) { 245 createLeftOver(); 246 break; // end of file part 247 } 248 } 249 250 // last file part handling 251 if (isLastFilePart && leftOver != null) { 252 // there will be partNumber line break anymore, this is the first line of the file 253 line = new String(leftOver, charset); 254 leftOver = null; 255 } 256 257 return line; 258 } 259 260 /** 261 * Handles block rollover 262 * 263 * @return the new FilePart or null 264 * @throws IOException if there was a problem reading the file 265 */ 266 private FilePart rollOver() throws IOException { 267 268 if (currentLastBytePos > -1) { 269 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 270 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 271 } 272 273 if (partNumber > 1) { 274 return new FilePart(partNumber - 1, blockSize, leftOver); 275 } 276 // NO 1 was the last FilePart, we're finished 277 if (leftOver != null) { 278 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 279 + new String(leftOver, charset)); 280 } 281 return null; 282 } 283 } 284 285 private static final String EMPTY_STRING = ""; 286 287 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 288 289 /** 290 * Constructs a new {@link Builder}. 291 * 292 * @return a new {@link Builder}. 293 * @since 2.12.0 294 */ 295 public static Builder builder() { 296 return new Builder(); 297 } 298 299 private final int blockSize; 300 private final Charset charset; 301 private final SeekableByteChannel channel; 302 private final long totalByteLength; 303 private final long totalBlockCount; 304 private final byte[][] newLineSequences; 305 private final int avoidNewlineSplitBufferSize; 306 private final int byteDecrement; 307 private FilePart currentFilePart; 308 private boolean trailingNewlineOfFileSkipped; 309 310 private ReversedLinesFileReader(final Builder builder) throws IOException { 311 this.blockSize = builder.getBufferSize(); 312 this.charset = Charsets.toCharset(builder.getCharset()); 313 // check & prepare encoding 314 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 315 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 316 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 317 // all one byte encodings are partNumber problem 318 byteDecrement = 1; 319 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 320 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 321 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 322 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 323 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 324 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 325 byteDecrement = 1; 326 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 327 // UTF-16 new line sequences are not allowed as second tuple of four byte 328 // sequences, 329 // however byte order has to be specified 330 byteDecrement = 2; 331 } else if (this.charset == StandardCharsets.UTF_16) { 332 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); 333 } else { 334 throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)"); 335 } 336 // NOTE: The new line sequences are matched in the order given, so it is 337 // important that \r\n is BEFORE \n 338 this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(this.charset), StandardLineSeparator.LF.getBytes(this.charset), 339 StandardLineSeparator.CR.getBytes(this.charset) }; 340 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 341 // Open file 342 this.channel = Files.newByteChannel(builder.getPath(), StandardOpenOption.READ); 343 this.totalByteLength = channel.size(); 344 int lastBlockLength = (int) (this.totalByteLength % blockSize); 345 if (lastBlockLength > 0) { 346 this.totalBlockCount = this.totalByteLength / blockSize + 1; 347 } else { 348 this.totalBlockCount = this.totalByteLength / blockSize; 349 if (this.totalByteLength > 0) { 350 lastBlockLength = blockSize; 351 } 352 } 353 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 354 } 355 356 /** 357 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}. 358 * 359 * @param file the file to be read 360 * @throws IOException if an I/O error occurs. 361 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 362 */ 363 @Deprecated 364 public ReversedLinesFileReader(final File file) throws IOException { 365 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 366 } 367 368 /** 369 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 370 * specified encoding. 371 * 372 * @param file the file to be read 373 * @param charset the charset to use, null uses the default Charset. 374 * @throws IOException if an I/O error occurs. 375 * @since 2.5 376 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 377 */ 378 @Deprecated 379 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 380 this(file.toPath(), charset); 381 } 382 383 /** 384 * Constructs a ReversedLinesFileReader with the given block size and encoding. 385 * 386 * @param file the file to be read 387 * @param blockSize size of the internal buffer (for ideal performance this 388 * should match with the block size of the underlying file 389 * system). 390 * @param charset the encoding of the file, null uses the default Charset. 391 * @throws IOException if an I/O error occurs. 392 * @since 2.3 393 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 394 */ 395 @Deprecated 396 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 397 this(file.toPath(), blockSize, charset); 398 } 399 400 /** 401 * Constructs a ReversedLinesFileReader with the given block size and encoding. 402 * 403 * @param file the file to be read 404 * @param blockSize size of the internal buffer (for ideal performance this 405 * should match with the block size of the underlying file 406 * system). 407 * @param charsetName the encoding of the file, null uses the default Charset. 408 * @throws IOException if an I/O error occurs 409 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 410 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 411 */ 412 @Deprecated 413 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 414 this(file.toPath(), blockSize, charsetName); 415 } 416 417 /** 418 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 419 * specified encoding. 420 * 421 * @param file the file to be read 422 * @param charset the charset to use, null uses the default Charset. 423 * @throws IOException if an I/O error occurs. 424 * @since 2.7 425 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 426 */ 427 @Deprecated 428 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 429 this(file, DEFAULT_BLOCK_SIZE, charset); 430 } 431 432 /** 433 * Constructs a ReversedLinesFileReader with the given block size and encoding. 434 * 435 * @param file the file to be read 436 * @param blockSize size of the internal buffer (for ideal performance this 437 * should match with the block size of the underlying file 438 * system). 439 * @param charset the encoding of the file, null uses the default Charset. 440 * @throws IOException if an I/O error occurs. 441 * @since 2.7 442 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 443 */ 444 @Deprecated 445 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 446 this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset)); 447 } 448 449 /** 450 * Constructs a ReversedLinesFileReader with the given block size and encoding. 451 * 452 * @param file the file to be read 453 * @param blockSize size of the internal buffer (for ideal performance this 454 * should match with the block size of the underlying file 455 * system). 456 * @param charsetName the encoding of the file, null uses the default Charset. 457 * @throws IOException if an I/O error occurs 458 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 459 * @since 2.7 460 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 461 */ 462 @Deprecated 463 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 464 this(file, blockSize, Charsets.toCharset(charsetName)); 465 } 466 467 /** 468 * Closes underlying resources. 469 * 470 * @throws IOException if an I/O error occurs. 471 */ 472 @Override 473 public void close() throws IOException { 474 channel.close(); 475 } 476 477 @Override 478 public IOIterator<String> iterator() { 479 return new IOIterator<String>() { 480 481 private String next; 482 483 @Override 484 public boolean hasNext() throws IOException { 485 if (next == null) { 486 next = readLine(); 487 } 488 return next != null; 489 } 490 491 @Override 492 public String next() throws IOException { 493 if (next == null) { 494 next = readLine(); 495 } 496 final String tmp = next; 497 next = null; 498 return tmp; 499 } 500 501 @Override 502 public Iterator<String> unwrap() { 503 return null; 504 } 505 506 }; 507 } 508 509 /** 510 * Returns the lines of the file from bottom to top. 511 * 512 * @return the next line or null if the start of the file is reached 513 * @throws IOException if an I/O error occurs. 514 */ 515 public String readLine() throws IOException { 516 String line = currentFilePart.readLine(); 517 while (line == null) { 518 currentFilePart = currentFilePart.rollOver(); 519 if (currentFilePart == null) { 520 // partNumber more FileParts: we're done, leave line set to null 521 break; 522 } 523 line = currentFilePart.readLine(); 524 } 525 // aligned behavior with BufferedReader that doesn't return a last, empty line 526 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 527 trailingNewlineOfFileSkipped = true; 528 line = readLine(); 529 } 530 return line; 531 } 532 533 /** 534 * Returns {@code lineCount} lines of the file from bottom to top. 535 * <p> 536 * If there are less than {@code lineCount} lines in the file, then that's what 537 * you get. 538 * </p> 539 * <p> 540 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 541 * </p> 542 * 543 * @param lineCount How many lines to read. 544 * @return A new list 545 * @throws IOException if an I/O error occurs. 546 * @since 2.8.0 547 */ 548 public List<String> readLines(final int lineCount) throws IOException { 549 if (lineCount < 0) { 550 throw new IllegalArgumentException("lineCount < 0"); 551 } 552 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 553 for (int i = 0; i < lineCount; i++) { 554 final String line = readLine(); 555 if (line == null) { 556 return arrayList; 557 } 558 arrayList.add(line); 559 } 560 return arrayList; 561 } 562 563 /** 564 * Returns the last {@code lineCount} lines of the file. 565 * <p> 566 * If there are less than {@code lineCount} lines in the file, then that's what 567 * you get. 568 * </p> 569 * 570 * @param lineCount How many lines to read. 571 * @return A String. 572 * @throws IOException if an I/O error occurs. 573 * @since 2.8.0 574 */ 575 public String toString(final int lineCount) throws IOException { 576 final List<String> lines = readLines(lineCount); 577 Collections.reverse(lines); 578 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 579 } 580 581 @Override 582 public Iterable<String> unwrap() { 583 return null; 584 } 585 586}