001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.Iterator; 035import java.util.List; 036 037import org.apache.commons.io.Charsets; 038import org.apache.commons.io.FileSystem; 039import org.apache.commons.io.StandardLineSeparator; 040import org.apache.commons.io.build.AbstractStreamBuilder; 041import org.apache.commons.io.function.IOIterable; 042import org.apache.commons.io.function.IOIterator; 043 044/** 045 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 046 * <p> 047 * To build an instance, use {@link Builder}. 048 * </p> 049 * <p> 050 * For example: 051 * </p> 052 * <pre> 053 * <code> 054 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 055 * .setPath(path) 056 * .setBufferSize(4096) 057 * .setCharset(StandardCharsets.UTF_8) 058 * .get()) { 059 * reader.forEach(line -> System.out.println(line)); 060 * } 061 * </code> 062 * </pre> 063 * 064 * @see Builder 065 * @since 2.2 066 */ 067public class ReversedLinesFileReader implements Closeable, IOIterable<String> { 068 069 // @formatter:off 070 /** 071 * Builds a new {@link ReversedLinesFileReader}. 072 * 073 * <p> 074 * For example: 075 * </p> 076 * <pre>{@code 077 * ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 078 * .setPath(path) 079 * .setBufferSize(4096) 080 * .setCharset(StandardCharsets.UTF_8) 081 * .get());} 082 * </pre> 083 * 084 * @see #get() 085 * @since 2.12.0 086 */ 087 // @formatter:on 088 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 089 090 /** 091 * Constructs a new builder of {@link ReversedLinesFileReader}. 092 */ 093 public Builder() { 094 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 095 setBufferSize(DEFAULT_BLOCK_SIZE); 096 } 097 098 /** 099 * Builds a new {@link ReversedLinesFileReader}. 100 * <p> 101 * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception. 102 * </p> 103 * <p> 104 * This builder uses the following aspects: 105 * </p> 106 * <ul> 107 * <li>{@link #getPath()} gets the target aspect.</li> 108 * <li>{@link #getBufferSize()}</li> 109 * <li>{@link #getCharset()}</li> 110 * </ul> 111 * 112 * @return a new instance. 113 * @throws IllegalStateException if the {@code origin} is {@code null}. 114 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 115 * @throws IOException if an I/O error occurs converting to a {@link Path} using {@link #getPath()}. 116 * @see #getPath() 117 * @see #getBufferSize() 118 * @see #getCharset() 119 * @see #getUnchecked() 120 */ 121 @Override 122 public ReversedLinesFileReader get() throws IOException { 123 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset()); 124 } 125 126 } 127 128 private final class FilePart { 129 private final long partNumber; 130 131 private final byte[] data; 132 133 private byte[] leftOver; 134 135 private int currentLastBytePos; 136 137 /** 138 * Constructs a new instance. 139 * 140 * @param partNumber the part number 141 * @param length its length 142 * @param leftOverOfLastFilePart remainder 143 * @throws IOException if there is a problem reading the file 144 */ 145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 146 this.partNumber = partNumber; 147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 148 this.data = new byte[dataLength]; 149 final long off = (partNumber - 1) * blockSize; 150 151 // read data 152 if (partNumber > 0 /* file not empty */) { 153 channel.position(off); 154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 155 if (countRead != length) { 156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 157 } 158 } 159 // copy left over part into data arr 160 if (leftOverOfLastFilePart != null) { 161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 162 } 163 this.currentLastBytePos = data.length - 1; 164 this.leftOver = null; 165 } 166 167 /** 168 * Constructs the buffer containing any leftover bytes. 169 */ 170 private void createLeftOver() { 171 final int lineLengthBytes = currentLastBytePos + 1; 172 if (lineLengthBytes > 0) { 173 // create left over for next block 174 leftOver = Arrays.copyOf(data, lineLengthBytes); 175 } else { 176 leftOver = null; 177 } 178 currentLastBytePos = -1; 179 } 180 181 /** 182 * Finds the new-line sequence and return its length. 183 * 184 * @param data buffer to scan 185 * @param i start offset in buffer 186 * @return length of newline sequence or 0 if none found 187 */ 188 private int getNewLineMatchByteCount(final byte[] data, final int i) { 189 for (final byte[] newLineSequence : newLineSequences) { 190 boolean match = true; 191 for (int j = newLineSequence.length - 1; j >= 0; j--) { 192 final int k = i + j - (newLineSequence.length - 1); 193 match &= k >= 0 && data[k] == newLineSequence[j]; 194 } 195 if (match) { 196 return newLineSequence.length; 197 } 198 } 199 return 0; 200 } 201 202 /** 203 * Reads a line. 204 * 205 * @return the line or null 206 */ 207 private String readLine() { //NOPMD Bug in PMD 208 209 String line = null; 210 int newLineMatchByteCount; 211 212 final boolean isLastFilePart = partNumber == 1; 213 214 int i = currentLastBytePos; 215 while (i > -1) { 216 217 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 218 // avoidNewlineSplitBuffer: for all except the last file part we 219 // take a few bytes to the next file part to avoid splitting of newlines 220 createLeftOver(); 221 break; // skip last few bytes and leave it to the next file part 222 } 223 224 // check for newline 225 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 226 final int lineStart = i + 1; 227 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 228 229 if (lineLengthBytes < 0) { 230 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 231 } 232 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 233 234 line = new String(lineData, charset); 235 236 currentLastBytePos = i - newLineMatchByteCount; 237 break; // found line 238 } 239 240 // move cursor 241 i -= byteDecrement; 242 243 // end of file part handling 244 if (i < 0) { 245 createLeftOver(); 246 break; // end of file part 247 } 248 } 249 250 // last file part handling 251 if (isLastFilePart && leftOver != null) { 252 // there will be partNumber line break anymore, this is the first line of the file 253 line = new String(leftOver, charset); 254 leftOver = null; 255 } 256 257 return line; 258 } 259 260 /** 261 * Handles block rollover 262 * 263 * @return the new FilePart or null 264 * @throws IOException if there was a problem reading the file 265 */ 266 private FilePart rollOver() throws IOException { 267 268 if (currentLastBytePos > -1) { 269 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 270 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 271 } 272 273 if (partNumber > 1) { 274 return new FilePart(partNumber - 1, blockSize, leftOver); 275 } 276 // NO 1 was the last FilePart, we're finished 277 if (leftOver != null) { 278 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 279 + new String(leftOver, charset)); 280 } 281 return null; 282 } 283 } 284 285 private static final String EMPTY_STRING = ""; 286 287 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 288 289 /** 290 * Constructs a new {@link Builder}. 291 * 292 * @return a new {@link Builder}. 293 * @since 2.12.0 294 */ 295 public static Builder builder() { 296 return new Builder(); 297 } 298 299 private final int blockSize; 300 private final Charset charset; 301 private final SeekableByteChannel channel; 302 private final long totalByteLength; 303 private final long totalBlockCount; 304 private final byte[][] newLineSequences; 305 private final int avoidNewlineSplitBufferSize; 306 private final int byteDecrement; 307 private FilePart currentFilePart; 308 private boolean trailingNewlineOfFileSkipped; 309 310 /** 311 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}. 312 * 313 * @param file the file to be read 314 * @throws IOException if an I/O error occurs. 315 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 316 */ 317 @Deprecated 318 public ReversedLinesFileReader(final File file) throws IOException { 319 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 320 } 321 322 /** 323 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 324 * specified encoding. 325 * 326 * @param file the file to be read 327 * @param charset the charset to use, null uses the default Charset. 328 * @throws IOException if an I/O error occurs. 329 * @since 2.5 330 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 331 */ 332 @Deprecated 333 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 334 this(file.toPath(), charset); 335 } 336 337 /** 338 * Constructs a ReversedLinesFileReader with the given block size and encoding. 339 * 340 * @param file the file to be read 341 * @param blockSize size of the internal buffer (for ideal performance this 342 * should match with the block size of the underlying file 343 * system). 344 * @param charset the encoding of the file, null uses the default Charset. 345 * @throws IOException if an I/O error occurs. 346 * @since 2.3 347 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 348 */ 349 @Deprecated 350 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 351 this(file.toPath(), blockSize, charset); 352 } 353 354 /** 355 * Constructs a ReversedLinesFileReader with the given block size and encoding. 356 * 357 * @param file the file to be read 358 * @param blockSize size of the internal buffer (for ideal performance this 359 * should match with the block size of the underlying file 360 * system). 361 * @param charsetName the encoding of the file, null uses the default Charset. 362 * @throws IOException if an I/O error occurs 363 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 364 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 365 */ 366 @Deprecated 367 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 368 this(file.toPath(), blockSize, charsetName); 369 } 370 371 /** 372 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 373 * specified encoding. 374 * 375 * @param file the file to be read 376 * @param charset the charset to use, null uses the default Charset. 377 * @throws IOException if an I/O error occurs. 378 * @since 2.7 379 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 380 */ 381 @Deprecated 382 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 383 this(file, DEFAULT_BLOCK_SIZE, charset); 384 } 385 386 /** 387 * Constructs a ReversedLinesFileReader with the given block size and encoding. 388 * 389 * @param file the file to be read 390 * @param blockSize size of the internal buffer (for ideal performance this 391 * should match with the block size of the underlying file 392 * system). 393 * @param charset the encoding of the file, null uses the default Charset. 394 * @throws IOException if an I/O error occurs. 395 * @since 2.7 396 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 397 */ 398 @Deprecated 399 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 400 this.blockSize = blockSize; 401 this.charset = Charsets.toCharset(charset); 402 403 // --- check & prepare encoding --- 404 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 405 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 406 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 407 // all one byte encodings are partNumber problem 408 byteDecrement = 1; 409 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 410 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 411 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 412 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 413 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 414 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 415 byteDecrement = 1; 416 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 417 // UTF-16 new line sequences are not allowed as second tuple of four byte 418 // sequences, 419 // however byte order has to be specified 420 byteDecrement = 2; 421 } else if (this.charset == StandardCharsets.UTF_16) { 422 throw new UnsupportedEncodingException( 423 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 424 } else { 425 throw new UnsupportedEncodingException( 426 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 427 } 428 429 // NOTE: The new line sequences are matched in the order given, so it is 430 // important that \r\n is BEFORE \n 431 this.newLineSequences = new byte[][] { 432 StandardLineSeparator.CRLF.getBytes(this.charset), 433 StandardLineSeparator.LF.getBytes(this.charset), 434 StandardLineSeparator.CR.getBytes(this.charset) 435 }; 436 437 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 438 439 // Open file 440 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 441 this.totalByteLength = channel.size(); 442 int lastBlockLength = (int) (this.totalByteLength % blockSize); 443 if (lastBlockLength > 0) { 444 this.totalBlockCount = this.totalByteLength / blockSize + 1; 445 } else { 446 this.totalBlockCount = this.totalByteLength / blockSize; 447 if (this.totalByteLength > 0) { 448 lastBlockLength = blockSize; 449 } 450 } 451 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 452 453 } 454 455 /** 456 * Constructs a ReversedLinesFileReader with the given block size and encoding. 457 * 458 * @param file the file to be read 459 * @param blockSize size of the internal buffer (for ideal performance this 460 * should match with the block size of the underlying file 461 * system). 462 * @param charsetName the encoding of the file, null uses the default Charset. 463 * @throws IOException if an I/O error occurs 464 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported 465 * @since 2.7 466 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 467 */ 468 @Deprecated 469 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 470 this(file, blockSize, Charsets.toCharset(charsetName)); 471 } 472 473 /** 474 * Closes underlying resources. 475 * 476 * @throws IOException if an I/O error occurs. 477 */ 478 @Override 479 public void close() throws IOException { 480 channel.close(); 481 } 482 483 @Override 484 public IOIterator<String> iterator() { 485 return new IOIterator<String>() { 486 487 private String next; 488 489 @Override 490 public boolean hasNext() throws IOException { 491 if (next == null) { 492 next = readLine(); 493 } 494 return next != null; 495 } 496 497 @Override 498 public String next() throws IOException { 499 if (next == null) { 500 next = readLine(); 501 } 502 final String tmp = next; 503 next = null; 504 return tmp; 505 } 506 507 @Override 508 public Iterator<String> unwrap() { 509 return null; 510 } 511 512 }; 513 } 514 515 /** 516 * Returns the lines of the file from bottom to top. 517 * 518 * @return the next line or null if the start of the file is reached 519 * @throws IOException if an I/O error occurs. 520 */ 521 public String readLine() throws IOException { 522 String line = currentFilePart.readLine(); 523 while (line == null) { 524 currentFilePart = currentFilePart.rollOver(); 525 if (currentFilePart == null) { 526 // partNumber more FileParts: we're done, leave line set to null 527 break; 528 } 529 line = currentFilePart.readLine(); 530 } 531 // aligned behavior with BufferedReader that doesn't return a last, empty line 532 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 533 trailingNewlineOfFileSkipped = true; 534 line = readLine(); 535 } 536 return line; 537 } 538 539 /** 540 * Returns {@code lineCount} lines of the file from bottom to top. 541 * <p> 542 * If there are less than {@code lineCount} lines in the file, then that's what 543 * you get. 544 * </p> 545 * <p> 546 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 547 * </p> 548 * 549 * @param lineCount How many lines to read. 550 * @return A new list 551 * @throws IOException if an I/O error occurs. 552 * @since 2.8.0 553 */ 554 public List<String> readLines(final int lineCount) throws IOException { 555 if (lineCount < 0) { 556 throw new IllegalArgumentException("lineCount < 0"); 557 } 558 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 559 for (int i = 0; i < lineCount; i++) { 560 final String line = readLine(); 561 if (line == null) { 562 return arrayList; 563 } 564 arrayList.add(line); 565 } 566 return arrayList; 567 } 568 569 /** 570 * Returns the last {@code lineCount} lines of the file. 571 * <p> 572 * If there are less than {@code lineCount} lines in the file, then that's what 573 * you get. 574 * </p> 575 * 576 * @param lineCount How many lines to read. 577 * @return A String. 578 * @throws IOException if an I/O error occurs. 579 * @since 2.8.0 580 */ 581 public String toString(final int lineCount) throws IOException { 582 final List<String> lines = readLines(lineCount); 583 Collections.reverse(lines); 584 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 585 } 586 587 @Override 588 public Iterable<String> unwrap() { 589 return null; 590 } 591 592}