ReversedLinesFileReader.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.input;

  18. import java.io.Closeable;
  19. import java.io.File;
  20. import java.io.IOException;
  21. import java.io.UnsupportedEncodingException;
  22. import java.nio.ByteBuffer;
  23. import java.nio.channels.SeekableByteChannel;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.CharsetEncoder;
  26. import java.nio.charset.StandardCharsets;
  27. import java.nio.file.Files;
  28. import java.nio.file.Path;
  29. import java.nio.file.StandardOpenOption;
  30. import java.util.ArrayList;
  31. import java.util.Arrays;
  32. import java.util.Collections;
  33. import java.util.List;

  34. import org.apache.commons.io.Charsets;
  35. import org.apache.commons.io.FileSystem;
  36. import org.apache.commons.io.StandardLineSeparator;
  37. import org.apache.commons.io.build.AbstractStreamBuilder;

  38. /**
  39.  * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
  40.  * <p>
  41.  * To build an instance, use {@link Builder}.
  42.  * </p>
  43.  *
  44.  * @see Builder
  45.  * @since 2.2
  46.  */
  47. public class ReversedLinesFileReader implements Closeable {

  48.     // @formatter:off
  49.     /**
  50.      * Builds a new {@link ReversedLinesFileReader}.
  51.      *
  52.      * <p>
  53.      * For example:
  54.      * </p>
  55.      * <pre>{@code
  56.      * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
  57.      *   .setPath(path)
  58.      *   .setBufferSize(4096)
  59.      *   .setCharset(StandardCharsets.UTF_8)
  60.      *   .get();}
  61.      * </pre>
  62.      *
  63.      * @see #get()
  64.      * @since 2.12.0
  65.      */
  66.     // @formatter:on
  67.     public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {

  68.         /**
  69.          * Constructs a new {@link Builder}.
  70.          */
  71.         public Builder() {
  72.             setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
  73.             setBufferSize(DEFAULT_BLOCK_SIZE);
  74.         }

  75.         /**
  76.          * Builds a new {@link ReversedLinesFileReader}.
  77.          * <p>
  78.          * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
  79.          * </p>
  80.          * <p>
  81.          * This builder use the following aspects:
  82.          * </p>
  83.          * <ul>
  84.          * <li>{@link #getInputStream()}</li>
  85.          * <li>{@link #getBufferSize()}</li>
  86.          * <li>{@link #getCharset()}</li>
  87.          * </ul>
  88.          *
  89.          * @return a new instance.
  90.          * @throws IllegalStateException         if the {@code origin} is {@code null}.
  91.          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
  92.          * @throws IOException                   if an I/O error occurs.
  93.          * @see #getPath()
  94.          * @see #getBufferSize()
  95.          * @see #getCharset()
  96.          */
  97.         @Override
  98.         public ReversedLinesFileReader get() throws IOException {
  99.             return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
  100.         }

  101.     }

  102.     private final class FilePart {
  103.         private final long no;

  104.         private final byte[] data;

  105.         private byte[] leftOver;

  106.         private int currentLastBytePos;

  107.         /**
  108.          * Constructs a new instance.
  109.          *
  110.          * @param no                     the part number
  111.          * @param length                 its length
  112.          * @param leftOverOfLastFilePart remainder
  113.          * @throws IOException if there is a problem reading the file
  114.          */
  115.         private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
  116.             this.no = no;
  117.             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
  118.             this.data = new byte[dataLength];
  119.             final long off = (no - 1) * blockSize;

  120.             // read data
  121.             if (no > 0 /* file not empty */) {
  122.                 channel.position(off);
  123.                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
  124.                 if (countRead != length) {
  125.                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
  126.                 }
  127.             }
  128.             // copy left over part into data arr
  129.             if (leftOverOfLastFilePart != null) {
  130.                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
  131.             }
  132.             this.currentLastBytePos = data.length - 1;
  133.             this.leftOver = null;
  134.         }

  135.         /**
  136.          * Constructs the buffer containing any leftover bytes.
  137.          */
  138.         private void createLeftOver() {
  139.             final int lineLengthBytes = currentLastBytePos + 1;
  140.             if (lineLengthBytes > 0) {
  141.                 // create left over for next block
  142.                 leftOver = Arrays.copyOf(data, lineLengthBytes);
  143.             } else {
  144.                 leftOver = null;
  145.             }
  146.             currentLastBytePos = -1;
  147.         }

  148.         /**
  149.          * Finds the new-line sequence and return its length.
  150.          *
  151.          * @param data buffer to scan
  152.          * @param i    start offset in buffer
  153.          * @return length of newline sequence or 0 if none found
  154.          */
  155.         private int getNewLineMatchByteCount(final byte[] data, final int i) {
  156.             for (final byte[] newLineSequence : newLineSequences) {
  157.                 boolean match = true;
  158.                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
  159.                     final int k = i + j - (newLineSequence.length - 1);
  160.                     match &= k >= 0 && data[k] == newLineSequence[j];
  161.                 }
  162.                 if (match) {
  163.                     return newLineSequence.length;
  164.                 }
  165.             }
  166.             return 0;
  167.         }

  168.         /**
  169.          * Reads a line.
  170.          *
  171.          * @return the line or null
  172.          */
  173.         private String readLine() { //NOPMD Bug in PMD

  174.             String line = null;
  175.             int newLineMatchByteCount;

  176.             final boolean isLastFilePart = no == 1;

  177.             int i = currentLastBytePos;
  178.             while (i > -1) {

  179.                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
  180.                     // avoidNewlineSplitBuffer: for all except the last file part we
  181.                     // take a few bytes to the next file part to avoid splitting of newlines
  182.                     createLeftOver();
  183.                     break; // skip last few bytes and leave it to the next file part
  184.                 }

  185.                 // check for newline
  186.                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
  187.                     final int lineStart = i + 1;
  188.                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;

  189.                     if (lineLengthBytes < 0) {
  190.                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
  191.                     }
  192.                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);

  193.                     line = new String(lineData, charset);

  194.                     currentLastBytePos = i - newLineMatchByteCount;
  195.                     break; // found line
  196.                 }

  197.                 // move cursor
  198.                 i -= byteDecrement;

  199.                 // end of file part handling
  200.                 if (i < 0) {
  201.                     createLeftOver();
  202.                     break; // end of file part
  203.                 }
  204.             }

  205.             // last file part handling
  206.             if (isLastFilePart && leftOver != null) {
  207.                 // there will be no line break anymore, this is the first line of the file
  208.                 line = new String(leftOver, charset);
  209.                 leftOver = null;
  210.             }

  211.             return line;
  212.         }

  213.         /**
  214.          * Handles block rollover
  215.          *
  216.          * @return the new FilePart or null
  217.          * @throws IOException if there was a problem reading the file
  218.          */
  219.         private FilePart rollOver() throws IOException {

  220.             if (currentLastBytePos > -1) {
  221.                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
  222.                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
  223.             }

  224.             if (no > 1) {
  225.                 return new FilePart(no - 1, blockSize, leftOver);
  226.             }
  227.             // NO 1 was the last FilePart, we're finished
  228.             if (leftOver != null) {
  229.                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
  230.                         + new String(leftOver, charset));
  231.             }
  232.             return null;
  233.         }
  234.     }

  235.     private static final String EMPTY_STRING = "";

  236.     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();

  237.     /**
  238.      * Constructs a new {@link Builder}.
  239.      *
  240.      * @return a new {@link Builder}.
  241.      * @since 2.12.0
  242.      */
  243.     public static Builder builder() {
  244.         return new Builder();
  245.     }

  246.     private final int blockSize;
  247.     private final Charset charset;
  248.     private final SeekableByteChannel channel;
  249.     private final long totalByteLength;
  250.     private final long totalBlockCount;
  251.     private final byte[][] newLineSequences;
  252.     private final int avoidNewlineSplitBufferSize;
  253.     private final int byteDecrement;
  254.     private FilePart currentFilePart;
  255.     private boolean trailingNewlineOfFileSkipped;

  256.     /**
  257.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
  258.      * platform's default encoding.
  259.      *
  260.      * @param file the file to be read
  261.      * @throws IOException if an I/O error occurs.
  262.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  263.      */
  264.     @Deprecated
  265.     public ReversedLinesFileReader(final File file) throws IOException {
  266.         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
  267.     }

  268.     /**
  269.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
  270.      * specified encoding.
  271.      *
  272.      * @param file    the file to be read
  273.      * @param charset the charset to use, null uses the default Charset.
  274.      * @throws IOException if an I/O error occurs.
  275.      * @since 2.5
  276.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  277.      */
  278.     @Deprecated
  279.     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
  280.         this(file.toPath(), charset);
  281.     }

  282.     /**
  283.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  284.      *
  285.      * @param file      the file to be read
  286.      * @param blockSize size of the internal buffer (for ideal performance this
  287.      *                  should match with the block size of the underlying file
  288.      *                  system).
  289.      * @param charset  the encoding of the file, null uses the default Charset.
  290.      * @throws IOException if an I/O error occurs.
  291.      * @since 2.3
  292.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  293.      */
  294.     @Deprecated
  295.     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
  296.         this(file.toPath(), blockSize, charset);
  297.     }

  298.     /**
  299.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  300.      *
  301.      * @param file      the file to be read
  302.      * @param blockSize size of the internal buffer (for ideal performance this
  303.      *                  should match with the block size of the underlying file
  304.      *                  system).
  305.      * @param charsetName  the encoding of the file, null uses the default Charset.
  306.      * @throws IOException                                  if an I/O error occurs
  307.      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
  308.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  309.      */
  310.     @Deprecated
  311.     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
  312.         this(file.toPath(), blockSize, charsetName);
  313.     }

  314.     /**
  315.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
  316.      * specified encoding.
  317.      *
  318.      * @param file    the file to be read
  319.      * @param charset the charset to use, null uses the default Charset.
  320.      * @throws IOException if an I/O error occurs.
  321.      * @since 2.7
  322.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  323.      */
  324.     @Deprecated
  325.     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
  326.         this(file, DEFAULT_BLOCK_SIZE, charset);
  327.     }

  328.     /**
  329.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  330.      *
  331.      * @param file      the file to be read
  332.      * @param blockSize size of the internal buffer (for ideal performance this
  333.      *                  should match with the block size of the underlying file
  334.      *                  system).
  335.      * @param charset  the encoding of the file, null uses the default Charset.
  336.      * @throws IOException if an I/O error occurs.
  337.      * @since 2.7
  338.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  339.      */
  340.     @Deprecated
  341.     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
  342.         this.blockSize = blockSize;
  343.         this.charset = Charsets.toCharset(charset);

  344.         // --- check & prepare encoding ---
  345.         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
  346.         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
  347.         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
  348.             // all one byte encodings are no problem
  349.             byteDecrement = 1;
  350.         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
  351.         // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
  352.                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
  353.                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
  354.                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
  355.                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
  356.             byteDecrement = 1;
  357.         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
  358.             // UTF-16 new line sequences are not allowed as second tuple of four byte
  359.             // sequences,
  360.             // however byte order has to be specified
  361.             byteDecrement = 2;
  362.         } else if (this.charset == StandardCharsets.UTF_16) {
  363.             throw new UnsupportedEncodingException(
  364.                     "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
  365.         } else {
  366.             throw new UnsupportedEncodingException(
  367.                     "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
  368.         }

  369.         // NOTE: The new line sequences are matched in the order given, so it is
  370.         // important that \r\n is BEFORE \n
  371.         this.newLineSequences = new byte[][] {
  372.             StandardLineSeparator.CRLF.getBytes(this.charset),
  373.             StandardLineSeparator.LF.getBytes(this.charset),
  374.             StandardLineSeparator.CR.getBytes(this.charset)
  375.         };

  376.         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;

  377.         // Open file
  378.         this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
  379.         this.totalByteLength = channel.size();
  380.         int lastBlockLength = (int) (this.totalByteLength % blockSize);
  381.         if (lastBlockLength > 0) {
  382.             this.totalBlockCount = this.totalByteLength / blockSize + 1;
  383.         } else {
  384.             this.totalBlockCount = this.totalByteLength / blockSize;
  385.             if (this.totalByteLength > 0) {
  386.                 lastBlockLength = blockSize;
  387.             }
  388.         }
  389.         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

  390.     }

  391.     /**
  392.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  393.      *
  394.      * @param file        the file to be read
  395.      * @param blockSize   size of the internal buffer (for ideal performance this
  396.      *                    should match with the block size of the underlying file
  397.      *                    system).
  398.      * @param charsetName the encoding of the file, null uses the default Charset.
  399.      * @throws IOException                                  if an I/O error occurs
  400.      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
  401.      * @since 2.7
  402.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  403.      */
  404.     @Deprecated
  405.     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
  406.         this(file, blockSize, Charsets.toCharset(charsetName));
  407.     }

  408.     /**
  409.      * Closes underlying resources.
  410.      *
  411.      * @throws IOException if an I/O error occurs.
  412.      */
  413.     @Override
  414.     public void close() throws IOException {
  415.         channel.close();
  416.     }

  417.     /**
  418.      * Returns the lines of the file from bottom to top.
  419.      *
  420.      * @return the next line or null if the start of the file is reached
  421.      * @throws IOException if an I/O error occurs.
  422.      */
  423.     public String readLine() throws IOException {

  424.         String line = currentFilePart.readLine();
  425.         while (line == null) {
  426.             currentFilePart = currentFilePart.rollOver();
  427.             if (currentFilePart == null) {
  428.                 // no more FileParts: we're done, leave line set to null
  429.                 break;
  430.             }
  431.             line = currentFilePart.readLine();
  432.         }

  433.         // aligned behavior with BufferedReader that doesn't return a last, empty line
  434.         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
  435.             trailingNewlineOfFileSkipped = true;
  436.             line = readLine();
  437.         }

  438.         return line;
  439.     }

  440.     /**
  441.      * Returns {@code lineCount} lines of the file from bottom to top.
  442.      * <p>
  443.      * If there are less than {@code lineCount} lines in the file, then that's what
  444.      * you get.
  445.      * </p>
  446.      * <p>
  447.      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
  448.      * </p>
  449.      *
  450.      * @param lineCount How many lines to read.
  451.      * @return A new list
  452.      * @throws IOException if an I/O error occurs.
  453.      * @since 2.8.0
  454.      */
  455.     public List<String> readLines(final int lineCount) throws IOException {
  456.         if (lineCount < 0) {
  457.             throw new IllegalArgumentException("lineCount < 0");
  458.         }
  459.         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
  460.         for (int i = 0; i < lineCount; i++) {
  461.             final String line = readLine();
  462.             if (line == null) {
  463.                 return arrayList;
  464.             }
  465.             arrayList.add(line);
  466.         }
  467.         return arrayList;
  468.     }

  469.     /**
  470.      * Returns the last {@code lineCount} lines of the file.
  471.      * <p>
  472.      * If there are less than {@code lineCount} lines in the file, then that's what
  473.      * you get.
  474.      * </p>
  475.      *
  476.      * @param lineCount How many lines to read.
  477.      * @return A String.
  478.      * @throws IOException if an I/O error occurs.
  479.      * @since 2.8.0
  480.      */
  481.     public String toString(final int lineCount) throws IOException {
  482.         final List<String> lines = readLines(lineCount);
  483.         Collections.reverse(lines);
  484.         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
  485.     }

  486. }