ReversedLinesFileReader.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.input;

  18. import java.io.Closeable;
  19. import java.io.File;
  20. import java.io.IOException;
  21. import java.io.UnsupportedEncodingException;
  22. import java.nio.ByteBuffer;
  23. import java.nio.channels.SeekableByteChannel;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.CharsetEncoder;
  26. import java.nio.charset.StandardCharsets;
  27. import java.nio.file.Files;
  28. import java.nio.file.Path;
  29. import java.nio.file.StandardOpenOption;
  30. import java.util.ArrayList;
  31. import java.util.Arrays;
  32. import java.util.Collections;
  33. import java.util.Iterator;
  34. import java.util.List;

  35. import org.apache.commons.io.Charsets;
  36. import org.apache.commons.io.FileSystem;
  37. import org.apache.commons.io.StandardLineSeparator;
  38. import org.apache.commons.io.build.AbstractStreamBuilder;
  39. import org.apache.commons.io.function.IOIterable;
  40. import org.apache.commons.io.function.IOIterator;

  41. /**
  42.  * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
  43.  * <p>
  44.  * To build an instance, use {@link Builder}.
  45.  * </p>
  46.  * <p>
  47.  * For example:
  48.  * </p>
  49.  * <pre>
  50.  * <code>
  51.  * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
  52.  *   .setPath(path)
  53.  *   .setBufferSize(4096)
  54.  *   .setCharset(StandardCharsets.UTF_8)
  55.  *   .get()) {
  56.  *      reader.forEach(line -&gt; System.out.println(line));
  57.  * }
  58.  * </code>
  59.  * </pre>
  60.  *
  61.  * @see Builder
  62.  * @since 2.2
  63.  */
  64. public class ReversedLinesFileReader implements Closeable, IOIterable<String> {

  65.     // @formatter:off
  66.     /**
  67.      * Builds a new {@link ReversedLinesFileReader}.
  68.      *
  69.      * <p>
  70.      * For example:
  71.      * </p>
  72.      * <pre>{@code
  73.      * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
  74.      *   .setPath(path)
  75.      *   .setBufferSize(4096)
  76.      *   .setCharset(StandardCharsets.UTF_8)
  77.      *   .get());}
  78.      * </pre>
  79.      *
  80.      * @see #get()
  81.      * @since 2.12.0
  82.      */
  83.     // @formatter:on
  84.     public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {

  85.         /**
  86.          * Constructs a new builder of {@link ReversedLinesFileReader}.
  87.          */
  88.         public Builder() {
  89.             setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
  90.             setBufferSize(DEFAULT_BLOCK_SIZE);
  91.         }

  92.         /**
  93.          * Builds a new {@link ReversedLinesFileReader}.
  94.          * <p>
  95.          * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
  96.          * </p>
  97.          * <p>
  98.          * This builder uses the following aspects:
  99.          * </p>
  100.          * <ul>
  101.          * <li>{@link #getPath()} gets the target aspect.</li>
  102.          * <li>{@link #getBufferSize()}</li>
  103.          * <li>{@link #getCharset()}</li>
  104.          * </ul>
  105.          *
  106.          * @return a new instance.
  107.          * @throws IllegalStateException         if the {@code origin} is {@code null}.
  108.          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
  109.          * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
  110.          * @see #getPath()
  111.          * @see #getBufferSize()
  112.          * @see #getCharset()
  113.          * @see #getUnchecked()
  114.          */
  115.         @Override
  116.         public ReversedLinesFileReader get() throws IOException {
  117.             return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
  118.         }

  119.     }

  120.     private final class FilePart {
  121.         private final long partNumber;

  122.         private final byte[] data;

  123.         private byte[] leftOver;

  124.         private int currentLastBytePos;

  125.         /**
  126.          * Constructs a new instance.
  127.          *
  128.          * @param partNumber             the part number
  129.          * @param length                 its length
  130.          * @param leftOverOfLastFilePart remainder
  131.          * @throws IOException if there is a problem reading the file
  132.          */
  133.         private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
  134.             this.partNumber = partNumber;
  135.             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
  136.             this.data = new byte[dataLength];
  137.             final long off = (partNumber - 1) * blockSize;

  138.             // read data
  139.             if (partNumber > 0 /* file not empty */) {
  140.                 channel.position(off);
  141.                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
  142.                 if (countRead != length) {
  143.                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
  144.                 }
  145.             }
  146.             // copy left over part into data arr
  147.             if (leftOverOfLastFilePart != null) {
  148.                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
  149.             }
  150.             this.currentLastBytePos = data.length - 1;
  151.             this.leftOver = null;
  152.         }

  153.         /**
  154.          * Constructs the buffer containing any leftover bytes.
  155.          */
  156.         private void createLeftOver() {
  157.             final int lineLengthBytes = currentLastBytePos + 1;
  158.             if (lineLengthBytes > 0) {
  159.                 // create left over for next block
  160.                 leftOver = Arrays.copyOf(data, lineLengthBytes);
  161.             } else {
  162.                 leftOver = null;
  163.             }
  164.             currentLastBytePos = -1;
  165.         }

  166.         /**
  167.          * Finds the new-line sequence and return its length.
  168.          *
  169.          * @param data buffer to scan
  170.          * @param i    start offset in buffer
  171.          * @return length of newline sequence or 0 if none found
  172.          */
  173.         private int getNewLineMatchByteCount(final byte[] data, final int i) {
  174.             for (final byte[] newLineSequence : newLineSequences) {
  175.                 boolean match = true;
  176.                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
  177.                     final int k = i + j - (newLineSequence.length - 1);
  178.                     match &= k >= 0 && data[k] == newLineSequence[j];
  179.                 }
  180.                 if (match) {
  181.                     return newLineSequence.length;
  182.                 }
  183.             }
  184.             return 0;
  185.         }

  186.         /**
  187.          * Reads a line.
  188.          *
  189.          * @return the line or null
  190.          */
  191.         private String readLine() { //NOPMD Bug in PMD

  192.             String line = null;
  193.             int newLineMatchByteCount;

  194.             final boolean isLastFilePart = partNumber == 1;

  195.             int i = currentLastBytePos;
  196.             while (i > -1) {

  197.                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
  198.                     // avoidNewlineSplitBuffer: for all except the last file part we
  199.                     // take a few bytes to the next file part to avoid splitting of newlines
  200.                     createLeftOver();
  201.                     break; // skip last few bytes and leave it to the next file part
  202.                 }

  203.                 // check for newline
  204.                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
  205.                     final int lineStart = i + 1;
  206.                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;

  207.                     if (lineLengthBytes < 0) {
  208.                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
  209.                     }
  210.                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);

  211.                     line = new String(lineData, charset);

  212.                     currentLastBytePos = i - newLineMatchByteCount;
  213.                     break; // found line
  214.                 }

  215.                 // move cursor
  216.                 i -= byteDecrement;

  217.                 // end of file part handling
  218.                 if (i < 0) {
  219.                     createLeftOver();
  220.                     break; // end of file part
  221.                 }
  222.             }

  223.             // last file part handling
  224.             if (isLastFilePart && leftOver != null) {
  225.                 // there will be partNumber line break anymore, this is the first line of the file
  226.                 line = new String(leftOver, charset);
  227.                 leftOver = null;
  228.             }

  229.             return line;
  230.         }

  231.         /**
  232.          * Handles block rollover
  233.          *
  234.          * @return the new FilePart or null
  235.          * @throws IOException if there was a problem reading the file
  236.          */
  237.         private FilePart rollOver() throws IOException {

  238.             if (currentLastBytePos > -1) {
  239.                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
  240.                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
  241.             }

  242.             if (partNumber > 1) {
  243.                 return new FilePart(partNumber - 1, blockSize, leftOver);
  244.             }
  245.             // NO 1 was the last FilePart, we're finished
  246.             if (leftOver != null) {
  247.                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
  248.                         + new String(leftOver, charset));
  249.             }
  250.             return null;
  251.         }
  252.     }

  253.     private static final String EMPTY_STRING = "";

  254.     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();

  255.     /**
  256.      * Constructs a new {@link Builder}.
  257.      *
  258.      * @return a new {@link Builder}.
  259.      * @since 2.12.0
  260.      */
  261.     public static Builder builder() {
  262.         return new Builder();
  263.     }

  264.     private final int blockSize;
  265.     private final Charset charset;
  266.     private final SeekableByteChannel channel;
  267.     private final long totalByteLength;
  268.     private final long totalBlockCount;
  269.     private final byte[][] newLineSequences;
  270.     private final int avoidNewlineSplitBufferSize;
  271.     private final int byteDecrement;
  272.     private FilePart currentFilePart;
  273.     private boolean trailingNewlineOfFileSkipped;

  274.     /**
  275.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}.
  276.      *
  277.      * @param file the file to be read
  278.      * @throws IOException if an I/O error occurs.
  279.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  280.      */
  281.     @Deprecated
  282.     public ReversedLinesFileReader(final File file) throws IOException {
  283.         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
  284.     }

  285.     /**
  286.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
  287.      * specified encoding.
  288.      *
  289.      * @param file    the file to be read
  290.      * @param charset the charset to use, null uses the default Charset.
  291.      * @throws IOException if an I/O error occurs.
  292.      * @since 2.5
  293.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  294.      */
  295.     @Deprecated
  296.     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
  297.         this(file.toPath(), charset);
  298.     }

  299.     /**
  300.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  301.      *
  302.      * @param file      the file to be read
  303.      * @param blockSize size of the internal buffer (for ideal performance this
  304.      *                  should match with the block size of the underlying file
  305.      *                  system).
  306.      * @param charset  the encoding of the file, null uses the default Charset.
  307.      * @throws IOException if an I/O error occurs.
  308.      * @since 2.3
  309.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  310.      */
  311.     @Deprecated
  312.     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
  313.         this(file.toPath(), blockSize, charset);
  314.     }

  315.     /**
  316.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  317.      *
  318.      * @param file      the file to be read
  319.      * @param blockSize size of the internal buffer (for ideal performance this
  320.      *                  should match with the block size of the underlying file
  321.      *                  system).
  322.      * @param charsetName  the encoding of the file, null uses the default Charset.
  323.      * @throws IOException                                  if an I/O error occurs
  324.      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
  325.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  326.      */
  327.     @Deprecated
  328.     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
  329.         this(file.toPath(), blockSize, charsetName);
  330.     }

  331.     /**
  332.      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
  333.      * specified encoding.
  334.      *
  335.      * @param file    the file to be read
  336.      * @param charset the charset to use, null uses the default Charset.
  337.      * @throws IOException if an I/O error occurs.
  338.      * @since 2.7
  339.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  340.      */
  341.     @Deprecated
  342.     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
  343.         this(file, DEFAULT_BLOCK_SIZE, charset);
  344.     }

  345.     /**
  346.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  347.      *
  348.      * @param file      the file to be read
  349.      * @param blockSize size of the internal buffer (for ideal performance this
  350.      *                  should match with the block size of the underlying file
  351.      *                  system).
  352.      * @param charset  the encoding of the file, null uses the default Charset.
  353.      * @throws IOException if an I/O error occurs.
  354.      * @since 2.7
  355.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  356.      */
  357.     @Deprecated
  358.     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
  359.         this.blockSize = blockSize;
  360.         this.charset = Charsets.toCharset(charset);

  361.         // --- check & prepare encoding ---
  362.         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
  363.         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
  364.         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
  365.             // all one byte encodings are partNumber problem
  366.             byteDecrement = 1;
  367.         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
  368.         // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
  369.                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
  370.                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
  371.                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
  372.                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
  373.             byteDecrement = 1;
  374.         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
  375.             // UTF-16 new line sequences are not allowed as second tuple of four byte
  376.             // sequences,
  377.             // however byte order has to be specified
  378.             byteDecrement = 2;
  379.         } else if (this.charset == StandardCharsets.UTF_16) {
  380.             throw new UnsupportedEncodingException(
  381.                     "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
  382.         } else {
  383.             throw new UnsupportedEncodingException(
  384.                     "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
  385.         }

  386.         // NOTE: The new line sequences are matched in the order given, so it is
  387.         // important that \r\n is BEFORE \n
  388.         this.newLineSequences = new byte[][] {
  389.             StandardLineSeparator.CRLF.getBytes(this.charset),
  390.             StandardLineSeparator.LF.getBytes(this.charset),
  391.             StandardLineSeparator.CR.getBytes(this.charset)
  392.         };

  393.         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;

  394.         // Open file
  395.         this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
  396.         this.totalByteLength = channel.size();
  397.         int lastBlockLength = (int) (this.totalByteLength % blockSize);
  398.         if (lastBlockLength > 0) {
  399.             this.totalBlockCount = this.totalByteLength / blockSize + 1;
  400.         } else {
  401.             this.totalBlockCount = this.totalByteLength / blockSize;
  402.             if (this.totalByteLength > 0) {
  403.                 lastBlockLength = blockSize;
  404.             }
  405.         }
  406.         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

  407.     }

  408.     /**
  409.      * Constructs a ReversedLinesFileReader with the given block size and encoding.
  410.      *
  411.      * @param file        the file to be read
  412.      * @param blockSize   size of the internal buffer (for ideal performance this
  413.      *                    should match with the block size of the underlying file
  414.      *                    system).
  415.      * @param charsetName the encoding of the file, null uses the default Charset.
  416.      * @throws IOException                                  if an I/O error occurs
  417.      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
  418.      * @since 2.7
  419.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  420.      */
  421.     @Deprecated
  422.     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
  423.         this(file, blockSize, Charsets.toCharset(charsetName));
  424.     }

  425.     /**
  426.      * Closes underlying resources.
  427.      *
  428.      * @throws IOException if an I/O error occurs.
  429.      */
  430.     @Override
  431.     public void close() throws IOException {
  432.         channel.close();
  433.     }

  434.     @Override
  435.     public IOIterator<String> iterator() {
  436.         return new IOIterator<String>() {

  437.             private String next;

  438.             @Override
  439.             public boolean hasNext() throws IOException {
  440.                 if (next == null) {
  441.                     next = readLine();
  442.                 }
  443.                 return next != null;
  444.             }

  445.             @Override
  446.             public String next() throws IOException {
  447.                 if (next == null) {
  448.                     next = readLine();
  449.                 }
  450.                 final String tmp = next;
  451.                 next = null;
  452.                 return tmp;
  453.             }

  454.             @Override
  455.             public Iterator<String> unwrap() {
  456.                 return null;
  457.             }

  458.         };
  459.     }

  460.     /**
  461.      * Returns the lines of the file from bottom to top.
  462.      *
  463.      * @return the next line or null if the start of the file is reached
  464.      * @throws IOException if an I/O error occurs.
  465.      */
  466.     public String readLine() throws IOException {
  467.         String line = currentFilePart.readLine();
  468.         while (line == null) {
  469.             currentFilePart = currentFilePart.rollOver();
  470.             if (currentFilePart == null) {
  471.                 // partNumber more FileParts: we're done, leave line set to null
  472.                 break;
  473.             }
  474.             line = currentFilePart.readLine();
  475.         }
  476.         // aligned behavior with BufferedReader that doesn't return a last, empty line
  477.         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
  478.             trailingNewlineOfFileSkipped = true;
  479.             line = readLine();
  480.         }
  481.         return line;
  482.     }

  483.     /**
  484.      * Returns {@code lineCount} lines of the file from bottom to top.
  485.      * <p>
  486.      * If there are less than {@code lineCount} lines in the file, then that's what
  487.      * you get.
  488.      * </p>
  489.      * <p>
  490.      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
  491.      * </p>
  492.      *
  493.      * @param lineCount How many lines to read.
  494.      * @return A new list
  495.      * @throws IOException if an I/O error occurs.
  496.      * @since 2.8.0
  497.      */
  498.     public List<String> readLines(final int lineCount) throws IOException {
  499.         if (lineCount < 0) {
  500.             throw new IllegalArgumentException("lineCount < 0");
  501.         }
  502.         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
  503.         for (int i = 0; i < lineCount; i++) {
  504.             final String line = readLine();
  505.             if (line == null) {
  506.                 return arrayList;
  507.             }
  508.             arrayList.add(line);
  509.         }
  510.         return arrayList;
  511.     }

  512.     /**
  513.      * Returns the last {@code lineCount} lines of the file.
  514.      * <p>
  515.      * If there are less than {@code lineCount} lines in the file, then that's what
  516.      * you get.
  517.      * </p>
  518.      *
  519.      * @param lineCount How many lines to read.
  520.      * @return A String.
  521.      * @throws IOException if an I/O error occurs.
  522.      * @since 2.8.0
  523.      */
  524.     public String toString(final int lineCount) throws IOException {
  525.         final List<String> lines = readLines(lineCount);
  526.         Collections.reverse(lines);
  527.         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
  528.     }

  529.     @Override
  530.     public Iterable<String> unwrap() {
  531.         return null;
  532.     }

  533. }