View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.UnsupportedEncodingException;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.nio.file.StandardOpenOption;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.Collections;
34  import java.util.List;
35  
36  import org.apache.commons.io.Charsets;
37  import org.apache.commons.io.FileSystem;
38  import org.apache.commons.io.StandardLineSeparator;
39  import org.apache.commons.io.build.AbstractStreamBuilder;
40  
41  /**
42   * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
43   * <p>
44   * To build an instance, use {@link Builder}.
45   * </p>
46   *
47   * @see Builder
48   * @since 2.2
49   */
50  public class ReversedLinesFileReader implements Closeable {
51  
52      // @formatter:off
53      /**
54       * Builds a new {@link ReversedLinesFileReader}.
55       *
56       * <p>
57       * For example:
58       * </p>
59       * <pre>{@code
60       * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
61       *   .setPath(path)
62       *   .setBufferSize(4096)
63       *   .setCharset(StandardCharsets.UTF_8)
64       *   .get();}
65       * </pre>
66       *
67       * @see #get()
68       * @since 2.12.0
69       */
70      // @formatter:on
71      public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
72  
73          /**
74           * Constructs a new {@link Builder}.
75           */
76          public Builder() {
77              setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
78              setBufferSize(DEFAULT_BLOCK_SIZE);
79          }
80  
81          /**
82           * Builds a new {@link ReversedLinesFileReader}.
83           * <p>
84           * You must set input that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
85           * </p>
86           * <p>
87           * This builder use the following aspects:
88           * </p>
89           * <ul>
90           * <li>{@link #getInputStream()}</li>
91           * <li>{@link #getBufferSize()}</li>
92           * <li>{@link #getCharset()}</li>
93           * </ul>
94           *
95           * @return a new instance.
96           * @throws IllegalStateException         if the {@code origin} is {@code null}.
97           * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
98           * @throws IOException                   if an I/O error occurs.
99           * @see #getPath()
100          * @see #getBufferSize()
101          * @see #getCharset()
102          */
103         @Override
104         public ReversedLinesFileReader get() throws IOException {
105             return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
106         }
107 
108     }
109 
110     private final class FilePart {
111         private final long no;
112 
113         private final byte[] data;
114 
115         private byte[] leftOver;
116 
117         private int currentLastBytePos;
118 
119         /**
120          * Constructs a new instance.
121          *
122          * @param no                     the part number
123          * @param length                 its length
124          * @param leftOverOfLastFilePart remainder
125          * @throws IOException if there is a problem reading the file
126          */
127         private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
128             this.no = no;
129             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
130             this.data = new byte[dataLength];
131             final long off = (no - 1) * blockSize;
132 
133             // read data
134             if (no > 0 /* file not empty */) {
135                 channel.position(off);
136                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
137                 if (countRead != length) {
138                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
139                 }
140             }
141             // copy left over part into data arr
142             if (leftOverOfLastFilePart != null) {
143                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
144             }
145             this.currentLastBytePos = data.length - 1;
146             this.leftOver = null;
147         }
148 
149         /**
150          * Constructs the buffer containing any leftover bytes.
151          */
152         private void createLeftOver() {
153             final int lineLengthBytes = currentLastBytePos + 1;
154             if (lineLengthBytes > 0) {
155                 // create left over for next block
156                 leftOver = Arrays.copyOf(data, lineLengthBytes);
157             } else {
158                 leftOver = null;
159             }
160             currentLastBytePos = -1;
161         }
162 
163         /**
164          * Finds the new-line sequence and return its length.
165          *
166          * @param data buffer to scan
167          * @param i    start offset in buffer
168          * @return length of newline sequence or 0 if none found
169          */
170         private int getNewLineMatchByteCount(final byte[] data, final int i) {
171             for (final byte[] newLineSequence : newLineSequences) {
172                 boolean match = true;
173                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
174                     final int k = i + j - (newLineSequence.length - 1);
175                     match &= k >= 0 && data[k] == newLineSequence[j];
176                 }
177                 if (match) {
178                     return newLineSequence.length;
179                 }
180             }
181             return 0;
182         }
183 
184         /**
185          * Reads a line.
186          *
187          * @return the line or null
188          */
189         private String readLine() { //NOPMD Bug in PMD
190 
191             String line = null;
192             int newLineMatchByteCount;
193 
194             final boolean isLastFilePart = no == 1;
195 
196             int i = currentLastBytePos;
197             while (i > -1) {
198 
199                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
200                     // avoidNewlineSplitBuffer: for all except the last file part we
201                     // take a few bytes to the next file part to avoid splitting of newlines
202                     createLeftOver();
203                     break; // skip last few bytes and leave it to the next file part
204                 }
205 
206                 // --- check for newline ---
207                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
208                     final int lineStart = i + 1;
209                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
210 
211                     if (lineLengthBytes < 0) {
212                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
213                     }
214                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
215 
216                     line = new String(lineData, charset);
217 
218                     currentLastBytePos = i - newLineMatchByteCount;
219                     break; // found line
220                 }
221 
222                 // --- move cursor ---
223                 i -= byteDecrement;
224 
225                 // --- end of file part handling ---
226                 if (i < 0) {
227                     createLeftOver();
228                     break; // end of file part
229                 }
230             }
231 
232             // --- last file part handling ---
233             if (isLastFilePart && leftOver != null) {
234                 // there will be no line break anymore, this is the first line of the file
235                 line = new String(leftOver, charset);
236                 leftOver = null;
237             }
238 
239             return line;
240         }
241 
242         /**
243          * Handles block rollover
244          *
245          * @return the new FilePart or null
246          * @throws IOException if there was a problem reading the file
247          */
248         private FilePart rollOver() throws IOException {
249 
250             if (currentLastBytePos > -1) {
251                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
252                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
253             }
254 
255             if (no > 1) {
256                 return new FilePart(no - 1, blockSize, leftOver);
257             }
258             // NO 1 was the last FilePart, we're finished
259             if (leftOver != null) {
260                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
261                         + new String(leftOver, charset));
262             }
263             return null;
264         }
265     }
266 
267     private static final String EMPTY_STRING = "";
268 
269     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
270 
271     /**
272      * Constructs a new {@link Builder}.
273      *
274      * @return a new {@link Builder}.
275      * @since 2.12.0
276      */
277     public static Builder builder() {
278         return new Builder();
279     }
280 
281     private final int blockSize;
282     private final Charset charset;
283     private final SeekableByteChannel channel;
284     private final long totalByteLength;
285     private final long totalBlockCount;
286     private final byte[][] newLineSequences;
287     private final int avoidNewlineSplitBufferSize;
288     private final int byteDecrement;
289     private FilePart currentFilePart;
290     private boolean trailingNewlineOfFileSkipped;
291 
292     /**
293      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
294      * platform's default encoding.
295      *
296      * @param file the file to be read
297      * @throws IOException if an I/O error occurs.
298      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
299      */
300     @Deprecated
301     public ReversedLinesFileReader(final File file) throws IOException {
302         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
303     }
304 
305     /**
306      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
307      * specified encoding.
308      *
309      * @param file    the file to be read
310      * @param charset the charset to use, null uses the default Charset.
311      * @throws IOException if an I/O error occurs.
312      * @since 2.5
313      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
314      */
315     @Deprecated
316     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
317         this(file.toPath(), charset);
318     }
319 
320     /**
321      * Constructs a ReversedLinesFileReader with the given block size and encoding.
322      *
323      * @param file      the file to be read
324      * @param blockSize size of the internal buffer (for ideal performance this
325      *                  should match with the block size of the underlying file
326      *                  system).
327      * @param charset  the encoding of the file, null uses the default Charset.
328      * @throws IOException if an I/O error occurs.
329      * @since 2.3
330      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
331      */
332     @Deprecated
333     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
334         this(file.toPath(), blockSize, charset);
335     }
336 
337     /**
338      * Constructs a ReversedLinesFileReader with the given block size and encoding.
339      *
340      * @param file      the file to be read
341      * @param blockSize size of the internal buffer (for ideal performance this
342      *                  should match with the block size of the underlying file
343      *                  system).
344      * @param charsetName  the encoding of the file, null uses the default Charset.
345      * @throws IOException                                  if an I/O error occurs
346      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
347      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
348      */
349     @Deprecated
350     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
351         this(file.toPath(), blockSize, charsetName);
352     }
353 
354     /**
355      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
356      * specified encoding.
357      *
358      * @param file    the file to be read
359      * @param charset the charset to use, null uses the default Charset.
360      * @throws IOException if an I/O error occurs.
361      * @since 2.7
362      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
363      */
364     @Deprecated
365     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
366         this(file, DEFAULT_BLOCK_SIZE, charset);
367     }
368 
369     /**
370      * Constructs a ReversedLinesFileReader with the given block size and encoding.
371      *
372      * @param file      the file to be read
373      * @param blockSize size of the internal buffer (for ideal performance this
374      *                  should match with the block size of the underlying file
375      *                  system).
376      * @param charset  the encoding of the file, null uses the default Charset.
377      * @throws IOException if an I/O error occurs.
378      * @since 2.7
379      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
380      */
381     @Deprecated
382     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
383         this.blockSize = blockSize;
384         this.charset = Charsets.toCharset(charset);
385 
386         // --- check & prepare encoding ---
387         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
388         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
389         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
390             // all one byte encodings are no problem
391             byteDecrement = 1;
392         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
393         // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
394                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
395                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
396                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
397                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
398             byteDecrement = 1;
399         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
400             // UTF-16 new line sequences are not allowed as second tuple of four byte
401             // sequences,
402             // however byte order has to be specified
403             byteDecrement = 2;
404         } else if (this.charset == StandardCharsets.UTF_16) {
405             throw new UnsupportedEncodingException(
406                     "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
407         } else {
408             throw new UnsupportedEncodingException(
409                     "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
410         }
411 
412         // NOTE: The new line sequences are matched in the order given, so it is
413         // important that \r\n is BEFORE \n
414         this.newLineSequences = new byte[][] {
415             StandardLineSeparator.CRLF.getBytes(this.charset),
416             StandardLineSeparator.LF.getBytes(this.charset),
417             StandardLineSeparator.CR.getBytes(this.charset)
418         };
419 
420         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
421 
422         // Open file
423         this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
424         this.totalByteLength = channel.size();
425         int lastBlockLength = (int) (this.totalByteLength % blockSize);
426         if (lastBlockLength > 0) {
427             this.totalBlockCount = this.totalByteLength / blockSize + 1;
428         } else {
429             this.totalBlockCount = this.totalByteLength / blockSize;
430             if (this.totalByteLength > 0) {
431                 lastBlockLength = blockSize;
432             }
433         }
434         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
435 
436     }
437 
438     /**
439      * Constructs a ReversedLinesFileReader with the given block size and encoding.
440      *
441      * @param file        the file to be read
442      * @param blockSize   size of the internal buffer (for ideal performance this
443      *                    should match with the block size of the underlying file
444      *                    system).
445      * @param charsetName the encoding of the file, null uses the default Charset.
446      * @throws IOException                                  if an I/O error occurs
447      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
448      * @since 2.7
449      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
450      */
451     @Deprecated
452     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
453         this(file, blockSize, Charsets.toCharset(charsetName));
454     }
455 
456     /**
457      * Closes underlying resources.
458      *
459      * @throws IOException if an I/O error occurs.
460      */
461     @Override
462     public void close() throws IOException {
463         channel.close();
464     }
465 
466     /**
467      * Returns the lines of the file from bottom to top.
468      *
469      * @return the next line or null if the start of the file is reached
470      * @throws IOException if an I/O error occurs.
471      */
472     public String readLine() throws IOException {
473 
474         String line = currentFilePart.readLine();
475         while (line == null) {
476             currentFilePart = currentFilePart.rollOver();
477             if (currentFilePart == null) {
478                 // no more FileParts: we're done, leave line set to null
479                 break;
480             }
481             line = currentFilePart.readLine();
482         }
483 
484         // aligned behavior with BufferedReader that doesn't return a last, empty line
485         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
486             trailingNewlineOfFileSkipped = true;
487             line = readLine();
488         }
489 
490         return line;
491     }
492 
493     /**
494      * Returns {@code lineCount} lines of the file from bottom to top.
495      * <p>
496      * If there are less than {@code lineCount} lines in the file, then that's what
497      * you get.
498      * </p>
499      * <p>
500      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
501      * </p>
502      *
503      * @param lineCount How many lines to read.
504      * @return A new list
505      * @throws IOException if an I/O error occurs.
506      * @since 2.8.0
507      */
508     public List<String> readLines(final int lineCount) throws IOException {
509         if (lineCount < 0) {
510             throw new IllegalArgumentException("lineCount < 0");
511         }
512         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
513         for (int i = 0; i < lineCount; i++) {
514             final String line = readLine();
515             if (line == null) {
516                 return arrayList;
517             }
518             arrayList.add(line);
519         }
520         return arrayList;
521     }
522 
523     /**
524      * Returns the last {@code lineCount} lines of the file.
525      * <p>
526      * If there are less than {@code lineCount} lines in the file, then that's what
527      * you get.
528      * </p>
529      *
530      * @param lineCount How many lines to read.
531      * @return A String.
532      * @throws IOException if an I/O error occurs.
533      * @since 2.8.0
534      */
535     public String toString(final int lineCount) throws IOException {
536         final List<String> lines = readLines(lineCount);
537         Collections.reverse(lines);
538         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
539     }
540 
541 }