View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.UnsupportedEncodingException;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.nio.file.StandardOpenOption;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.Collections;
34  import java.util.Iterator;
35  import java.util.List;
36  
37  import org.apache.commons.io.Charsets;
38  import org.apache.commons.io.FileSystem;
39  import org.apache.commons.io.StandardLineSeparator;
40  import org.apache.commons.io.build.AbstractStreamBuilder;
41  import org.apache.commons.io.function.IOIterable;
42  import org.apache.commons.io.function.IOIterator;
43  
44  /**
45   * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
46   * <p>
47   * To build an instance, use {@link Builder}.
48   * </p>
49   * <p>
50   * For example:
51   * </p>
52   * <pre>
53   * <code>
54   * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
55   *   .setPath(path)
56   *   .setBufferSize(4096)
57   *   .setCharset(StandardCharsets.UTF_8)
58   *   .get()) {
59   *      reader.forEach(line -&gt; System.out.println(line));
60   * }
61   * </code>
62   * </pre>
63   *
64   * @see Builder
65   * @since 2.2
66   */
67  public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
68  
69      // @formatter:off
70      /**
71       * Builds a new {@link ReversedLinesFileReader}.
72       *
73       * <p>
74       * For example:
75       * </p>
76       * <pre>{@code
77       * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
78       *   .setPath(path)
79       *   .setBufferSize(4096)
80       *   .setCharset(StandardCharsets.UTF_8)
81       *   .get());}
82       * </pre>
83       *
84       * @see #get()
85       * @since 2.12.0
86       */
87      // @formatter:on
88      public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
89  
90          /**
91           * Constructs a new builder of {@link ReversedLinesFileReader}.
92           */
93          public Builder() {
94              setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
95              setBufferSize(DEFAULT_BLOCK_SIZE);
96          }
97  
98          /**
99           * Builds a new {@link ReversedLinesFileReader}.
100          * <p>
101          * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102          * </p>
103          * <p>
104          * This builder uses the following aspects:
105          * </p>
106          * <ul>
107          * <li>{@link #getPath()} gets the target aspect.</li>
108          * <li>{@link #getBufferSize()}</li>
109          * <li>{@link #getCharset()}</li>
110          * </ul>
111          *
112          * @return a new instance.
113          * @throws IllegalStateException         if the {@code origin} is {@code null}.
114          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115          * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116          * @see #getPath()
117          * @see #getBufferSize()
118          * @see #getCharset()
119          * @see #getUnchecked()
120          */
121         @Override
122         public ReversedLinesFileReader get() throws IOException {
123             return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
124         }
125 
126     }
127 
128     private final class FilePart {
129         private final long partNumber;
130 
131         private final byte[] data;
132 
133         private byte[] leftOver;
134 
135         private int currentLastBytePos;
136 
137         /**
138          * Constructs a new instance.
139          *
140          * @param partNumber             the part number
141          * @param length                 its length
142          * @param leftOverOfLastFilePart remainder
143          * @throws IOException if there is a problem reading the file
144          */
145         private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146             this.partNumber = partNumber;
147             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148             this.data = new byte[dataLength];
149             final long off = (partNumber - 1) * blockSize;
150 
151             // read data
152             if (partNumber > 0 /* file not empty */) {
153                 channel.position(off);
154                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155                 if (countRead != length) {
156                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157                 }
158             }
159             // copy left over part into data arr
160             if (leftOverOfLastFilePart != null) {
161                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162             }
163             this.currentLastBytePos = data.length - 1;
164             this.leftOver = null;
165         }
166 
167         /**
168          * Constructs the buffer containing any leftover bytes.
169          */
170         private void createLeftOver() {
171             final int lineLengthBytes = currentLastBytePos + 1;
172             if (lineLengthBytes > 0) {
173                 // create left over for next block
174                 leftOver = Arrays.copyOf(data, lineLengthBytes);
175             } else {
176                 leftOver = null;
177             }
178             currentLastBytePos = -1;
179         }
180 
181         /**
182          * Finds the new-line sequence and return its length.
183          *
184          * @param data buffer to scan
185          * @param i    start offset in buffer
186          * @return length of newline sequence or 0 if none found
187          */
188         private int getNewLineMatchByteCount(final byte[] data, final int i) {
189             for (final byte[] newLineSequence : newLineSequences) {
190                 boolean match = true;
191                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192                     final int k = i + j - (newLineSequence.length - 1);
193                     match &= k >= 0 && data[k] == newLineSequence[j];
194                 }
195                 if (match) {
196                     return newLineSequence.length;
197                 }
198             }
199             return 0;
200         }
201 
202         /**
203          * Reads a line.
204          *
205          * @return the line or null
206          */
207         private String readLine() { //NOPMD Bug in PMD
208 
209             String line = null;
210             int newLineMatchByteCount;
211 
212             final boolean isLastFilePart = partNumber == 1;
213 
214             int i = currentLastBytePos;
215             while (i > -1) {
216 
217                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218                     // avoidNewlineSplitBuffer: for all except the last file part we
219                     // take a few bytes to the next file part to avoid splitting of newlines
220                     createLeftOver();
221                     break; // skip last few bytes and leave it to the next file part
222                 }
223 
224                 // check for newline
225                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
226                     final int lineStart = i + 1;
227                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228 
229                     if (lineLengthBytes < 0) {
230                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231                     }
232                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233 
234                     line = new String(lineData, charset);
235 
236                     currentLastBytePos = i - newLineMatchByteCount;
237                     break; // found line
238                 }
239 
240                 // move cursor
241                 i -= byteDecrement;
242 
243                 // end of file part handling
244                 if (i < 0) {
245                     createLeftOver();
246                     break; // end of file part
247                 }
248             }
249 
250             // last file part handling
251             if (isLastFilePart && leftOver != null) {
252                 // there will be partNumber line break anymore, this is the first line of the file
253                 line = new String(leftOver, charset);
254                 leftOver = null;
255             }
256 
257             return line;
258         }
259 
260         /**
261          * Handles block rollover
262          *
263          * @return the new FilePart or null
264          * @throws IOException if there was a problem reading the file
265          */
266         private FilePart rollOver() throws IOException {
267 
268             if (currentLastBytePos > -1) {
269                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271             }
272 
273             if (partNumber > 1) {
274                 return new FilePart(partNumber - 1, blockSize, leftOver);
275             }
276             // NO 1 was the last FilePart, we're finished
277             if (leftOver != null) {
278                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279                         + new String(leftOver, charset));
280             }
281             return null;
282         }
283     }
284 
285     private static final String EMPTY_STRING = "";
286 
287     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288 
289     /**
290      * Constructs a new {@link Builder}.
291      *
292      * @return a new {@link Builder}.
293      * @since 2.12.0
294      */
295     public static Builder builder() {
296         return new Builder();
297     }
298 
299     private final int blockSize;
300     private final Charset charset;
301     private final SeekableByteChannel channel;
302     private final long totalByteLength;
303     private final long totalBlockCount;
304     private final byte[][] newLineSequences;
305     private final int avoidNewlineSplitBufferSize;
306     private final int byteDecrement;
307     private FilePart currentFilePart;
308     private boolean trailingNewlineOfFileSkipped;
309 
310     /**
311      * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}.
312      *
313      * @param file the file to be read
314      * @throws IOException if an I/O error occurs.
315      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
316      */
317     @Deprecated
318     public ReversedLinesFileReader(final File file) throws IOException {
319         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
320     }
321 
322     /**
323      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
324      * specified encoding.
325      *
326      * @param file    the file to be read
327      * @param charset the charset to use, null uses the default Charset.
328      * @throws IOException if an I/O error occurs.
329      * @since 2.5
330      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
331      */
332     @Deprecated
333     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
334         this(file.toPath(), charset);
335     }
336 
337     /**
338      * Constructs a ReversedLinesFileReader with the given block size and encoding.
339      *
340      * @param file      the file to be read
341      * @param blockSize size of the internal buffer (for ideal performance this
342      *                  should match with the block size of the underlying file
343      *                  system).
344      * @param charset  the encoding of the file, null uses the default Charset.
345      * @throws IOException if an I/O error occurs.
346      * @since 2.3
347      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
348      */
349     @Deprecated
350     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
351         this(file.toPath(), blockSize, charset);
352     }
353 
354     /**
355      * Constructs a ReversedLinesFileReader with the given block size and encoding.
356      *
357      * @param file      the file to be read
358      * @param blockSize size of the internal buffer (for ideal performance this
359      *                  should match with the block size of the underlying file
360      *                  system).
361      * @param charsetName  the encoding of the file, null uses the default Charset.
362      * @throws IOException                                  if an I/O error occurs
363      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
364      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
365      */
366     @Deprecated
367     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
368         this(file.toPath(), blockSize, charsetName);
369     }
370 
371     /**
372      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
373      * specified encoding.
374      *
375      * @param file    the file to be read
376      * @param charset the charset to use, null uses the default Charset.
377      * @throws IOException if an I/O error occurs.
378      * @since 2.7
379      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
380      */
381     @Deprecated
382     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
383         this(file, DEFAULT_BLOCK_SIZE, charset);
384     }
385 
386     /**
387      * Constructs a ReversedLinesFileReader with the given block size and encoding.
388      *
389      * @param file      the file to be read
390      * @param blockSize size of the internal buffer (for ideal performance this
391      *                  should match with the block size of the underlying file
392      *                  system).
393      * @param charset  the encoding of the file, null uses the default Charset.
394      * @throws IOException if an I/O error occurs.
395      * @since 2.7
396      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
397      */
398     @Deprecated
399     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
400         this.blockSize = blockSize;
401         this.charset = Charsets.toCharset(charset);
402 
403         // --- check & prepare encoding ---
404         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
405         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
406         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
407             // all one byte encodings are partNumber problem
408             byteDecrement = 1;
409         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
410         // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
411                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
412                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
413                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
414                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
415             byteDecrement = 1;
416         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
417             // UTF-16 new line sequences are not allowed as second tuple of four byte
418             // sequences,
419             // however byte order has to be specified
420             byteDecrement = 2;
421         } else if (this.charset == StandardCharsets.UTF_16) {
422             throw new UnsupportedEncodingException(
423                     "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
424         } else {
425             throw new UnsupportedEncodingException(
426                     "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
427         }
428 
429         // NOTE: The new line sequences are matched in the order given, so it is
430         // important that \r\n is BEFORE \n
431         this.newLineSequences = new byte[][] {
432             StandardLineSeparator.CRLF.getBytes(this.charset),
433             StandardLineSeparator.LF.getBytes(this.charset),
434             StandardLineSeparator.CR.getBytes(this.charset)
435         };
436 
437         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
438 
439         // Open file
440         this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
441         this.totalByteLength = channel.size();
442         int lastBlockLength = (int) (this.totalByteLength % blockSize);
443         if (lastBlockLength > 0) {
444             this.totalBlockCount = this.totalByteLength / blockSize + 1;
445         } else {
446             this.totalBlockCount = this.totalByteLength / blockSize;
447             if (this.totalByteLength > 0) {
448                 lastBlockLength = blockSize;
449             }
450         }
451         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
452 
453     }
454 
455     /**
456      * Constructs a ReversedLinesFileReader with the given block size and encoding.
457      *
458      * @param file        the file to be read
459      * @param blockSize   size of the internal buffer (for ideal performance this
460      *                    should match with the block size of the underlying file
461      *                    system).
462      * @param charsetName the encoding of the file, null uses the default Charset.
463      * @throws IOException                                  if an I/O error occurs
464      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
465      * @since 2.7
466      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
467      */
468     @Deprecated
469     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
470         this(file, blockSize, Charsets.toCharset(charsetName));
471     }
472 
473     /**
474      * Closes underlying resources.
475      *
476      * @throws IOException if an I/O error occurs.
477      */
478     @Override
479     public void close() throws IOException {
480         channel.close();
481     }
482 
483     @Override
484     public IOIterator<String> iterator() {
485         return new IOIterator<String>() {
486 
487             private String next;
488 
489             @Override
490             public boolean hasNext() throws IOException {
491                 if (next == null) {
492                     next = readLine();
493                 }
494                 return next != null;
495             }
496 
497             @Override
498             public String next() throws IOException {
499                 if (next == null) {
500                     next = readLine();
501                 }
502                 final String tmp = next;
503                 next = null;
504                 return tmp;
505             }
506 
507             @Override
508             public Iterator<String> unwrap() {
509                 return null;
510             }
511 
512         };
513     }
514 
515     /**
516      * Returns the lines of the file from bottom to top.
517      *
518      * @return the next line or null if the start of the file is reached
519      * @throws IOException if an I/O error occurs.
520      */
521     public String readLine() throws IOException {
522         String line = currentFilePart.readLine();
523         while (line == null) {
524             currentFilePart = currentFilePart.rollOver();
525             if (currentFilePart == null) {
526                 // partNumber more FileParts: we're done, leave line set to null
527                 break;
528             }
529             line = currentFilePart.readLine();
530         }
531         // aligned behavior with BufferedReader that doesn't return a last, empty line
532         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
533             trailingNewlineOfFileSkipped = true;
534             line = readLine();
535         }
536         return line;
537     }
538 
539     /**
540      * Returns {@code lineCount} lines of the file from bottom to top.
541      * <p>
542      * If there are less than {@code lineCount} lines in the file, then that's what
543      * you get.
544      * </p>
545      * <p>
546      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
547      * </p>
548      *
549      * @param lineCount How many lines to read.
550      * @return A new list
551      * @throws IOException if an I/O error occurs.
552      * @since 2.8.0
553      */
554     public List<String> readLines(final int lineCount) throws IOException {
555         if (lineCount < 0) {
556             throw new IllegalArgumentException("lineCount < 0");
557         }
558         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
559         for (int i = 0; i < lineCount; i++) {
560             final String line = readLine();
561             if (line == null) {
562                 return arrayList;
563             }
564             arrayList.add(line);
565         }
566         return arrayList;
567     }
568 
569     /**
570      * Returns the last {@code lineCount} lines of the file.
571      * <p>
572      * If there are less than {@code lineCount} lines in the file, then that's what
573      * you get.
574      * </p>
575      *
576      * @param lineCount How many lines to read.
577      * @return A String.
578      * @throws IOException if an I/O error occurs.
579      * @since 2.8.0
580      */
581     public String toString(final int lineCount) throws IOException {
582         final List<String> lines = readLines(lineCount);
583         Collections.reverse(lines);
584         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
585     }
586 
587     @Override
588     public Iterable<String> unwrap() {
589         return null;
590     }
591 
592 }