View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.UnsupportedEncodingException;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.nio.file.StandardOpenOption;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.Collections;
34  import java.util.Iterator;
35  import java.util.List;
36  
37  import org.apache.commons.io.Charsets;
38  import org.apache.commons.io.FileSystem;
39  import org.apache.commons.io.StandardLineSeparator;
40  import org.apache.commons.io.build.AbstractStreamBuilder;
41  import org.apache.commons.io.function.IOIterable;
42  import org.apache.commons.io.function.IOIterator;
43  
44  /**
45   * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
46   * <p>
47   * To build an instance, use {@link Builder}.
48   * </p>
49   * <p>
50   * For example:
51   * </p>
52   * <pre>
53   * <code>
54   * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
55   *   .setPath(path)
56   *   .setBufferSize(4096)
57   *   .setCharset(StandardCharsets.UTF_8)
58   *   .get()) {
59   *      reader.forEach(line -&gt; System.out.println(line));
60   * }
61   * </code>
62   * </pre>
63   *
64   * @see Builder
65   * @since 2.2
66   */
67  public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
68  
69      // @formatter:off
70      /**
71       * Builds a new {@link ReversedLinesFileReader}.
72       *
73       * <p>
74       * For example:
75       * </p>
76       * <pre>{@code
77       * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
78       *   .setPath(path)
79       *   .setBufferSize(4096)
80       *   .setCharset(StandardCharsets.UTF_8)
81       *   .get());}
82       * </pre>
83       *
84       * @see #get()
85       * @since 2.12.0
86       */
87      // @formatter:on
88      public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
89  
90          /**
91           * Constructs a new builder of {@link ReversedLinesFileReader}.
92           */
93          public Builder() {
94              setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
95              setBufferSize(DEFAULT_BLOCK_SIZE);
96          }
97  
98          /**
99           * Builds a new {@link ReversedLinesFileReader}.
100          * <p>
101          * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102          * </p>
103          * <p>
104          * This builder uses the following aspects:
105          * </p>
106          * <ul>
107          * <li>{@link #getPath()} gets the target aspect.</li>
108          * <li>{@link #getBufferSize()}</li>
109          * <li>{@link #getCharset()}</li>
110          * </ul>
111          *
112          * @return a new instance.
113          * @throws IllegalStateException         if the {@code origin} is {@code null}.
114          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115          * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116          * @see #getPath()
117          * @see #getBufferSize()
118          * @see #getCharset()
119          * @see #getUnchecked()
120          */
121         @Override
122         public ReversedLinesFileReader get() throws IOException {
123             return new ReversedLinesFileReader(this);
124         }
125 
126     }
127 
128     private final class FilePart {
129         private final long partNumber;
130 
131         private final byte[] data;
132 
133         private byte[] leftOver;
134 
135         private int currentLastBytePos;
136 
137         /**
138          * Constructs a new instance.
139          *
140          * @param partNumber             the part number
141          * @param length                 its length
142          * @param leftOverOfLastFilePart remainder
143          * @throws IOException if there is a problem reading the file
144          */
145         private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146             this.partNumber = partNumber;
147             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148             this.data = new byte[dataLength];
149             final long off = (partNumber - 1) * blockSize;
150 
151             // read data
152             if (partNumber > 0 /* file not empty */) {
153                 channel.position(off);
154                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155                 if (countRead != length) {
156                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157                 }
158             }
159             // copy left over part into data arr
160             if (leftOverOfLastFilePart != null) {
161                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162             }
163             this.currentLastBytePos = data.length - 1;
164             this.leftOver = null;
165         }
166 
167         /**
168          * Constructs the buffer containing any leftover bytes.
169          */
170         private void createLeftOver() {
171             final int lineLengthBytes = currentLastBytePos + 1;
172             if (lineLengthBytes > 0) {
173                 // create left over for next block
174                 leftOver = Arrays.copyOf(data, lineLengthBytes);
175             } else {
176                 leftOver = null;
177             }
178             currentLastBytePos = -1;
179         }
180 
181         /**
182          * Finds the new-line sequence and return its length.
183          *
184          * @param data buffer to scan
185          * @param i    start offset in buffer
186          * @return length of newline sequence or 0 if none found
187          */
188         private int getNewLineMatchByteCount(final byte[] data, final int i) {
189             for (final byte[] newLineSequence : newLineSequences) {
190                 boolean match = true;
191                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192                     final int k = i + j - (newLineSequence.length - 1);
193                     match &= k >= 0 && data[k] == newLineSequence[j];
194                 }
195                 if (match) {
196                     return newLineSequence.length;
197                 }
198             }
199             return 0;
200         }
201 
202         /**
203          * Reads a line.
204          *
205          * @return the line or null
206          */
207         private String readLine() { //NOPMD Bug in PMD
208 
209             String line = null;
210             int newLineMatchByteCount;
211 
212             final boolean isLastFilePart = partNumber == 1;
213 
214             int i = currentLastBytePos;
215             while (i > -1) {
216 
217                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218                     // avoidNewlineSplitBuffer: for all except the last file part we
219                     // take a few bytes to the next file part to avoid splitting of newlines
220                     createLeftOver();
221                     break; // skip last few bytes and leave it to the next file part
222                 }
223 
224                 // check for newline
225                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
226                     final int lineStart = i + 1;
227                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228 
229                     if (lineLengthBytes < 0) {
230                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231                     }
232                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233 
234                     line = new String(lineData, charset);
235 
236                     currentLastBytePos = i - newLineMatchByteCount;
237                     break; // found line
238                 }
239 
240                 // move cursor
241                 i -= byteDecrement;
242 
243                 // end of file part handling
244                 if (i < 0) {
245                     createLeftOver();
246                     break; // end of file part
247                 }
248             }
249 
250             // last file part handling
251             if (isLastFilePart && leftOver != null) {
252                 // there will be partNumber line break anymore, this is the first line of the file
253                 line = new String(leftOver, charset);
254                 leftOver = null;
255             }
256 
257             return line;
258         }
259 
260         /**
261          * Handles block rollover
262          *
263          * @return the new FilePart or null
264          * @throws IOException if there was a problem reading the file
265          */
266         private FilePart rollOver() throws IOException {
267 
268             if (currentLastBytePos > -1) {
269                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271             }
272 
273             if (partNumber > 1) {
274                 return new FilePart(partNumber - 1, blockSize, leftOver);
275             }
276             // NO 1 was the last FilePart, we're finished
277             if (leftOver != null) {
278                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279                         + new String(leftOver, charset));
280             }
281             return null;
282         }
283     }
284 
285     private static final String EMPTY_STRING = "";
286 
287     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288 
289     /**
290      * Constructs a new {@link Builder}.
291      *
292      * @return a new {@link Builder}.
293      * @since 2.12.0
294      */
295     public static Builder builder() {
296         return new Builder();
297     }
298 
299     private final int blockSize;
300     private final Charset charset;
301     private final SeekableByteChannel channel;
302     private final long totalByteLength;
303     private final long totalBlockCount;
304     private final byte[][] newLineSequences;
305     private final int avoidNewlineSplitBufferSize;
306     private final int byteDecrement;
307     private FilePart currentFilePart;
308     private boolean trailingNewlineOfFileSkipped;
309 
310     private ReversedLinesFileReader(final Builder builder) throws IOException {
311         this.blockSize = builder.getBufferSize();
312         this.charset = Charsets.toCharset(builder.getCharset());
313         // check & prepare encoding
314         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
315         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
316         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
317             // all one byte encodings are partNumber problem
318             byteDecrement = 1;
319         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
320                 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
321                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
322                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
323                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
324                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
325             byteDecrement = 1;
326         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
327             // UTF-16 new line sequences are not allowed as second tuple of four byte
328             // sequences,
329             // however byte order has to be specified
330             byteDecrement = 2;
331         } else if (this.charset == StandardCharsets.UTF_16) {
332             throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
333         } else {
334             throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
335         }
336         // NOTE: The new line sequences are matched in the order given, so it is
337         // important that \r\n is BEFORE \n
338         this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(this.charset), StandardLineSeparator.LF.getBytes(this.charset),
339                 StandardLineSeparator.CR.getBytes(this.charset) };
340         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
341         // Open file
342         this.channel = Files.newByteChannel(builder.getPath(), StandardOpenOption.READ);
343         this.totalByteLength = channel.size();
344         int lastBlockLength = (int) (this.totalByteLength % blockSize);
345         if (lastBlockLength > 0) {
346             this.totalBlockCount = this.totalByteLength / blockSize + 1;
347         } else {
348             this.totalBlockCount = this.totalByteLength / blockSize;
349             if (this.totalByteLength > 0) {
350                 lastBlockLength = blockSize;
351             }
352         }
353         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
354     }
355 
356     /**
357      * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}.
358      *
359      * @param file the file to be read
360      * @throws IOException if an I/O error occurs.
361      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
362      */
363     @Deprecated
364     public ReversedLinesFileReader(final File file) throws IOException {
365         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
366     }
367 
368     /**
369      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
370      * specified encoding.
371      *
372      * @param file    the file to be read
373      * @param charset the charset to use, null uses the default Charset.
374      * @throws IOException if an I/O error occurs.
375      * @since 2.5
376      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
377      */
378     @Deprecated
379     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
380         this(file.toPath(), charset);
381     }
382 
383     /**
384      * Constructs a ReversedLinesFileReader with the given block size and encoding.
385      *
386      * @param file      the file to be read
387      * @param blockSize size of the internal buffer (for ideal performance this
388      *                  should match with the block size of the underlying file
389      *                  system).
390      * @param charset  the encoding of the file, null uses the default Charset.
391      * @throws IOException if an I/O error occurs.
392      * @since 2.3
393      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
394      */
395     @Deprecated
396     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
397         this(file.toPath(), blockSize, charset);
398     }
399 
400     /**
401      * Constructs a ReversedLinesFileReader with the given block size and encoding.
402      *
403      * @param file      the file to be read
404      * @param blockSize size of the internal buffer (for ideal performance this
405      *                  should match with the block size of the underlying file
406      *                  system).
407      * @param charsetName  the encoding of the file, null uses the default Charset.
408      * @throws IOException                                  if an I/O error occurs
409      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
410      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
411      */
412     @Deprecated
413     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
414         this(file.toPath(), blockSize, charsetName);
415     }
416 
417     /**
418      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
419      * specified encoding.
420      *
421      * @param file    the file to be read
422      * @param charset the charset to use, null uses the default Charset.
423      * @throws IOException if an I/O error occurs.
424      * @since 2.7
425      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
426      */
427     @Deprecated
428     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
429         this(file, DEFAULT_BLOCK_SIZE, charset);
430     }
431 
432     /**
433      * Constructs a ReversedLinesFileReader with the given block size and encoding.
434      *
435      * @param file      the file to be read
436      * @param blockSize size of the internal buffer (for ideal performance this
437      *                  should match with the block size of the underlying file
438      *                  system).
439      * @param charset  the encoding of the file, null uses the default Charset.
440      * @throws IOException if an I/O error occurs.
441      * @since 2.7
442      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
443      */
444     @Deprecated
445     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
446         this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
447     }
448 
449     /**
450      * Constructs a ReversedLinesFileReader with the given block size and encoding.
451      *
452      * @param file        the file to be read
453      * @param blockSize   size of the internal buffer (for ideal performance this
454      *                    should match with the block size of the underlying file
455      *                    system).
456      * @param charsetName the encoding of the file, null uses the default Charset.
457      * @throws IOException                                  if an I/O error occurs
458      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
459      * @since 2.7
460      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
461      */
462     @Deprecated
463     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
464         this(file, blockSize, Charsets.toCharset(charsetName));
465     }
466 
467     /**
468      * Closes underlying resources.
469      *
470      * @throws IOException if an I/O error occurs.
471      */
472     @Override
473     public void close() throws IOException {
474         channel.close();
475     }
476 
477     @Override
478     public IOIterator<String> iterator() {
479         return new IOIterator<String>() {
480 
481             private String next;
482 
483             @Override
484             public boolean hasNext() throws IOException {
485                 if (next == null) {
486                     next = readLine();
487                 }
488                 return next != null;
489             }
490 
491             @Override
492             public String next() throws IOException {
493                 if (next == null) {
494                     next = readLine();
495                 }
496                 final String tmp = next;
497                 next = null;
498                 return tmp;
499             }
500 
501             @Override
502             public Iterator<String> unwrap() {
503                 return null;
504             }
505 
506         };
507     }
508 
509     /**
510      * Returns the lines of the file from bottom to top.
511      *
512      * @return the next line or null if the start of the file is reached
513      * @throws IOException if an I/O error occurs.
514      */
515     public String readLine() throws IOException {
516         String line = currentFilePart.readLine();
517         while (line == null) {
518             currentFilePart = currentFilePart.rollOver();
519             if (currentFilePart == null) {
520                 // partNumber more FileParts: we're done, leave line set to null
521                 break;
522             }
523             line = currentFilePart.readLine();
524         }
525         // aligned behavior with BufferedReader that doesn't return a last, empty line
526         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
527             trailingNewlineOfFileSkipped = true;
528             line = readLine();
529         }
530         return line;
531     }
532 
533     /**
534      * Returns {@code lineCount} lines of the file from bottom to top.
535      * <p>
536      * If there are less than {@code lineCount} lines in the file, then that's what
537      * you get.
538      * </p>
539      * <p>
540      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
541      * </p>
542      *
543      * @param lineCount How many lines to read.
544      * @return A new list
545      * @throws IOException if an I/O error occurs.
546      * @since 2.8.0
547      */
548     public List<String> readLines(final int lineCount) throws IOException {
549         if (lineCount < 0) {
550             throw new IllegalArgumentException("lineCount < 0");
551         }
552         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
553         for (int i = 0; i < lineCount; i++) {
554             final String line = readLine();
555             if (line == null) {
556                 return arrayList;
557             }
558             arrayList.add(line);
559         }
560         return arrayList;
561     }
562 
563     /**
564      * Returns the last {@code lineCount} lines of the file.
565      * <p>
566      * If there are less than {@code lineCount} lines in the file, then that's what
567      * you get.
568      * </p>
569      *
570      * @param lineCount How many lines to read.
571      * @return A String.
572      * @throws IOException if an I/O error occurs.
573      * @since 2.8.0
574      */
575     public String toString(final int lineCount) throws IOException {
576         final List<String> lines = readLines(lineCount);
577         Collections.reverse(lines);
578         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
579     }
580 
581     @Override
582     public Iterable<String> unwrap() {
583         return null;
584     }
585 
586 }