View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.UnsupportedEncodingException;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Path;
29  import java.nio.file.StandardOpenOption;
30  import java.util.ArrayList;
31  import java.util.Arrays;
32  import java.util.Collections;
33  import java.util.Iterator;
34  import java.util.List;
35  
36  import org.apache.commons.io.Charsets;
37  import org.apache.commons.io.FileSystem;
38  import org.apache.commons.io.StandardLineSeparator;
39  import org.apache.commons.io.build.AbstractStreamBuilder;
40  import org.apache.commons.io.function.IOIterable;
41  import org.apache.commons.io.function.IOIterator;
42  
43  /**
44   * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
45   * <p>
46   * To build an instance, use {@link Builder}.
47   * </p>
48   * <p>
49   * For example:
50   * </p>
51   * <pre>
52   * <code>
53   * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
54   *   .setPath(path)
55   *   .setBufferSize(4096)
56   *   .setCharset(StandardCharsets.UTF_8)
57   *   .get()) {
58   *      reader.forEach(line -&gt; System.out.println(line));
59   * }
60   * </code>
61   * </pre>
62   *
63   * @see Builder
64   * @since 2.2
65   */
66  public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
67  
68      // @formatter:off
69      /**
70       * Builds a new {@link ReversedLinesFileReader}.
71       *
72       * <p>
73       * For example:
74       * </p>
75       * <pre>{@code
76       * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
77       *   .setPath(path)
78       *   .setBufferSize(4096)
79       *   .setCharset(StandardCharsets.UTF_8)
80       *   .get());}
81       * </pre>
82       *
83       * @see #get()
84       * @since 2.12.0
85       */
86      // @formatter:on
87      public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
88  
89          /**
90           * Constructs a new builder of {@link ReversedLinesFileReader}.
91           */
92          public Builder() {
93              setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
94              setBufferSize(DEFAULT_BLOCK_SIZE);
95              setOpenOptions(StandardOpenOption.READ);
96          }
97  
98          /**
99           * Builds a new {@link ReversedLinesFileReader}.
100          * <p>
101          * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102          * </p>
103          * <p>
104          * This builder uses the following aspects:
105          * </p>
106          * <ul>
107          * <li>{@link #getPath()} gets the target aspect.</li>
108          * <li>{@link #getBufferSize()}</li>
109          * <li>{@link #getCharset()}</li>
110          * </ul>
111          *
112          * @return a new instance.
113          * @throws IllegalStateException         if the {@code origin} is {@code null}.
114          * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115          * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116          * @see #getPath()
117          * @see #getBufferSize()
118          * @see #getCharset()
119          * @see #getUnchecked()
120          */
121         @Override
122         public ReversedLinesFileReader get() throws IOException {
123             return new ReversedLinesFileReader(this);
124         }
125 
126     }
127 
128     private final class FilePart {
129         private final long partNumber;
130 
131         private final byte[] data;
132 
133         private byte[] leftOver;
134 
135         private int currentLastBytePos;
136 
137         /**
138          * Constructs a new instance.
139          *
140          * @param partNumber             the part number.
141          * @param length                 its length.
142          * @param leftOverOfLastFilePart remainder.
143          * @throws IOException if there is a problem reading the file.
144          */
145         private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146             this.partNumber = partNumber;
147             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148             this.data = new byte[dataLength];
149             final long off = (partNumber - 1) * blockSize;
150 
151             // read data
152             if (partNumber > 0 /* file not empty */) {
153                 channel.position(off);
154                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155                 if (countRead != length) {
156                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157                 }
158             }
159             // copy left over part into data arr
160             if (leftOverOfLastFilePart != null) {
161                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162             }
163             this.currentLastBytePos = data.length - 1;
164             this.leftOver = null;
165         }
166 
167         /**
168          * Constructs the buffer containing any leftover bytes.
169          */
170         private void createLeftOver() {
171             final int lineLengthBytes = currentLastBytePos + 1;
172             if (lineLengthBytes > 0) {
173                 // create left over for next block
174                 leftOver = Arrays.copyOf(data, lineLengthBytes);
175             } else {
176                 leftOver = null;
177             }
178             currentLastBytePos = -1;
179         }
180 
181         /**
182          * Finds the new-line sequence and return its length.
183          *
184          * @param data buffer to scan.
185          * @param i    start offset in buffer.
186          * @return length of newline sequence or 0 if none found.
187          */
188         private int getNewLineMatchByteCount(final byte[] data, final int i) {
189             for (final byte[] newLineSequence : newLineSequences) {
190                 boolean match = true;
191                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192                     final int k = i + j - (newLineSequence.length - 1);
193                     match &= k >= 0 && data[k] == newLineSequence[j];
194                 }
195                 if (match) {
196                     return newLineSequence.length;
197                 }
198             }
199             return 0;
200         }
201 
202         /**
203          * Reads a line.
204          *
205          * @return the line or null.
206          */
207         private String readLine() { //NOPMD Bug in PMD
208 
209             String line = null;
210             int newLineMatchByteCount;
211 
212             final boolean isLastFilePart = partNumber == 1;
213 
214             int i = currentLastBytePos;
215 
216             if (i == -1 && isLastFilePart && leftOver != null) {
217                 line = new String(leftOver, charset);
218                 leftOver = null;
219                 return line;
220             }
221 
222             while (i > -1) {
223 
224                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
225                     // avoidNewlineSplitBuffer: for all except the last file part we
226                     // take a few bytes to the next file part to avoid splitting of newlines
227                     createLeftOver();
228                     break; // skip last few bytes and leave it to the next file part
229                 }
230 
231                 // check for newline
232                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
233                     final int lineStart = i + 1;
234                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
235 
236                     if (lineLengthBytes < 0) {
237                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
238                     }
239                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
240 
241                     line = new String(lineData, charset);
242 
243                     currentLastBytePos = i - newLineMatchByteCount;
244 
245                     if (isLastFilePart && currentLastBytePos == -1 && i == 0) {
246                         leftOver = new byte[0];
247                     }
248                     break; // found line
249                 }
250 
251                 // move cursor
252                 i -= byteDecrement;
253 
254                 // end of file part handling
255                 if (i < 0) {
256                     if (isLastFilePart) {
257                         final int lineLengthBytes = currentLastBytePos + 1;
258                         if (lineLengthBytes > 0) {
259                             final byte[] lineData = Arrays.copyOf(data, lineLengthBytes);
260                             line = new String(lineData, charset);
261                         }
262                         currentLastBytePos = -1;
263                     } else {
264                         createLeftOver();
265                     }
266                     break; // end of file part
267                 }
268             }
269 
270             // there will be partNumber line break anymore, this is the first line of the file
271             if (line == null && isLastFilePart && leftOver != null) {
272                 line = new String(leftOver, charset);
273                 leftOver = null;
274             }
275 
276             return line;
277         }
278 
279         /**
280          * Handles block rollover
281          *
282          * @return the new FilePart or null.
283          * @throws IOException if there was a problem reading the file.
284          */
285         private FilePart rollOver() throws IOException {
286 
287             if (currentLastBytePos > -1) {
288                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
289                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
290             }
291 
292             if (partNumber > 1) {
293                 return new FilePart(partNumber - 1, blockSize, leftOver);
294             }
295             // NO 1 was the last FilePart, we're finished
296             if (leftOver != null) {
297                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
298                         + new String(leftOver, charset));
299             }
300             return null;
301         }
302     }
303 
304     private static final String EMPTY_STRING = "";
305 
306     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
307 
308     /**
309      * Constructs a new {@link Builder}.
310      *
311      * @return a new {@link Builder}.
312      * @since 2.12.0
313      */
314     public static Builder builder() {
315         return new Builder();
316     }
317 
318     private final int blockSize;
319     private final Charset charset;
320     private final SeekableByteChannel channel;
321     private final long totalByteLength;
322     private final long totalBlockCount;
323     private final byte[][] newLineSequences;
324     private final int avoidNewlineSplitBufferSize;
325     private final int byteDecrement;
326     private FilePart currentFilePart;
327     private boolean trailingNewlineOfFileSkipped;
328 
329     private ReversedLinesFileReader(final Builder builder) throws IOException {
330         this.blockSize = builder.getBufferSize();
331         this.charset = Charsets.toCharset(builder.getCharset());
332         // check & prepare encoding
333         final CharsetEncoder charsetEncoder = charset.newEncoder();
334         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
335         if (maxBytesPerChar == 1f || charset == StandardCharsets.UTF_8) {
336             // all one byte encodings are partNumber problem
337             byteDecrement = 1;
338         } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
339                 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
340                 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
341                 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
342                 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
343                 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
344             byteDecrement = 1;
345         } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
346             // UTF-16 new line sequences are not allowed as second tuple of four byte
347             // sequences,
348             // however byte order has to be specified
349             byteDecrement = 2;
350         } else if (charset == StandardCharsets.UTF_16) {
351             throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
352         } else {
353             throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
354         }
355         // NOTE: The new line sequences are matched in the order given, so it is
356         // important that \r\n is BEFORE \n
357         this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(charset), StandardLineSeparator.LF.getBytes(charset),
358                 StandardLineSeparator.CR.getBytes(charset) };
359         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
360         // Open file
361         this.channel = builder.getChannel(SeekableByteChannel.class);
362         this.totalByteLength = channel.size();
363         int lastBlockLength = (int) (totalByteLength % blockSize);
364         if (lastBlockLength > 0) {
365             this.totalBlockCount = totalByteLength / blockSize + 1;
366         } else {
367             this.totalBlockCount = totalByteLength / blockSize;
368             if (totalByteLength > 0) {
369                 lastBlockLength = blockSize;
370             }
371         }
372         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
373     }
374 
375     /**
376      * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@linkplain Charset#defaultCharset() default charset}.
377      *
378      * @param file the file to be read.
379      * @throws IOException if an I/O error occurs.
380      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
381      */
382     @Deprecated
383     public ReversedLinesFileReader(final File file) throws IOException {
384         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
385     }
386 
387     /**
388      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
389      * specified encoding.
390      *
391      * @param file    the file to be read.
392      * @param charset the charset to use, null uses the default Charset.
393      * @throws IOException if an I/O error occurs.
394      * @since 2.5
395      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
396      */
397     @Deprecated
398     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
399         this(file.toPath(), charset);
400     }
401 
402     /**
403      * Constructs a ReversedLinesFileReader with the given block size and encoding.
404      *
405      * @param file      the file to be read.
406      * @param blockSize size of the internal buffer (for ideal performance this
407      *                  should match with the block size of the underlying file
408      *                  system).
409      * @param charset  the encoding of the file, null uses the default Charset.
410      * @throws IOException if an I/O error occurs.
411      * @since 2.3
412      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
413      */
414     @Deprecated
415     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
416         this(file.toPath(), blockSize, charset);
417     }
418 
419     /**
420      * Constructs a ReversedLinesFileReader with the given block size and encoding.
421      *
422      * @param file      the file to be read.
423      * @param blockSize size of the internal buffer (for ideal performance this
424      *                  should match with the block size of the underlying file
425      *                  system).
426      * @param charsetName  the encoding of the file, null uses the default Charset.
427      * @throws IOException                                  if an I/O error occurs.
428      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
429      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
430      */
431     @Deprecated
432     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
433         this(file.toPath(), blockSize, charsetName);
434     }
435 
436     /**
437      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
438      * specified encoding.
439      *
440      * @param file    the file to be read.
441      * @param charset the charset to use, null uses the default Charset.
442      * @throws IOException if an I/O error occurs.
443      * @since 2.7
444      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
445      */
446     @Deprecated
447     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
448         this(file, DEFAULT_BLOCK_SIZE, charset);
449     }
450 
451     /**
452      * Constructs a ReversedLinesFileReader with the given block size and encoding.
453      *
454      * @param file      the file to be read.
455      * @param blockSize size of the internal buffer (for ideal performance this
456      *                  should match with the block size of the underlying file
457      *                  system).
458      * @param charset  the encoding of the file, null uses the default Charset.
459      * @throws IOException if an I/O error occurs.
460      * @since 2.7
461      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
462      */
463     @Deprecated
464     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
465         this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
466     }
467 
468     /**
469      * Constructs a ReversedLinesFileReader with the given block size and encoding.
470      *
471      * @param file        the file to be read.
472      * @param blockSize   size of the internal buffer (for ideal performance this
473      *                    should match with the block size of the underlying file
474      *                    system).
475      * @param charsetName the encoding of the file, null uses the default Charset.
476      * @throws IOException                                  if an I/O error occurs.
477      * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
478      * @since 2.7
479      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
480      */
481     @Deprecated
482     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
483         this(file, blockSize, Charsets.toCharset(charsetName));
484     }
485 
486     /**
487      * Closes underlying resources.
488      *
489      * @throws IOException if an I/O error occurs.
490      */
491     @Override
492     public void close() throws IOException {
493         channel.close();
494     }
495 
496     @Override
497     public IOIterator<String> iterator() {
498         return new IOIterator<String>() {
499 
500             private String next;
501 
502             @Override
503             public boolean hasNext() throws IOException {
504                 if (next == null) {
505                     next = readLine();
506                 }
507                 return next != null;
508             }
509 
510             @Override
511             public String next() throws IOException {
512                 if (next == null) {
513                     next = readLine();
514                 }
515                 final String tmp = next;
516                 next = null;
517                 return tmp;
518             }
519 
520             @Override
521             public Iterator<String> unwrap() {
522                 return null;
523             }
524 
525         };
526     }
527 
528     /**
529      * Returns the lines of the file from bottom to top.
530      *
531      * @return the next line or null if the start of the file is reached.
532      * @throws IOException if an I/O error occurs.
533      */
534     public String readLine() throws IOException {
535         String line = currentFilePart.readLine();
536         while (line == null) {
537             currentFilePart = currentFilePart.rollOver();
538             if (currentFilePart == null) {
539                 // partNumber more FileParts: we're done, leave line set to null
540                 break;
541             }
542             line = currentFilePart.readLine();
543         }
544         // aligned behavior with BufferedReader that doesn't return a last, empty line
545         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
546             trailingNewlineOfFileSkipped = true;
547             line = readLine();
548         }
549         return line;
550     }
551 
552     /**
553      * Returns {@code lineCount} lines of the file from bottom to top.
554      * <p>
555      * If there are less than {@code lineCount} lines in the file, then that's what
556      * you get.
557      * </p>
558      * <p>
559      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
560      * </p>
561      *
562      * @param lineCount How many lines to read.
563      * @return A new list.
564      * @throws IOException if an I/O error occurs.
565      * @since 2.8.0
566      */
567     public List<String> readLines(final int lineCount) throws IOException {
568         if (lineCount < 0) {
569             throw new IllegalArgumentException("lineCount < 0");
570         }
571         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
572         for (int i = 0; i < lineCount; i++) {
573             final String line = readLine();
574             if (line == null) {
575                 return arrayList;
576             }
577             arrayList.add(line);
578         }
579         return arrayList;
580     }
581 
582     /**
583      * Returns the last {@code lineCount} lines of the file.
584      * <p>
585      * If there are less than {@code lineCount} lines in the file, then that's what
586      * you get.
587      * </p>
588      *
589      * @param lineCount How many lines to read.
590      * @return A String.
591      * @throws IOException if an I/O error occurs.
592      * @since 2.8.0
593      */
594     public String toString(final int lineCount) throws IOException {
595         final List<String> lines = readLines(lineCount);
596         Collections.reverse(lines);
597         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
598     }
599 
600     @Override
601     public Iterable<String> unwrap() {
602         return null;
603     }
604 
605 }