View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.UnsupportedEncodingException;
23  import java.nio.ByteBuffer;
24  import java.nio.channels.SeekableByteChannel;
25  import java.nio.charset.Charset;
26  import java.nio.charset.CharsetEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.nio.file.StandardOpenOption;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.Collections;
34  import java.util.List;
35  
36  import org.apache.commons.io.Charsets;
37  import org.apache.commons.io.FileSystem;
38  import org.apache.commons.io.StandardLineSeparator;
39  import org.apache.commons.io.build.AbstractOrigin;
40  import org.apache.commons.io.build.AbstractStreamBuilder;
41  
42  /**
43   * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
44   * <p>
45   * To build an instance, see {@link Builder}.
46   * </p>
47   *
48   * @since 2.2
49   */
50  public class ReversedLinesFileReader implements Closeable {
51  
52      /**
53       * Builds a new {@link ReversedLinesFileReader} instance.
54       * <p>
55       * For example:
56       * </p>
57       * <pre>{@code
58       * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
59       *   .setPath(path)
60       *   .setBufferSize(4096)
61       *   .setCharset(StandardCharsets.UTF_8)
62       *   .get();}
63       * </pre>
64       *
65       * @since 2.12.0
66       */
67      public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
68  
69          /**
70           * Constructs a new Builder.
71           */
72          public Builder() {
73              setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
74              setBufferSize(DEFAULT_BLOCK_SIZE);
75          }
76  
77          /**
78           * Constructs a new instance.
79           * <p>
80           * This builder use the aspects Path, Charset, buffer size.
81           * </p>
82           * <p>
83           * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an
84           * {@link UnsupportedOperationException}.
85           * </p>
86           *
87           * @return a new instance.
88           * @throws UnsupportedOperationException if the origin cannot provide a Path.
89           * @see AbstractOrigin#getPath()
90           */
91          @Override
92          public ReversedLinesFileReader get() throws IOException {
93              return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
94          }
95  
96      }
97  
98      private final class FilePart {
99          private final long no;
100 
101         private final byte[] data;
102 
103         private byte[] leftOver;
104 
105         private int currentLastBytePos;
106 
107         /**
108          * Constructs a new instance.
109          *
110          * @param no                     the part number
111          * @param length                 its length
112          * @param leftOverOfLastFilePart remainder
113          * @throws IOException if there is a problem reading the file
114          */
115         private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
116             this.no = no;
117             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
118             this.data = new byte[dataLength];
119             final long off = (no - 1) * blockSize;
120 
121             // read data
122             if (no > 0 /* file not empty */) {
123                 channel.position(off);
124                 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
125                 if (countRead != length) {
126                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
127                 }
128             }
129             // copy left over part into data arr
130             if (leftOverOfLastFilePart != null) {
131                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
132             }
133             this.currentLastBytePos = data.length - 1;
134             this.leftOver = null;
135         }
136 
137         /**
138          * Constructs the buffer containing any leftover bytes.
139          */
140         private void createLeftOver() {
141             final int lineLengthBytes = currentLastBytePos + 1;
142             if (lineLengthBytes > 0) {
143                 // create left over for next block
144                 leftOver = Arrays.copyOf(data, lineLengthBytes);
145             } else {
146                 leftOver = null;
147             }
148             currentLastBytePos = -1;
149         }
150 
151         /**
152          * Finds the new-line sequence and return its length.
153          *
154          * @param data buffer to scan
155          * @param i    start offset in buffer
156          * @return length of newline sequence or 0 if none found
157          */
158         private int getNewLineMatchByteCount(final byte[] data, final int i) {
159             for (final byte[] newLineSequence : newLineSequences) {
160                 boolean match = true;
161                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
162                     final int k = i + j - (newLineSequence.length - 1);
163                     match &= k >= 0 && data[k] == newLineSequence[j];
164                 }
165                 if (match) {
166                     return newLineSequence.length;
167                 }
168             }
169             return 0;
170         }
171 
172         /**
173          * Reads a line.
174          *
175          * @return the line or null
176          */
177         private String readLine() { //NOPMD Bug in PMD
178 
179             String line = null;
180             int newLineMatchByteCount;
181 
182             final boolean isLastFilePart = no == 1;
183 
184             int i = currentLastBytePos;
185             while (i > -1) {
186 
187                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
188                     // avoidNewlineSplitBuffer: for all except the last file part we
189                     // take a few bytes to the next file part to avoid splitting of newlines
190                     createLeftOver();
191                     break; // skip last few bytes and leave it to the next file part
192                 }
193 
194                 // --- check for newline ---
195                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
196                     final int lineStart = i + 1;
197                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
198 
199                     if (lineLengthBytes < 0) {
200                         throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
201                     }
202                     final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
203 
204                     line = new String(lineData, charset);
205 
206                     currentLastBytePos = i - newLineMatchByteCount;
207                     break; // found line
208                 }
209 
210                 // --- move cursor ---
211                 i -= byteDecrement;
212 
213                 // --- end of file part handling ---
214                 if (i < 0) {
215                     createLeftOver();
216                     break; // end of file part
217                 }
218             }
219 
220             // --- last file part handling ---
221             if (isLastFilePart && leftOver != null) {
222                 // there will be no line break anymore, this is the first line of the file
223                 line = new String(leftOver, charset);
224                 leftOver = null;
225             }
226 
227             return line;
228         }
229 
230         /**
231          * Handles block rollover
232          *
233          * @return the new FilePart or null
234          * @throws IOException if there was a problem reading the file
235          */
236         private FilePart rollOver() throws IOException {
237 
238             if (currentLastBytePos > -1) {
239                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
240                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
241             }
242 
243             if (no > 1) {
244                 return new FilePart(no - 1, blockSize, leftOver);
245             }
246             // NO 1 was the last FilePart, we're finished
247             if (leftOver != null) {
248                 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
249                         + new String(leftOver, charset));
250             }
251             return null;
252         }
253     }
254 
255     private static final String EMPTY_STRING = "";
256 
257     private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
258 
259     /**
260      * Constructs a new {@link Builder}.
261      *
262      * @return a new {@link Builder}.
263      * @since 2.12.0
264      */
265     public static Builder builder() {
266         return new Builder();
267     }
268 
269     private final int blockSize;
270     private final Charset charset;
271     private final SeekableByteChannel channel;
272     private final long totalByteLength;
273     private final long totalBlockCount;
274     private final byte[][] newLineSequences;
275     private final int avoidNewlineSplitBufferSize;
276     private final int byteDecrement;
277     private FilePart currentFilePart;
278     private boolean trailingNewlineOfFileSkipped;
279 
280     /**
281      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
282      * platform's default encoding.
283      *
284      * @param file the file to be read
285      * @throws IOException if an I/O error occurs.
286      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
287      */
288     @Deprecated
289     public ReversedLinesFileReader(final File file) throws IOException {
290         this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
291     }
292 
293     /**
294      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
295      * specified encoding.
296      *
297      * @param file    the file to be read
298      * @param charset the charset to use, null uses the default Charset.
299      * @throws IOException if an I/O error occurs.
300      * @since 2.5
301      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
302      */
303     @Deprecated
304     public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
305         this(file.toPath(), charset);
306     }
307 
308     /**
309      * Constructs a ReversedLinesFileReader with the given block size and encoding.
310      *
311      * @param file      the file to be read
312      * @param blockSize size of the internal buffer (for ideal performance this
313      *                  should match with the block size of the underlying file
314      *                  system).
315      * @param charset  the encoding of the file, null uses the default Charset.
316      * @throws IOException if an I/O error occurs.
317      * @since 2.3
318      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
319      */
320     @Deprecated
321     public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
322         this(file.toPath(), blockSize, charset);
323     }
324 
325     /**
326      * Constructs a ReversedLinesFileReader with the given block size and encoding.
327      *
328      * @param file      the file to be read
329      * @param blockSize size of the internal buffer (for ideal performance this
330      *                  should match with the block size of the underlying file
331      *                  system).
332      * @param charsetName  the encoding of the file, null uses the default Charset.
333      * @throws IOException                                  if an I/O error occurs
334      * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
335      *                                                      {@link UnsupportedEncodingException}
336      *                                                      in version 2.2 if the
337      *                                                      encoding is not
338      *                                                      supported.
339      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
340      */
341     @Deprecated
342     public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
343         this(file.toPath(), blockSize, charsetName);
344     }
345 
346     /**
347      * Constructs a ReversedLinesFileReader with default block size of 4KB and the
348      * specified encoding.
349      *
350      * @param file    the file to be read
351      * @param charset the charset to use, null uses the default Charset.
352      * @throws IOException if an I/O error occurs.
353      * @since 2.7
354      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
355      */
356     @Deprecated
357     public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
358         this(file, DEFAULT_BLOCK_SIZE, charset);
359     }
360 
361     /**
362      * Constructs a ReversedLinesFileReader with the given block size and encoding.
363      *
364      * @param file      the file to be read
365      * @param blockSize size of the internal buffer (for ideal performance this
366      *                  should match with the block size of the underlying file
367      *                  system).
368      * @param charset  the encoding of the file, null uses the default Charset.
369      * @throws IOException if an I/O error occurs.
370      * @since 2.7
371      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
372      */
373     @Deprecated
374     public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
375         this.blockSize = blockSize;
376         this.charset = Charsets.toCharset(charset);
377 
378         // --- check & prepare encoding ---
379         final CharsetEncoder charsetEncoder = this.charset.newEncoder();
380         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
381         if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
382             // all one byte encodings are no problem
383             byteDecrement = 1;
384         } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
385         // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
386                 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
387                 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
388                 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
389                 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
390             byteDecrement = 1;
391         } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
392             // UTF-16 new line sequences are not allowed as second tuple of four byte
393             // sequences,
394             // however byte order has to be specified
395             byteDecrement = 2;
396         } else if (this.charset == StandardCharsets.UTF_16) {
397             throw new UnsupportedEncodingException(
398                     "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
399         } else {
400             throw new UnsupportedEncodingException(
401                     "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
402         }
403 
404         // NOTE: The new line sequences are matched in the order given, so it is
405         // important that \r\n is BEFORE \n
406         this.newLineSequences = new byte[][] {
407             StandardLineSeparator.CRLF.getBytes(this.charset),
408             StandardLineSeparator.LF.getBytes(this.charset),
409             StandardLineSeparator.CR.getBytes(this.charset)
410         };
411 
412         this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
413 
414         // Open file
415         this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
416         this.totalByteLength = channel.size();
417         int lastBlockLength = (int) (this.totalByteLength % blockSize);
418         if (lastBlockLength > 0) {
419             this.totalBlockCount = this.totalByteLength / blockSize + 1;
420         } else {
421             this.totalBlockCount = this.totalByteLength / blockSize;
422             if (this.totalByteLength > 0) {
423                 lastBlockLength = blockSize;
424             }
425         }
426         this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
427 
428     }
429 
430     /**
431      * Constructs a ReversedLinesFileReader with the given block size and encoding.
432      *
433      * @param file        the file to be read
434      * @param blockSize   size of the internal buffer (for ideal performance this
435      *                    should match with the block size of the underlying file
436      *                    system).
437      * @param charsetName the encoding of the file, null uses the default Charset.
438      * @throws IOException                                  if an I/O error occurs
439      * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
440      *                                                      {@link UnsupportedEncodingException}
441      *                                                      in version 2.2 if the
442      *                                                      encoding is not
443      *                                                      supported.
444      * @since 2.7
445      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
446      */
447     @Deprecated
448     public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
449         this(file, blockSize, Charsets.toCharset(charsetName));
450     }
451 
452     /**
453      * Closes underlying resources.
454      *
455      * @throws IOException if an I/O error occurs.
456      */
457     @Override
458     public void close() throws IOException {
459         channel.close();
460     }
461 
462     /**
463      * Returns the lines of the file from bottom to top.
464      *
465      * @return the next line or null if the start of the file is reached
466      * @throws IOException if an I/O error occurs.
467      */
468     public String readLine() throws IOException {
469 
470         String line = currentFilePart.readLine();
471         while (line == null) {
472             currentFilePart = currentFilePart.rollOver();
473             if (currentFilePart == null) {
474                 // no more FileParts: we're done, leave line set to null
475                 break;
476             }
477             line = currentFilePart.readLine();
478         }
479 
480         // aligned behavior with BufferedReader that doesn't return a last, empty line
481         if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
482             trailingNewlineOfFileSkipped = true;
483             line = readLine();
484         }
485 
486         return line;
487     }
488 
489     /**
490      * Returns {@code lineCount} lines of the file from bottom to top.
491      * <p>
492      * If there are less than {@code lineCount} lines in the file, then that's what
493      * you get.
494      * </p>
495      * <p>
496      * Note: You can easily flip the result with {@link Collections#reverse(List)}.
497      * </p>
498      *
499      * @param lineCount How many lines to read.
500      * @return A new list
501      * @throws IOException if an I/O error occurs.
502      * @since 2.8.0
503      */
504     public List<String> readLines(final int lineCount) throws IOException {
505         if (lineCount < 0) {
506             throw new IllegalArgumentException("lineCount < 0");
507         }
508         final ArrayList<String> arrayList = new ArrayList<>(lineCount);
509         for (int i = 0; i < lineCount; i++) {
510             final String line = readLine();
511             if (line == null) {
512                 return arrayList;
513             }
514             arrayList.add(line);
515         }
516         return arrayList;
517     }
518 
519     /**
520      * Returns the last {@code lineCount} lines of the file.
521      * <p>
522      * If there are less than {@code lineCount} lines in the file, then that's what
523      * you get.
524      * </p>
525      *
526      * @param lineCount How many lines to read.
527      * @return A String.
528      * @throws IOException if an I/O error occurs.
529      * @since 2.8.0
530      */
531     public String toString(final int lineCount) throws IOException {
532         final List<String> lines = readLines(lineCount);
533         Collections.reverse(lines);
534         return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
535     }
536 
537 }