001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.Iterator;
035import java.util.List;
036
037import org.apache.commons.io.Charsets;
038import org.apache.commons.io.FileSystem;
039import org.apache.commons.io.StandardLineSeparator;
040import org.apache.commons.io.build.AbstractStreamBuilder;
041import org.apache.commons.io.function.IOIterable;
042import org.apache.commons.io.function.IOIterator;
043
044/**
045 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
046 * <p>
047 * To build an instance, use {@link Builder}.
048 * </p>
049 * <p>
050 * For example:
051 * </p>
052 * <pre>
053 * <code>
054 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
055 *   .setPath(path)
056 *   .setBufferSize(4096)
057 *   .setCharset(StandardCharsets.UTF_8)
058 *   .get()) {
059 *      reader.forEach(line -&gt; System.out.println(line));
060 * }
061 * </code>
062 * </pre>
063 *
064 * @see Builder
065 * @since 2.2
066 */
067public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
068
069    // @formatter:off
070    /**
071     * Builds a new {@link ReversedLinesFileReader}.
072     *
073     * <p>
074     * For example:
075     * </p>
076     * <pre>{@code
077     * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
078     *   .setPath(path)
079     *   .setBufferSize(4096)
080     *   .setCharset(StandardCharsets.UTF_8)
081     *   .get());}
082     * </pre>
083     *
084     * @see #get()
085     * @since 2.12.0
086     */
087    // @formatter:on
088    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
089
090        /**
091         * Constructs a new builder of {@link ReversedLinesFileReader}.
092         */
093        public Builder() {
094            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
095            setBufferSize(DEFAULT_BLOCK_SIZE);
096        }
097
098        /**
099         * Builds a new {@link ReversedLinesFileReader}.
100         * <p>
101         * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102         * </p>
103         * <p>
104         * This builder uses the following aspects:
105         * </p>
106         * <ul>
107         * <li>{@link #getPath()} gets the target aspect.</li>
108         * <li>{@link #getBufferSize()}</li>
109         * <li>{@link #getCharset()}</li>
110         * </ul>
111         *
112         * @return a new instance.
113         * @throws IllegalStateException         if the {@code origin} is {@code null}.
114         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115         * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116         * @see #getPath()
117         * @see #getBufferSize()
118         * @see #getCharset()
119         * @see #getUnchecked()
120         */
121        @Override
122        public ReversedLinesFileReader get() throws IOException {
123            return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
124        }
125
126    }
127
128    private final class FilePart {
129        private final long partNumber;
130
131        private final byte[] data;
132
133        private byte[] leftOver;
134
135        private int currentLastBytePos;
136
137        /**
138         * Constructs a new instance.
139         *
140         * @param partNumber             the part number
141         * @param length                 its length
142         * @param leftOverOfLastFilePart remainder
143         * @throws IOException if there is a problem reading the file
144         */
145        private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146            this.partNumber = partNumber;
147            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148            this.data = new byte[dataLength];
149            final long off = (partNumber - 1) * blockSize;
150
151            // read data
152            if (partNumber > 0 /* file not empty */) {
153                channel.position(off);
154                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155                if (countRead != length) {
156                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157                }
158            }
159            // copy left over part into data arr
160            if (leftOverOfLastFilePart != null) {
161                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162            }
163            this.currentLastBytePos = data.length - 1;
164            this.leftOver = null;
165        }
166
167        /**
168         * Constructs the buffer containing any leftover bytes.
169         */
170        private void createLeftOver() {
171            final int lineLengthBytes = currentLastBytePos + 1;
172            if (lineLengthBytes > 0) {
173                // create left over for next block
174                leftOver = Arrays.copyOf(data, lineLengthBytes);
175            } else {
176                leftOver = null;
177            }
178            currentLastBytePos = -1;
179        }
180
181        /**
182         * Finds the new-line sequence and return its length.
183         *
184         * @param data buffer to scan
185         * @param i    start offset in buffer
186         * @return length of newline sequence or 0 if none found
187         */
188        private int getNewLineMatchByteCount(final byte[] data, final int i) {
189            for (final byte[] newLineSequence : newLineSequences) {
190                boolean match = true;
191                for (int j = newLineSequence.length - 1; j >= 0; j--) {
192                    final int k = i + j - (newLineSequence.length - 1);
193                    match &= k >= 0 && data[k] == newLineSequence[j];
194                }
195                if (match) {
196                    return newLineSequence.length;
197                }
198            }
199            return 0;
200        }
201
202        /**
203         * Reads a line.
204         *
205         * @return the line or null
206         */
207        private String readLine() { //NOPMD Bug in PMD
208
209            String line = null;
210            int newLineMatchByteCount;
211
212            final boolean isLastFilePart = partNumber == 1;
213
214            int i = currentLastBytePos;
215            while (i > -1) {
216
217                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218                    // avoidNewlineSplitBuffer: for all except the last file part we
219                    // take a few bytes to the next file part to avoid splitting of newlines
220                    createLeftOver();
221                    break; // skip last few bytes and leave it to the next file part
222                }
223
224                // check for newline
225                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
226                    final int lineStart = i + 1;
227                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228
229                    if (lineLengthBytes < 0) {
230                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231                    }
232                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233
234                    line = new String(lineData, charset);
235
236                    currentLastBytePos = i - newLineMatchByteCount;
237                    break; // found line
238                }
239
240                // move cursor
241                i -= byteDecrement;
242
243                // end of file part handling
244                if (i < 0) {
245                    createLeftOver();
246                    break; // end of file part
247                }
248            }
249
250            // last file part handling
251            if (isLastFilePart && leftOver != null) {
252                // there will be partNumber line break anymore, this is the first line of the file
253                line = new String(leftOver, charset);
254                leftOver = null;
255            }
256
257            return line;
258        }
259
260        /**
261         * Handles block rollover
262         *
263         * @return the new FilePart or null
264         * @throws IOException if there was a problem reading the file
265         */
266        private FilePart rollOver() throws IOException {
267
268            if (currentLastBytePos > -1) {
269                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271            }
272
273            if (partNumber > 1) {
274                return new FilePart(partNumber - 1, blockSize, leftOver);
275            }
276            // NO 1 was the last FilePart, we're finished
277            if (leftOver != null) {
278                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279                        + new String(leftOver, charset));
280            }
281            return null;
282        }
283    }
284
285    private static final String EMPTY_STRING = "";
286
287    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288
289    /**
290     * Constructs a new {@link Builder}.
291     *
292     * @return a new {@link Builder}.
293     * @since 2.12.0
294     */
295    public static Builder builder() {
296        return new Builder();
297    }
298
299    private final int blockSize;
300    private final Charset charset;
301    private final SeekableByteChannel channel;
302    private final long totalByteLength;
303    private final long totalBlockCount;
304    private final byte[][] newLineSequences;
305    private final int avoidNewlineSplitBufferSize;
306    private final int byteDecrement;
307    private FilePart currentFilePart;
308    private boolean trailingNewlineOfFileSkipped;
309
310    /**
311     * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}.
312     *
313     * @param file the file to be read
314     * @throws IOException if an I/O error occurs.
315     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
316     */
317    @Deprecated
318    public ReversedLinesFileReader(final File file) throws IOException {
319        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
320    }
321
322    /**
323     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
324     * specified encoding.
325     *
326     * @param file    the file to be read
327     * @param charset the charset to use, null uses the default Charset.
328     * @throws IOException if an I/O error occurs.
329     * @since 2.5
330     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
331     */
332    @Deprecated
333    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
334        this(file.toPath(), charset);
335    }
336
337    /**
338     * Constructs a ReversedLinesFileReader with the given block size and encoding.
339     *
340     * @param file      the file to be read
341     * @param blockSize size of the internal buffer (for ideal performance this
342     *                  should match with the block size of the underlying file
343     *                  system).
344     * @param charset  the encoding of the file, null uses the default Charset.
345     * @throws IOException if an I/O error occurs.
346     * @since 2.3
347     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
348     */
349    @Deprecated
350    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
351        this(file.toPath(), blockSize, charset);
352    }
353
354    /**
355     * Constructs a ReversedLinesFileReader with the given block size and encoding.
356     *
357     * @param file      the file to be read
358     * @param blockSize size of the internal buffer (for ideal performance this
359     *                  should match with the block size of the underlying file
360     *                  system).
361     * @param charsetName  the encoding of the file, null uses the default Charset.
362     * @throws IOException                                  if an I/O error occurs
363     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
364     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
365     */
366    @Deprecated
367    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
368        this(file.toPath(), blockSize, charsetName);
369    }
370
371    /**
372     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
373     * specified encoding.
374     *
375     * @param file    the file to be read
376     * @param charset the charset to use, null uses the default Charset.
377     * @throws IOException if an I/O error occurs.
378     * @since 2.7
379     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
380     */
381    @Deprecated
382    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
383        this(file, DEFAULT_BLOCK_SIZE, charset);
384    }
385
386    /**
387     * Constructs a ReversedLinesFileReader with the given block size and encoding.
388     *
389     * @param file      the file to be read
390     * @param blockSize size of the internal buffer (for ideal performance this
391     *                  should match with the block size of the underlying file
392     *                  system).
393     * @param charset  the encoding of the file, null uses the default Charset.
394     * @throws IOException if an I/O error occurs.
395     * @since 2.7
396     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
397     */
398    @Deprecated
399    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
400        this.blockSize = blockSize;
401        this.charset = Charsets.toCharset(charset);
402
403        // --- check & prepare encoding ---
404        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
405        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
406        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
407            // all one byte encodings are partNumber problem
408            byteDecrement = 1;
409        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
410        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
411                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
412                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
413                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
414                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
415            byteDecrement = 1;
416        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
417            // UTF-16 new line sequences are not allowed as second tuple of four byte
418            // sequences,
419            // however byte order has to be specified
420            byteDecrement = 2;
421        } else if (this.charset == StandardCharsets.UTF_16) {
422            throw new UnsupportedEncodingException(
423                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
424        } else {
425            throw new UnsupportedEncodingException(
426                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
427        }
428
429        // NOTE: The new line sequences are matched in the order given, so it is
430        // important that \r\n is BEFORE \n
431        this.newLineSequences = new byte[][] {
432            StandardLineSeparator.CRLF.getBytes(this.charset),
433            StandardLineSeparator.LF.getBytes(this.charset),
434            StandardLineSeparator.CR.getBytes(this.charset)
435        };
436
437        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
438
439        // Open file
440        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
441        this.totalByteLength = channel.size();
442        int lastBlockLength = (int) (this.totalByteLength % blockSize);
443        if (lastBlockLength > 0) {
444            this.totalBlockCount = this.totalByteLength / blockSize + 1;
445        } else {
446            this.totalBlockCount = this.totalByteLength / blockSize;
447            if (this.totalByteLength > 0) {
448                lastBlockLength = blockSize;
449            }
450        }
451        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
452
453    }
454
455    /**
456     * Constructs a ReversedLinesFileReader with the given block size and encoding.
457     *
458     * @param file        the file to be read
459     * @param blockSize   size of the internal buffer (for ideal performance this
460     *                    should match with the block size of the underlying file
461     *                    system).
462     * @param charsetName the encoding of the file, null uses the default Charset.
463     * @throws IOException                                  if an I/O error occurs
464     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported
465     * @since 2.7
466     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
467     */
468    @Deprecated
469    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
470        this(file, blockSize, Charsets.toCharset(charsetName));
471    }
472
473    /**
474     * Closes underlying resources.
475     *
476     * @throws IOException if an I/O error occurs.
477     */
478    @Override
479    public void close() throws IOException {
480        channel.close();
481    }
482
483    @Override
484    public IOIterator<String> iterator() {
485        return new IOIterator<String>() {
486
487            private String next;
488
489            @Override
490            public boolean hasNext() throws IOException {
491                if (next == null) {
492                    next = readLine();
493                }
494                return next != null;
495            }
496
497            @Override
498            public String next() throws IOException {
499                if (next == null) {
500                    next = readLine();
501                }
502                final String tmp = next;
503                next = null;
504                return tmp;
505            }
506
507            @Override
508            public Iterator<String> unwrap() {
509                return null;
510            }
511
512        };
513    }
514
515    /**
516     * Returns the lines of the file from bottom to top.
517     *
518     * @return the next line or null if the start of the file is reached
519     * @throws IOException if an I/O error occurs.
520     */
521    public String readLine() throws IOException {
522        String line = currentFilePart.readLine();
523        while (line == null) {
524            currentFilePart = currentFilePart.rollOver();
525            if (currentFilePart == null) {
526                // partNumber more FileParts: we're done, leave line set to null
527                break;
528            }
529            line = currentFilePart.readLine();
530        }
531        // aligned behavior with BufferedReader that doesn't return a last, empty line
532        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
533            trailingNewlineOfFileSkipped = true;
534            line = readLine();
535        }
536        return line;
537    }
538
539    /**
540     * Returns {@code lineCount} lines of the file from bottom to top.
541     * <p>
542     * If there are less than {@code lineCount} lines in the file, then that's what
543     * you get.
544     * </p>
545     * <p>
546     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
547     * </p>
548     *
549     * @param lineCount How many lines to read.
550     * @return A new list
551     * @throws IOException if an I/O error occurs.
552     * @since 2.8.0
553     */
554    public List<String> readLines(final int lineCount) throws IOException {
555        if (lineCount < 0) {
556            throw new IllegalArgumentException("lineCount < 0");
557        }
558        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
559        for (int i = 0; i < lineCount; i++) {
560            final String line = readLine();
561            if (line == null) {
562                return arrayList;
563            }
564            arrayList.add(line);
565        }
566        return arrayList;
567    }
568
569    /**
570     * Returns the last {@code lineCount} lines of the file.
571     * <p>
572     * If there are less than {@code lineCount} lines in the file, then that's what
573     * you get.
574     * </p>
575     *
576     * @param lineCount How many lines to read.
577     * @return A String.
578     * @throws IOException if an I/O error occurs.
579     * @since 2.8.0
580     */
581    public String toString(final int lineCount) throws IOException {
582        final List<String> lines = readLines(lineCount);
583        Collections.reverse(lines);
584        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
585    }
586
587    @Override
588    public Iterable<String> unwrap() {
589        return null;
590    }
591
592}