001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031
032import org.apache.commons.io.Charsets;
033import org.apache.commons.io.IOUtils;
034
035/**
036 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
037 * the last line). Useful for e.g. searching in log files.
038 *
039 * @since 2.2
040 */
041public class ReversedLinesFileReader implements Closeable {
042
043    private static final String EMPTY_STRING = "";
044    private static final int DEFAULT_BLOCK_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
045
046    private final int blockSize;
047    private final Charset encoding;
048
049    private final SeekableByteChannel channel;
050
051    private final long totalByteLength;
052    private final long totalBlockCount;
053
054    private final byte[][] newLineSequences;
055    private final int avoidNewlineSplitBufferSize;
056    private final int byteDecrement;
057
058    private FilePart currentFilePart;
059
060    private boolean trailingNewlineOfFileSkipped = false;
061
062    /**
063     * Creates a ReversedLinesFileReader with default block size of 4KB and the
064     * platform's default encoding.
065     *
066     * @param file
067     *            the file to be read
068     * @throws IOException  if an I/O error occurs
069     * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
070     */
071    @Deprecated
072    public ReversedLinesFileReader(final File file) throws IOException {
073        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
074    }
075
076    /**
077     * Creates a ReversedLinesFileReader with default block size of 4KB and the
078     * specified encoding.
079     *
080     * @param file
081     *            the file to be read
082     * @param charset the charset to use
083     * @throws IOException  if an I/O error occurs
084     * @since 2.5
085     */
086    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
087        this(file.toPath(), charset);
088    }
089
090    /**
091     * Creates a ReversedLinesFileReader with default block size of 4KB and the
092     * specified encoding.
093     *
094     * @param file
095     *            the file to be read
096     * @param charset the charset to use
097     * @throws IOException  if an I/O error occurs
098     * @since 2.7
099     */
100    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
101        this(file, DEFAULT_BLOCK_SIZE, charset);
102    }
103
104    /**
105     * Creates a ReversedLinesFileReader with the given block size and encoding.
106     *
107     * @param file
108     *            the file to be read
109     * @param blockSize
110     *            size of the internal buffer (for ideal performance this should
111     *            match with the block size of the underlying file system).
112     * @param encoding
113     *            the encoding of the file
114     * @throws IOException  if an I/O error occurs
115     * @since 2.3
116     */
117    public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
118        this(file.toPath(), blockSize, encoding);
119    }
120
121    /**
122     * Creates a ReversedLinesFileReader with the given block size and encoding.
123     *
124     * @param file
125     *            the file to be read
126     * @param blockSize
127     *            size of the internal buffer (for ideal performance this should
128     *            match with the block size of the underlying file system).
129     * @param encoding
130     *            the encoding of the file
131     * @throws IOException  if an I/O error occurs
132     * @since 2.7
133     */
134    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset encoding) throws IOException {
135        this.blockSize = blockSize;
136        this.encoding = encoding;
137
138        // --- check & prepare encoding ---
139        final Charset charset = Charsets.toCharset(encoding);
140        final CharsetEncoder charsetEncoder = charset.newEncoder();
141        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
142        if (maxBytesPerChar == 1f) {
143            // all one byte encodings are no problem
144            byteDecrement = 1;
145        } else if (charset == StandardCharsets.UTF_8) {
146            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
147            // http://en.wikipedia.org/wiki/UTF-8
148            byteDecrement = 1;
149        } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
150                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
151                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
152                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
153                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
154                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
155            byteDecrement = 1;
156        } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
157            // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
158            // however byte order has to be specified
159            byteDecrement = 2;
160        } else if (charset == StandardCharsets.UTF_16) {
161            throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " +
162                    "UTF-16LE)");
163        } else {
164            throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " +
165                    "submit a patch)");
166        }
167
168        // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
169        newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
170
171        avoidNewlineSplitBufferSize = newLineSequences[0].length;
172
173        // Open file
174        channel = Files.newByteChannel(file, StandardOpenOption.READ);
175        totalByteLength = channel.size();
176        int lastBlockLength = (int) (totalByteLength % blockSize);
177        if (lastBlockLength > 0) {
178            totalBlockCount = totalByteLength / blockSize + 1;
179        } else {
180            totalBlockCount = totalByteLength / blockSize;
181            if (totalByteLength > 0) {
182                lastBlockLength = blockSize;
183            }
184        }
185        currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
186
187    }
188
189    /**
190     * Creates a ReversedLinesFileReader with the given block size and encoding.
191     *
192     * @param file
193     *            the file to be read
194     * @param blockSize
195     *            size of the internal buffer (for ideal performance this should
196     *            match with the block size of the underlying file system).
197     * @param encoding
198     *            the encoding of the file
199     * @throws IOException  if an I/O error occurs
200     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in
201     * version 2.2 if the encoding is not supported.
202     */
203    public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
204        this(file.toPath(), blockSize, encoding);
205    }
206
207    /**
208     * Creates a ReversedLinesFileReader with the given block size and encoding.
209     *
210     * @param file
211     *            the file to be read
212     * @param blockSize
213     *            size of the internal buffer (for ideal performance this should
214     *            match with the block size of the underlying file system).
215     * @param charsetName
216     *            the encoding of the file
217     * @throws IOException  if an I/O error occurs
218     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in
219     * version 2.2 if the encoding is not supported.
220     * @since 2.7
221     */
222    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
223        this(file, blockSize, Charsets.toCharset(charsetName));
224    }
225
226    /**
227     * Returns the lines of the file from bottom to top.
228     *
229     * @return the next line or null if the start of the file is reached
230     * @throws IOException  if an I/O error occurs
231     */
232    public String readLine() throws IOException {
233
234        String line = currentFilePart.readLine();
235        while (line == null) {
236            currentFilePart = currentFilePart.rollOver();
237            if (currentFilePart != null) {
238                line = currentFilePart.readLine();
239            } else {
240                // no more fileparts: we're done, leave line set to null
241                break;
242            }
243        }
244
245        // aligned behavior with BufferedReader that doesn't return a last, empty line
246        if(EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
247            trailingNewlineOfFileSkipped = true;
248            line = readLine();
249        }
250
251        return line;
252    }
253
254    /**
255     * Closes underlying resources.
256     *
257     * @throws IOException  if an I/O error occurs
258     */
259    @Override
260    public void close() throws IOException {
261        channel.close();
262    }
263
264    private class FilePart {
265        private final long no;
266
267        private final byte[] data;
268
269        private byte[] leftOver;
270
271        private int currentLastBytePos;
272
273        /**
274         * ctor
275         * @param no the part number
276         * @param length its length
277         * @param leftOverOfLastFilePart remainder
278         * @throws IOException if there is a problem reading the file
279         */
280        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
281            this.no = no;
282            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
283            this.data = new byte[dataLength];
284            final long off = (no - 1) * blockSize;
285
286            // read data
287            if (no > 0 /* file not empty */) {
288                channel.position(off);
289                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
290                if (countRead != length) {
291                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
292                }
293            }
294            // copy left over part into data arr
295            if (leftOverOfLastFilePart != null) {
296                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
297            }
298            this.currentLastBytePos = data.length - 1;
299            this.leftOver = null;
300        }
301
302        /**
303         * Handles block rollover
304         *
305         * @return the new FilePart or null
306         * @throws IOException if there was a problem reading the file
307         */
308        private FilePart rollOver() throws IOException {
309
310            if (currentLastBytePos > -1) {
311                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
312                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
313            }
314
315            if (no > 1) {
316                return new FilePart(no - 1, blockSize, leftOver);
317            }
318            // NO 1 was the last FilePart, we're finished
319            if (leftOver != null) {
320                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
321                        + new String(leftOver, encoding));
322            }
323            return null;
324        }
325
326        /**
327         * Reads a line.
328         *
329         * @return the line or null
330         * @throws IOException if there is an error reading from the file
331         */
332        private String readLine() throws IOException {
333
334            String line = null;
335            int newLineMatchByteCount;
336
337            final boolean isLastFilePart = no == 1;
338
339            int i = currentLastBytePos;
340            while (i > -1) {
341
342                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
343                    // avoidNewlineSplitBuffer: for all except the last file part we
344                    // take a few bytes to the next file part to avoid splitting of newlines
345                    createLeftOver();
346                    break; // skip last few bytes and leave it to the next file part
347                }
348
349                // --- check for newline ---
350                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
351                    final int lineStart = i + 1;
352                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
353
354                    if (lineLengthBytes < 0) {
355                        throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
356                    }
357                    final byte[] lineData = new byte[lineLengthBytes];
358                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
359
360                    line = new String(lineData, encoding);
361
362                    currentLastBytePos = i - newLineMatchByteCount;
363                    break; // found line
364                }
365
366                // --- move cursor ---
367                i -= byteDecrement;
368
369                // --- end of file part handling ---
370                if (i < 0) {
371                    createLeftOver();
372                    break; // end of file part
373                }
374            }
375
376            // --- last file part handling ---
377            if (isLastFilePart && leftOver != null) {
378                // there will be no line break anymore, this is the first line of the file
379                line = new String(leftOver, encoding);
380                leftOver = null;
381            }
382
383            return line;
384        }
385
386        /**
387         * Creates the buffer containing any left over bytes.
388         */
389        private void createLeftOver() {
390            final int lineLengthBytes = currentLastBytePos + 1;
391            if (lineLengthBytes > 0) {
392                // create left over for next block
393                leftOver = new byte[lineLengthBytes];
394                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
395            } else {
396                leftOver = null;
397            }
398            currentLastBytePos = -1;
399        }
400
401        /**
402         * Finds the new-line sequence and return its length.
403         *
404         * @param data buffer to scan
405         * @param i start offset in buffer
406         * @return length of newline sequence or 0 if none found
407         */
408        private int getNewLineMatchByteCount(final byte[] data, final int i) {
409            for (final byte[] newLineSequence : newLineSequences) {
410                boolean match = true;
411                for (int j = newLineSequence.length - 1; j >= 0; j--) {
412                    final int k = i + j - (newLineSequence.length - 1);
413                    match &= k >= 0 && data[k] == newLineSequence[j];
414                }
415                if (match) {
416                    return newLineSequence.length;
417                }
418            }
419            return 0;
420        }
421    }
422
423}