001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.RandomAccessFile;
023import java.io.UnsupportedEncodingException;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetEncoder;
026
027import org.apache.commons.io.Charsets;
028
029/**
030 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
031 * the last line). Useful for e.g. searching in log files.
032 *
033 * @since 2.2
034 */
035public class ReversedLinesFileReader implements Closeable {
036
037    private final int blockSize;
038    private final Charset encoding;
039
040    private final RandomAccessFile randomAccessFile;
041
042    private final long totalByteLength;
043    private final long totalBlockCount;
044
045    private final byte[][] newLineSequences;
046    private final int avoidNewlineSplitBufferSize;
047    private final int byteDecrement;
048
049    private FilePart currentFilePart;
050
051    private boolean trailingNewlineOfFileSkipped = false;
052
053    /**
054     * Creates a ReversedLinesFileReader with default block size of 4KB and the
055     * platform's default encoding.
056     *
057     * @param file
058     *            the file to be read
059     * @throws IOException  if an I/O error occurs
060     * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
061     */
062    @Deprecated
063    public ReversedLinesFileReader(final File file) throws IOException {
064        this(file, 4096, Charset.defaultCharset());
065    }
066
067    /**
068     * Creates a ReversedLinesFileReader with default block size of 4KB and the
069     * specified encoding.
070     *
071     * @param file
072     *            the file to be read
073     * @param charset the encoding to use
074     * @throws IOException  if an I/O error occurs
075     * @since 2.5
076     */
077    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
078        this(file, 4096, charset);
079    }
080
081    /**
082     * Creates a ReversedLinesFileReader with the given block size and encoding.
083     *
084     * @param file
085     *            the file to be read
086     * @param blockSize
087     *            size of the internal buffer (for ideal performance this should
088     *            match with the block size of the underlying file system).
089     * @param encoding
090     *            the encoding of the file
091     * @throws IOException  if an I/O error occurs
092     * @since 2.3
093     */
094    @SuppressWarnings("deprecation") // unavoidable until Java 7
095    public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
096        this.blockSize = blockSize;
097        this.encoding = encoding;
098
099        // --- check & prepare encoding ---
100        final Charset charset = Charsets.toCharset(encoding);
101        final CharsetEncoder charsetEncoder = charset.newEncoder();
102        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
103        if (maxBytesPerChar == 1f) {
104            // all one byte encodings are no problem
105            byteDecrement = 1;
106        } else if (charset == Charsets.UTF_8) {
107            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
108            // http://en.wikipedia.org/wiki/UTF-8
109            byteDecrement = 1;
110        } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
111                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
112                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
113                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
114                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
115                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
116            byteDecrement = 1;
117        } else if (charset == Charsets.UTF_16BE || charset == Charsets.UTF_16LE) {
118            // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
119            // however byte order has to be specified
120            byteDecrement = 2;
121        } else if (charset == Charsets.UTF_16) {
122            throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " +
123                    "UTF-16LE)");
124        } else {
125            throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " +
126                    "submit a patch)");
127        }
128
129        // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
130        newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
131
132        avoidNewlineSplitBufferSize = newLineSequences[0].length;
133
134        // Open file
135        randomAccessFile = new RandomAccessFile(file, "r");
136        totalByteLength = randomAccessFile.length();
137        int lastBlockLength = (int) (totalByteLength % blockSize);
138        if (lastBlockLength > 0) {
139            totalBlockCount = totalByteLength / blockSize + 1;
140        } else {
141            totalBlockCount = totalByteLength / blockSize;
142            if (totalByteLength > 0) {
143                lastBlockLength = blockSize;
144            }
145        }
146        currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
147
148    }
149
150    /**
151     * Creates a ReversedLinesFileReader with the given block size and encoding.
152     *
153     * @param file
154     *            the file to be read
155     * @param blockSize
156     *            size of the internal buffer (for ideal performance this should
157     *            match with the block size of the underlying file system).
158     * @param encoding
159     *            the encoding of the file
160     * @throws IOException  if an I/O error occurs
161     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in
162     * version 2.2 if the encoding is not supported.
163     */
164    public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
165        this(file, blockSize, Charsets.toCharset(encoding));
166    }
167
168    /**
169     * Returns the lines of the file from bottom to top.
170     *
171     * @return the next line or null if the start of the file is reached
172     * @throws IOException  if an I/O error occurs
173     */
174    public String readLine() throws IOException {
175
176        String line = currentFilePart.readLine();
177        while (line == null) {
178            currentFilePart = currentFilePart.rollOver();
179            if (currentFilePart != null) {
180                line = currentFilePart.readLine();
181            } else {
182                // no more fileparts: we're done, leave line set to null
183                break;
184            }
185        }
186
187        // aligned behaviour with BufferedReader that doesn't return a last, empty line
188        if("".equals(line) && !trailingNewlineOfFileSkipped) {
189            trailingNewlineOfFileSkipped = true;
190            line = readLine();
191        }
192
193        return line;
194    }
195
196    /**
197     * Closes underlying resources.
198     *
199     * @throws IOException  if an I/O error occurs
200     */
201    public void close() throws IOException {
202        randomAccessFile.close();
203    }
204
205    private class FilePart {
206        private final long no;
207
208        private final byte[] data;
209
210        private byte[] leftOver;
211
212        private int currentLastBytePos;
213
214        /**
215         * ctor
216         * @param no the part number
217         * @param length its length
218         * @param leftOverOfLastFilePart remainder
219         * @throws IOException if there is a problem reading the file
220         */
221        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
222            this.no = no;
223            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
224            this.data = new byte[dataLength];
225            final long off = (no - 1) * blockSize;
226
227            // read data
228            if (no > 0 /* file not empty */) {
229                randomAccessFile.seek(off);
230                final int countRead = randomAccessFile.read(data, 0, length);
231                if (countRead != length) {
232                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
233                }
234            }
235            // copy left over part into data arr
236            if (leftOverOfLastFilePart != null) {
237                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
238            }
239            this.currentLastBytePos = data.length - 1;
240            this.leftOver = null;
241        }
242
243        /**
244         * Handles block rollover
245         * 
246         * @return the new FilePart or null
247         * @throws IOException if there was a problem reading the file
248         */
249        private FilePart rollOver() throws IOException {
250
251            if (currentLastBytePos > -1) {
252                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
253                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
254            }
255
256            if (no > 1) {
257                return new FilePart(no - 1, blockSize, leftOver);
258            } else {
259                // NO 1 was the last FilePart, we're finished
260                if (leftOver != null) {
261                    throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
262                            + new String(leftOver, encoding));
263                }
264                return null;
265            }
266        }
267
268        /**
269         * Reads a line.
270         * 
271         * @return the line or null
272         * @throws IOException if there is an error reading from the file
273         */
274        private String readLine() throws IOException {
275
276            String line = null;
277            int newLineMatchByteCount;
278
279            final boolean isLastFilePart = no == 1;
280
281            int i = currentLastBytePos;
282            while (i > -1) {
283
284                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
285                    // avoidNewlineSplitBuffer: for all except the last file part we
286                    // take a few bytes to the next file part to avoid splitting of newlines
287                    createLeftOver();
288                    break; // skip last few bytes and leave it to the next file part
289                }
290
291                // --- check for newline ---
292                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
293                    final int lineStart = i + 1;
294                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
295
296                    if (lineLengthBytes < 0) {
297                        throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
298                    }
299                    final byte[] lineData = new byte[lineLengthBytes];
300                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
301
302                    line = new String(lineData, encoding);
303
304                    currentLastBytePos = i - newLineMatchByteCount;
305                    break; // found line
306                }
307
308                // --- move cursor ---
309                i -= byteDecrement;
310
311                // --- end of file part handling ---
312                if (i < 0) {
313                    createLeftOver();
314                    break; // end of file part
315                }
316            }
317
318            // --- last file part handling ---
319            if (isLastFilePart && leftOver != null) {
320                // there will be no line break anymore, this is the first line of the file
321                line = new String(leftOver, encoding);
322                leftOver = null;
323            }
324
325            return line;
326        }
327
328        /**
329         * Creates the buffer containing any left over bytes.
330         */
331        private void createLeftOver() {
332            final int lineLengthBytes = currentLastBytePos + 1;
333            if (lineLengthBytes > 0) {
334                // create left over for next block
335                leftOver = new byte[lineLengthBytes];
336                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
337            } else {
338                leftOver = null;
339            }
340            currentLastBytePos = -1;
341        }
342
343        /**
344         * Finds the new-line sequence and return its length.
345         * 
346         * @param data buffer to scan
347         * @param i start offset in buffer
348         * @return length of newline sequence or 0 if none found
349         */
350        private int getNewLineMatchByteCount(final byte[] data, final int i) {
351            for (final byte[] newLineSequence : newLineSequences) {
352                boolean match = true;
353                for (int j = newLineSequence.length - 1; j >= 0; j--) {
354                    final int k = i + j - (newLineSequence.length - 1);
355                    match &= k >= 0 && data[k] == newLineSequence[j];
356                }
357                if (match) {
358                    return newLineSequence.length;
359                }
360            }
361            return 0;
362        }
363    }
364
365}