001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.input;
018    
019    import java.io.Closeable;
020    import java.io.File;
021    import java.io.IOException;
022    import java.io.RandomAccessFile;
023    import java.io.UnsupportedEncodingException;
024    import java.nio.charset.Charset;
025    import java.nio.charset.CharsetEncoder;
026    
027    /**
028     * Reads lines in a file reversely (similar to a BufferedReader, but starting at
029     * the last line). Useful for e.g. searching in log files.
030     *
031     * @since 2.2
032     */
033    public class ReversedLinesFileReader implements Closeable {
034    
035        private final int blockSize;
036        private final String encoding;
037    
038        private final RandomAccessFile randomAccessFile;
039    
040        private final long totalByteLength;
041        private final long totalBlockCount;
042    
043        private final byte[][] newLineSequences;
044        private final int avoidNewlineSplitBufferSize;
045        private final int byteDecrement;
046    
047        private FilePart currentFilePart;
048    
049        private boolean trailingNewlineOfFileSkipped = false;
050    
051        /**
052         * Creates a ReversedLinesFileReader with default block size of 4KB and the
053         * platform's default encoding.
054         *
055         * @param file
056         *            the file to be read
057         * @throws IOException  if an I/O error occurs
058         */
059        public ReversedLinesFileReader(final File file) throws IOException {
060            this(file, 4096, Charset.defaultCharset().toString());
061        }
062    
063        /**
064         * Creates a ReversedLinesFileReader with the given block size and encoding.
065         *
066         * @param file
067         *            the file to be read
068         * @param blockSize
069         *            size of the internal buffer (for ideal performance this should
070         *            match with the block size of the underlying file system).
071         * @param encoding
072         *            the encoding of the file
073         * @throws IOException  if an I/O error occurs
074         */
075        public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
076            this.blockSize = blockSize;
077            this.encoding = encoding;
078    
079            randomAccessFile = new RandomAccessFile(file, "r");
080            totalByteLength = randomAccessFile.length();
081            int lastBlockLength = (int) (totalByteLength % blockSize);
082            if (lastBlockLength > 0) {
083                totalBlockCount = totalByteLength / blockSize + 1;
084            } else {
085                totalBlockCount = totalByteLength / blockSize;
086                if (totalByteLength > 0) {
087                    lastBlockLength = blockSize;
088                }
089            }
090            currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
091    
092            // --- check & prepare encoding ---
093            Charset charset = Charset.forName(encoding);
094            CharsetEncoder charsetEncoder = charset.newEncoder();
095            float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
096            if(maxBytesPerChar==1f) {
097                // all one byte encodings are no problem
098                byteDecrement = 1;
099            } else if(charset == Charset.forName("UTF-8")) {
100                // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
101                // http://en.wikipedia.org/wiki/UTF-8
102                byteDecrement = 1;
103            } else if(charset == Charset.forName("Shift_JIS")) {
104                // Same as for UTF-8
105                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
106                byteDecrement = 1;
107            } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
108                // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
109                // however byte order has to be specified
110                byteDecrement = 2;
111            } else if(charset == Charset.forName("UTF-16")) {
112                throw new UnsupportedEncodingException(
113                        "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
114            } else {
115                throw new UnsupportedEncodingException(
116                        "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
117            }
118            // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
119            newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
120    
121            avoidNewlineSplitBufferSize = newLineSequences[0].length;
122    
123        }
124    
125        /**
126         * Returns the lines of the file from bottom to top.
127         *
128         * @return the next line or null if the start of the file is reached
129         * @throws IOException  if an I/O error occurs
130         */
131        public String readLine() throws IOException {
132    
133            String line = currentFilePart.readLine();
134            while (line == null) {
135                currentFilePart = currentFilePart.rollOver();
136                if (currentFilePart != null) {
137                    line = currentFilePart.readLine();
138                } else {
139                    // no more fileparts: we're done, leave line set to null
140                    break;
141                }
142            }
143    
144            // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
145            if("".equals(line) && !trailingNewlineOfFileSkipped) {
146                trailingNewlineOfFileSkipped = true;
147                line = readLine();
148            }
149    
150            return line;
151        }
152    
153        /**
154         * Closes underlying resources.
155         *
156         * @throws IOException  if an I/O error occurs
157         */
158        public void close() throws IOException {
159            randomAccessFile.close();
160        }
161    
162        private class FilePart {
163            private final long no;
164    
165            private final byte[] data;
166    
167            private byte[] leftOver;
168    
169            private int currentLastBytePos;
170    
171            /**
172             * ctor
173             * @param no the part number
174             * @param length its length
175             * @param leftOverOfLastFilePart remainder
176             * @throws IOException if there is a problem reading the file
177             */
178            private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
179                this.no = no;
180                int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
181                this.data = new byte[dataLength];
182                final long off = (no - 1) * blockSize;
183    
184                // read data
185                if (no > 0 /* file not empty */) {
186                    randomAccessFile.seek(off);
187                    final int countRead = randomAccessFile.read(data, 0, length);
188                    if (countRead != length) {
189                        throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
190                    }
191                }
192                // copy left over part into data arr
193                if (leftOverOfLastFilePart != null) {
194                    System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
195                }
196                this.currentLastBytePos = data.length - 1;
197                this.leftOver = null;
198            }
199    
200            /**
201             * Handles block rollover
202             * 
203             * @return the new FilePart or null
204             * @throws IOException if there was a problem reading the file
205             */
206            private FilePart rollOver() throws IOException {
207    
208                if (currentLastBytePos > -1) {
209                    throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
210                            + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
211                }
212    
213                if (no > 1) {
214                    return new FilePart(no - 1, blockSize, leftOver);
215                } else {
216                    // NO 1 was the last FilePart, we're finished
217                    if (leftOver != null) {
218                        throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
219                                + new String(leftOver, encoding));
220                    }
221                    return null;
222                }
223            }
224    
225            /**
226             * Reads a line.
227             * 
228             * @return the line or null
229             * @throws IOException if there is an error reading from the file
230             */
231            private String readLine() throws IOException {
232    
233                String line = null;
234                int newLineMatchByteCount;
235    
236                boolean isLastFilePart = no == 1;
237    
238                int i = currentLastBytePos;
239                while (i > -1) {
240    
241                    if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
242                        // avoidNewlineSplitBuffer: for all except the last file part we
243                        // take a few bytes to the next file part to avoid splitting of newlines
244                        createLeftOver();
245                        break; // skip last few bytes and leave it to the next file part
246                    }
247    
248                    // --- check for newline ---
249                    if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
250                        final int lineStart = i + 1;
251                        int lineLengthBytes = currentLastBytePos - lineStart + 1;
252    
253                        if (lineLengthBytes < 0) {
254                            throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
255                        }
256                        byte[] lineData = new byte[lineLengthBytes];
257                        System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
258    
259                        line = new String(lineData, encoding);
260    
261                        currentLastBytePos = i - newLineMatchByteCount;
262                        break; // found line
263                    }
264    
265                    // --- move cursor ---
266                    i -= byteDecrement;
267    
268                    // --- end of file part handling ---
269                    if (i < 0) {
270                        createLeftOver();
271                        break; // end of file part
272                    }
273                }
274    
275                // --- last file part handling ---
276                if (isLastFilePart && leftOver != null) {
277                    // there will be no line break anymore, this is the first line of the file
278                    line = new String(leftOver, encoding);
279                    leftOver = null;
280                }
281    
282                return line;
283            }
284    
285            /**
286             * Creates the buffer containing any left over bytes.
287             */
288            private void createLeftOver() {
289                int lineLengthBytes = currentLastBytePos + 1;
290                if (lineLengthBytes > 0) {
291                    // create left over for next block
292                    leftOver = new byte[lineLengthBytes];
293                    System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
294                } else {
295                    leftOver = null;
296                }
297                currentLastBytePos = -1;
298            }
299    
300            /**
301             * Finds the new-line sequence and return its length.
302             * 
303             * @param data buffer to scan
304             * @param i start offset in buffer
305             * @return length of newline sequence or 0 if none found
306             */
307            private int getNewLineMatchByteCount(byte[] data, int i) {
308                for (byte[] newLineSequence : newLineSequences) {
309                    boolean match = true;
310                    for (int j = newLineSequence.length - 1; j >= 0; j--) {
311                        int k = i + j - (newLineSequence.length - 1);
312                        match &= k >= 0 && data[k] == newLineSequence[j];
313                    }
314                    if (match) {
315                        return newLineSequence.length;
316                    }
317                }
318                return 0;
319            }
320        }
321    
322    }