001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.input;
018    
019    import java.io.Closeable;
020    import java.io.File;
021    import java.io.IOException;
022    import java.io.RandomAccessFile;
023    import java.io.UnsupportedEncodingException;
024    import java.nio.charset.Charset;
025    import java.nio.charset.CharsetEncoder;
026    import java.nio.charset.UnsupportedCharsetException;
027    
028    import org.apache.commons.io.Charsets;
029    
030    /**
031     * Reads lines in a file reversely (similar to a BufferedReader, but starting at
032     * the last line). Useful for e.g. searching in log files.
033     *
034     * @since 2.2
035     */
036    public class ReversedLinesFileReader implements Closeable {
037    
038        private final int blockSize;
039        private final Charset encoding;
040    
041        private final RandomAccessFile randomAccessFile;
042    
043        private final long totalByteLength;
044        private final long totalBlockCount;
045    
046        private final byte[][] newLineSequences;
047        private final int avoidNewlineSplitBufferSize;
048        private final int byteDecrement;
049    
050        private FilePart currentFilePart;
051    
052        private boolean trailingNewlineOfFileSkipped = false;
053    
054        /**
055         * Creates a ReversedLinesFileReader with default block size of 4KB and the
056         * platform's default encoding.
057         *
058         * @param file
059         *            the file to be read
060         * @throws IOException  if an I/O error occurs
061         */
062        public ReversedLinesFileReader(final File file) throws IOException {
063            this(file, 4096, Charset.defaultCharset().toString());
064        }
065    
066        /**
067         * Creates a ReversedLinesFileReader with the given block size and encoding.
068         *
069         * @param file
070         *            the file to be read
071         * @param blockSize
072         *            size of the internal buffer (for ideal performance this should
073         *            match with the block size of the underlying file system).
074         * @param encoding
075         *            the encoding of the file
076         * @throws IOException  if an I/O error occurs
077         * @since 2.3
078         */
079        public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
080            this.blockSize = blockSize;
081            this.encoding = encoding;
082    
083            randomAccessFile = new RandomAccessFile(file, "r");
084            totalByteLength = randomAccessFile.length();
085            int lastBlockLength = (int) (totalByteLength % blockSize);
086            if (lastBlockLength > 0) {
087                totalBlockCount = totalByteLength / blockSize + 1;
088            } else {
089                totalBlockCount = totalByteLength / blockSize;
090                if (totalByteLength > 0) {
091                    lastBlockLength = blockSize;
092                }
093            }
094            currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
095    
096            // --- check & prepare encoding ---
097            Charset charset = Charsets.toCharset(encoding);
098            CharsetEncoder charsetEncoder = charset.newEncoder();
099            float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
100            if(maxBytesPerChar==1f) {
101                // all one byte encodings are no problem
102                byteDecrement = 1;
103            } else if(charset == Charset.forName("UTF-8")) {
104                // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
105                // http://en.wikipedia.org/wiki/UTF-8
106                byteDecrement = 1;
107            } else if(charset == Charset.forName("Shift_JIS")) {
108                // Same as for UTF-8
109                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
110                byteDecrement = 1;
111            } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
112                // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
113                // however byte order has to be specified
114                byteDecrement = 2;
115            } else if(charset == Charset.forName("UTF-16")) {
116                throw new UnsupportedEncodingException(
117                        "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
118            } else {
119                throw new UnsupportedEncodingException(
120                        "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
121            }
122            // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
123            newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
124    
125            avoidNewlineSplitBufferSize = newLineSequences[0].length;
126        }
127    
128        /**
129         * Creates a ReversedLinesFileReader with the given block size and encoding.
130         *
131         * @param file
132         *            the file to be read
133         * @param blockSize
134         *            size of the internal buffer (for ideal performance this should
135         *            match with the block size of the underlying file system).
136         * @param encoding
137         *            the encoding of the file
138         * @throws IOException  if an I/O error occurs
139         * @throws UnsupportedCharsetException
140         *             thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
141         *             supported.
142         */
143        public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
144            this(file, blockSize, Charsets.toCharset(encoding));
145        }
146    
147        /**
148         * Returns the lines of the file from bottom to top.
149         *
150         * @return the next line or null if the start of the file is reached
151         * @throws IOException  if an I/O error occurs
152         */
153        public String readLine() throws IOException {
154    
155            String line = currentFilePart.readLine();
156            while (line == null) {
157                currentFilePart = currentFilePart.rollOver();
158                if (currentFilePart != null) {
159                    line = currentFilePart.readLine();
160                } else {
161                    // no more fileparts: we're done, leave line set to null
162                    break;
163                }
164            }
165    
166            // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
167            if("".equals(line) && !trailingNewlineOfFileSkipped) {
168                trailingNewlineOfFileSkipped = true;
169                line = readLine();
170            }
171    
172            return line;
173        }
174    
175        /**
176         * Closes underlying resources.
177         *
178         * @throws IOException  if an I/O error occurs
179         */
180        public void close() throws IOException {
181            randomAccessFile.close();
182        }
183    
184        private class FilePart {
185            private final long no;
186    
187            private final byte[] data;
188    
189            private byte[] leftOver;
190    
191            private int currentLastBytePos;
192    
193            /**
194             * ctor
195             * @param no the part number
196             * @param length its length
197             * @param leftOverOfLastFilePart remainder
198             * @throws IOException if there is a problem reading the file
199             */
200            private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
201                this.no = no;
202                int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
203                this.data = new byte[dataLength];
204                final long off = (no - 1) * blockSize;
205    
206                // read data
207                if (no > 0 /* file not empty */) {
208                    randomAccessFile.seek(off);
209                    final int countRead = randomAccessFile.read(data, 0, length);
210                    if (countRead != length) {
211                        throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
212                    }
213                }
214                // copy left over part into data arr
215                if (leftOverOfLastFilePart != null) {
216                    System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
217                }
218                this.currentLastBytePos = data.length - 1;
219                this.leftOver = null;
220            }
221    
222            /**
223             * Handles block rollover
224             * 
225             * @return the new FilePart or null
226             * @throws IOException if there was a problem reading the file
227             */
228            private FilePart rollOver() throws IOException {
229    
230                if (currentLastBytePos > -1) {
231                    throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
232                            + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
233                }
234    
235                if (no > 1) {
236                    return new FilePart(no - 1, blockSize, leftOver);
237                } else {
238                    // NO 1 was the last FilePart, we're finished
239                    if (leftOver != null) {
240                        throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
241                                + new String(leftOver, encoding));
242                    }
243                    return null;
244                }
245            }
246    
247            /**
248             * Reads a line.
249             * 
250             * @return the line or null
251             * @throws IOException if there is an error reading from the file
252             */
253            private String readLine() throws IOException {
254    
255                String line = null;
256                int newLineMatchByteCount;
257    
258                boolean isLastFilePart = no == 1;
259    
260                int i = currentLastBytePos;
261                while (i > -1) {
262    
263                    if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
264                        // avoidNewlineSplitBuffer: for all except the last file part we
265                        // take a few bytes to the next file part to avoid splitting of newlines
266                        createLeftOver();
267                        break; // skip last few bytes and leave it to the next file part
268                    }
269    
270                    // --- check for newline ---
271                    if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
272                        final int lineStart = i + 1;
273                        int lineLengthBytes = currentLastBytePos - lineStart + 1;
274    
275                        if (lineLengthBytes < 0) {
276                            throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
277                        }
278                        byte[] lineData = new byte[lineLengthBytes];
279                        System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
280    
281                        line = new String(lineData, encoding);
282    
283                        currentLastBytePos = i - newLineMatchByteCount;
284                        break; // found line
285                    }
286    
287                    // --- move cursor ---
288                    i -= byteDecrement;
289    
290                    // --- end of file part handling ---
291                    if (i < 0) {
292                        createLeftOver();
293                        break; // end of file part
294                    }
295                }
296    
297                // --- last file part handling ---
298                if (isLastFilePart && leftOver != null) {
299                    // there will be no line break anymore, this is the first line of the file
300                    line = new String(leftOver, encoding);
301                    leftOver = null;
302                }
303    
304                return line;
305            }
306    
307            /**
308             * Creates the buffer containing any left over bytes.
309             */
310            private void createLeftOver() {
311                int lineLengthBytes = currentLastBytePos + 1;
312                if (lineLengthBytes > 0) {
313                    // create left over for next block
314                    leftOver = new byte[lineLengthBytes];
315                    System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
316                } else {
317                    leftOver = null;
318                }
319                currentLastBytePos = -1;
320            }
321    
322            /**
323             * Finds the new-line sequence and return its length.
324             * 
325             * @param data buffer to scan
326             * @param i start offset in buffer
327             * @return length of newline sequence or 0 if none found
328             */
329            private int getNewLineMatchByteCount(byte[] data, int i) {
330                for (byte[] newLineSequence : newLineSequences) {
331                    boolean match = true;
332                    for (int j = newLineSequence.length - 1; j >= 0; j--) {
333                        int k = i + j - (newLineSequence.length - 1);
334                        match &= k >= 0 && data[k] == newLineSequence[j];
335                    }
336                    if (match) {
337                        return newLineSequence.length;
338                    }
339                }
340                return 0;
341            }
342        }
343    
344    }