View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.RandomAccessFile;
23  import java.io.UnsupportedEncodingException;
24  import java.nio.charset.Charset;
25  import java.nio.charset.CharsetEncoder;
26  import java.nio.charset.UnsupportedCharsetException;
27  
28  import org.apache.commons.io.Charsets;
29  
30  /**
31   * Reads lines in a file reversely (similar to a BufferedReader, but starting at
32   * the last line). Useful for e.g. searching in log files.
33   *
34   * @since 2.2
35   */
36  public class ReversedLinesFileReader implements Closeable {
37  
38      private final int blockSize;
39      private final Charset encoding;
40  
41      private final RandomAccessFile randomAccessFile;
42  
43      private final long totalByteLength;
44      private final long totalBlockCount;
45  
46      private final byte[][] newLineSequences;
47      private final int avoidNewlineSplitBufferSize;
48      private final int byteDecrement;
49  
50      private FilePart currentFilePart;
51  
52      private boolean trailingNewlineOfFileSkipped = false;
53  
54      /**
55       * Creates a ReversedLinesFileReader with default block size of 4KB and the
56       * platform's default encoding.
57       *
58       * @param file
59       *            the file to be read
60       * @throws IOException  if an I/O error occurs
61       * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
62       */
63      @Deprecated
64      public ReversedLinesFileReader(final File file) throws IOException {
65          this(file, 4096, Charset.defaultCharset());
66      }
67  
68      /**
69       * Creates a ReversedLinesFileReader with default block size of 4KB and the
70       * specified encoding.
71       *
72       * @param file
73       *            the file to be read
74       * @param charset the encoding to use
75       * @throws IOException  if an I/O error occurs
76       * @since 2.5
77       */
78      public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
79          this(file, 4096, charset);
80      }
81  
82      /**
83       * Creates a ReversedLinesFileReader with the given block size and encoding.
84       *
85       * @param file
86       *            the file to be read
87       * @param blockSize
88       *            size of the internal buffer (for ideal performance this should
89       *            match with the block size of the underlying file system).
90       * @param encoding
91       *            the encoding of the file
92       * @throws IOException  if an I/O error occurs
93       * @since 2.3
94       */
95      public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
96          this.blockSize = blockSize;
97          this.encoding = encoding;
98  
99          randomAccessFile = new RandomAccessFile(file, "r");
100         totalByteLength = randomAccessFile.length();
101         int lastBlockLength = (int) (totalByteLength % blockSize);
102         if (lastBlockLength > 0) {
103             totalBlockCount = totalByteLength / blockSize + 1;
104         } else {
105             totalBlockCount = totalByteLength / blockSize;
106             if (totalByteLength > 0) {
107                 lastBlockLength = blockSize;
108             }
109         }
110         currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
111 
112         // --- check & prepare encoding ---
113         final Charset charset = Charsets.toCharset(encoding);
114         final CharsetEncoder charsetEncoder = charset.newEncoder();
115         final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
116         if(maxBytesPerChar==1f) {
117             // all one byte encodings are no problem
118             byteDecrement = 1;
119         } else if(charset == Charset.forName("UTF-8")) {
120             // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
121             // http://en.wikipedia.org/wiki/UTF-8
122             byteDecrement = 1;
123         } else if(charset == Charset.forName("Shift_JIS")) {
124             // Same as for UTF-8
125             // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
126             byteDecrement = 1;
127         } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
128             // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
129             // however byte order has to be specified
130             byteDecrement = 2;
131         } else if(charset == Charset.forName("UTF-16")) {
132             throw new UnsupportedEncodingException(
133                     "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
134         } else {
135             throw new UnsupportedEncodingException(
136                     "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
137         }
138         // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
139         newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
140 
141         avoidNewlineSplitBufferSize = newLineSequences[0].length;
142     }
143 
144     /**
145      * Creates a ReversedLinesFileReader with the given block size and encoding.
146      *
147      * @param file
148      *            the file to be read
149      * @param blockSize
150      *            size of the internal buffer (for ideal performance this should
151      *            match with the block size of the underlying file system).
152      * @param encoding
153      *            the encoding of the file
154      * @throws IOException  if an I/O error occurs
155      * @throws UnsupportedCharsetException
156      *             thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
157      *             supported.
158      */
159     public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
160         this(file, blockSize, Charsets.toCharset(encoding));
161     }
162 
163     /**
164      * Returns the lines of the file from bottom to top.
165      *
166      * @return the next line or null if the start of the file is reached
167      * @throws IOException  if an I/O error occurs
168      */
169     public String readLine() throws IOException {
170 
171         String line = currentFilePart.readLine();
172         while (line == null) {
173             currentFilePart = currentFilePart.rollOver();
174             if (currentFilePart != null) {
175                 line = currentFilePart.readLine();
176             } else {
177                 // no more fileparts: we're done, leave line set to null
178                 break;
179             }
180         }
181 
182         // aligned behaviour with BufferedReader that doesn't return a last, empty line
183         if("".equals(line) && !trailingNewlineOfFileSkipped) {
184             trailingNewlineOfFileSkipped = true;
185             line = readLine();
186         }
187 
188         return line;
189     }
190 
191     /**
192      * Closes underlying resources.
193      *
194      * @throws IOException  if an I/O error occurs
195      */
196     public void close() throws IOException {
197         randomAccessFile.close();
198     }
199 
200     private class FilePart {
201         private final long no;
202 
203         private final byte[] data;
204 
205         private byte[] leftOver;
206 
207         private int currentLastBytePos;
208 
209         /**
210          * ctor
211          * @param no the part number
212          * @param length its length
213          * @param leftOverOfLastFilePart remainder
214          * @throws IOException if there is a problem reading the file
215          */
216         private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
217             this.no = no;
218             final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
219             this.data = new byte[dataLength];
220             final long off = (no - 1) * blockSize;
221 
222             // read data
223             if (no > 0 /* file not empty */) {
224                 randomAccessFile.seek(off);
225                 final int countRead = randomAccessFile.read(data, 0, length);
226                 if (countRead != length) {
227                     throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
228                 }
229             }
230             // copy left over part into data arr
231             if (leftOverOfLastFilePart != null) {
232                 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
233             }
234             this.currentLastBytePos = data.length - 1;
235             this.leftOver = null;
236         }
237 
238         /**
239          * Handles block rollover
240          * 
241          * @return the new FilePart or null
242          * @throws IOException if there was a problem reading the file
243          */
244         private FilePart rollOver() throws IOException {
245 
246             if (currentLastBytePos > -1) {
247                 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
248                         + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
249             }
250 
251             if (no > 1) {
252                 return new FilePart(no - 1, blockSize, leftOver);
253             } else {
254                 // NO 1 was the last FilePart, we're finished
255                 if (leftOver != null) {
256                     throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
257                             + new String(leftOver, encoding));
258                 }
259                 return null;
260             }
261         }
262 
263         /**
264          * Reads a line.
265          * 
266          * @return the line or null
267          * @throws IOException if there is an error reading from the file
268          */
269         private String readLine() throws IOException {
270 
271             String line = null;
272             int newLineMatchByteCount;
273 
274             final boolean isLastFilePart = no == 1;
275 
276             int i = currentLastBytePos;
277             while (i > -1) {
278 
279                 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
280                     // avoidNewlineSplitBuffer: for all except the last file part we
281                     // take a few bytes to the next file part to avoid splitting of newlines
282                     createLeftOver();
283                     break; // skip last few bytes and leave it to the next file part
284                 }
285 
286                 // --- check for newline ---
287                 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
288                     final int lineStart = i + 1;
289                     final int lineLengthBytes = currentLastBytePos - lineStart + 1;
290 
291                     if (lineLengthBytes < 0) {
292                         throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
293                     }
294                     final byte[] lineData = new byte[lineLengthBytes];
295                     System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
296 
297                     line = new String(lineData, encoding);
298 
299                     currentLastBytePos = i - newLineMatchByteCount;
300                     break; // found line
301                 }
302 
303                 // --- move cursor ---
304                 i -= byteDecrement;
305 
306                 // --- end of file part handling ---
307                 if (i < 0) {
308                     createLeftOver();
309                     break; // end of file part
310                 }
311             }
312 
313             // --- last file part handling ---
314             if (isLastFilePart && leftOver != null) {
315                 // there will be no line break anymore, this is the first line of the file
316                 line = new String(leftOver, encoding);
317                 leftOver = null;
318             }
319 
320             return line;
321         }
322 
323         /**
324          * Creates the buffer containing any left over bytes.
325          */
326         private void createLeftOver() {
327             final int lineLengthBytes = currentLastBytePos + 1;
328             if (lineLengthBytes > 0) {
329                 // create left over for next block
330                 leftOver = new byte[lineLengthBytes];
331                 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
332             } else {
333                 leftOver = null;
334             }
335             currentLastBytePos = -1;
336         }
337 
338         /**
339          * Finds the new-line sequence and return its length.
340          * 
341          * @param data buffer to scan
342          * @param i start offset in buffer
343          * @return length of newline sequence or 0 if none found
344          */
345         private int getNewLineMatchByteCount(final byte[] data, final int i) {
346             for (final byte[] newLineSequence : newLineSequences) {
347                 boolean match = true;
348                 for (int j = newLineSequence.length - 1; j >= 0; j--) {
349                     final int k = i + j - (newLineSequence.length - 1);
350                     match &= k >= 0 && data[k] == newLineSequence[j];
351                 }
352                 if (match) {
353                     return newLineSequence.length;
354                 }
355             }
356             return 0;
357         }
358     }
359 
360 }