1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.RandomAccessFile;
23 import java.io.UnsupportedEncodingException;
24 import java.nio.charset.Charset;
25 import java.nio.charset.CharsetEncoder;
26 import java.nio.charset.UnsupportedCharsetException;
27
28 import org.apache.commons.io.Charsets;
29
30 /**
31 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
32 * the last line). Useful for e.g. searching in log files.
33 *
34 * @since 2.2
35 */
36 public class ReversedLinesFileReader implements Closeable {
37
38 private final int blockSize;
39 private final Charset encoding;
40
41 private final RandomAccessFile randomAccessFile;
42
43 private final long totalByteLength;
44 private final long totalBlockCount;
45
46 private final byte[][] newLineSequences;
47 private final int avoidNewlineSplitBufferSize;
48 private final int byteDecrement;
49
50 private FilePart currentFilePart;
51
52 private boolean trailingNewlineOfFileSkipped = false;
53
54 /**
55 * Creates a ReversedLinesFileReader with default block size of 4KB and the
56 * platform's default encoding.
57 *
58 * @param file
59 * the file to be read
60 * @throws IOException if an I/O error occurs
61 * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
62 */
63 @Deprecated
64 public ReversedLinesFileReader(final File file) throws IOException {
65 this(file, 4096, Charset.defaultCharset());
66 }
67
68 /**
69 * Creates a ReversedLinesFileReader with default block size of 4KB and the
70 * specified encoding.
71 *
72 * @param file
73 * the file to be read
74 * @param charset the encoding to use
75 * @throws IOException if an I/O error occurs
76 * @since 2.5
77 */
78 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
79 this(file, 4096, charset);
80 }
81
82 /**
83 * Creates a ReversedLinesFileReader with the given block size and encoding.
84 *
85 * @param file
86 * the file to be read
87 * @param blockSize
88 * size of the internal buffer (for ideal performance this should
89 * match with the block size of the underlying file system).
90 * @param encoding
91 * the encoding of the file
92 * @throws IOException if an I/O error occurs
93 * @since 2.3
94 */
95 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
96 this.blockSize = blockSize;
97 this.encoding = encoding;
98
99 randomAccessFile = new RandomAccessFile(file, "r");
100 totalByteLength = randomAccessFile.length();
101 int lastBlockLength = (int) (totalByteLength % blockSize);
102 if (lastBlockLength > 0) {
103 totalBlockCount = totalByteLength / blockSize + 1;
104 } else {
105 totalBlockCount = totalByteLength / blockSize;
106 if (totalByteLength > 0) {
107 lastBlockLength = blockSize;
108 }
109 }
110 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
111
112 // --- check & prepare encoding ---
113 final Charset charset = Charsets.toCharset(encoding);
114 final CharsetEncoder charsetEncoder = charset.newEncoder();
115 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
116 if(maxBytesPerChar==1f) {
117 // all one byte encodings are no problem
118 byteDecrement = 1;
119 } else if(charset == Charset.forName("UTF-8")) {
120 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
121 // http://en.wikipedia.org/wiki/UTF-8
122 byteDecrement = 1;
123 } else if(charset == Charset.forName("Shift_JIS")) {
124 // Same as for UTF-8
125 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
126 byteDecrement = 1;
127 } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
128 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
129 // however byte order has to be specified
130 byteDecrement = 2;
131 } else if(charset == Charset.forName("UTF-16")) {
132 throw new UnsupportedEncodingException(
133 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
134 } else {
135 throw new UnsupportedEncodingException(
136 "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
137 }
138 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
139 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
140
141 avoidNewlineSplitBufferSize = newLineSequences[0].length;
142 }
143
144 /**
145 * Creates a ReversedLinesFileReader with the given block size and encoding.
146 *
147 * @param file
148 * the file to be read
149 * @param blockSize
150 * size of the internal buffer (for ideal performance this should
151 * match with the block size of the underlying file system).
152 * @param encoding
153 * the encoding of the file
154 * @throws IOException if an I/O error occurs
155 * @throws UnsupportedCharsetException
156 * thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
157 * supported.
158 */
159 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
160 this(file, blockSize, Charsets.toCharset(encoding));
161 }
162
163 /**
164 * Returns the lines of the file from bottom to top.
165 *
166 * @return the next line or null if the start of the file is reached
167 * @throws IOException if an I/O error occurs
168 */
169 public String readLine() throws IOException {
170
171 String line = currentFilePart.readLine();
172 while (line == null) {
173 currentFilePart = currentFilePart.rollOver();
174 if (currentFilePart != null) {
175 line = currentFilePart.readLine();
176 } else {
177 // no more fileparts: we're done, leave line set to null
178 break;
179 }
180 }
181
182 // aligned behaviour with BufferedReader that doesn't return a last, empty line
183 if("".equals(line) && !trailingNewlineOfFileSkipped) {
184 trailingNewlineOfFileSkipped = true;
185 line = readLine();
186 }
187
188 return line;
189 }
190
191 /**
192 * Closes underlying resources.
193 *
194 * @throws IOException if an I/O error occurs
195 */
196 public void close() throws IOException {
197 randomAccessFile.close();
198 }
199
200 private class FilePart {
201 private final long no;
202
203 private final byte[] data;
204
205 private byte[] leftOver;
206
207 private int currentLastBytePos;
208
209 /**
210 * ctor
211 * @param no the part number
212 * @param length its length
213 * @param leftOverOfLastFilePart remainder
214 * @throws IOException if there is a problem reading the file
215 */
216 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
217 this.no = no;
218 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
219 this.data = new byte[dataLength];
220 final long off = (no - 1) * blockSize;
221
222 // read data
223 if (no > 0 /* file not empty */) {
224 randomAccessFile.seek(off);
225 final int countRead = randomAccessFile.read(data, 0, length);
226 if (countRead != length) {
227 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
228 }
229 }
230 // copy left over part into data arr
231 if (leftOverOfLastFilePart != null) {
232 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
233 }
234 this.currentLastBytePos = data.length - 1;
235 this.leftOver = null;
236 }
237
238 /**
239 * Handles block rollover
240 *
241 * @return the new FilePart or null
242 * @throws IOException if there was a problem reading the file
243 */
244 private FilePart rollOver() throws IOException {
245
246 if (currentLastBytePos > -1) {
247 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
248 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
249 }
250
251 if (no > 1) {
252 return new FilePart(no - 1, blockSize, leftOver);
253 } else {
254 // NO 1 was the last FilePart, we're finished
255 if (leftOver != null) {
256 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
257 + new String(leftOver, encoding));
258 }
259 return null;
260 }
261 }
262
263 /**
264 * Reads a line.
265 *
266 * @return the line or null
267 * @throws IOException if there is an error reading from the file
268 */
269 private String readLine() throws IOException {
270
271 String line = null;
272 int newLineMatchByteCount;
273
274 final boolean isLastFilePart = no == 1;
275
276 int i = currentLastBytePos;
277 while (i > -1) {
278
279 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
280 // avoidNewlineSplitBuffer: for all except the last file part we
281 // take a few bytes to the next file part to avoid splitting of newlines
282 createLeftOver();
283 break; // skip last few bytes and leave it to the next file part
284 }
285
286 // --- check for newline ---
287 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
288 final int lineStart = i + 1;
289 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
290
291 if (lineLengthBytes < 0) {
292 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
293 }
294 final byte[] lineData = new byte[lineLengthBytes];
295 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
296
297 line = new String(lineData, encoding);
298
299 currentLastBytePos = i - newLineMatchByteCount;
300 break; // found line
301 }
302
303 // --- move cursor ---
304 i -= byteDecrement;
305
306 // --- end of file part handling ---
307 if (i < 0) {
308 createLeftOver();
309 break; // end of file part
310 }
311 }
312
313 // --- last file part handling ---
314 if (isLastFilePart && leftOver != null) {
315 // there will be no line break anymore, this is the first line of the file
316 line = new String(leftOver, encoding);
317 leftOver = null;
318 }
319
320 return line;
321 }
322
323 /**
324 * Creates the buffer containing any left over bytes.
325 */
326 private void createLeftOver() {
327 final int lineLengthBytes = currentLastBytePos + 1;
328 if (lineLengthBytes > 0) {
329 // create left over for next block
330 leftOver = new byte[lineLengthBytes];
331 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
332 } else {
333 leftOver = null;
334 }
335 currentLastBytePos = -1;
336 }
337
338 /**
339 * Finds the new-line sequence and return its length.
340 *
341 * @param data buffer to scan
342 * @param i start offset in buffer
343 * @return length of newline sequence or 0 if none found
344 */
345 private int getNewLineMatchByteCount(final byte[] data, final int i) {
346 for (final byte[] newLineSequence : newLineSequences) {
347 boolean match = true;
348 for (int j = newLineSequence.length - 1; j >= 0; j--) {
349 final int k = i + j - (newLineSequence.length - 1);
350 match &= k >= 0 && data[k] == newLineSequence[j];
351 }
352 if (match) {
353 return newLineSequence.length;
354 }
355 }
356 return 0;
357 }
358 }
359
360 }