001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.input;
018
019 import java.io.Closeable;
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.RandomAccessFile;
023 import java.io.UnsupportedEncodingException;
024 import java.nio.charset.Charset;
025 import java.nio.charset.CharsetEncoder;
026
027 /**
028 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
029 * the last line). Useful for e.g. searching in log files.
030 *
031 * @since 2.2
032 */
033 public class ReversedLinesFileReader implements Closeable {
034
035 private final int blockSize;
036 private final String encoding;
037
038 private final RandomAccessFile randomAccessFile;
039
040 private final long totalByteLength;
041 private final long totalBlockCount;
042
043 private final byte[][] newLineSequences;
044 private final int avoidNewlineSplitBufferSize;
045 private final int byteDecrement;
046
047 private FilePart currentFilePart;
048
049 private boolean trailingNewlineOfFileSkipped = false;
050
051 /**
052 * Creates a ReversedLinesFileReader with default block size of 4KB and the
053 * platform's default encoding.
054 *
055 * @param file
056 * the file to be read
057 * @throws IOException if an I/O error occurs
058 */
059 public ReversedLinesFileReader(final File file) throws IOException {
060 this(file, 4096, Charset.defaultCharset().toString());
061 }
062
063 /**
064 * Creates a ReversedLinesFileReader with the given block size and encoding.
065 *
066 * @param file
067 * the file to be read
068 * @param blockSize
069 * size of the internal buffer (for ideal performance this should
070 * match with the block size of the underlying file system).
071 * @param encoding
072 * the encoding of the file
073 * @throws IOException if an I/O error occurs
074 */
075 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
076 this.blockSize = blockSize;
077 this.encoding = encoding;
078
079 randomAccessFile = new RandomAccessFile(file, "r");
080 totalByteLength = randomAccessFile.length();
081 int lastBlockLength = (int) (totalByteLength % blockSize);
082 if (lastBlockLength > 0) {
083 totalBlockCount = totalByteLength / blockSize + 1;
084 } else {
085 totalBlockCount = totalByteLength / blockSize;
086 if (totalByteLength > 0) {
087 lastBlockLength = blockSize;
088 }
089 }
090 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
091
092 // --- check & prepare encoding ---
093 Charset charset = Charset.forName(encoding);
094 CharsetEncoder charsetEncoder = charset.newEncoder();
095 float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
096 if(maxBytesPerChar==1f) {
097 // all one byte encodings are no problem
098 byteDecrement = 1;
099 } else if(charset == Charset.forName("UTF-8")) {
100 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
101 // http://en.wikipedia.org/wiki/UTF-8
102 byteDecrement = 1;
103 } else if(charset == Charset.forName("Shift_JIS")) {
104 // Same as for UTF-8
105 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
106 byteDecrement = 1;
107 } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
108 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
109 // however byte order has to be specified
110 byteDecrement = 2;
111 } else if(charset == Charset.forName("UTF-16")) {
112 throw new UnsupportedEncodingException(
113 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
114 } else {
115 throw new UnsupportedEncodingException(
116 "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
117 }
118 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
119 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
120
121 avoidNewlineSplitBufferSize = newLineSequences[0].length;
122
123 }
124
125 /**
126 * Returns the lines of the file from bottom to top.
127 *
128 * @return the next line or null if the start of the file is reached
129 * @throws IOException if an I/O error occurs
130 */
131 public String readLine() throws IOException {
132
133 String line = currentFilePart.readLine();
134 while (line == null) {
135 currentFilePart = currentFilePart.rollOver();
136 if (currentFilePart != null) {
137 line = currentFilePart.readLine();
138 } else {
139 // no more fileparts: we're done, leave line set to null
140 break;
141 }
142 }
143
144 // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
145 if("".equals(line) && !trailingNewlineOfFileSkipped) {
146 trailingNewlineOfFileSkipped = true;
147 line = readLine();
148 }
149
150 return line;
151 }
152
153 /**
154 * Closes underlying resources.
155 *
156 * @throws IOException if an I/O error occurs
157 */
158 public void close() throws IOException {
159 randomAccessFile.close();
160 }
161
162 private class FilePart {
163 private final long no;
164
165 private final byte[] data;
166
167 private byte[] leftOver;
168
169 private int currentLastBytePos;
170
171 /**
172 * ctor
173 * @param no the part number
174 * @param length its length
175 * @param leftOverOfLastFilePart remainder
176 * @throws IOException if there is a problem reading the file
177 */
178 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
179 this.no = no;
180 int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
181 this.data = new byte[dataLength];
182 final long off = (no - 1) * blockSize;
183
184 // read data
185 if (no > 0 /* file not empty */) {
186 randomAccessFile.seek(off);
187 final int countRead = randomAccessFile.read(data, 0, length);
188 if (countRead != length) {
189 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
190 }
191 }
192 // copy left over part into data arr
193 if (leftOverOfLastFilePart != null) {
194 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
195 }
196 this.currentLastBytePos = data.length - 1;
197 this.leftOver = null;
198 }
199
200 /**
201 * Handles block rollover
202 *
203 * @return the new FilePart or null
204 * @throws IOException if there was a problem reading the file
205 */
206 private FilePart rollOver() throws IOException {
207
208 if (currentLastBytePos > -1) {
209 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
210 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
211 }
212
213 if (no > 1) {
214 return new FilePart(no - 1, blockSize, leftOver);
215 } else {
216 // NO 1 was the last FilePart, we're finished
217 if (leftOver != null) {
218 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
219 + new String(leftOver, encoding));
220 }
221 return null;
222 }
223 }
224
225 /**
226 * Reads a line.
227 *
228 * @return the line or null
229 * @throws IOException if there is an error reading from the file
230 */
231 private String readLine() throws IOException {
232
233 String line = null;
234 int newLineMatchByteCount;
235
236 boolean isLastFilePart = no == 1;
237
238 int i = currentLastBytePos;
239 while (i > -1) {
240
241 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
242 // avoidNewlineSplitBuffer: for all except the last file part we
243 // take a few bytes to the next file part to avoid splitting of newlines
244 createLeftOver();
245 break; // skip last few bytes and leave it to the next file part
246 }
247
248 // --- check for newline ---
249 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
250 final int lineStart = i + 1;
251 int lineLengthBytes = currentLastBytePos - lineStart + 1;
252
253 if (lineLengthBytes < 0) {
254 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
255 }
256 byte[] lineData = new byte[lineLengthBytes];
257 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
258
259 line = new String(lineData, encoding);
260
261 currentLastBytePos = i - newLineMatchByteCount;
262 break; // found line
263 }
264
265 // --- move cursor ---
266 i -= byteDecrement;
267
268 // --- end of file part handling ---
269 if (i < 0) {
270 createLeftOver();
271 break; // end of file part
272 }
273 }
274
275 // --- last file part handling ---
276 if (isLastFilePart && leftOver != null) {
277 // there will be no line break anymore, this is the first line of the file
278 line = new String(leftOver, encoding);
279 leftOver = null;
280 }
281
282 return line;
283 }
284
285 /**
286 * Creates the buffer containing any left over bytes.
287 */
288 private void createLeftOver() {
289 int lineLengthBytes = currentLastBytePos + 1;
290 if (lineLengthBytes > 0) {
291 // create left over for next block
292 leftOver = new byte[lineLengthBytes];
293 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
294 } else {
295 leftOver = null;
296 }
297 currentLastBytePos = -1;
298 }
299
300 /**
301 * Finds the new-line sequence and return its length.
302 *
303 * @param data buffer to scan
304 * @param i start offset in buffer
305 * @return length of newline sequence or 0 if none found
306 */
307 private int getNewLineMatchByteCount(byte[] data, int i) {
308 for (byte[] newLineSequence : newLineSequences) {
309 boolean match = true;
310 for (int j = newLineSequence.length - 1; j >= 0; j--) {
311 int k = i + j - (newLineSequence.length - 1);
312 match &= k >= 0 && data[k] == newLineSequence[j];
313 }
314 if (match) {
315 return newLineSequence.length;
316 }
317 }
318 return 0;
319 }
320 }
321
322 }