001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.input;
018
019 import java.io.Closeable;
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.RandomAccessFile;
023 import java.io.UnsupportedEncodingException;
024 import java.nio.charset.Charset;
025 import java.nio.charset.CharsetEncoder;
026 import java.nio.charset.UnsupportedCharsetException;
027
028 import org.apache.commons.io.Charsets;
029
030 /**
031 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
032 * the last line). Useful for e.g. searching in log files.
033 *
034 * @since 2.2
035 */
036 public class ReversedLinesFileReader implements Closeable {
037
038 private final int blockSize;
039 private final Charset encoding;
040
041 private final RandomAccessFile randomAccessFile;
042
043 private final long totalByteLength;
044 private final long totalBlockCount;
045
046 private final byte[][] newLineSequences;
047 private final int avoidNewlineSplitBufferSize;
048 private final int byteDecrement;
049
050 private FilePart currentFilePart;
051
052 private boolean trailingNewlineOfFileSkipped = false;
053
054 /**
055 * Creates a ReversedLinesFileReader with default block size of 4KB and the
056 * platform's default encoding.
057 *
058 * @param file
059 * the file to be read
060 * @throws IOException if an I/O error occurs
061 */
062 public ReversedLinesFileReader(final File file) throws IOException {
063 this(file, 4096, Charset.defaultCharset().toString());
064 }
065
066 /**
067 * Creates a ReversedLinesFileReader with the given block size and encoding.
068 *
069 * @param file
070 * the file to be read
071 * @param blockSize
072 * size of the internal buffer (for ideal performance this should
073 * match with the block size of the underlying file system).
074 * @param encoding
075 * the encoding of the file
076 * @throws IOException if an I/O error occurs
077 * @since 2.3
078 */
079 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
080 this.blockSize = blockSize;
081 this.encoding = encoding;
082
083 randomAccessFile = new RandomAccessFile(file, "r");
084 totalByteLength = randomAccessFile.length();
085 int lastBlockLength = (int) (totalByteLength % blockSize);
086 if (lastBlockLength > 0) {
087 totalBlockCount = totalByteLength / blockSize + 1;
088 } else {
089 totalBlockCount = totalByteLength / blockSize;
090 if (totalByteLength > 0) {
091 lastBlockLength = blockSize;
092 }
093 }
094 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
095
096 // --- check & prepare encoding ---
097 Charset charset = Charsets.toCharset(encoding);
098 CharsetEncoder charsetEncoder = charset.newEncoder();
099 float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
100 if(maxBytesPerChar==1f) {
101 // all one byte encodings are no problem
102 byteDecrement = 1;
103 } else if(charset == Charset.forName("UTF-8")) {
104 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
105 // http://en.wikipedia.org/wiki/UTF-8
106 byteDecrement = 1;
107 } else if(charset == Charset.forName("Shift_JIS")) {
108 // Same as for UTF-8
109 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
110 byteDecrement = 1;
111 } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
112 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
113 // however byte order has to be specified
114 byteDecrement = 2;
115 } else if(charset == Charset.forName("UTF-16")) {
116 throw new UnsupportedEncodingException(
117 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
118 } else {
119 throw new UnsupportedEncodingException(
120 "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
121 }
122 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
123 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
124
125 avoidNewlineSplitBufferSize = newLineSequences[0].length;
126 }
127
128 /**
129 * Creates a ReversedLinesFileReader with the given block size and encoding.
130 *
131 * @param file
132 * the file to be read
133 * @param blockSize
134 * size of the internal buffer (for ideal performance this should
135 * match with the block size of the underlying file system).
136 * @param encoding
137 * the encoding of the file
138 * @throws IOException if an I/O error occurs
139 * @throws UnsupportedCharsetException
140 * thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
141 * supported.
142 */
143 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
144 this(file, blockSize, Charsets.toCharset(encoding));
145 }
146
147 /**
148 * Returns the lines of the file from bottom to top.
149 *
150 * @return the next line or null if the start of the file is reached
151 * @throws IOException if an I/O error occurs
152 */
153 public String readLine() throws IOException {
154
155 String line = currentFilePart.readLine();
156 while (line == null) {
157 currentFilePart = currentFilePart.rollOver();
158 if (currentFilePart != null) {
159 line = currentFilePart.readLine();
160 } else {
161 // no more fileparts: we're done, leave line set to null
162 break;
163 }
164 }
165
166 // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line
167 if("".equals(line) && !trailingNewlineOfFileSkipped) {
168 trailingNewlineOfFileSkipped = true;
169 line = readLine();
170 }
171
172 return line;
173 }
174
175 /**
176 * Closes underlying resources.
177 *
178 * @throws IOException if an I/O error occurs
179 */
180 public void close() throws IOException {
181 randomAccessFile.close();
182 }
183
184 private class FilePart {
185 private final long no;
186
187 private final byte[] data;
188
189 private byte[] leftOver;
190
191 private int currentLastBytePos;
192
193 /**
194 * ctor
195 * @param no the part number
196 * @param length its length
197 * @param leftOverOfLastFilePart remainder
198 * @throws IOException if there is a problem reading the file
199 */
200 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
201 this.no = no;
202 int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
203 this.data = new byte[dataLength];
204 final long off = (no - 1) * blockSize;
205
206 // read data
207 if (no > 0 /* file not empty */) {
208 randomAccessFile.seek(off);
209 final int countRead = randomAccessFile.read(data, 0, length);
210 if (countRead != length) {
211 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
212 }
213 }
214 // copy left over part into data arr
215 if (leftOverOfLastFilePart != null) {
216 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
217 }
218 this.currentLastBytePos = data.length - 1;
219 this.leftOver = null;
220 }
221
222 /**
223 * Handles block rollover
224 *
225 * @return the new FilePart or null
226 * @throws IOException if there was a problem reading the file
227 */
228 private FilePart rollOver() throws IOException {
229
230 if (currentLastBytePos > -1) {
231 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
232 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
233 }
234
235 if (no > 1) {
236 return new FilePart(no - 1, blockSize, leftOver);
237 } else {
238 // NO 1 was the last FilePart, we're finished
239 if (leftOver != null) {
240 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
241 + new String(leftOver, encoding));
242 }
243 return null;
244 }
245 }
246
247 /**
248 * Reads a line.
249 *
250 * @return the line or null
251 * @throws IOException if there is an error reading from the file
252 */
253 private String readLine() throws IOException {
254
255 String line = null;
256 int newLineMatchByteCount;
257
258 boolean isLastFilePart = no == 1;
259
260 int i = currentLastBytePos;
261 while (i > -1) {
262
263 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
264 // avoidNewlineSplitBuffer: for all except the last file part we
265 // take a few bytes to the next file part to avoid splitting of newlines
266 createLeftOver();
267 break; // skip last few bytes and leave it to the next file part
268 }
269
270 // --- check for newline ---
271 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
272 final int lineStart = i + 1;
273 int lineLengthBytes = currentLastBytePos - lineStart + 1;
274
275 if (lineLengthBytes < 0) {
276 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
277 }
278 byte[] lineData = new byte[lineLengthBytes];
279 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
280
281 line = new String(lineData, encoding);
282
283 currentLastBytePos = i - newLineMatchByteCount;
284 break; // found line
285 }
286
287 // --- move cursor ---
288 i -= byteDecrement;
289
290 // --- end of file part handling ---
291 if (i < 0) {
292 createLeftOver();
293 break; // end of file part
294 }
295 }
296
297 // --- last file part handling ---
298 if (isLastFilePart && leftOver != null) {
299 // there will be no line break anymore, this is the first line of the file
300 line = new String(leftOver, encoding);
301 leftOver = null;
302 }
303
304 return line;
305 }
306
307 /**
308 * Creates the buffer containing any left over bytes.
309 */
310 private void createLeftOver() {
311 int lineLengthBytes = currentLastBytePos + 1;
312 if (lineLengthBytes > 0) {
313 // create left over for next block
314 leftOver = new byte[lineLengthBytes];
315 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
316 } else {
317 leftOver = null;
318 }
319 currentLastBytePos = -1;
320 }
321
322 /**
323 * Finds the new-line sequence and return its length.
324 *
325 * @param data buffer to scan
326 * @param i start offset in buffer
327 * @return length of newline sequence or 0 if none found
328 */
329 private int getNewLineMatchByteCount(byte[] data, int i) {
330 for (byte[] newLineSequence : newLineSequences) {
331 boolean match = true;
332 for (int j = newLineSequence.length - 1; j >= 0; j--) {
333 int k = i + j - (newLineSequence.length - 1);
334 match &= k >= 0 && data[k] == newLineSequence[j];
335 }
336 if (match) {
337 return newLineSequence.length;
338 }
339 }
340 return 0;
341 }
342 }
343
344 }