001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.RandomAccessFile; 023import java.io.UnsupportedEncodingException; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetEncoder; 026 027import org.apache.commons.io.Charsets; 028 029/** 030 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 031 * the last line). Useful for e.g. searching in log files. 032 * 033 * @since 2.2 034 */ 035public class ReversedLinesFileReader implements Closeable { 036 037 private final int blockSize; 038 private final Charset encoding; 039 040 private final RandomAccessFile randomAccessFile; 041 042 private final long totalByteLength; 043 private final long totalBlockCount; 044 045 private final byte[][] newLineSequences; 046 private final int avoidNewlineSplitBufferSize; 047 private final int byteDecrement; 048 049 private FilePart currentFilePart; 050 051 private boolean trailingNewlineOfFileSkipped = false; 052 053 /** 054 * Creates a ReversedLinesFileReader with default block size of 4KB and the 055 * platform's default encoding. 056 * 057 * @param file 058 * the file to be read 059 * @throws IOException if an I/O error occurs 060 * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead 061 */ 062 @Deprecated 063 public ReversedLinesFileReader(final File file) throws IOException { 064 this(file, 4096, Charset.defaultCharset()); 065 } 066 067 /** 068 * Creates a ReversedLinesFileReader with default block size of 4KB and the 069 * specified encoding. 070 * 071 * @param file 072 * the file to be read 073 * @param charset the encoding to use 074 * @throws IOException if an I/O error occurs 075 * @since 2.5 076 */ 077 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 078 this(file, 4096, charset); 079 } 080 081 /** 082 * Creates a ReversedLinesFileReader with the given block size and encoding. 083 * 084 * @param file 085 * the file to be read 086 * @param blockSize 087 * size of the internal buffer (for ideal performance this should 088 * match with the block size of the underlying file system). 089 * @param encoding 090 * the encoding of the file 091 * @throws IOException if an I/O error occurs 092 * @since 2.3 093 */ 094 @SuppressWarnings("deprecation") // unavoidable until Java 7 095 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { 096 this.blockSize = blockSize; 097 this.encoding = encoding; 098 099 // --- check & prepare encoding --- 100 final Charset charset = Charsets.toCharset(encoding); 101 final CharsetEncoder charsetEncoder = charset.newEncoder(); 102 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 103 if (maxBytesPerChar == 1f) { 104 // all one byte encodings are no problem 105 byteDecrement = 1; 106 } else if (charset == Charsets.UTF_8) { 107 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte 108 // http://en.wikipedia.org/wiki/UTF-8 109 byteDecrement = 1; 110 } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 111 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 112 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 113 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 114 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 115 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 116 byteDecrement = 1; 117 } else if (charset == Charsets.UTF_16BE || charset == Charsets.UTF_16LE) { 118 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, 119 // however byte order has to be specified 120 byteDecrement = 2; 121 } else if (charset == Charsets.UTF_16) { 122 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " + 123 "UTF-16LE)"); 124 } else { 125 throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " + 126 "submit a patch)"); 127 } 128 129 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n 130 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; 131 132 avoidNewlineSplitBufferSize = newLineSequences[0].length; 133 134 // Open file 135 randomAccessFile = new RandomAccessFile(file, "r"); 136 totalByteLength = randomAccessFile.length(); 137 int lastBlockLength = (int) (totalByteLength % blockSize); 138 if (lastBlockLength > 0) { 139 totalBlockCount = totalByteLength / blockSize + 1; 140 } else { 141 totalBlockCount = totalByteLength / blockSize; 142 if (totalByteLength > 0) { 143 lastBlockLength = blockSize; 144 } 145 } 146 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 147 148 } 149 150 /** 151 * Creates a ReversedLinesFileReader with the given block size and encoding. 152 * 153 * @param file 154 * the file to be read 155 * @param blockSize 156 * size of the internal buffer (for ideal performance this should 157 * match with the block size of the underlying file system). 158 * @param encoding 159 * the encoding of the file 160 * @throws IOException if an I/O error occurs 161 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in 162 * version 2.2 if the encoding is not supported. 163 */ 164 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException { 165 this(file, blockSize, Charsets.toCharset(encoding)); 166 } 167 168 /** 169 * Returns the lines of the file from bottom to top. 170 * 171 * @return the next line or null if the start of the file is reached 172 * @throws IOException if an I/O error occurs 173 */ 174 public String readLine() throws IOException { 175 176 String line = currentFilePart.readLine(); 177 while (line == null) { 178 currentFilePart = currentFilePart.rollOver(); 179 if (currentFilePart != null) { 180 line = currentFilePart.readLine(); 181 } else { 182 // no more fileparts: we're done, leave line set to null 183 break; 184 } 185 } 186 187 // aligned behaviour with BufferedReader that doesn't return a last, empty line 188 if("".equals(line) && !trailingNewlineOfFileSkipped) { 189 trailingNewlineOfFileSkipped = true; 190 line = readLine(); 191 } 192 193 return line; 194 } 195 196 /** 197 * Closes underlying resources. 198 * 199 * @throws IOException if an I/O error occurs 200 */ 201 public void close() throws IOException { 202 randomAccessFile.close(); 203 } 204 205 private class FilePart { 206 private final long no; 207 208 private final byte[] data; 209 210 private byte[] leftOver; 211 212 private int currentLastBytePos; 213 214 /** 215 * ctor 216 * @param no the part number 217 * @param length its length 218 * @param leftOverOfLastFilePart remainder 219 * @throws IOException if there is a problem reading the file 220 */ 221 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 222 this.no = no; 223 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 224 this.data = new byte[dataLength]; 225 final long off = (no - 1) * blockSize; 226 227 // read data 228 if (no > 0 /* file not empty */) { 229 randomAccessFile.seek(off); 230 final int countRead = randomAccessFile.read(data, 0, length); 231 if (countRead != length) { 232 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 233 } 234 } 235 // copy left over part into data arr 236 if (leftOverOfLastFilePart != null) { 237 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 238 } 239 this.currentLastBytePos = data.length - 1; 240 this.leftOver = null; 241 } 242 243 /** 244 * Handles block rollover 245 * 246 * @return the new FilePart or null 247 * @throws IOException if there was a problem reading the file 248 */ 249 private FilePart rollOver() throws IOException { 250 251 if (currentLastBytePos > -1) { 252 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 253 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 254 } 255 256 if (no > 1) { 257 return new FilePart(no - 1, blockSize, leftOver); 258 } else { 259 // NO 1 was the last FilePart, we're finished 260 if (leftOver != null) { 261 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 262 + new String(leftOver, encoding)); 263 } 264 return null; 265 } 266 } 267 268 /** 269 * Reads a line. 270 * 271 * @return the line or null 272 * @throws IOException if there is an error reading from the file 273 */ 274 private String readLine() throws IOException { 275 276 String line = null; 277 int newLineMatchByteCount; 278 279 final boolean isLastFilePart = no == 1; 280 281 int i = currentLastBytePos; 282 while (i > -1) { 283 284 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 285 // avoidNewlineSplitBuffer: for all except the last file part we 286 // take a few bytes to the next file part to avoid splitting of newlines 287 createLeftOver(); 288 break; // skip last few bytes and leave it to the next file part 289 } 290 291 // --- check for newline --- 292 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 293 final int lineStart = i + 1; 294 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 295 296 if (lineLengthBytes < 0) { 297 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes); 298 } 299 final byte[] lineData = new byte[lineLengthBytes]; 300 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 301 302 line = new String(lineData, encoding); 303 304 currentLastBytePos = i - newLineMatchByteCount; 305 break; // found line 306 } 307 308 // --- move cursor --- 309 i -= byteDecrement; 310 311 // --- end of file part handling --- 312 if (i < 0) { 313 createLeftOver(); 314 break; // end of file part 315 } 316 } 317 318 // --- last file part handling --- 319 if (isLastFilePart && leftOver != null) { 320 // there will be no line break anymore, this is the first line of the file 321 line = new String(leftOver, encoding); 322 leftOver = null; 323 } 324 325 return line; 326 } 327 328 /** 329 * Creates the buffer containing any left over bytes. 330 */ 331 private void createLeftOver() { 332 final int lineLengthBytes = currentLastBytePos + 1; 333 if (lineLengthBytes > 0) { 334 // create left over for next block 335 leftOver = new byte[lineLengthBytes]; 336 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 337 } else { 338 leftOver = null; 339 } 340 currentLastBytePos = -1; 341 } 342 343 /** 344 * Finds the new-line sequence and return its length. 345 * 346 * @param data buffer to scan 347 * @param i start offset in buffer 348 * @return length of newline sequence or 0 if none found 349 */ 350 private int getNewLineMatchByteCount(final byte[] data, final int i) { 351 for (final byte[] newLineSequence : newLineSequences) { 352 boolean match = true; 353 for (int j = newLineSequence.length - 1; j >= 0; j--) { 354 final int k = i + j - (newLineSequence.length - 1); 355 match &= k >= 0 && data[k] == newLineSequence[j]; 356 } 357 if (match) { 358 return newLineSequence.length; 359 } 360 } 361 return 0; 362 } 363 } 364 365}