001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031 032import org.apache.commons.io.Charsets; 033import org.apache.commons.io.IOUtils; 034 035/** 036 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 037 * the last line). Useful for e.g. searching in log files. 038 * 039 * @since 2.2 040 */ 041public class ReversedLinesFileReader implements Closeable { 042 043 private static final String EMPTY_STRING = ""; 044 private static final int DEFAULT_BLOCK_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 045 046 private final int blockSize; 047 private final Charset encoding; 048 049 private final SeekableByteChannel channel; 050 051 private final long totalByteLength; 052 private final long totalBlockCount; 053 054 private final byte[][] newLineSequences; 055 private final int avoidNewlineSplitBufferSize; 056 private final int byteDecrement; 057 058 private FilePart currentFilePart; 059 060 private boolean trailingNewlineOfFileSkipped = false; 061 062 /** 063 * Creates a ReversedLinesFileReader with default block size of 4KB and the 064 * platform's default encoding. 065 * 066 * @param file 067 * the file to be read 068 * @throws IOException if an I/O error occurs 069 * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead 070 */ 071 @Deprecated 072 public ReversedLinesFileReader(final File file) throws IOException { 073 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 074 } 075 076 /** 077 * Creates a ReversedLinesFileReader with default block size of 4KB and the 078 * specified encoding. 079 * 080 * @param file 081 * the file to be read 082 * @param charset the charset to use 083 * @throws IOException if an I/O error occurs 084 * @since 2.5 085 */ 086 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 087 this(file.toPath(), charset); 088 } 089 090 /** 091 * Creates a ReversedLinesFileReader with default block size of 4KB and the 092 * specified encoding. 093 * 094 * @param file 095 * the file to be read 096 * @param charset the charset to use 097 * @throws IOException if an I/O error occurs 098 * @since 2.7 099 */ 100 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 101 this(file, DEFAULT_BLOCK_SIZE, charset); 102 } 103 104 /** 105 * Creates a ReversedLinesFileReader with the given block size and encoding. 106 * 107 * @param file 108 * the file to be read 109 * @param blockSize 110 * size of the internal buffer (for ideal performance this should 111 * match with the block size of the underlying file system). 112 * @param encoding 113 * the encoding of the file 114 * @throws IOException if an I/O error occurs 115 * @since 2.3 116 */ 117 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { 118 this(file.toPath(), blockSize, encoding); 119 } 120 121 /** 122 * Creates a ReversedLinesFileReader with the given block size and encoding. 123 * 124 * @param file 125 * the file to be read 126 * @param blockSize 127 * size of the internal buffer (for ideal performance this should 128 * match with the block size of the underlying file system). 129 * @param encoding 130 * the encoding of the file 131 * @throws IOException if an I/O error occurs 132 * @since 2.7 133 */ 134 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset encoding) throws IOException { 135 this.blockSize = blockSize; 136 this.encoding = encoding; 137 138 // --- check & prepare encoding --- 139 final Charset charset = Charsets.toCharset(encoding); 140 final CharsetEncoder charsetEncoder = charset.newEncoder(); 141 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 142 if (maxBytesPerChar == 1f) { 143 // all one byte encodings are no problem 144 byteDecrement = 1; 145 } else if (charset == StandardCharsets.UTF_8) { 146 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte 147 // http://en.wikipedia.org/wiki/UTF-8 148 byteDecrement = 1; 149 } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 150 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 151 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 152 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 153 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 154 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 155 byteDecrement = 1; 156 } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) { 157 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, 158 // however byte order has to be specified 159 byteDecrement = 2; 160 } else if (charset == StandardCharsets.UTF_16) { 161 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " + 162 "UTF-16LE)"); 163 } else { 164 throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " + 165 "submit a patch)"); 166 } 167 168 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n 169 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; 170 171 avoidNewlineSplitBufferSize = newLineSequences[0].length; 172 173 // Open file 174 channel = Files.newByteChannel(file, StandardOpenOption.READ); 175 totalByteLength = channel.size(); 176 int lastBlockLength = (int) (totalByteLength % blockSize); 177 if (lastBlockLength > 0) { 178 totalBlockCount = totalByteLength / blockSize + 1; 179 } else { 180 totalBlockCount = totalByteLength / blockSize; 181 if (totalByteLength > 0) { 182 lastBlockLength = blockSize; 183 } 184 } 185 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 186 187 } 188 189 /** 190 * Creates a ReversedLinesFileReader with the given block size and encoding. 191 * 192 * @param file 193 * the file to be read 194 * @param blockSize 195 * size of the internal buffer (for ideal performance this should 196 * match with the block size of the underlying file system). 197 * @param encoding 198 * the encoding of the file 199 * @throws IOException if an I/O error occurs 200 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in 201 * version 2.2 if the encoding is not supported. 202 */ 203 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException { 204 this(file.toPath(), blockSize, encoding); 205 } 206 207 /** 208 * Creates a ReversedLinesFileReader with the given block size and encoding. 209 * 210 * @param file 211 * the file to be read 212 * @param blockSize 213 * size of the internal buffer (for ideal performance this should 214 * match with the block size of the underlying file system). 215 * @param charsetName 216 * the encoding of the file 217 * @throws IOException if an I/O error occurs 218 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in 219 * version 2.2 if the encoding is not supported. 220 * @since 2.7 221 */ 222 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 223 this(file, blockSize, Charsets.toCharset(charsetName)); 224 } 225 226 /** 227 * Returns the lines of the file from bottom to top. 228 * 229 * @return the next line or null if the start of the file is reached 230 * @throws IOException if an I/O error occurs 231 */ 232 public String readLine() throws IOException { 233 234 String line = currentFilePart.readLine(); 235 while (line == null) { 236 currentFilePart = currentFilePart.rollOver(); 237 if (currentFilePart != null) { 238 line = currentFilePart.readLine(); 239 } else { 240 // no more fileparts: we're done, leave line set to null 241 break; 242 } 243 } 244 245 // aligned behavior with BufferedReader that doesn't return a last, empty line 246 if(EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 247 trailingNewlineOfFileSkipped = true; 248 line = readLine(); 249 } 250 251 return line; 252 } 253 254 /** 255 * Closes underlying resources. 256 * 257 * @throws IOException if an I/O error occurs 258 */ 259 @Override 260 public void close() throws IOException { 261 channel.close(); 262 } 263 264 private class FilePart { 265 private final long no; 266 267 private final byte[] data; 268 269 private byte[] leftOver; 270 271 private int currentLastBytePos; 272 273 /** 274 * ctor 275 * @param no the part number 276 * @param length its length 277 * @param leftOverOfLastFilePart remainder 278 * @throws IOException if there is a problem reading the file 279 */ 280 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 281 this.no = no; 282 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 283 this.data = new byte[dataLength]; 284 final long off = (no - 1) * blockSize; 285 286 // read data 287 if (no > 0 /* file not empty */) { 288 channel.position(off); 289 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 290 if (countRead != length) { 291 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 292 } 293 } 294 // copy left over part into data arr 295 if (leftOverOfLastFilePart != null) { 296 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 297 } 298 this.currentLastBytePos = data.length - 1; 299 this.leftOver = null; 300 } 301 302 /** 303 * Handles block rollover 304 * 305 * @return the new FilePart or null 306 * @throws IOException if there was a problem reading the file 307 */ 308 private FilePart rollOver() throws IOException { 309 310 if (currentLastBytePos > -1) { 311 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 312 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 313 } 314 315 if (no > 1) { 316 return new FilePart(no - 1, blockSize, leftOver); 317 } 318 // NO 1 was the last FilePart, we're finished 319 if (leftOver != null) { 320 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 321 + new String(leftOver, encoding)); 322 } 323 return null; 324 } 325 326 /** 327 * Reads a line. 328 * 329 * @return the line or null 330 * @throws IOException if there is an error reading from the file 331 */ 332 private String readLine() throws IOException { 333 334 String line = null; 335 int newLineMatchByteCount; 336 337 final boolean isLastFilePart = no == 1; 338 339 int i = currentLastBytePos; 340 while (i > -1) { 341 342 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 343 // avoidNewlineSplitBuffer: for all except the last file part we 344 // take a few bytes to the next file part to avoid splitting of newlines 345 createLeftOver(); 346 break; // skip last few bytes and leave it to the next file part 347 } 348 349 // --- check for newline --- 350 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 351 final int lineStart = i + 1; 352 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 353 354 if (lineLengthBytes < 0) { 355 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes); 356 } 357 final byte[] lineData = new byte[lineLengthBytes]; 358 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 359 360 line = new String(lineData, encoding); 361 362 currentLastBytePos = i - newLineMatchByteCount; 363 break; // found line 364 } 365 366 // --- move cursor --- 367 i -= byteDecrement; 368 369 // --- end of file part handling --- 370 if (i < 0) { 371 createLeftOver(); 372 break; // end of file part 373 } 374 } 375 376 // --- last file part handling --- 377 if (isLastFilePart && leftOver != null) { 378 // there will be no line break anymore, this is the first line of the file 379 line = new String(leftOver, encoding); 380 leftOver = null; 381 } 382 383 return line; 384 } 385 386 /** 387 * Creates the buffer containing any left over bytes. 388 */ 389 private void createLeftOver() { 390 final int lineLengthBytes = currentLastBytePos + 1; 391 if (lineLengthBytes > 0) { 392 // create left over for next block 393 leftOver = new byte[lineLengthBytes]; 394 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 395 } else { 396 leftOver = null; 397 } 398 currentLastBytePos = -1; 399 } 400 401 /** 402 * Finds the new-line sequence and return its length. 403 * 404 * @param data buffer to scan 405 * @param i start offset in buffer 406 * @return length of newline sequence or 0 if none found 407 */ 408 private int getNewLineMatchByteCount(final byte[] data, final int i) { 409 for (final byte[] newLineSequence : newLineSequences) { 410 boolean match = true; 411 for (int j = newLineSequence.length - 1; j >= 0; j--) { 412 final int k = i + j - (newLineSequence.length - 1); 413 match &= k >= 0 && data[k] == newLineSequence[j]; 414 } 415 if (match) { 416 return newLineSequence.length; 417 } 418 } 419 return 0; 420 } 421 } 422 423}