001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.input; 018 019 import java.io.Closeable; 020 import java.io.File; 021 import java.io.IOException; 022 import java.io.RandomAccessFile; 023 import java.io.UnsupportedEncodingException; 024 import java.nio.charset.Charset; 025 import java.nio.charset.CharsetEncoder; 026 import java.nio.charset.UnsupportedCharsetException; 027 028 import org.apache.commons.io.Charsets; 029 030 /** 031 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 032 * the last line). Useful for e.g. searching in log files. 033 * 034 * @since 2.2 035 */ 036 public class ReversedLinesFileReader implements Closeable { 037 038 private final int blockSize; 039 private final Charset encoding; 040 041 private final RandomAccessFile randomAccessFile; 042 043 private final long totalByteLength; 044 private final long totalBlockCount; 045 046 private final byte[][] newLineSequences; 047 private final int avoidNewlineSplitBufferSize; 048 private final int byteDecrement; 049 050 private FilePart currentFilePart; 051 052 private boolean trailingNewlineOfFileSkipped = false; 053 054 /** 055 * Creates a ReversedLinesFileReader with default block size of 4KB and the 056 * platform's default encoding. 057 * 058 * @param file 059 * the file to be read 060 * @throws IOException if an I/O error occurs 061 */ 062 public ReversedLinesFileReader(final File file) throws IOException { 063 this(file, 4096, Charset.defaultCharset().toString()); 064 } 065 066 /** 067 * Creates a ReversedLinesFileReader with the given block size and encoding. 068 * 069 * @param file 070 * the file to be read 071 * @param blockSize 072 * size of the internal buffer (for ideal performance this should 073 * match with the block size of the underlying file system). 074 * @param encoding 075 * the encoding of the file 076 * @throws IOException if an I/O error occurs 077 * @since 2.3 078 */ 079 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { 080 this.blockSize = blockSize; 081 this.encoding = encoding; 082 083 randomAccessFile = new RandomAccessFile(file, "r"); 084 totalByteLength = randomAccessFile.length(); 085 int lastBlockLength = (int) (totalByteLength % blockSize); 086 if (lastBlockLength > 0) { 087 totalBlockCount = totalByteLength / blockSize + 1; 088 } else { 089 totalBlockCount = totalByteLength / blockSize; 090 if (totalByteLength > 0) { 091 lastBlockLength = blockSize; 092 } 093 } 094 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 095 096 // --- check & prepare encoding --- 097 Charset charset = Charsets.toCharset(encoding); 098 CharsetEncoder charsetEncoder = charset.newEncoder(); 099 float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 100 if(maxBytesPerChar==1f) { 101 // all one byte encodings are no problem 102 byteDecrement = 1; 103 } else if(charset == Charset.forName("UTF-8")) { 104 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte 105 // http://en.wikipedia.org/wiki/UTF-8 106 byteDecrement = 1; 107 } else if(charset == Charset.forName("Shift_JIS")) { 108 // Same as for UTF-8 109 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 110 byteDecrement = 1; 111 } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) { 112 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, 113 // however byte order has to be specified 114 byteDecrement = 2; 115 } else if(charset == Charset.forName("UTF-16")) { 116 throw new UnsupportedEncodingException( 117 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); 118 } else { 119 throw new UnsupportedEncodingException( 120 "Encoding "+encoding+" is not supported yet (feel free to submit a patch)"); 121 } 122 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n 123 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; 124 125 avoidNewlineSplitBufferSize = newLineSequences[0].length; 126 } 127 128 /** 129 * Creates a ReversedLinesFileReader with the given block size and encoding. 130 * 131 * @param file 132 * the file to be read 133 * @param blockSize 134 * size of the internal buffer (for ideal performance this should 135 * match with the block size of the underlying file system). 136 * @param encoding 137 * the encoding of the file 138 * @throws IOException if an I/O error occurs 139 * @throws UnsupportedCharsetException 140 * thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not 141 * supported. 142 */ 143 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException { 144 this(file, blockSize, Charsets.toCharset(encoding)); 145 } 146 147 /** 148 * Returns the lines of the file from bottom to top. 149 * 150 * @return the next line or null if the start of the file is reached 151 * @throws IOException if an I/O error occurs 152 */ 153 public String readLine() throws IOException { 154 155 String line = currentFilePart.readLine(); 156 while (line == null) { 157 currentFilePart = currentFilePart.rollOver(); 158 if (currentFilePart != null) { 159 line = currentFilePart.readLine(); 160 } else { 161 // no more fileparts: we're done, leave line set to null 162 break; 163 } 164 } 165 166 // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line 167 if("".equals(line) && !trailingNewlineOfFileSkipped) { 168 trailingNewlineOfFileSkipped = true; 169 line = readLine(); 170 } 171 172 return line; 173 } 174 175 /** 176 * Closes underlying resources. 177 * 178 * @throws IOException if an I/O error occurs 179 */ 180 public void close() throws IOException { 181 randomAccessFile.close(); 182 } 183 184 private class FilePart { 185 private final long no; 186 187 private final byte[] data; 188 189 private byte[] leftOver; 190 191 private int currentLastBytePos; 192 193 /** 194 * ctor 195 * @param no the part number 196 * @param length its length 197 * @param leftOverOfLastFilePart remainder 198 * @throws IOException if there is a problem reading the file 199 */ 200 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 201 this.no = no; 202 int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 203 this.data = new byte[dataLength]; 204 final long off = (no - 1) * blockSize; 205 206 // read data 207 if (no > 0 /* file not empty */) { 208 randomAccessFile.seek(off); 209 final int countRead = randomAccessFile.read(data, 0, length); 210 if (countRead != length) { 211 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 212 } 213 } 214 // copy left over part into data arr 215 if (leftOverOfLastFilePart != null) { 216 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 217 } 218 this.currentLastBytePos = data.length - 1; 219 this.leftOver = null; 220 } 221 222 /** 223 * Handles block rollover 224 * 225 * @return the new FilePart or null 226 * @throws IOException if there was a problem reading the file 227 */ 228 private FilePart rollOver() throws IOException { 229 230 if (currentLastBytePos > -1) { 231 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 232 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 233 } 234 235 if (no > 1) { 236 return new FilePart(no - 1, blockSize, leftOver); 237 } else { 238 // NO 1 was the last FilePart, we're finished 239 if (leftOver != null) { 240 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 241 + new String(leftOver, encoding)); 242 } 243 return null; 244 } 245 } 246 247 /** 248 * Reads a line. 249 * 250 * @return the line or null 251 * @throws IOException if there is an error reading from the file 252 */ 253 private String readLine() throws IOException { 254 255 String line = null; 256 int newLineMatchByteCount; 257 258 boolean isLastFilePart = no == 1; 259 260 int i = currentLastBytePos; 261 while (i > -1) { 262 263 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 264 // avoidNewlineSplitBuffer: for all except the last file part we 265 // take a few bytes to the next file part to avoid splitting of newlines 266 createLeftOver(); 267 break; // skip last few bytes and leave it to the next file part 268 } 269 270 // --- check for newline --- 271 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 272 final int lineStart = i + 1; 273 int lineLengthBytes = currentLastBytePos - lineStart + 1; 274 275 if (lineLengthBytes < 0) { 276 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes); 277 } 278 byte[] lineData = new byte[lineLengthBytes]; 279 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 280 281 line = new String(lineData, encoding); 282 283 currentLastBytePos = i - newLineMatchByteCount; 284 break; // found line 285 } 286 287 // --- move cursor --- 288 i -= byteDecrement; 289 290 // --- end of file part handling --- 291 if (i < 0) { 292 createLeftOver(); 293 break; // end of file part 294 } 295 } 296 297 // --- last file part handling --- 298 if (isLastFilePart && leftOver != null) { 299 // there will be no line break anymore, this is the first line of the file 300 line = new String(leftOver, encoding); 301 leftOver = null; 302 } 303 304 return line; 305 } 306 307 /** 308 * Creates the buffer containing any left over bytes. 309 */ 310 private void createLeftOver() { 311 int lineLengthBytes = currentLastBytePos + 1; 312 if (lineLengthBytes > 0) { 313 // create left over for next block 314 leftOver = new byte[lineLengthBytes]; 315 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 316 } else { 317 leftOver = null; 318 } 319 currentLastBytePos = -1; 320 } 321 322 /** 323 * Finds the new-line sequence and return its length. 324 * 325 * @param data buffer to scan 326 * @param i start offset in buffer 327 * @return length of newline sequence or 0 if none found 328 */ 329 private int getNewLineMatchByteCount(byte[] data, int i) { 330 for (byte[] newLineSequence : newLineSequences) { 331 boolean match = true; 332 for (int j = newLineSequence.length - 1; j >= 0; j--) { 333 int k = i + j - (newLineSequence.length - 1); 334 match &= k >= 0 && data[k] == newLineSequence[j]; 335 } 336 if (match) { 337 return newLineSequence.length; 338 } 339 } 340 return 0; 341 } 342 } 343 344 }