001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.input; 018 019 import java.io.Closeable; 020 import java.io.File; 021 import java.io.IOException; 022 import java.io.RandomAccessFile; 023 import java.io.UnsupportedEncodingException; 024 import java.nio.charset.Charset; 025 import java.nio.charset.CharsetEncoder; 026 027 /** 028 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 029 * the last line). Useful for e.g. searching in log files. 030 * 031 * @since 2.2 032 */ 033 public class ReversedLinesFileReader implements Closeable { 034 035 private final int blockSize; 036 private final String encoding; 037 038 private final RandomAccessFile randomAccessFile; 039 040 private final long totalByteLength; 041 private final long totalBlockCount; 042 043 private final byte[][] newLineSequences; 044 private final int avoidNewlineSplitBufferSize; 045 private final int byteDecrement; 046 047 private FilePart currentFilePart; 048 049 private boolean trailingNewlineOfFileSkipped = false; 050 051 /** 052 * Creates a ReversedLinesFileReader with default block size of 4KB and the 053 * platform's default encoding. 054 * 055 * @param file 056 * the file to be read 057 * @throws IOException if an I/O error occurs 058 */ 059 public ReversedLinesFileReader(final File file) throws IOException { 060 this(file, 4096, Charset.defaultCharset().toString()); 061 } 062 063 /** 064 * Creates a ReversedLinesFileReader with the given block size and encoding. 065 * 066 * @param file 067 * the file to be read 068 * @param blockSize 069 * size of the internal buffer (for ideal performance this should 070 * match with the block size of the underlying file system). 071 * @param encoding 072 * the encoding of the file 073 * @throws IOException if an I/O error occurs 074 */ 075 public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException { 076 this.blockSize = blockSize; 077 this.encoding = encoding; 078 079 randomAccessFile = new RandomAccessFile(file, "r"); 080 totalByteLength = randomAccessFile.length(); 081 int lastBlockLength = (int) (totalByteLength % blockSize); 082 if (lastBlockLength > 0) { 083 totalBlockCount = totalByteLength / blockSize + 1; 084 } else { 085 totalBlockCount = totalByteLength / blockSize; 086 if (totalByteLength > 0) { 087 lastBlockLength = blockSize; 088 } 089 } 090 currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 091 092 // --- check & prepare encoding --- 093 Charset charset = Charset.forName(encoding); 094 CharsetEncoder charsetEncoder = charset.newEncoder(); 095 float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 096 if(maxBytesPerChar==1f) { 097 // all one byte encodings are no problem 098 byteDecrement = 1; 099 } else if(charset == Charset.forName("UTF-8")) { 100 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte 101 // http://en.wikipedia.org/wiki/UTF-8 102 byteDecrement = 1; 103 } else if(charset == Charset.forName("Shift_JIS")) { 104 // Same as for UTF-8 105 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 106 byteDecrement = 1; 107 } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) { 108 // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, 109 // however byte order has to be specified 110 byteDecrement = 2; 111 } else if(charset == Charset.forName("UTF-16")) { 112 throw new UnsupportedEncodingException( 113 "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); 114 } else { 115 throw new UnsupportedEncodingException( 116 "Encoding "+encoding+" is not supported yet (feel free to submit a patch)"); 117 } 118 // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n 119 newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; 120 121 avoidNewlineSplitBufferSize = newLineSequences[0].length; 122 123 } 124 125 /** 126 * Returns the lines of the file from bottom to top. 127 * 128 * @return the next line or null if the start of the file is reached 129 * @throws IOException if an I/O error occurs 130 */ 131 public String readLine() throws IOException { 132 133 String line = currentFilePart.readLine(); 134 while (line == null) { 135 currentFilePart = currentFilePart.rollOver(); 136 if (currentFilePart != null) { 137 line = currentFilePart.readLine(); 138 } else { 139 // no more fileparts: we're done, leave line set to null 140 break; 141 } 142 } 143 144 // aligned behaviour wiht BufferedReader that doesn't return a last, emtpy line 145 if("".equals(line) && !trailingNewlineOfFileSkipped) { 146 trailingNewlineOfFileSkipped = true; 147 line = readLine(); 148 } 149 150 return line; 151 } 152 153 /** 154 * Closes underlying resources. 155 * 156 * @throws IOException if an I/O error occurs 157 */ 158 public void close() throws IOException { 159 randomAccessFile.close(); 160 } 161 162 private class FilePart { 163 private final long no; 164 165 private final byte[] data; 166 167 private byte[] leftOver; 168 169 private int currentLastBytePos; 170 171 /** 172 * ctor 173 * @param no the part number 174 * @param length its length 175 * @param leftOverOfLastFilePart remainder 176 * @throws IOException if there is a problem reading the file 177 */ 178 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 179 this.no = no; 180 int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 181 this.data = new byte[dataLength]; 182 final long off = (no - 1) * blockSize; 183 184 // read data 185 if (no > 0 /* file not empty */) { 186 randomAccessFile.seek(off); 187 final int countRead = randomAccessFile.read(data, 0, length); 188 if (countRead != length) { 189 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 190 } 191 } 192 // copy left over part into data arr 193 if (leftOverOfLastFilePart != null) { 194 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 195 } 196 this.currentLastBytePos = data.length - 1; 197 this.leftOver = null; 198 } 199 200 /** 201 * Handles block rollover 202 * 203 * @return the new FilePart or null 204 * @throws IOException if there was a problem reading the file 205 */ 206 private FilePart rollOver() throws IOException { 207 208 if (currentLastBytePos > -1) { 209 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 210 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 211 } 212 213 if (no > 1) { 214 return new FilePart(no - 1, blockSize, leftOver); 215 } else { 216 // NO 1 was the last FilePart, we're finished 217 if (leftOver != null) { 218 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 219 + new String(leftOver, encoding)); 220 } 221 return null; 222 } 223 } 224 225 /** 226 * Reads a line. 227 * 228 * @return the line or null 229 * @throws IOException if there is an error reading from the file 230 */ 231 private String readLine() throws IOException { 232 233 String line = null; 234 int newLineMatchByteCount; 235 236 boolean isLastFilePart = no == 1; 237 238 int i = currentLastBytePos; 239 while (i > -1) { 240 241 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 242 // avoidNewlineSplitBuffer: for all except the last file part we 243 // take a few bytes to the next file part to avoid splitting of newlines 244 createLeftOver(); 245 break; // skip last few bytes and leave it to the next file part 246 } 247 248 // --- check for newline --- 249 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 250 final int lineStart = i + 1; 251 int lineLengthBytes = currentLastBytePos - lineStart + 1; 252 253 if (lineLengthBytes < 0) { 254 throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes); 255 } 256 byte[] lineData = new byte[lineLengthBytes]; 257 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 258 259 line = new String(lineData, encoding); 260 261 currentLastBytePos = i - newLineMatchByteCount; 262 break; // found line 263 } 264 265 // --- move cursor --- 266 i -= byteDecrement; 267 268 // --- end of file part handling --- 269 if (i < 0) { 270 createLeftOver(); 271 break; // end of file part 272 } 273 } 274 275 // --- last file part handling --- 276 if (isLastFilePart && leftOver != null) { 277 // there will be no line break anymore, this is the first line of the file 278 line = new String(leftOver, encoding); 279 leftOver = null; 280 } 281 282 return line; 283 } 284 285 /** 286 * Creates the buffer containing any left over bytes. 287 */ 288 private void createLeftOver() { 289 int lineLengthBytes = currentLastBytePos + 1; 290 if (lineLengthBytes > 0) { 291 // create left over for next block 292 leftOver = new byte[lineLengthBytes]; 293 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 294 } else { 295 leftOver = null; 296 } 297 currentLastBytePos = -1; 298 } 299 300 /** 301 * Finds the new-line sequence and return its length. 302 * 303 * @param data buffer to scan 304 * @param i start offset in buffer 305 * @return length of newline sequence or 0 if none found 306 */ 307 private int getNewLineMatchByteCount(byte[] data, int i) { 308 for (byte[] newLineSequence : newLineSequences) { 309 boolean match = true; 310 for (int j = newLineSequence.length - 1; j >= 0; j--) { 311 int k = i + j - (newLineSequence.length - 1); 312 match &= k >= 0 && data[k] == newLineSequence[j]; 313 } 314 if (match) { 315 return newLineSequence.length; 316 } 317 } 318 return 0; 319 } 320 } 321 322 }