1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.channels.SeekableByteChannel;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetEncoder;
27 import java.nio.charset.StandardCharsets;
28 import java.nio.file.Files;
29 import java.nio.file.Path;
30 import java.nio.file.StandardOpenOption;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.List;
35
36 import org.apache.commons.io.Charsets;
37 import org.apache.commons.io.FileSystem;
38 import org.apache.commons.io.StandardLineSeparator;
39 import org.apache.commons.io.build.AbstractOrigin;
40 import org.apache.commons.io.build.AbstractStreamBuilder;
41
42
43
44
45
46
47
48
49
50 public class ReversedLinesFileReader implements Closeable {
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
68
69
70
71
72 public Builder() {
73 setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
74 setBufferSize(DEFAULT_BLOCK_SIZE);
75 }
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 @Override
92 public ReversedLinesFileReader get() throws IOException {
93 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
94 }
95
96 }
97
98 private final class FilePart {
99 private final long no;
100
101 private final byte[] data;
102
103 private byte[] leftOver;
104
105 private int currentLastBytePos;
106
107
108
109
110
111
112
113
114
115 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
116 this.no = no;
117 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
118 this.data = new byte[dataLength];
119 final long off = (no - 1) * blockSize;
120
121
122 if (no > 0 ) {
123 channel.position(off);
124 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
125 if (countRead != length) {
126 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
127 }
128 }
129
130 if (leftOverOfLastFilePart != null) {
131 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
132 }
133 this.currentLastBytePos = data.length - 1;
134 this.leftOver = null;
135 }
136
137
138
139
140 private void createLeftOver() {
141 final int lineLengthBytes = currentLastBytePos + 1;
142 if (lineLengthBytes > 0) {
143
144 leftOver = Arrays.copyOf(data, lineLengthBytes);
145 } else {
146 leftOver = null;
147 }
148 currentLastBytePos = -1;
149 }
150
151
152
153
154
155
156
157
158 private int getNewLineMatchByteCount(final byte[] data, final int i) {
159 for (final byte[] newLineSequence : newLineSequences) {
160 boolean match = true;
161 for (int j = newLineSequence.length - 1; j >= 0; j--) {
162 final int k = i + j - (newLineSequence.length - 1);
163 match &= k >= 0 && data[k] == newLineSequence[j];
164 }
165 if (match) {
166 return newLineSequence.length;
167 }
168 }
169 return 0;
170 }
171
172
173
174
175
176
177 private String readLine() {
178
179 String line = null;
180 int newLineMatchByteCount;
181
182 final boolean isLastFilePart = no == 1;
183
184 int i = currentLastBytePos;
185 while (i > -1) {
186
187 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
188
189
190 createLeftOver();
191 break;
192 }
193
194
195 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 ) {
196 final int lineStart = i + 1;
197 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
198
199 if (lineLengthBytes < 0) {
200 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
201 }
202 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
203
204 line = new String(lineData, charset);
205
206 currentLastBytePos = i - newLineMatchByteCount;
207 break;
208 }
209
210
211 i -= byteDecrement;
212
213
214 if (i < 0) {
215 createLeftOver();
216 break;
217 }
218 }
219
220
221 if (isLastFilePart && leftOver != null) {
222
223 line = new String(leftOver, charset);
224 leftOver = null;
225 }
226
227 return line;
228 }
229
230
231
232
233
234
235
236 private FilePart rollOver() throws IOException {
237
238 if (currentLastBytePos > -1) {
239 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
240 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
241 }
242
243 if (no > 1) {
244 return new FilePart(no - 1, blockSize, leftOver);
245 }
246
247 if (leftOver != null) {
248 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
249 + new String(leftOver, charset));
250 }
251 return null;
252 }
253 }
254
255 private static final String EMPTY_STRING = "";
256
257 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
258
259
260
261
262
263
264
265 public static Builder builder() {
266 return new Builder();
267 }
268
269 private final int blockSize;
270 private final Charset charset;
271 private final SeekableByteChannel channel;
272 private final long totalByteLength;
273 private final long totalBlockCount;
274 private final byte[][] newLineSequences;
275 private final int avoidNewlineSplitBufferSize;
276 private final int byteDecrement;
277 private FilePart currentFilePart;
278 private boolean trailingNewlineOfFileSkipped;
279
280
281
282
283
284
285
286
287
288 @Deprecated
289 public ReversedLinesFileReader(final File file) throws IOException {
290 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
291 }
292
293
294
295
296
297
298
299
300
301
302
303 @Deprecated
304 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
305 this(file.toPath(), charset);
306 }
307
308
309
310
311
312
313
314
315
316
317
318
319
320 @Deprecated
321 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
322 this(file.toPath(), blockSize, charset);
323 }
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 @Deprecated
342 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
343 this(file.toPath(), blockSize, charsetName);
344 }
345
346
347
348
349
350
351
352
353
354
355
356 @Deprecated
357 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
358 this(file, DEFAULT_BLOCK_SIZE, charset);
359 }
360
361
362
363
364
365
366
367
368
369
370
371
372
373 @Deprecated
374 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
375 this.blockSize = blockSize;
376 this.charset = Charsets.toCharset(charset);
377
378
379 final CharsetEncoder charsetEncoder = this.charset.newEncoder();
380 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
381 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
382
383 byteDecrement = 1;
384 } else if (this.charset == Charset.forName("Shift_JIS") ||
385
386 this.charset == Charset.forName("windows-31j") ||
387 this.charset == Charset.forName("x-windows-949") ||
388 this.charset == Charset.forName("gbk") ||
389 this.charset == Charset.forName("x-windows-950")) {
390 byteDecrement = 1;
391 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
392
393
394
395 byteDecrement = 2;
396 } else if (this.charset == StandardCharsets.UTF_16) {
397 throw new UnsupportedEncodingException(
398 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
399 } else {
400 throw new UnsupportedEncodingException(
401 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
402 }
403
404
405
406 this.newLineSequences = new byte[][] {
407 StandardLineSeparator.CRLF.getBytes(this.charset),
408 StandardLineSeparator.LF.getBytes(this.charset),
409 StandardLineSeparator.CR.getBytes(this.charset)
410 };
411
412 this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
413
414
415 this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
416 this.totalByteLength = channel.size();
417 int lastBlockLength = (int) (this.totalByteLength % blockSize);
418 if (lastBlockLength > 0) {
419 this.totalBlockCount = this.totalByteLength / blockSize + 1;
420 } else {
421 this.totalBlockCount = this.totalByteLength / blockSize;
422 if (this.totalByteLength > 0) {
423 lastBlockLength = blockSize;
424 }
425 }
426 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
427
428 }
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447 @Deprecated
448 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
449 this(file, blockSize, Charsets.toCharset(charsetName));
450 }
451
452
453
454
455
456
457 @Override
458 public void close() throws IOException {
459 channel.close();
460 }
461
462
463
464
465
466
467
468 public String readLine() throws IOException {
469
470 String line = currentFilePart.readLine();
471 while (line == null) {
472 currentFilePart = currentFilePart.rollOver();
473 if (currentFilePart == null) {
474
475 break;
476 }
477 line = currentFilePart.readLine();
478 }
479
480
481 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
482 trailingNewlineOfFileSkipped = true;
483 line = readLine();
484 }
485
486 return line;
487 }
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504 public List<String> readLines(final int lineCount) throws IOException {
505 if (lineCount < 0) {
506 throw new IllegalArgumentException("lineCount < 0");
507 }
508 final ArrayList<String> arrayList = new ArrayList<>(lineCount);
509 for (int i = 0; i < lineCount; i++) {
510 final String line = readLine();
511 if (line == null) {
512 return arrayList;
513 }
514 arrayList.add(line);
515 }
516 return arrayList;
517 }
518
519
520
521
522
523
524
525
526
527
528
529
530
531 public String toString(final int lineCount) throws IOException {
532 final List<String> lines = readLines(lineCount);
533 Collections.reverse(lines);
534 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
535 }
536
537 }