1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.channels.SeekableByteChannel;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetEncoder;
27 import java.nio.charset.StandardCharsets;
28 import java.nio.file.Files;
29 import java.nio.file.Path;
30 import java.nio.file.StandardOpenOption;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Iterator;
35 import java.util.List;
36
37 import org.apache.commons.io.Charsets;
38 import org.apache.commons.io.FileSystem;
39 import org.apache.commons.io.StandardLineSeparator;
40 import org.apache.commons.io.build.AbstractStreamBuilder;
41 import org.apache.commons.io.function.IOIterable;
42 import org.apache.commons.io.function.IOIterator;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
89
90
91
92
93 public Builder() {
94 setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
95 setBufferSize(DEFAULT_BLOCK_SIZE);
96 }
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 @Override
122 public ReversedLinesFileReader get() throws IOException {
123 return new ReversedLinesFileReader(this);
124 }
125
126 }
127
128 private final class FilePart {
129 private final long partNumber;
130
131 private final byte[] data;
132
133 private byte[] leftOver;
134
135 private int currentLastBytePos;
136
137
138
139
140
141
142
143
144
145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146 this.partNumber = partNumber;
147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148 this.data = new byte[dataLength];
149 final long off = (partNumber - 1) * blockSize;
150
151
152 if (partNumber > 0 ) {
153 channel.position(off);
154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155 if (countRead != length) {
156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157 }
158 }
159
160 if (leftOverOfLastFilePart != null) {
161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162 }
163 this.currentLastBytePos = data.length - 1;
164 this.leftOver = null;
165 }
166
167
168
169
170 private void createLeftOver() {
171 final int lineLengthBytes = currentLastBytePos + 1;
172 if (lineLengthBytes > 0) {
173
174 leftOver = Arrays.copyOf(data, lineLengthBytes);
175 } else {
176 leftOver = null;
177 }
178 currentLastBytePos = -1;
179 }
180
181
182
183
184
185
186
187
188 private int getNewLineMatchByteCount(final byte[] data, final int i) {
189 for (final byte[] newLineSequence : newLineSequences) {
190 boolean match = true;
191 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192 final int k = i + j - (newLineSequence.length - 1);
193 match &= k >= 0 && data[k] == newLineSequence[j];
194 }
195 if (match) {
196 return newLineSequence.length;
197 }
198 }
199 return 0;
200 }
201
202
203
204
205
206
207 private String readLine() {
208
209 String line = null;
210 int newLineMatchByteCount;
211
212 final boolean isLastFilePart = partNumber == 1;
213
214 int i = currentLastBytePos;
215 while (i > -1) {
216
217 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218
219
220 createLeftOver();
221 break;
222 }
223
224
225 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 ) {
226 final int lineStart = i + 1;
227 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228
229 if (lineLengthBytes < 0) {
230 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231 }
232 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233
234 line = new String(lineData, charset);
235
236 currentLastBytePos = i - newLineMatchByteCount;
237 break;
238 }
239
240
241 i -= byteDecrement;
242
243
244 if (i < 0) {
245 createLeftOver();
246 break;
247 }
248 }
249
250
251 if (isLastFilePart && leftOver != null) {
252
253 line = new String(leftOver, charset);
254 leftOver = null;
255 }
256
257 return line;
258 }
259
260
261
262
263
264
265
266 private FilePart rollOver() throws IOException {
267
268 if (currentLastBytePos > -1) {
269 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271 }
272
273 if (partNumber > 1) {
274 return new FilePart(partNumber - 1, blockSize, leftOver);
275 }
276
277 if (leftOver != null) {
278 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279 + new String(leftOver, charset));
280 }
281 return null;
282 }
283 }
284
285 private static final String EMPTY_STRING = "";
286
287 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288
289
290
291
292
293
294
295 public static Builder builder() {
296 return new Builder();
297 }
298
299 private final int blockSize;
300 private final Charset charset;
301 private final SeekableByteChannel channel;
302 private final long totalByteLength;
303 private final long totalBlockCount;
304 private final byte[][] newLineSequences;
305 private final int avoidNewlineSplitBufferSize;
306 private final int byteDecrement;
307 private FilePart currentFilePart;
308 private boolean trailingNewlineOfFileSkipped;
309
310 private ReversedLinesFileReader(final Builder builder) throws IOException {
311 this.blockSize = builder.getBufferSize();
312 this.charset = Charsets.toCharset(builder.getCharset());
313
314 final CharsetEncoder charsetEncoder = this.charset.newEncoder();
315 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
316 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
317
318 byteDecrement = 1;
319 } else if (this.charset == Charset.forName("Shift_JIS") ||
320
321 this.charset == Charset.forName("windows-31j") ||
322 this.charset == Charset.forName("x-windows-949") ||
323 this.charset == Charset.forName("gbk") ||
324 this.charset == Charset.forName("x-windows-950")) {
325 byteDecrement = 1;
326 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
327
328
329
330 byteDecrement = 2;
331 } else if (this.charset == StandardCharsets.UTF_16) {
332 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
333 } else {
334 throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
335 }
336
337
338 this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(this.charset), StandardLineSeparator.LF.getBytes(this.charset),
339 StandardLineSeparator.CR.getBytes(this.charset) };
340 this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
341
342 this.channel = Files.newByteChannel(builder.getPath(), StandardOpenOption.READ);
343 this.totalByteLength = channel.size();
344 int lastBlockLength = (int) (this.totalByteLength % blockSize);
345 if (lastBlockLength > 0) {
346 this.totalBlockCount = this.totalByteLength / blockSize + 1;
347 } else {
348 this.totalBlockCount = this.totalByteLength / blockSize;
349 if (this.totalByteLength > 0) {
350 lastBlockLength = blockSize;
351 }
352 }
353 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
354 }
355
356
357
358
359
360
361
362
363 @Deprecated
364 public ReversedLinesFileReader(final File file) throws IOException {
365 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
366 }
367
368
369
370
371
372
373
374
375
376
377
378 @Deprecated
379 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
380 this(file.toPath(), charset);
381 }
382
383
384
385
386
387
388
389
390
391
392
393
394
395 @Deprecated
396 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
397 this(file.toPath(), blockSize, charset);
398 }
399
400
401
402
403
404
405
406
407
408
409
410
411
412 @Deprecated
413 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
414 this(file.toPath(), blockSize, charsetName);
415 }
416
417
418
419
420
421
422
423
424
425
426
427 @Deprecated
428 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
429 this(file, DEFAULT_BLOCK_SIZE, charset);
430 }
431
432
433
434
435
436
437
438
439
440
441
442
443
444 @Deprecated
445 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
446 this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
447 }
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462 @Deprecated
463 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
464 this(file, blockSize, Charsets.toCharset(charsetName));
465 }
466
467
468
469
470
471
472 @Override
473 public void close() throws IOException {
474 channel.close();
475 }
476
477 @Override
478 public IOIterator<String> iterator() {
479 return new IOIterator<String>() {
480
481 private String next;
482
483 @Override
484 public boolean hasNext() throws IOException {
485 if (next == null) {
486 next = readLine();
487 }
488 return next != null;
489 }
490
491 @Override
492 public String next() throws IOException {
493 if (next == null) {
494 next = readLine();
495 }
496 final String tmp = next;
497 next = null;
498 return tmp;
499 }
500
501 @Override
502 public Iterator<String> unwrap() {
503 return null;
504 }
505
506 };
507 }
508
509
510
511
512
513
514
515 public String readLine() throws IOException {
516 String line = currentFilePart.readLine();
517 while (line == null) {
518 currentFilePart = currentFilePart.rollOver();
519 if (currentFilePart == null) {
520
521 break;
522 }
523 line = currentFilePart.readLine();
524 }
525
526 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
527 trailingNewlineOfFileSkipped = true;
528 line = readLine();
529 }
530 return line;
531 }
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548 public List<String> readLines(final int lineCount) throws IOException {
549 if (lineCount < 0) {
550 throw new IllegalArgumentException("lineCount < 0");
551 }
552 final ArrayList<String> arrayList = new ArrayList<>(lineCount);
553 for (int i = 0; i < lineCount; i++) {
554 final String line = readLine();
555 if (line == null) {
556 return arrayList;
557 }
558 arrayList.add(line);
559 }
560 return arrayList;
561 }
562
563
564
565
566
567
568
569
570
571
572
573
574
575 public String toString(final int lineCount) throws IOException {
576 final List<String> lines = readLines(lineCount);
577 Collections.reverse(lines);
578 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
579 }
580
581 @Override
582 public Iterable<String> unwrap() {
583 return null;
584 }
585
586 }