1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import java.io.Closeable;
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.channels.SeekableByteChannel;
25 import java.nio.charset.Charset;
26 import java.nio.charset.CharsetEncoder;
27 import java.nio.charset.StandardCharsets;
28 import java.nio.file.Files;
29 import java.nio.file.Path;
30 import java.nio.file.StandardOpenOption;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.Iterator;
35 import java.util.List;
36
37 import org.apache.commons.io.Charsets;
38 import org.apache.commons.io.FileSystem;
39 import org.apache.commons.io.StandardLineSeparator;
40 import org.apache.commons.io.build.AbstractStreamBuilder;
41 import org.apache.commons.io.function.IOIterable;
42 import org.apache.commons.io.function.IOIterator;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
89
90
91
92
93 public Builder() {
94 setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
95 setBufferSize(DEFAULT_BLOCK_SIZE);
96 }
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 @Override
122 public ReversedLinesFileReader get() throws IOException {
123 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
124 }
125
126 }
127
128 private final class FilePart {
129 private final long partNumber;
130
131 private final byte[] data;
132
133 private byte[] leftOver;
134
135 private int currentLastBytePos;
136
137
138
139
140
141
142
143
144
145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146 this.partNumber = partNumber;
147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148 this.data = new byte[dataLength];
149 final long off = (partNumber - 1) * blockSize;
150
151
152 if (partNumber > 0 ) {
153 channel.position(off);
154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155 if (countRead != length) {
156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157 }
158 }
159
160 if (leftOverOfLastFilePart != null) {
161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162 }
163 this.currentLastBytePos = data.length - 1;
164 this.leftOver = null;
165 }
166
167
168
169
170 private void createLeftOver() {
171 final int lineLengthBytes = currentLastBytePos + 1;
172 if (lineLengthBytes > 0) {
173
174 leftOver = Arrays.copyOf(data, lineLengthBytes);
175 } else {
176 leftOver = null;
177 }
178 currentLastBytePos = -1;
179 }
180
181
182
183
184
185
186
187
188 private int getNewLineMatchByteCount(final byte[] data, final int i) {
189 for (final byte[] newLineSequence : newLineSequences) {
190 boolean match = true;
191 for (int j = newLineSequence.length - 1; j >= 0; j--) {
192 final int k = i + j - (newLineSequence.length - 1);
193 match &= k >= 0 && data[k] == newLineSequence[j];
194 }
195 if (match) {
196 return newLineSequence.length;
197 }
198 }
199 return 0;
200 }
201
202
203
204
205
206
207 private String readLine() {
208
209 String line = null;
210 int newLineMatchByteCount;
211
212 final boolean isLastFilePart = partNumber == 1;
213
214 int i = currentLastBytePos;
215 while (i > -1) {
216
217 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
218
219
220 createLeftOver();
221 break;
222 }
223
224
225 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 ) {
226 final int lineStart = i + 1;
227 final int lineLengthBytes = currentLastBytePos - lineStart + 1;
228
229 if (lineLengthBytes < 0) {
230 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
231 }
232 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
233
234 line = new String(lineData, charset);
235
236 currentLastBytePos = i - newLineMatchByteCount;
237 break;
238 }
239
240
241 i -= byteDecrement;
242
243
244 if (i < 0) {
245 createLeftOver();
246 break;
247 }
248 }
249
250
251 if (isLastFilePart && leftOver != null) {
252
253 line = new String(leftOver, charset);
254 leftOver = null;
255 }
256
257 return line;
258 }
259
260
261
262
263
264
265
266 private FilePart rollOver() throws IOException {
267
268 if (currentLastBytePos > -1) {
269 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
270 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
271 }
272
273 if (partNumber > 1) {
274 return new FilePart(partNumber - 1, blockSize, leftOver);
275 }
276
277 if (leftOver != null) {
278 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
279 + new String(leftOver, charset));
280 }
281 return null;
282 }
283 }
284
285 private static final String EMPTY_STRING = "";
286
287 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
288
289
290
291
292
293
294
295 public static Builder builder() {
296 return new Builder();
297 }
298
299 private final int blockSize;
300 private final Charset charset;
301 private final SeekableByteChannel channel;
302 private final long totalByteLength;
303 private final long totalBlockCount;
304 private final byte[][] newLineSequences;
305 private final int avoidNewlineSplitBufferSize;
306 private final int byteDecrement;
307 private FilePart currentFilePart;
308 private boolean trailingNewlineOfFileSkipped;
309
310
311
312
313
314
315
316
317 @Deprecated
318 public ReversedLinesFileReader(final File file) throws IOException {
319 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
320 }
321
322
323
324
325
326
327
328
329
330
331
332 @Deprecated
333 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
334 this(file.toPath(), charset);
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348
349 @Deprecated
350 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
351 this(file.toPath(), blockSize, charset);
352 }
353
354
355
356
357
358
359
360
361
362
363
364
365
366 @Deprecated
367 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
368 this(file.toPath(), blockSize, charsetName);
369 }
370
371
372
373
374
375
376
377
378
379
380
381 @Deprecated
382 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
383 this(file, DEFAULT_BLOCK_SIZE, charset);
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397
398 @Deprecated
399 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
400 this.blockSize = blockSize;
401 this.charset = Charsets.toCharset(charset);
402
403
404 final CharsetEncoder charsetEncoder = this.charset.newEncoder();
405 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
406 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
407
408 byteDecrement = 1;
409 } else if (this.charset == Charset.forName("Shift_JIS") ||
410
411 this.charset == Charset.forName("windows-31j") ||
412 this.charset == Charset.forName("x-windows-949") ||
413 this.charset == Charset.forName("gbk") ||
414 this.charset == Charset.forName("x-windows-950")) {
415 byteDecrement = 1;
416 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
417
418
419
420 byteDecrement = 2;
421 } else if (this.charset == StandardCharsets.UTF_16) {
422 throw new UnsupportedEncodingException(
423 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
424 } else {
425 throw new UnsupportedEncodingException(
426 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
427 }
428
429
430
431 this.newLineSequences = new byte[][] {
432 StandardLineSeparator.CRLF.getBytes(this.charset),
433 StandardLineSeparator.LF.getBytes(this.charset),
434 StandardLineSeparator.CR.getBytes(this.charset)
435 };
436
437 this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
438
439
440 this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
441 this.totalByteLength = channel.size();
442 int lastBlockLength = (int) (this.totalByteLength % blockSize);
443 if (lastBlockLength > 0) {
444 this.totalBlockCount = this.totalByteLength / blockSize + 1;
445 } else {
446 this.totalBlockCount = this.totalByteLength / blockSize;
447 if (this.totalByteLength > 0) {
448 lastBlockLength = blockSize;
449 }
450 }
451 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
452
453 }
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468 @Deprecated
469 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
470 this(file, blockSize, Charsets.toCharset(charsetName));
471 }
472
473
474
475
476
477
478 @Override
479 public void close() throws IOException {
480 channel.close();
481 }
482
483 @Override
484 public IOIterator<String> iterator() {
485 return new IOIterator<String>() {
486
487 private String next;
488
489 @Override
490 public boolean hasNext() throws IOException {
491 if (next == null) {
492 next = readLine();
493 }
494 return next != null;
495 }
496
497 @Override
498 public String next() throws IOException {
499 if (next == null) {
500 next = readLine();
501 }
502 final String tmp = next;
503 next = null;
504 return tmp;
505 }
506
507 @Override
508 public Iterator<String> unwrap() {
509 return null;
510 }
511
512 };
513 }
514
515
516
517
518
519
520
521 public String readLine() throws IOException {
522 String line = currentFilePart.readLine();
523 while (line == null) {
524 currentFilePart = currentFilePart.rollOver();
525 if (currentFilePart == null) {
526
527 break;
528 }
529 line = currentFilePart.readLine();
530 }
531
532 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
533 trailingNewlineOfFileSkipped = true;
534 line = readLine();
535 }
536 return line;
537 }
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554 public List<String> readLines(final int lineCount) throws IOException {
555 if (lineCount < 0) {
556 throw new IllegalArgumentException("lineCount < 0");
557 }
558 final ArrayList<String> arrayList = new ArrayList<>(lineCount);
559 for (int i = 0; i < lineCount; i++) {
560 final String line = readLine();
561 if (line == null) {
562 return arrayList;
563 }
564 arrayList.add(line);
565 }
566 return arrayList;
567 }
568
569
570
571
572
573
574
575
576
577
578
579
580
581 public String toString(final int lineCount) throws IOException {
582 final List<String> lines = readLines(lineCount);
583 Collections.reverse(lines);
584 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
585 }
586
587 @Override
588 public Iterable<String> unwrap() {
589 return null;
590 }
591
592 }