1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.csv;
21
22 import static org.apache.commons.csv.Constants.CR;
23 import static org.apache.commons.csv.Constants.LF;
24 import static org.apache.commons.csv.Constants.UNDEFINED;
25 import static org.apache.commons.io.IOUtils.EOF;
26
27 import java.io.IOException;
28 import java.io.Reader;
29 import java.nio.CharBuffer;
30 import java.nio.charset.CharacterCodingException;
31 import java.nio.charset.Charset;
32 import java.nio.charset.CharsetEncoder;
33
34 import org.apache.commons.io.IOUtils;
35 import org.apache.commons.io.input.UnsynchronizedBufferedReader;
36
37
38
39
40
41
42
43
44 final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
45
46
47 private int lastChar = UNDEFINED;
48 private int lastCharMark = UNDEFINED;
49
50
51 private long lineNumber;
52 private long lineNumberMark;
53
54
55 private long position;
56 private long positionMark;
57
58
59 private long bytesRead;
60 private long bytesReadMark;
61
62
63 private final CharsetEncoder encoder;
64
65
66
67
68 ExtendedBufferedReader(final Reader reader) {
69 this(reader, null, false);
70 }
71
72
73
74
75
76
77
78
79
80
81 ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) {
82 super(reader);
83 encoder = charset != null && trackBytes ? charset.newEncoder() : null;
84 }
85
86
87
88
89
90
91
92 @Override
93 public void close() throws IOException {
94
95 lastChar = EOF;
96 super.close();
97 }
98
99
100
101
102
103
104 long getBytesRead() {
105 return this.bytesRead;
106 }
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 private int getEncodedCharLength(final int current) throws CharacterCodingException {
135 final char cChar = (char) current;
136 final char lChar = (char) lastChar;
137 if (!Character.isSurrogate(cChar)) {
138 return encoder.encode(CharBuffer.wrap(new char[] { cChar })).limit();
139 }
140 if (Character.isHighSurrogate(cChar)) {
141
142 return 0;
143 } else if (Character.isSurrogatePair(lChar, cChar)) {
144 return encoder.encode(CharBuffer.wrap(new char[] { lChar, cChar })).limit();
145 } else {
146 throw new CharacterCodingException();
147 }
148 }
149
150
151
152
153
154
155
156
157
158 int getLastChar() {
159 return lastChar;
160 }
161
162
163
164
165
166
167 long getLineNumber() {
168
169 if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == EOF) {
170 return lineNumber;
171 }
172 return lineNumber + 1;
173 }
174
175
176
177
178
179
180 long getPosition() {
181 return this.position;
182 }
183
184 @Override
185 public void mark(final int readAheadLimit) throws IOException {
186 lineNumberMark = lineNumber;
187 lastCharMark = lastChar;
188 positionMark = position;
189 bytesReadMark = bytesRead;
190 super.mark(readAheadLimit);
191 }
192
193 @Override
194 public int read() throws IOException {
195 final int current = super.read();
196 if (current == CR || current == LF && lastChar != CR ||
197 current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
198 lineNumber++;
199 }
200 if (encoder != null) {
201 this.bytesRead += getEncodedCharLength(current);
202 }
203 lastChar = current;
204 position++;
205 return lastChar;
206 }
207
208 @Override
209 public int read(final char[] buf, final int offset, final int length) throws IOException {
210 if (length == 0) {
211 return 0;
212 }
213 final int len = super.read(buf, offset, length);
214 if (len > 0) {
215 for (int i = offset; i < offset + len; i++) {
216 final char ch = buf[i];
217 if (ch == LF) {
218 if (CR != (i > offset ? buf[i - 1] : lastChar)) {
219 lineNumber++;
220 }
221 } else if (ch == CR) {
222 lineNumber++;
223 }
224 }
225 lastChar = buf[offset + len - 1];
226 } else if (len == EOF) {
227 lastChar = EOF;
228 }
229 position += len;
230 return len;
231 }
232
233
234
235
236
237
238
239
240
241
242
243
244
245 @Override
246 public String readLine() throws IOException {
247 if (peek() == EOF) {
248 return null;
249 }
250 final StringBuilder buffer = new StringBuilder();
251 while (true) {
252 final int current = read();
253 if (current == CR) {
254 final int next = peek();
255 if (next == LF) {
256 read();
257 }
258 }
259 if (current == EOF || current == LF || current == CR) {
260 break;
261 }
262 buffer.append((char) current);
263 }
264 return buffer.toString();
265 }
266
267 @Override
268 public void reset() throws IOException {
269 lineNumber = lineNumberMark;
270 lastChar = lastCharMark;
271 position = positionMark;
272 bytesRead = bytesReadMark;
273 super.reset();
274 }
275
276 }