View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.CR;
21  import static org.apache.commons.csv.Constants.END_OF_STREAM;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.UNDEFINED;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Reader;
28  
29  /**
30   * A special buffered reader which supports sophisticated read access.
31   * <p>
32   * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
33   * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
34   * </p>
35   */
36  final class ExtendedBufferedReader extends BufferedReader {
37  
38      /** The last char returned */
39      private int lastChar = UNDEFINED;
40  
41      /** The count of EOLs (CR/LF/CRLF) seen so far */
42      private long eolCounter;
43  
44      /** The position, which is number of characters read so far */
45      private long position;
46  
47      private boolean closed;
48  
49      /**
50       * Created extended buffered reader using default buffer-size
51       */
52      ExtendedBufferedReader(final Reader reader) {
53          super(reader);
54      }
55  
56      /**
57       * Closes the stream.
58       *
59       * @throws IOException
60       *             If an I/O error occurs
61       */
62      @Override
63      public void close() throws IOException {
64          // Set ivars before calling super close() in case close() throws an IOException.
65          closed = true;
66          lastChar = END_OF_STREAM;
67          super.close();
68      }
69  
70      /**
71       * Returns the current line number
72       *
73       * @return the current line number
74       */
75      long getCurrentLineNumber() {
76          // Check if we are at EOL or EOF or just starting
77          if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
78              return eolCounter; // counter is accurate
79          }
80          return eolCounter + 1; // Allow for counter being incremented only at EOL
81      }
82  
83      /**
84       * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
85       * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
86       * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
87       * on the last read then this will return {@link Constants#END_OF_STREAM}.
88       *
89       * @return the last character that was read
90       */
91      int getLastChar() {
92          return lastChar;
93      }
94  
95      /**
96       * Gets the character position in the reader.
97       *
98       * @return the current position in the reader (counting characters, not bytes since this is a Reader)
99       */
100     long getPosition() {
101         return this.position;
102     }
103 
104     public boolean isClosed() {
105         return closed;
106     }
107 
108     /**
109      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
110      * still return this value. Does not affect line number or last character.
111      *
112      * @return the next character
113      *
114      * @throws IOException
115      *             If an I/O error occurs
116      */
117     int lookAhead() throws IOException {
118         super.mark(1);
119         final int c = super.read();
120         super.reset();
121 
122         return c;
123     }
124 
125     /**
126      * Populates the buffer with the next {@code buf.length} characters in the
127      * current reader without consuming them. The next call to {@link #read()} will
128      * still return the next value. This doesn't affect line number or last
129      * character.
130      *
131      * @param buf the buffer to fill for the look ahead.
132      * @return the buffer itself
133      * @throws IOException If an I/O error occurs
134      */
135     char[] lookAhead(final char[] buf) throws IOException {
136         final int n = buf.length;
137         super.mark(n);
138         super.read(buf, 0, n);
139         super.reset();
140 
141         return buf;
142     }
143 
144     /**
145      * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
146      * doesn't affect line number or last character.
147      *
148      * @param n the number characters look ahead.
149      * @return the next n characters.
150      * @throws IOException If an I/O error occurs
151      */
152     char[] lookAhead(final int n) throws IOException {
153         final char[] buf = new char[n];
154         return lookAhead(buf);
155     }
156 
157     @Override
158     public int read() throws IOException {
159         final int current = super.read();
160         if (current == CR || current == LF && lastChar != CR ||
161             current == END_OF_STREAM && lastChar != CR && lastChar != LF && lastChar != END_OF_STREAM) {
162             eolCounter++;
163         }
164         lastChar = current;
165         position++;
166         return lastChar;
167     }
168 
169     @Override
170     public int read(final char[] buf, final int offset, final int length) throws IOException {
171         if (length == 0) {
172             return 0;
173         }
174 
175         final int len = super.read(buf, offset, length);
176 
177         if (len > 0) {
178 
179             for (int i = offset; i < offset + len; i++) {
180                 final char ch = buf[i];
181                 if (ch == LF) {
182                     if (CR != (i > offset ? buf[i - 1] : lastChar)) {
183                         eolCounter++;
184                     }
185                 } else if (ch == CR) {
186                     eolCounter++;
187                 }
188             }
189 
190             lastChar = buf[offset + len - 1];
191 
192         } else if (len == -1) {
193             lastChar = END_OF_STREAM;
194         }
195 
196         position += len;
197         return len;
198     }
199 
200     /**
201      * Gets the next line, dropping the line terminator(s). This method should only be called when processing a
202      * comment, otherwise information can be lost.
203      * <p>
204      * Increments {@link #eolCounter} and updates {@link #position}.
205      * </p>
206      * <p>
207      * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character.
208      * </p>
209      *
210      * @return the line that was read, or null if reached EOF.
211      */
212     @Override
213     public String readLine() throws IOException {
214         if (lookAhead() == END_OF_STREAM) {
215             return null;
216         }
217         final StringBuilder buffer = new StringBuilder();
218         while (true) {
219             final int current = read();
220             if (current == CR) {
221                 final int next = lookAhead();
222                 if (next == LF) {
223                     read();
224                 }
225             }
226             if (current == END_OF_STREAM || current == LF || current == CR) {
227                 break;
228             }
229             buffer.append((char) current);
230         }
231         return buffer.toString();
232     }
233 
234 }