View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.CR;
21  import static org.apache.commons.csv.Constants.END_OF_STREAM;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.UNDEFINED;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Reader;
28  
29  /**
30   * A special buffered reader which supports sophisticated read access.
31   * <p>
32   * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
33   * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
34   * </p>
35   */
36  final class ExtendedBufferedReader extends BufferedReader {
37  
38      /** The last char returned */
39      private int lastChar = UNDEFINED;
40  
41      /** The count of EOLs (CR/LF/CRLF) seen so far */
42      private long eolCounter;
43  
44      /** The position, which is number of characters read so far */
45      private long position;
46  
47      private boolean closed;
48  
49      /**
50       * Created extended buffered reader using default buffer-size
51       */
52      ExtendedBufferedReader(final Reader reader) {
53          super(reader);
54      }
55  
56      @Override
57      public int read() throws IOException {
58          final int current = super.read();
59          if (current == CR || current == LF && lastChar != CR) {
60              eolCounter++;
61          }
62          lastChar = current;
63          this.position++;
64          return lastChar;
65      }
66  
67      /**
68       * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
69       * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
70       * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
71       * on the last read then this will return {@link Constants#END_OF_STREAM}.
72       *
73       * @return the last character that was read
74       */
75      int getLastChar() {
76          return lastChar;
77      }
78  
79      @Override
80      public int read(final char[] buf, final int offset, final int length) throws IOException {
81          if (length == 0) {
82              return 0;
83          }
84  
85          final int len = super.read(buf, offset, length);
86  
87          if (len > 0) {
88  
89              for (int i = offset; i < offset + len; i++) {
90                  final char ch = buf[i];
91                  if (ch == LF) {
92                      if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
93                          eolCounter++;
94                      }
95                  } else if (ch == CR) {
96                      eolCounter++;
97                  }
98              }
99  
100             lastChar = buf[offset + len - 1];
101 
102         } else if (len == -1) {
103             lastChar = END_OF_STREAM;
104         }
105 
106         position += len;
107         return len;
108     }
109 
110     /**
111      * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
112      * when processing a comment, otherwise information can be lost.
113      * <p>
114      * Increments {@link #eolCounter}
115      * <p>
116      * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF
117      *
118      * @return the line that was read, or null if reached EOF.
119      */
120     @Override
121     public String readLine() throws IOException {
122         final String line = super.readLine();
123 
124         if (line != null) {
125             lastChar = LF; // needed for detecting start of line
126             eolCounter++;
127         } else {
128             lastChar = END_OF_STREAM;
129         }
130 
131         return line;
132     }
133 
134     /**
135      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
136      * still return this value. Does not affect line number or last character.
137      *
138      * @return the next character
139      *
140      * @throws IOException
141      *             if there is an error in reading
142      */
143     int lookAhead() throws IOException {
144         super.mark(1);
145         final int c = super.read();
146         super.reset();
147 
148         return c;
149     }
150 
151     /**
152      * Returns the current line number
153      *
154      * @return the current line number
155      */
156     long getCurrentLineNumber() {
157         // Check if we are at EOL or EOF or just starting
158         if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
159             return eolCounter; // counter is accurate
160         }
161         return eolCounter + 1; // Allow for counter being incremented only at EOL
162     }
163 
164     /**
165      * Gets the character position in the reader.
166      *
167      * @return the current position in the reader (counting characters, not bytes since this is a Reader)
168      */
169     long getPosition() {
170         return this.position;
171     }
172 
173     public boolean isClosed() {
174         return closed;
175     }
176 
177     /**
178      * Closes the stream.
179      *
180      * @throws IOException
181      *             If an I/O error occurs
182      */
183     @Override
184     public void close() throws IOException {
185         // Set ivars before calling super close() in case close() throws an IOException.
186         closed = true;
187         lastChar = END_OF_STREAM;
188         super.close();
189     }
190 
191 }