View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.CR;
21  import static org.apache.commons.csv.Constants.END_OF_STREAM;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.UNDEFINED;
24  
25  import java.io.BufferedReader;
26  import java.io.IOException;
27  import java.io.Reader;
28  
29  /**
30   * A special buffered reader which supports sophisticated read access.
31   * <p>
32   * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
33   * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
34   * </p>
35   *
36   * @version $Id: ExtendedBufferedReader.java 1635146 2014-10-29 14:31:07Z ggregory $
37   */
38  final class ExtendedBufferedReader extends BufferedReader {
39  
40      /** The last char returned */
41      private int lastChar = UNDEFINED;
42  
43      /** The count of EOLs (CR/LF/CRLF) seen so far */
44      private long eolCounter;
45  
46      /** The position, which is number of characters read so far */
47      private long position;
48  
49      private boolean closed;
50  
51      /**
52       * Created extended buffered reader using default buffer-size
53       */
54      ExtendedBufferedReader(final Reader reader) {
55          super(reader);
56      }
57  
58      @Override
59      public int read() throws IOException {
60          final int current = super.read();
61          if (current == CR || (current == LF && lastChar != CR)) {
62              eolCounter++;
63          }
64          lastChar = current;
65          this.position++;
66          return lastChar;
67      }
68  
69      /**
70       * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
71       * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
72       * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
73       * on the last read then this will return {@link Constants#END_OF_STREAM}.
74       *
75       * @return the last character that was read
76       */
77      int getLastChar() {
78          return lastChar;
79      }
80  
81      @Override
82      public int read(final char[] buf, final int offset, final int length) throws IOException {
83          if (length == 0) {
84              return 0;
85          }
86  
87          final int len = super.read(buf, offset, length);
88  
89          if (len > 0) {
90  
91              for (int i = offset; i < offset + len; i++) {
92                  final char ch = buf[i];
93                  if (ch == LF) {
94                      if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
95                          eolCounter++;
96                      }
97                  } else if (ch == CR) {
98                      eolCounter++;
99                  }
100             }
101 
102             lastChar = buf[offset + len - 1];
103 
104         } else if (len == -1) {
105             lastChar = END_OF_STREAM;
106         }
107 
108         position += len;
109         return len;
110     }
111 
112     /**
113      * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
114      * when processing a comment, otherwise information can be lost.
115      * <p>
116      * Increments {@link #eolCounter}
117      * <p>
118      * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF
119      *
120      * @return the line that was read, or null if reached EOF.
121      */
122     @Override
123     public String readLine() throws IOException {
124         final String line = super.readLine();
125 
126         if (line != null) {
127             lastChar = LF; // needed for detecting start of line
128             eolCounter++;
129         } else {
130             lastChar = END_OF_STREAM;
131         }
132 
133         return line;
134     }
135 
136     /**
137      * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
138      * still return this value. Does not affect line number or last character.
139      *
140      * @return the next character
141      *
142      * @throws IOException
143      *             if there is an error in reading
144      */
145     int lookAhead() throws IOException {
146         super.mark(1);
147         final int c = super.read();
148         super.reset();
149 
150         return c;
151     }
152 
153     /**
154      * Returns the current line number
155      *
156      * @return the current line number
157      */
158     long getCurrentLineNumber() {
159         // Check if we are at EOL or EOF or just starting
160         if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
161             return eolCounter; // counter is accurate
162         }
163         return eolCounter + 1; // Allow for counter being incremented only at EOL
164     }
165 
166     /**
167      * Gets the character position in the reader.
168      *
169      * @return the current position in the reader (counting characters, not bytes since this is a Reader)
170      */
171     long getPosition() {
172         return this.position;
173     }
174 
175     public boolean isClosed() {
176         return closed;
177     }
178 
179     /**
180      * Closes the stream.
181      *
182      * @throws IOException
183      *             If an I/O error occurs
184      */
185     @Override
186     public void close() throws IOException {
187         // Set ivars before calling super close() in case close() throws an IOException.
188         closed = true;
189         lastChar = END_OF_STREAM;
190         super.close();
191     }
192 
193 }