1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.csv; 19 20 import static org.apache.commons.csv.Constants.CR; 21 import static org.apache.commons.csv.Constants.END_OF_STREAM; 22 import static org.apache.commons.csv.Constants.LF; 23 import static org.apache.commons.csv.Constants.UNDEFINED; 24 25 import java.io.BufferedReader; 26 import java.io.IOException; 27 import java.io.Reader; 28 29 /** 30 * A special buffered reader which supports sophisticated read access. 31 * <p> 32 * In particular the reader supports a look-ahead option, which allows you to see the next char returned by 33 * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. 34 * </p> 35 */ 36 final class ExtendedBufferedReader extends BufferedReader { 37 38 /** The last char returned */ 39 private int lastChar = UNDEFINED; 40 41 /** The count of EOLs (CR/LF/CRLF) seen so far */ 42 private long eolCounter; 43 44 /** The position, which is number of characters read so far */ 45 private long position; 46 47 private boolean closed; 48 49 /** 50 * Created extended buffered reader using default buffer-size 51 */ 52 ExtendedBufferedReader(final Reader reader) { 53 super(reader); 54 } 55 56 /** 57 * Closes the stream. 58 * 59 * @throws IOException 60 * If an I/O error occurs 61 */ 62 @Override 63 public void close() throws IOException { 64 // Set ivars before calling super close() in case close() throws an IOException. 65 closed = true; 66 lastChar = END_OF_STREAM; 67 super.close(); 68 } 69 70 /** 71 * Returns the current line number 72 * 73 * @return the current line number 74 */ 75 long getCurrentLineNumber() { 76 // Check if we are at EOL or EOF or just starting 77 if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { 78 return eolCounter; // counter is accurate 79 } 80 return eolCounter + 1; // Allow for counter being incremented only at EOL 81 } 82 83 /** 84 * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by 85 * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no 86 * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached 87 * on the last read then this will return {@link Constants#END_OF_STREAM}. 88 * 89 * @return the last character that was read 90 */ 91 int getLastChar() { 92 return lastChar; 93 } 94 95 /** 96 * Gets the character position in the reader. 97 * 98 * @return the current position in the reader (counting characters, not bytes since this is a Reader) 99 */ 100 long getPosition() { 101 return this.position; 102 } 103 104 public boolean isClosed() { 105 return closed; 106 } 107 108 /** 109 * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will 110 * still return this value. Does not affect line number or last character. 111 * 112 * @return the next character 113 * 114 * @throws IOException 115 * If an I/O error occurs 116 */ 117 int lookAhead() throws IOException { 118 super.mark(1); 119 final int c = super.read(); 120 super.reset(); 121 122 return c; 123 } 124 125 /** 126 * Populates the buffer with the next {@code buf.length} characters in the 127 * current reader without consuming them. The next call to {@link #read()} will 128 * still return the next value. This doesn't affect line number or last 129 * character. 130 * 131 * @param buf the buffer to fill for the look ahead. 132 * @return the buffer itself 133 * @throws IOException If an I/O error occurs 134 */ 135 char[] lookAhead(final char[] buf) throws IOException { 136 final int n = buf.length; 137 super.mark(n); 138 super.read(buf, 0, n); 139 super.reset(); 140 141 return buf; 142 } 143 144 /** 145 * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This 146 * doesn't affect line number or last character. 147 * 148 * @param n the number characters look ahead. 149 * @return the next n characters. 150 * @throws IOException If an I/O error occurs 151 */ 152 char[] lookAhead(final int n) throws IOException { 153 final char[] buf = new char[n]; 154 return lookAhead(buf); 155 } 156 157 @Override 158 public int read() throws IOException { 159 final int current = super.read(); 160 if (current == CR || current == LF && lastChar != CR || 161 current == END_OF_STREAM && lastChar != CR && lastChar != LF && lastChar != END_OF_STREAM) { 162 eolCounter++; 163 } 164 lastChar = current; 165 position++; 166 return lastChar; 167 } 168 169 @Override 170 public int read(final char[] buf, final int offset, final int length) throws IOException { 171 if (length == 0) { 172 return 0; 173 } 174 175 final int len = super.read(buf, offset, length); 176 177 if (len > 0) { 178 179 for (int i = offset; i < offset + len; i++) { 180 final char ch = buf[i]; 181 if (ch == LF) { 182 if (CR != (i > offset ? buf[i - 1] : lastChar)) { 183 eolCounter++; 184 } 185 } else if (ch == CR) { 186 eolCounter++; 187 } 188 } 189 190 lastChar = buf[offset + len - 1]; 191 192 } else if (len == -1) { 193 lastChar = END_OF_STREAM; 194 } 195 196 position += len; 197 return len; 198 } 199 200 /** 201 * Gets the next line, dropping the line terminator(s). This method should only be called when processing a 202 * comment, otherwise information can be lost. 203 * <p> 204 * Increments {@link #eolCounter} and updates {@link #position}. 205 * </p> 206 * <p> 207 * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character. 208 * </p> 209 * 210 * @return the line that was read, or null if reached EOF. 211 */ 212 @Override 213 public String readLine() throws IOException { 214 if (lookAhead() == END_OF_STREAM) { 215 return null; 216 } 217 final StringBuilder buffer = new StringBuilder(); 218 while (true) { 219 final int current = read(); 220 if (current == CR) { 221 final int next = lookAhead(); 222 if (next == LF) { 223 read(); 224 } 225 } 226 if (current == END_OF_STREAM || current == LF || current == CR) { 227 break; 228 } 229 buffer.append((char) current); 230 } 231 return buffer.toString(); 232 } 233 234 }