1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.commons.csv;
21
22 import java.io.Serializable;
23 import java.util.Arrays;
24 import java.util.Iterator;
25 import java.util.LinkedHashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.stream.Collectors;
29 import java.util.stream.Stream;
30
31 /**
32 * A CSV record parsed from a CSV file.
33 *
34 * <p>
35 * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
36 * In version 1.8 the mapping between the column header and the column index was
37 * removed from the serialized state. The class maintains serialization compatibility
38 * with versions pre-1.8 for the record values; these must be accessed by index
39 * following deserialization. There will be a loss of any functionally linked to the header
40 * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
41 * </p>
42 */
43 public final class CSVRecord implements Serializable, Iterable<String> {
44
45 private static final long serialVersionUID = 1L;
46
47 /**
48 * The start position of this record as a character position in the source stream. This may or may not correspond to the byte position depending on the
49 * character set.
50 */
51 private final long characterPosition;
52
53 /**
54 * The starting position of this record in the source stream, measured in bytes.
55 */
56 private final long bytePosition;
57
58 /** The accumulated comments (if any). */
59 private final String comment;
60
61 /** The record number. */
62 private final long recordNumber;
63
64 /** The values of the record. */
65 private final String[] values;
66
67 /** The parser that originates this record. This is not serialized. */
68 private final transient CSVParser parser;
69
70 CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
71 final long characterPosition, final long bytePosition) {
72 this.recordNumber = recordNumber;
73 this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
74 this.parser = parser;
75 this.comment = comment;
76 this.characterPosition = characterPosition;
77 this.bytePosition = bytePosition;
78 }
79
80 /**
81 * Returns a value by {@link Enum}.
82 *
83 * @param e
84 * an enum
85 * @return the String at the given enum String
86 */
87 public String get(final Enum<?> e) {
88 return get(e == null ? null : e.name());
89 }
90
91 /**
92 * Returns a value by index.
93 *
94 * @param i
95 * a column index (0-based)
96 * @return the String at the given index
97 */
98 public String get(final int i) {
99 return values[i];
100 }
101
102 /**
103 * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned.
104 *
105 * <p>
106 * Note: This requires a field mapping obtained from the original parser.
107 * A check using {@link #isMapped(String)} should be used to determine if a
108 * mapping exists from the provided {@code name} to a field index. In this case an
109 * exception will only be thrown if the record does not contain a field corresponding
110 * to the mapping, that is the record length is not consistent with the mapping size.
111 * </p>
112 *
113 * @param name
114 * the name of the column to be retrieved.
115 * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
116 * @throws IllegalStateException
117 * if no header mapping was provided.
118 * @throws IllegalArgumentException
119 * if {@code name} is not mapped or if the record is inconsistent.
120 * @see #isMapped(String)
121 * @see #isConsistent()
122 * @see #getParser()
123 * @see CSVFormat.Builder#setNullString(String)
124 */
125 public String get(final String name) {
126 final Map<String, Integer> headerMap = getHeaderMapRaw();
127 if (headerMap == null) {
128 throw new IllegalStateException("No header mapping was specified, the record values can't be accessed by name");
129 }
130 final Integer index = headerMap.get(name);
131 if (index == null) {
132 throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet()));
133 }
134 try {
135 return values[index.intValue()]; // Explicit (un)boxing is intentional
136 } catch (final ArrayIndexOutOfBoundsException e) {
137 throw new IllegalArgumentException(
138 String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length))); // Explicit
139 // (un)boxing
140 // is
141 // intentional
142 }
143 }
144
145 /**
146 * Returns the starting position of this record in the source stream, measured in bytes.
147 *
148 * @return the byte position of this record in the source stream.
149 * @since 1.13.0
150 */
151 public long getBytePosition() {
152 return bytePosition;
153 }
154
155 /**
156 * Returns the start position of this record as a character position in the source stream. This may or may not
157 * correspond to the byte position depending on the character set.
158 *
159 * @return the position of this record in the source stream.
160 */
161 public long getCharacterPosition() {
162 return characterPosition;
163 }
164
165 /**
166 * Returns the comment for this record, if any.
167 * Note that comments are attached to the following record.
168 * If there is no following record (that is, the comment is at EOF),
169 * then the comment will be ignored.
170 *
171 * @return the comment for this record, or null if no comment for this record is available.
172 */
173 public String getComment() {
174 return comment;
175 }
176
177 private Map<String, Integer> getHeaderMapRaw() {
178 return parser == null ? null : parser.getHeaderMapRaw();
179 }
180
181 /**
182 * Returns the parser.
183 *
184 * <p>
185 * Note: The parser is not part of the serialized state of the record. A null check
186 * should be used when the record may have originated from a serialized form.
187 * </p>
188 *
189 * @return the parser.
190 * @since 1.7
191 */
192 public CSVParser getParser() {
193 return parser;
194 }
195
196 /**
197 * Returns the number of this record in the parsed CSV file.
198 *
199 * <p>
200 * <strong>NOTE:</strong>If your CSV input has multi-line values, the returned number does not correspond to
201 * the current line number of the parser that created this record.
202 * </p>
203 *
204 * @return the number of this record.
205 * @see CSVParser#getCurrentLineNumber()
206 */
207 public long getRecordNumber() {
208 return recordNumber;
209 }
210
211 /**
212 * Checks whether this record has a comment, false otherwise.
213 * Note that comments are attached to the following record.
214 * If there is no following record (that is, the comment is at EOF),
215 * then the comment will be ignored.
216 *
217 * @return true if this record has a comment, false otherwise.
218 * @since 1.3
219 */
220 public boolean hasComment() {
221 return comment != null;
222 }
223
224 /**
225 * Tells whether the record size matches the header size.
226 *
227 * <p>
228 * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
229 * test but still produce parsable files.
230 * </p>
231 *
232 * @return true of this record is valid, false if not.
233 */
234 public boolean isConsistent() {
235 final Map<String, Integer> headerMap = getHeaderMapRaw();
236 return headerMap == null || headerMap.size() == values.length;
237 }
238
239 /**
240 * Checks whether a given column is mapped, that is, its name has been defined to the parser.
241 *
242 * @param name
243 * the name of the column to be retrieved.
244 * @return whether a given column is mapped.
245 */
246 public boolean isMapped(final String name) {
247 final Map<String, Integer> headerMap = getHeaderMapRaw();
248 return headerMap != null && headerMap.containsKey(name);
249 }
250
251 /**
252 * Checks whether a column with a given index has a value.
253 *
254 * @param index
255 * a column index (0-based).
256 * @return whether a column with a given index has a value.
257 */
258 public boolean isSet(final int index) {
259 return 0 <= index && index < values.length;
260 }
261
262 /**
263 * Checks whether a given column is mapped and has a value.
264 *
265 * @param name
266 * the name of the column to be retrieved.
267 * @return whether a given column is mapped and has a value.
268 */
269 public boolean isSet(final String name) {
270 return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit (un)boxing is intentional
271 }
272
273 /**
274 * Returns an iterator over the values of this record.
275 *
276 * @return an iterator over the values of this record.
277 */
278 @Override
279 public Iterator<String> iterator() {
280 return toList().iterator();
281 }
282
283 /**
284 * Puts all values of this record into the given Map.
285 *
286 * @param <M> the map type.
287 * @param map The Map to populate.
288 * @return the given map.
289 * @since 1.9.0
290 */
291 public <M extends Map<String, String>> M putIn(final M map) {
292 if (getHeaderMapRaw() == null) {
293 return map;
294 }
295 getHeaderMapRaw().forEach((key, value) -> {
296 if (value < values.length) {
297 map.put(key, values[value]);
298 }
299 });
300 return map;
301 }
302
303 /**
304 * Returns the number of values in this record.
305 *
306 * @return the number of values.
307 */
308 public int size() {
309 return values.length;
310 }
311
312 /**
313 * Returns a sequential ordered stream whose elements are the values.
314 *
315 * @return the new stream.
316 * @since 1.9.0
317 */
318 public Stream<String> stream() {
319 return Stream.of(values);
320 }
321
322 /**
323 * Converts the values to a new List.
324 * <p>
325 * Editing the list does not update this instance.
326 * </p>
327 *
328 * @return a new List
329 * @since 1.9.0
330 */
331 public List<String> toList() {
332 return stream().collect(Collectors.toList());
333 }
334
335 /**
336 * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
337 * then only the last occurrence is mapped.
338 *
339 * <p>
340 * Editing the map does not update this instance.
341 * </p>
342 *
343 * @return A new Map. The map is empty if the record has no headers.
344 */
345 public Map<String, String> toMap() {
346 return putIn(new LinkedHashMap<>(values.length));
347 }
348
349 /**
350 * Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
351 * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
352 *
353 * @return a String representation of this record.
354 */
355 @Override
356 public String toString() {
357 return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]";
358 }
359
360 /**
361 * Gets the values for this record. This is <strong>not</strong> a copy.
362 *
363 * @return the values for this record, never null.
364 * @since 1.10.0
365 */
366 public String[] values() {
367 return values;
368 }
369
370 }