View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.csv;
21  
22  import java.io.Serializable;
23  import java.util.Arrays;
24  import java.util.Iterator;
25  import java.util.LinkedHashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.stream.Collectors;
29  import java.util.stream.Stream;
30  
31  /**
32   * A CSV record parsed from a CSV file.
33   *
34   * <p>
35   * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
36   * In version 1.8 the mapping between the column header and the column index was
37   * removed from the serialized state. The class maintains serialization compatibility
38   * with versions pre-1.8 for the record values; these must be accessed by index
39   * following deserialization. There will be a loss of any functionally linked to the header
40   * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
41   * </p>
42   */
43  public final class CSVRecord implements Serializable, Iterable<String> {
44  
45      private static final long serialVersionUID = 1L;
46  
47      /**
48       * The start position of this record as a character position in the source stream. This may or may not correspond to the byte position depending on the
49       * character set.
50       */
51      private final long characterPosition;
52  
53      /**
54       * The starting position of this record in the source stream, measured in bytes.
55       */
56      private final long bytePosition;
57  
58      /** The accumulated comments (if any) */
59      private final String comment;
60  
61      /** The record number. */
62      private final long recordNumber;
63  
64      /** The values of the record */
65      private final String[] values;
66  
67      /** The parser that originates this record. This is not serialized. */
68      private final transient CSVParser parser;
69  
70      CSVRecord(final CSVParser parser, final String[] values,  final String comment, final long recordNumber,
71              final long characterPosition, final long bytePosition) {
72          this.recordNumber = recordNumber;
73          this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
74          this.parser = parser;
75          this.comment = comment;
76          this.characterPosition = characterPosition;
77          this.bytePosition = bytePosition;
78      }
79  
80      /**
81       * Returns a value by {@link Enum}.
82       *
83       * @param e
84       *            an enum
85       * @return the String at the given enum String
86       */
87      public String get(final Enum<?> e) {
88          return get(e == null ? null : e.name());
89      }
90  
91      /**
92       * Returns a value by index.
93       *
94       * @param i
95       *            a column index (0-based)
96       * @return the String at the given index
97       */
98      public String get(final int i) {
99          return values[i];
100     }
101 
102     /**
103      * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned.
104      *
105      * <p>
106      * Note: This requires a field mapping obtained from the original parser.
107      * A check using {@link #isMapped(String)} should be used to determine if a
108      * mapping exists from the provided {@code name} to a field index. In this case an
109      * exception will only be thrown if the record does not contain a field corresponding
110      * to the mapping, that is the record length is not consistent with the mapping size.
111      * </p>
112      *
113      * @param name
114      *            the name of the column to be retrieved.
115      * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
116      * @throws IllegalStateException
117      *             if no header mapping was provided
118      * @throws IllegalArgumentException
119      *             if {@code name} is not mapped or if the record is inconsistent
120      * @see #isMapped(String)
121      * @see #isConsistent()
122      * @see #getParser()
123      * @see CSVFormat.Builder#setNullString(String)
124      */
125     public String get(final String name) {
126         final Map<String, Integer> headerMap = getHeaderMapRaw();
127         if (headerMap == null) {
128             throw new IllegalStateException(
129                 "No header mapping was specified, the record values can't be accessed by name");
130         }
131         final Integer index = headerMap.get(name);
132         if (index == null) {
133             throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
134                 headerMap.keySet()));
135         }
136         try {
137             return values[index.intValue()];  // N.B. Explicit (un)boxing is intentional
138         } catch (final ArrayIndexOutOfBoundsException e) {
139             throw new IllegalArgumentException(String.format(
140                 "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
141                 Integer.valueOf(values.length)));  // N.B. Explicit (un)boxing is intentional
142         }
143     }
144 
145     /**
146      * Returns the starting position of this record in the source stream, measured in bytes.
147      *
148      * @return the byte position of this record in the source stream.
149      * @since 1.13.0
150      */
151     public long getBytePosition() {
152         return bytePosition;
153     }
154 
155     /**
156      * Returns the start position of this record as a character position in the source stream. This may or may not
157      * correspond to the byte position depending on the character set.
158      *
159      * @return the position of this record in the source stream.
160      */
161     public long getCharacterPosition() {
162         return characterPosition;
163     }
164 
165     /**
166      * Returns the comment for this record, if any.
167      * Note that comments are attached to the following record.
168      * If there is no following record (i.e. the comment is at EOF),
169      * then the comment will be ignored.
170      *
171      * @return the comment for this record, or null if no comment for this record is available.
172      */
173     public String getComment() {
174         return comment;
175     }
176 
177     private Map<String, Integer> getHeaderMapRaw() {
178         return parser == null ? null : parser.getHeaderMapRaw();
179     }
180 
181     /**
182      * Returns the parser.
183      *
184      * <p>
185      * Note: The parser is not part of the serialized state of the record. A null check
186      * should be used when the record may have originated from a serialized form.
187      * </p>
188      *
189      * @return the parser.
190      * @since 1.7
191      */
192     public CSVParser getParser() {
193         return parser;
194     }
195 
196     /**
197      * Returns the number of this record in the parsed CSV file.
198      *
199      * <p>
200      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
201      * the current line number of the parser that created this record.
202      * </p>
203      *
204      * @return the number of this record.
205      * @see CSVParser#getCurrentLineNumber()
206      */
207     public long getRecordNumber() {
208         return recordNumber;
209     }
210 
211     /**
212      * Checks whether this record has a comment, false otherwise.
213      * Note that comments are attached to the following record.
214      * If there is no following record (i.e. the comment is at EOF),
215      * then the comment will be ignored.
216      *
217      * @return true if this record has a comment, false otherwise
218      * @since 1.3
219      */
220     public boolean hasComment() {
221         return comment != null;
222     }
223 
224     /**
225      * Tells whether the record size matches the header size.
226      *
227      * <p>
228      * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
229      * test but still produce parsable files.
230      * </p>
231      *
232      * @return true of this record is valid, false if not
233      */
234     public boolean isConsistent() {
235         final Map<String, Integer> headerMap = getHeaderMapRaw();
236         return headerMap == null || headerMap.size() == values.length;
237     }
238 
239     /**
240      * Checks whether a given column is mapped, i.e. its name has been defined to the parser.
241      *
242      * @param name
243      *            the name of the column to be retrieved.
244      * @return whether a given column is mapped.
245      */
246     public boolean isMapped(final String name) {
247         final Map<String, Integer> headerMap = getHeaderMapRaw();
248         return headerMap != null && headerMap.containsKey(name);
249     }
250 
251     /**
252      * Checks whether a column with a given index has a value.
253      *
254      * @param index
255      *         a column index (0-based)
256      * @return whether a column with a given index has a value
257      */
258     public boolean isSet(final int index) {
259         return 0 <= index && index < values.length;
260     }
261 
262     /**
263      * Checks whether a given column is mapped and has a value.
264      *
265      * @param name
266      *            the name of the column to be retrieved.
267      * @return whether a given column is mapped and has a value
268      */
269     public boolean isSet(final String name) {
270         return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // N.B. Explicit (un)boxing is intentional
271     }
272 
273     /**
274      * Returns an iterator over the values of this record.
275      *
276      * @return an iterator over the values of this record.
277      */
278     @Override
279     public Iterator<String> iterator() {
280         return toList().iterator();
281     }
282 
283     /**
284      * Puts all values of this record into the given Map.
285      *
286      * @param <M> the map type
287      * @param map The Map to populate.
288      * @return the given map.
289      * @since 1.9.0
290      */
291     public <M extends Map<String, String>> M putIn(final M map) {
292         if (getHeaderMapRaw() == null) {
293             return map;
294         }
295         getHeaderMapRaw().forEach((key, value) -> {
296             if (value < values.length) {
297                 map.put(key, values[value]);
298             }
299         });
300         return map;
301     }
302 
303     /**
304      * Returns the number of values in this record.
305      *
306      * @return the number of values.
307      */
308     public int size() {
309         return values.length;
310     }
311 
312     /**
313      * Returns a sequential ordered stream whose elements are the values.
314      *
315      * @return the new stream.
316      * @since 1.9.0
317      */
318     public Stream<String> stream() {
319         return Stream.of(values);
320     }
321 
322     /**
323      * Converts the values to a new List.
324      * <p>
325      * Editing the list does not update this instance.
326      * </p>
327      *
328      * @return a new List
329      * @since 1.9.0
330      */
331     public List<String> toList() {
332         return stream().collect(Collectors.toList());
333     }
334 
335     /**
336      * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
337      * then only the last occurrence is mapped.
338      *
339      * <p>
340      * Editing the map does not update this instance.
341      * </p>
342      *
343      * @return A new Map. The map is empty if the record has no headers.
344      */
345     public Map<String, String> toMap() {
346         return putIn(new LinkedHashMap<>(values.length));
347     }
348 
349     /**
350      * Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
351      * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
352      *
353      * @return a String representation of this record.
354      */
355     @Override
356     public String toString() {
357         return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
358             Arrays.toString(values) + "]";
359     }
360 
361     /**
362      * Gets the values for this record. This is not a copy.
363      *
364      * @return the values for this record.
365      * @since 1.10.0
366      */
367     public String[] values() {
368         return values;
369     }
370 
371 }