CSVRecord.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  *   https://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */

  19. package org.apache.commons.csv;

  20. import java.io.Serializable;
  21. import java.util.Arrays;
  22. import java.util.Iterator;
  23. import java.util.LinkedHashMap;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.stream.Collectors;
  27. import java.util.stream.Stream;

  28. /**
  29.  * A CSV record parsed from a CSV file.
  30.  *
  31.  * <p>
  32.  * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
  33.  * In version 1.8 the mapping between the column header and the column index was
  34.  * removed from the serialized state. The class maintains serialization compatibility
  35.  * with versions pre-1.8 for the record values; these must be accessed by index
  36.  * following deserialization. There will be a loss of any functionally linked to the header
  37.  * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
  38.  * </p>
  39.  */
  40. public final class CSVRecord implements Serializable, Iterable<String> {

  41.     private static final long serialVersionUID = 1L;

  42.     /**
  43.      * The start position of this record as a character position in the source stream. This may or may not correspond to the byte position depending on the
  44.      * character set.
  45.      */
  46.     private final long characterPosition;

  47.     /**
  48.      * The starting position of this record in the source stream, measured in bytes.
  49.      */
  50.     private final long bytePosition;

  51.     /** The accumulated comments (if any) */
  52.     private final String comment;

  53.     /** The record number. */
  54.     private final long recordNumber;

  55.     /** The values of the record */
  56.     private final String[] values;

  57.     /** The parser that originates this record. This is not serialized. */
  58.     private final transient CSVParser parser;

  59.     CSVRecord(final CSVParser parser, final String[] values,  final String comment, final long recordNumber,
  60.             final long characterPosition, final long bytePosition) {
  61.         this.recordNumber = recordNumber;
  62.         this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
  63.         this.parser = parser;
  64.         this.comment = comment;
  65.         this.characterPosition = characterPosition;
  66.         this.bytePosition = bytePosition;
  67.     }

  68.     /**
  69.      * Returns a value by {@link Enum}.
  70.      *
  71.      * @param e
  72.      *            an enum
  73.      * @return the String at the given enum String
  74.      */
  75.     public String get(final Enum<?> e) {
  76.         return get(e == null ? null : e.name());
  77.     }

  78.     /**
  79.      * Returns a value by index.
  80.      *
  81.      * @param i
  82.      *            a column index (0-based)
  83.      * @return the String at the given index
  84.      */
  85.     public String get(final int i) {
  86.         return values[i];
  87.     }

  88.     /**
  89.      * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned.
  90.      *
  91.      * <p>
  92.      * Note: This requires a field mapping obtained from the original parser.
  93.      * A check using {@link #isMapped(String)} should be used to determine if a
  94.      * mapping exists from the provided {@code name} to a field index. In this case an
  95.      * exception will only be thrown if the record does not contain a field corresponding
  96.      * to the mapping, that is the record length is not consistent with the mapping size.
  97.      * </p>
  98.      *
  99.      * @param name
  100.      *            the name of the column to be retrieved.
  101.      * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
  102.      * @throws IllegalStateException
  103.      *             if no header mapping was provided
  104.      * @throws IllegalArgumentException
  105.      *             if {@code name} is not mapped or if the record is inconsistent
  106.      * @see #isMapped(String)
  107.      * @see #isConsistent()
  108.      * @see #getParser()
  109.      * @see CSVFormat.Builder#setNullString(String)
  110.      */
  111.     public String get(final String name) {
  112.         final Map<String, Integer> headerMap = getHeaderMapRaw();
  113.         if (headerMap == null) {
  114.             throw new IllegalStateException(
  115.                 "No header mapping was specified, the record values can't be accessed by name");
  116.         }
  117.         final Integer index = headerMap.get(name);
  118.         if (index == null) {
  119.             throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
  120.                 headerMap.keySet()));
  121.         }
  122.         try {
  123.             return values[index.intValue()];  // Explicit (un)boxing is intentional
  124.         } catch (final ArrayIndexOutOfBoundsException e) {
  125.             throw new IllegalArgumentException(String.format(
  126.                 "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
  127.                 Integer.valueOf(values.length)));  // Explicit (un)boxing is intentional
  128.         }
  129.     }

  130.     /**
  131.      * Returns the starting position of this record in the source stream, measured in bytes.
  132.      *
  133.      * @return the byte position of this record in the source stream.
  134.      * @since 1.13.0
  135.      */
  136.     public long getBytePosition() {
  137.         return bytePosition;
  138.     }

  139.     /**
  140.      * Returns the start position of this record as a character position in the source stream. This may or may not
  141.      * correspond to the byte position depending on the character set.
  142.      *
  143.      * @return the position of this record in the source stream.
  144.      */
  145.     public long getCharacterPosition() {
  146.         return characterPosition;
  147.     }

  148.     /**
  149.      * Returns the comment for this record, if any.
  150.      * Note that comments are attached to the following record.
  151.      * If there is no following record (that is, the comment is at EOF),
  152.      * then the comment will be ignored.
  153.      *
  154.      * @return the comment for this record, or null if no comment for this record is available.
  155.      */
  156.     public String getComment() {
  157.         return comment;
  158.     }

  159.     private Map<String, Integer> getHeaderMapRaw() {
  160.         return parser == null ? null : parser.getHeaderMapRaw();
  161.     }

  162.     /**
  163.      * Returns the parser.
  164.      *
  165.      * <p>
  166.      * Note: The parser is not part of the serialized state of the record. A null check
  167.      * should be used when the record may have originated from a serialized form.
  168.      * </p>
  169.      *
  170.      * @return the parser.
  171.      * @since 1.7
  172.      */
  173.     public CSVParser getParser() {
  174.         return parser;
  175.     }

  176.     /**
  177.      * Returns the number of this record in the parsed CSV file.
  178.      *
  179.      * <p>
  180.      * <strong>NOTE:</strong>If your CSV input has multi-line values, the returned number does not correspond to
  181.      * the current line number of the parser that created this record.
  182.      * </p>
  183.      *
  184.      * @return the number of this record.
  185.      * @see CSVParser#getCurrentLineNumber()
  186.      */
  187.     public long getRecordNumber() {
  188.         return recordNumber;
  189.     }

  190.     /**
  191.      * Checks whether this record has a comment, false otherwise.
  192.      * Note that comments are attached to the following record.
  193.      * If there is no following record (that is, the comment is at EOF),
  194.      * then the comment will be ignored.
  195.      *
  196.      * @return true if this record has a comment, false otherwise
  197.      * @since 1.3
  198.      */
  199.     public boolean hasComment() {
  200.         return comment != null;
  201.     }

  202.     /**
  203.      * Tells whether the record size matches the header size.
  204.      *
  205.      * <p>
  206.      * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
  207.      * test but still produce parsable files.
  208.      * </p>
  209.      *
  210.      * @return true of this record is valid, false if not
  211.      */
  212.     public boolean isConsistent() {
  213.         final Map<String, Integer> headerMap = getHeaderMapRaw();
  214.         return headerMap == null || headerMap.size() == values.length;
  215.     }

  216.     /**
  217.      * Checks whether a given column is mapped, that is, its name has been defined to the parser.
  218.      *
  219.      * @param name
  220.      *            the name of the column to be retrieved.
  221.      * @return whether a given column is mapped.
  222.      */
  223.     public boolean isMapped(final String name) {
  224.         final Map<String, Integer> headerMap = getHeaderMapRaw();
  225.         return headerMap != null && headerMap.containsKey(name);
  226.     }

  227.     /**
  228.      * Checks whether a column with a given index has a value.
  229.      *
  230.      * @param index
  231.      *         a column index (0-based)
  232.      * @return whether a column with a given index has a value
  233.      */
  234.     public boolean isSet(final int index) {
  235.         return 0 <= index && index < values.length;
  236.     }

  237.     /**
  238.      * Checks whether a given column is mapped and has a value.
  239.      *
  240.      * @param name
  241.      *            the name of the column to be retrieved.
  242.      * @return whether a given column is mapped and has a value
  243.      */
  244.     public boolean isSet(final String name) {
  245.         return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit (un)boxing is intentional
  246.     }

  247.     /**
  248.      * Returns an iterator over the values of this record.
  249.      *
  250.      * @return an iterator over the values of this record.
  251.      */
  252.     @Override
  253.     public Iterator<String> iterator() {
  254.         return toList().iterator();
  255.     }

  256.     /**
  257.      * Puts all values of this record into the given Map.
  258.      *
  259.      * @param <M> the map type
  260.      * @param map The Map to populate.
  261.      * @return the given map.
  262.      * @since 1.9.0
  263.      */
  264.     public <M extends Map<String, String>> M putIn(final M map) {
  265.         if (getHeaderMapRaw() == null) {
  266.             return map;
  267.         }
  268.         getHeaderMapRaw().forEach((key, value) -> {
  269.             if (value < values.length) {
  270.                 map.put(key, values[value]);
  271.             }
  272.         });
  273.         return map;
  274.     }

  275.     /**
  276.      * Returns the number of values in this record.
  277.      *
  278.      * @return the number of values.
  279.      */
  280.     public int size() {
  281.         return values.length;
  282.     }

  283.     /**
  284.      * Returns a sequential ordered stream whose elements are the values.
  285.      *
  286.      * @return the new stream.
  287.      * @since 1.9.0
  288.      */
  289.     public Stream<String> stream() {
  290.         return Stream.of(values);
  291.     }

  292.     /**
  293.      * Converts the values to a new List.
  294.      * <p>
  295.      * Editing the list does not update this instance.
  296.      * </p>
  297.      *
  298.      * @return a new List
  299.      * @since 1.9.0
  300.      */
  301.     public List<String> toList() {
  302.         return stream().collect(Collectors.toList());
  303.     }

  304.     /**
  305.      * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
  306.      * then only the last occurrence is mapped.
  307.      *
  308.      * <p>
  309.      * Editing the map does not update this instance.
  310.      * </p>
  311.      *
  312.      * @return A new Map. The map is empty if the record has no headers.
  313.      */
  314.     public Map<String, String> toMap() {
  315.         return putIn(new LinkedHashMap<>(values.length));
  316.     }

  317.     /**
  318.      * Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
  319.      * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
  320.      *
  321.      * @return a String representation of this record.
  322.      */
  323.     @Override
  324.     public String toString() {
  325.         return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
  326.             Arrays.toString(values) + "]";
  327.     }

  328.     /**
  329.      * Gets the values for this record. This is not a copy.
  330.      *
  331.      * @return the values for this record.
  332.      * @since 1.10.0
  333.      */
  334.     public String[] values() {
  335.         return values;
  336.     }

  337. }