View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.csv;
21  
22  import java.io.IOException;
23  import java.util.Arrays;
24  import java.util.List;
25  import java.util.stream.Collectors;
26  import java.util.stream.Stream;
27  
28  import org.junit.jupiter.api.Assertions;
29  import org.junit.jupiter.params.ParameterizedTest;
30  import org.junit.jupiter.params.provider.Arguments;
31  import org.junit.jupiter.params.provider.MethodSource;
32  
33  /**
34   * Tests parsing of duplicate column names in a CSV header.
35   * The test verifies that headers are consistently handled by CSVFormat and CSVParser.
36   */
37  public class CSVDuplicateHeaderTest {
38  
39      /**
40       * Return test cases for duplicate header data for use in CSVFormat.
41       * <p>
42       * This filters the parsing test data to all cases where the allow missing column
43       * names flag is true and ignore header case is false: these flags are exclusively for parsing.
44       * CSVFormat validation applies to both parsing and writing and thus validation
45       * is less strict and behaves as if the allow missing column names constraint and
46       * the ignore header case behavior are absent.
47       * The filtered data is then returned with the parser flags set to both true and false
48       * for each test case.
49       * </p>
50       *
51       * @return the stream of arguments
52       */
53      static Stream<Arguments> duplicateHeaderAllowsMissingColumnsNamesData() {
54          return duplicateHeaderData()
55              .filter(arg -> Boolean.TRUE.equals(arg.get()[1]) && Boolean.FALSE.equals(arg.get()[2]))
56              .flatMap(arg -> {
57                  // Return test case with flags as all true/false combinations
58                  final Object[][] data = new Object[4][];
59                  final Boolean[] flags = {Boolean.TRUE, Boolean.FALSE};
60                  int i = 0;
61                  for (final Boolean a : flags) {
62                      for (final Boolean b : flags) {
63                          data[i] = arg.get().clone();
64                          data[i][1] = a;
65                          data[i][2] = b;
66                          i++;
67                      }
68                  }
69                  return Arrays.stream(data).map(Arguments::of);
70              });
71      }
72  
73      /**
74       * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order:
75       * <pre>
76       * DuplicateHeaderMode duplicateHeaderMode
77       * boolean allowMissingColumnNames
78       * String[] headers
79       * boolean valid
80       * </pre>
81       *
82       * @return the stream of arguments
83       */
84      static Stream<Arguments> duplicateHeaderData() {
85          return Stream.of(
86              // Any combination with a valid header
87              Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B"}, true),
88              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B"}, true),
89              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B"}, true),
90              Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B"}, true),
91              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B"}, true),
92              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B"}, true),
93  
94              // Any combination with a valid header including empty
95              Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", ""}, false),
96              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", ""}, false),
97              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", ""}, false),
98              Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", ""}, true),
99              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", ""}, true),
100             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", ""}, true),
101 
102             // Any combination with a valid header including blank (1 space)
103             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " "}, false),
104             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " "}, false),
105             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " "}, false),
106             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " "}, true),
107             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " "}, true),
108             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " "}, true),
109 
110             // Any combination with a valid header including null
111             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null}, false),
112             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null}, false),
113             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null}, false),
114             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null}, true),
115             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null}, true),
116             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null}, true),
117 
118             // Duplicate non-empty names
119             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A"}, false),
120             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A"}, false),
121             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A"}, true),
122             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A"}, false),
123             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A"}, false),
124             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A"}, true),
125 
126             // Duplicate empty names
127             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"", ""}, false),
128             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"", ""}, false),
129             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"", ""}, false),
130             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"", ""}, false),
131             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"", ""}, true),
132             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"", ""}, true),
133 
134             // Duplicate blank names (1 space)
135             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", " "}, false),
136             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false),
137             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", " "}, false),
138             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", " "}, false),
139             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", " "}, true),
140             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", " "}, true),
141 
142             // Duplicate blank names (3 spaces)
143             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"   ", "   "}, false),
144             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"   ", "   "}, false),
145             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"   ", "   "}, false),
146             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"   ", "   "}, false),
147             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"   ", "   "}, true),
148             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"   ", "   "}, true),
149 
150             // Duplicate null names
151             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {null, null}, false),
152             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {null, null}, false),
153             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {null, null}, false),
154             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {null, null}, false),
155             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {null, null}, true),
156             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {null, null}, true),
157 
158             // Duplicate blank names (1+3 spaces)
159             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", "   "}, false),
160             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", "   "}, false),
161             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", "   "}, false),
162             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", "   "}, false),
163             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", "   "}, true),
164             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", "   "}, true),
165 
166             // Duplicate blank names and null names
167             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", null}, false),
168             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", null}, false),
169             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", null}, false),
170             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", null}, false),
171             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", null}, true),
172             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", null}, true),
173 
174             // Duplicate non-empty and empty names
175             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", "", ""}, false),
176             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", "", ""}, false),
177             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", "", ""}, false),
178             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", "", ""}, false),
179             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", "", ""}, false),
180             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", "", ""}, true),
181 
182             // Non-duplicate non-empty and duplicate empty names
183             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B", "", ""}, false),
184             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B", "", ""}, false),
185             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B", "", ""}, false),
186             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B", "", ""}, false),
187             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B", "", ""}, true),
188             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B", "", ""}, true),
189 
190             // Duplicate non-empty and blank names
191             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", " ", " "}, false),
192             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", " ", " "}, false),
193             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", " ", " "}, false),
194             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", " ", " "}, false),
195             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", " ", " "}, false),
196             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", " ", " "}, true),
197 
198             // Duplicate non-empty and null names
199             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", null, null}, false),
200             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", null, null}, false),
201             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", null, null}, false),
202             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", null, null}, false),
203             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", null, null}, false),
204             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", null, null}, true),
205 
206             // Duplicate blank names
207             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "", ""}, false),
208             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "", ""}, false),
209             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "", ""}, false),
210             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "", ""}, false),
211             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "", ""}, true),
212             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "", ""}, true),
213 
214             // Duplicate null names
215             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null, null}, false),
216             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null, null}, false),
217             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null, null}, false),
218             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null, null}, false),
219             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null, null}, true),
220             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null, null}, true),
221 
222             // Duplicate blank names (1+3 spaces)
223             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " ", "   "}, false),
224             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " ", "   "}, false),
225             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " ", "   "}, false),
226             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " ", "   "}, false),
227             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " ", "   "}, true),
228             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " ", "   "}, true),
229 
230             // Duplicate names (case insensitive)
231             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true , new String[] {"A", "a"}, false),
232             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true , new String[] {"A", "a"}, false),
233             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true , new String[] {"A", "a"}, true),
234             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true , new String[] {"A", "a"}, false),
235             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true , new String[] {"A", "a"}, false),
236             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true , new String[] {"A", "a"}, true),
237 
238             // Duplicate non-empty (case insensitive) and empty names
239             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", "", ""}, false),
240             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", "", ""}, false),
241             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", "", ""}, false),
242             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", "", ""}, false),
243             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", "", ""}, false),
244             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", "", ""}, true),
245 
246             // Duplicate non-empty (case insensitive) and blank names
247             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", " ", " "}, false),
248             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", " ", " "}, false),
249             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", " ", " "}, false),
250             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", " ", " "}, false),
251             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", " ", " "}, false),
252             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", " ", " "}, true),
253 
254             // Duplicate non-empty (case insensitive) and null names
255             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", null, null}, false),
256             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", null, null}, false),
257             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", null, null}, false),
258             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", null, null}, false),
259             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", null, null}, false),
260             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", null, null}, true)
261         );
262     }
263 
264     /**
265      * Tests duplicate headers with the CSVFormat.
266      *
267      * @param duplicateHeaderMode the duplicate header mode
268      * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
269      * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
270      * @param headers the headers
271      * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
272      */
273     @ParameterizedTest
274     @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"})
275     public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
276                               final boolean allowMissingColumnNames,
277                               final boolean ignoreHeaderCase,
278                               final String[] headers,
279                               final boolean valid) {
280         final CSVFormat.Builder builder =
281             CSVFormat.DEFAULT.builder()
282                              .setDuplicateHeaderMode(duplicateHeaderMode)
283                              .setAllowMissingColumnNames(allowMissingColumnNames)
284                              .setIgnoreHeaderCase(ignoreHeaderCase)
285                              .setHeader(headers);
286         if (valid) {
287             final CSVFormat format = builder.get();
288             Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode");
289             Assertions.assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames");
290             Assertions.assertArrayEquals(headers, format.getHeader(), "Header");
291         } else {
292             Assertions.assertThrows(IllegalArgumentException.class, builder::get);
293         }
294     }
295 
296     /**
297      * Tests duplicate headers with the CSVParser.
298      *
299      * @param duplicateHeaderMode the duplicate header mode
300      * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
301      * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
302      * @param headers the headers (joined with the CSVFormat delimiter to create a string input)
303      * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
304      * @throws IOException Signals that an I/O exception has occurred.
305      */
306     @ParameterizedTest
307     @MethodSource(value = {"duplicateHeaderData"})
308     public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode,
309                               final boolean allowMissingColumnNames,
310                               final boolean ignoreHeaderCase,
311                               final String[] headers,
312             final boolean valid) throws IOException {
313         // @formatter:off
314         final CSVFormat format = CSVFormat.DEFAULT.builder()
315                 .setDuplicateHeaderMode(duplicateHeaderMode)
316                 .setAllowMissingColumnNames(allowMissingColumnNames)
317                 .setIgnoreHeaderCase(ignoreHeaderCase)
318                 .setNullString("NULL")
319                 .setHeader()
320                 .get();
321         // @formatter:on
322         final String input = Arrays.stream(headers)
323                 .map(s -> s == null ? format.getNullString() : s)
324                 .collect(Collectors.joining(format.getDelimiterString()));
325         // @formatter:off
326         if (valid) {
327             try (CSVParser parser = CSVParser.parse(input, format)) {
328                 // Parser ignores null headers
329                 final List<String> expected = Arrays.stream(headers).filter(s -> s != null).collect(Collectors.toList());
330                 Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames");
331             }
332         } else {
333             Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format));
334         }
335     }
336 }