View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import java.io.IOException;
21  import java.util.Arrays;
22  import java.util.List;
23  import java.util.stream.Collectors;
24  import java.util.stream.Stream;
25  
26  import org.junit.jupiter.api.Assertions;
27  import org.junit.jupiter.params.ParameterizedTest;
28  import org.junit.jupiter.params.provider.Arguments;
29  import org.junit.jupiter.params.provider.MethodSource;
30  
31  /**
32   * Tests parsing of duplicate column names in a CSV header.
33   * The test verifies that headers are consistently handled by CSVFormat and CSVParser.
34   */
35  public class CSVDuplicateHeaderTest {
36  
37      /**
38       * Return test cases for duplicate header data for use in CSVFormat.
39       * <p>
40       * This filters the parsing test data to all cases where the allow missing column
41       * names flag is true and ignore header case is false: these flags are exclusively for parsing.
42       * CSVFormat validation applies to both parsing and writing and thus validation
43       * is less strict and behaves as if the allow missing column names constraint and
44       * the ignore header case behavior are absent.
45       * The filtered data is then returned with the parser flags set to both true and false
46       * for each test case.
47       * </p>
48       *
49       * @return the stream of arguments
50       */
51      static Stream<Arguments> duplicateHeaderAllowsMissingColumnsNamesData() {
52          return duplicateHeaderData()
53              .filter(arg -> Boolean.TRUE.equals(arg.get()[1]) && Boolean.FALSE.equals(arg.get()[2]))
54              .flatMap(arg -> {
55                  // Return test case with flags as all true/false combinations
56                  final Object[][] data = new Object[4][];
57                  final Boolean[] flags = {Boolean.TRUE, Boolean.FALSE};
58                  int i = 0;
59                  for (final Boolean a : flags) {
60                      for (final Boolean b : flags) {
61                          data[i] = arg.get().clone();
62                          data[i][1] = a;
63                          data[i][2] = b;
64                          i++;
65                      }
66                  }
67                  return Arrays.stream(data).map(Arguments::of);
68              });
69      }
70  
71      /**
72       * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order:
73       * <pre>
74       * DuplicateHeaderMode duplicateHeaderMode
75       * boolean allowMissingColumnNames
76       * String[] headers
77       * boolean valid
78       * </pre>
79       *
80       * @return the stream of arguments
81       */
82      static Stream<Arguments> duplicateHeaderData() {
83          return Stream.of(
84              // Any combination with a valid header
85              Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B"}, true),
86              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B"}, true),
87              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B"}, true),
88              Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B"}, true),
89              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B"}, true),
90              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B"}, true),
91  
92              // Any combination with a valid header including empty
93              Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", ""}, false),
94              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", ""}, false),
95              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", ""}, false),
96              Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", ""}, true),
97              Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", ""}, true),
98              Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", ""}, true),
99  
100             // Any combination with a valid header including blank (1 space)
101             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " "}, false),
102             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " "}, false),
103             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " "}, false),
104             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " "}, true),
105             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " "}, true),
106             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " "}, true),
107 
108             // Any combination with a valid header including null
109             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null}, false),
110             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null}, false),
111             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null}, false),
112             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null}, true),
113             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null}, true),
114             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null}, true),
115 
116             // Duplicate non-empty names
117             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A"}, false),
118             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A"}, false),
119             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A"}, true),
120             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A"}, false),
121             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A"}, false),
122             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A"}, true),
123 
124             // Duplicate empty names
125             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"", ""}, false),
126             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"", ""}, false),
127             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"", ""}, false),
128             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"", ""}, false),
129             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"", ""}, true),
130             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"", ""}, true),
131 
132             // Duplicate blank names (1 space)
133             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", " "}, false),
134             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false),
135             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", " "}, false),
136             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", " "}, false),
137             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", " "}, true),
138             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", " "}, true),
139 
140             // Duplicate blank names (3 spaces)
141             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"   ", "   "}, false),
142             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"   ", "   "}, false),
143             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"   ", "   "}, false),
144             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"   ", "   "}, false),
145             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"   ", "   "}, true),
146             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"   ", "   "}, true),
147 
148             // Duplicate null names
149             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {null, null}, false),
150             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {null, null}, false),
151             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {null, null}, false),
152             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {null, null}, false),
153             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {null, null}, true),
154             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {null, null}, true),
155 
156             // Duplicate blank names (1+3 spaces)
157             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", "   "}, false),
158             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", "   "}, false),
159             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", "   "}, false),
160             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", "   "}, false),
161             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", "   "}, true),
162             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", "   "}, true),
163 
164             // Duplicate blank names and null names
165             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {" ", null}, false),
166             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", null}, false),
167             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {" ", null}, false),
168             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {" ", null}, false),
169             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {" ", null}, true),
170             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {" ", null}, true),
171 
172             // Duplicate non-empty and empty names
173             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", "", ""}, false),
174             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", "", ""}, false),
175             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", "", ""}, false),
176             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", "", ""}, false),
177             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", "", ""}, false),
178             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", "", ""}, true),
179 
180             // Non-duplicate non-empty and duplicate empty names
181             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "B", "", ""}, false),
182             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B", "", ""}, false),
183             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "B", "", ""}, false),
184             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "B", "", ""}, false),
185             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "B", "", ""}, true),
186             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "B", "", ""}, true),
187 
188             // Duplicate non-empty and blank names
189             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", " ", " "}, false),
190             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", " ", " "}, false),
191             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", " ", " "}, false),
192             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", " ", " "}, false),
193             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", " ", " "}, false),
194             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", " ", " "}, true),
195 
196             // Duplicate non-empty and null names
197             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "A", null, null}, false),
198             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", null, null}, false),
199             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "A", null, null}, false),
200             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "A", null, null}, false),
201             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "A", null, null}, false),
202             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "A", null, null}, true),
203 
204             // Duplicate blank names
205             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", "", ""}, false),
206             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "", ""}, false),
207             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", "", ""}, false),
208             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", "", ""}, false),
209             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", "", ""}, true),
210             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", "", ""}, true),
211 
212             // Duplicate null names
213             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", null, null}, false),
214             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null, null}, false),
215             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", null, null}, false),
216             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", null, null}, false),
217             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", null, null}, true),
218             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", null, null}, true),
219 
220             // Duplicate blank names (1+3 spaces)
221             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, false, new String[] {"A", " ", "   "}, false),
222             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " ", "   "}, false),
223             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, false, new String[] {"A", " ", "   "}, false),
224             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  false, new String[] {"A", " ", "   "}, false),
225             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  false, new String[] {"A", " ", "   "}, true),
226             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  false, new String[] {"A", " ", "   "}, true),
227 
228             // Duplicate names (case insensitive)
229             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true , new String[] {"A", "a"}, false),
230             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true , new String[] {"A", "a"}, false),
231             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true , new String[] {"A", "a"}, true),
232             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true , new String[] {"A", "a"}, false),
233             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true , new String[] {"A", "a"}, false),
234             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true , new String[] {"A", "a"}, true),
235 
236             // Duplicate non-empty (case insensitive) and empty names
237             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", "", ""}, false),
238             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", "", ""}, false),
239             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", "", ""}, false),
240             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", "", ""}, false),
241             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", "", ""}, false),
242             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", "", ""}, true),
243 
244             // Duplicate non-empty (case insensitive) and blank names
245             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", " ", " "}, false),
246             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", " ", " "}, false),
247             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", " ", " "}, false),
248             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", " ", " "}, false),
249             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", " ", " "}, false),
250             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", " ", " "}, true),
251 
252             // Duplicate non-empty (case insensitive) and null names
253             Arguments.of(DuplicateHeaderMode.DISALLOW,    false, true, new String[] {"A", "a", null, null}, false),
254             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", null, null}, false),
255             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   false, true, new String[] {"A", "a", null, null}, false),
256             Arguments.of(DuplicateHeaderMode.DISALLOW,    true,  true, new String[] {"A", "a", null, null}, false),
257             Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true,  true, new String[] {"A", "a", null, null}, false),
258             Arguments.of(DuplicateHeaderMode.ALLOW_ALL,   true,  true, new String[] {"A", "a", null, null}, true)
259         );
260     }
261 
262     /**
263      * Tests duplicate headers with the CSVFormat.
264      *
265      * @param duplicateHeaderMode the duplicate header mode
266      * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
267      * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
268      * @param headers the headers
269      * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
270      */
271     @ParameterizedTest
272     @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"})
273     public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
274                               final boolean allowMissingColumnNames,
275                               final boolean ignoreHeaderCase,
276                               final String[] headers,
277                               final boolean valid) {
278         final CSVFormat.Builder builder =
279             CSVFormat.DEFAULT.builder()
280                              .setDuplicateHeaderMode(duplicateHeaderMode)
281                              .setAllowMissingColumnNames(allowMissingColumnNames)
282                              .setIgnoreHeaderCase(ignoreHeaderCase)
283                              .setHeader(headers);
284         if (valid) {
285             final CSVFormat format = builder.build();
286             Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode");
287             Assertions.assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames");
288             Assertions.assertArrayEquals(headers, format.getHeader(), "Header");
289         } else {
290             Assertions.assertThrows(IllegalArgumentException.class, builder::build);
291         }
292     }
293 
294     /**
295      * Tests duplicate headers with the CSVParser.
296      *
297      * @param duplicateHeaderMode the duplicate header mode
298      * @param allowMissingColumnNames the allow missing column names flag (only used for parsing)
299      * @param ignoreHeaderCase the ignore header case flag (only used for parsing)
300      * @param headers the headers (joined with the CSVFormat delimiter to create a string input)
301      * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
302      * @throws IOException Signals that an I/O exception has occurred.
303      */
304     @ParameterizedTest
305     @MethodSource(value = {"duplicateHeaderData"})
306     public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode,
307                               final boolean allowMissingColumnNames,
308                               final boolean ignoreHeaderCase,
309                               final String[] headers,
310                               final boolean valid) throws IOException {
311         final CSVFormat format =
312             CSVFormat.DEFAULT.builder()
313                              .setDuplicateHeaderMode(duplicateHeaderMode)
314                              .setAllowMissingColumnNames(allowMissingColumnNames)
315                              .setIgnoreHeaderCase(ignoreHeaderCase)
316                              .setNullString("NULL")
317                              .setHeader()
318                              .build();
319         final String input = Arrays.stream(headers)
320                 .map(s -> s == null ? format.getNullString() : s)
321                 .collect(Collectors.joining(format.getDelimiterString()));
322         if (valid) {
323             try(CSVParser parser = CSVParser.parse(input, format)) {
324                 // Parser ignores null headers
325                 final List<String> expected =
326                     Arrays.stream(headers)
327                           .filter(s -> s != null)
328                           .collect(Collectors.toList());
329                 Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames");
330             }
331         } else {
332             Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format));
333         }
334     }
335 }