View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.csv;
21  
22  import static org.junit.jupiter.api.Assertions.assertEquals;
23  
24  import java.io.IOException;
25  import java.io.InputStreamReader;
26  import java.io.Reader;
27  import java.nio.charset.StandardCharsets;
28  
29  import org.junit.jupiter.api.Test;
30  
31  class JiraCsv196Test {
32  
33      private Reader getTestInput(final String path) {
34          return new InputStreamReader(ClassLoader.getSystemClassLoader().getResourceAsStream(path));
35      }
36  
37      @Test
38      void testParseFourBytes() throws IOException {
39          final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get();
40          // @formatter:off
41          try (@SuppressWarnings("resource") // parser closes the reader.
42              CSVParser parser = new CSVParser.Builder()
43                  .setFormat(format)
44                  .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv"))
45                  .setCharset(StandardCharsets.UTF_8)
46                  .setTrackBytes(true)
47                  .get()) {
48              // @formatter:on
49              final long[] charByteKey = { 0, 84, 701, 1318, 1935 };
50              int idx = 0;
51              for (final CSVRecord record : parser) {
52                  assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx);
53              }
54          }
55      }
56  
57      @Test
58      void testParseThreeBytes() throws IOException {
59          final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get();
60          // @formatter:off
61          try (@SuppressWarnings("resource") // parser closes the reader.
62              CSVParser parser = new CSVParser.Builder()
63                  .setFormat(format)
64                  .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv"))
65                  .setCharset(StandardCharsets.UTF_8)
66                  .setTrackBytes(true)
67                  .get()) {
68              // @formatter:on
69              final long[] charByteKey = { 0, 89, 242, 395 };
70              int idx = 0;
71              for (final CSVRecord record : parser) {
72                  assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx);
73              }
74          }
75      }
76  }