View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.csv;
21  
22  import java.io.BufferedReader;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.Reader;
26  import java.io.StringReader;
27  import java.nio.charset.StandardCharsets;
28  import java.util.Iterator;
29  import java.util.Scanner;
30  import java.util.concurrent.TimeUnit;
31  import java.util.zip.GZIPInputStream;
32  
33  import org.apache.commons.io.IOUtils;
34  import org.apache.commons.lang3.StringUtils;
35  import org.openjdk.jmh.annotations.Benchmark;
36  import org.openjdk.jmh.annotations.BenchmarkMode;
37  import org.openjdk.jmh.annotations.Fork;
38  import org.openjdk.jmh.annotations.Measurement;
39  import org.openjdk.jmh.annotations.Mode;
40  import org.openjdk.jmh.annotations.OutputTimeUnit;
41  import org.openjdk.jmh.annotations.Scope;
42  import org.openjdk.jmh.annotations.Setup;
43  import org.openjdk.jmh.annotations.State;
44  import org.openjdk.jmh.annotations.Threads;
45  import org.openjdk.jmh.annotations.Warmup;
46  import org.openjdk.jmh.infra.Blackhole;
47  import org.supercsv.io.CsvListReader;
48  import org.supercsv.prefs.CsvPreference;
49  
50  import com.generationjava.io.CsvReader;
51  import com.opencsv.CSVParserBuilder;
52  import com.opencsv.CSVReaderBuilder;
53  
54  @BenchmarkMode(Mode.AverageTime)
55  @Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
56  @Threads(1)
57  @Warmup(iterations = 5)
58  @Measurement(iterations = 20)
59  @OutputTimeUnit(TimeUnit.MILLISECONDS)
60  @State(Scope.Benchmark)
61  public class CSVBenchmark {
62  
63      private static final class CountingReaderCallback implements org.skife.csv.ReaderCallback {
64          public int count;
65  
66          @Override
67          public void onRow(final String[] fields) {
68              count++;
69          }
70      }
71  
72      private String data;
73  
74      private Reader getReader() {
75          return new StringReader(data);
76      }
77  
78      /**
79       * Load the data in memory before running the benchmarks, this takes out IO from the results.
80       */
81      @Setup
82      public void init() throws IOException {
83          try (InputStream in = this.getClass().getClassLoader().getResourceAsStream("org/apache/commons/csv/perf/worldcitiespop.txt.gz");
84                  InputStream gzin = new GZIPInputStream(in, 8192)) {
85              this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
86          }
87      }
88  
89      @Benchmark
90      public int parseCommonsCSV(final Blackhole bh) throws Exception {
91          int count = 0;
92  
93          try (Reader in = getReader()) {
94              final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
95              final Iterator<CSVRecord> iter = format.parse(in).iterator();
96              while (iter.hasNext()) {
97                  count++;
98                  iter.next();
99              }
100         }
101 
102         bh.consume(count);
103         return count;
104     }
105 
106     @Benchmark
107     public int parseGenJavaCSV(final Blackhole bh) throws Exception {
108         int count = 0;
109 
110         try (Reader in = getReader()) {
111             final CsvReader reader = new CsvReader(in);
112             reader.setFieldDelimiter(',');
113             while (reader.readLine() != null) {
114                 count++;
115             }
116         }
117 
118         bh.consume(count);
119         return count;
120     }
121 
122     @Benchmark
123     public int parseJavaCSV(final Blackhole bh) throws Exception {
124         int count = 0;
125 
126         try (Reader in = getReader()) {
127             final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
128             reader.setRecordDelimiter('\n');
129             while (reader.readRecord()) {
130                 count++;
131             }
132         }
133 
134         bh.consume(count);
135         return count;
136     }
137 
138     @Benchmark
139     public int parseOpenCSV(final Blackhole bh) throws Exception {
140         int count = 0;
141 
142         final com.opencsv.CSVParser parser = new CSVParserBuilder()
143           .withSeparator(',').withIgnoreQuotations(true).build();
144 
145         try (Reader in = getReader()) {
146             final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
147             while (reader.readNext() != null) {
148                 count++;
149             }
150         }
151 
152         bh.consume(count);
153         return count;
154     }
155 
156     @Benchmark
157     public int parseSkifeCSV(final Blackhole bh) throws Exception {
158         final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
159         reader.setSeperator(',');
160         final CountingReaderCallback callback = new CountingReaderCallback();
161 
162         try (Reader in = getReader()) {
163           reader.parse(in, callback);
164         }
165 
166         bh.consume(callback);
167         return callback.count;
168     }
169 
170     @Benchmark
171     public int parseSuperCSV(final Blackhole bh) throws Exception {
172         int count = 0;
173 
174         try (CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
175             while (reader.read() != null) {
176                 count++;
177             }
178         }
179 
180         bh.consume(count);
181         return count;
182     }
183 
184     @Benchmark
185     public int read(final Blackhole bh) throws Exception {
186         int count = 0;
187 
188         try (BufferedReader reader = new BufferedReader(getReader())) {
189             while (reader.readLine() != null) {
190               count++;
191             }
192         }
193 
194         bh.consume(count);
195         return count;
196     }
197 
198     @Benchmark
199     public int scan(final Blackhole bh) throws Exception {
200         int count = 0;
201 
202         try (Scanner scanner = new Scanner(getReader())) {
203             while (scanner.hasNextLine()) {
204               scanner.nextLine();
205               count++;
206             }
207         }
208 
209         bh.consume(count);
210         return count;
211     }
212 
213     @Benchmark
214     public int split(final Blackhole bh) throws Exception {
215       int count = 0;
216 
217       try (BufferedReader reader = new BufferedReader(getReader())) {
218           String line;
219           while ((line = reader.readLine()) != null) {
220             final String[] values = StringUtils.split(line, ',');
221             count += values.length;
222           }
223       }
224 
225       bh.consume(count);
226       return count;
227     }
228 }