View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import java.io.BufferedReader;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.Reader;
24  import java.io.StringReader;
25  import java.nio.charset.StandardCharsets;
26  import java.util.Iterator;
27  import java.util.Scanner;
28  import java.util.concurrent.TimeUnit;
29  import java.util.zip.GZIPInputStream;
30  
31  import com.generationjava.io.CsvReader;
32  import com.opencsv.CSVParserBuilder;
33  import com.opencsv.CSVReaderBuilder;
34  
35  import org.apache.commons.io.IOUtils;
36  import org.apache.commons.lang3.StringUtils;
37  import org.openjdk.jmh.annotations.Benchmark;
38  import org.openjdk.jmh.annotations.BenchmarkMode;
39  import org.openjdk.jmh.annotations.Fork;
40  import org.openjdk.jmh.annotations.Measurement;
41  import org.openjdk.jmh.annotations.Mode;
42  import org.openjdk.jmh.annotations.OutputTimeUnit;
43  import org.openjdk.jmh.annotations.Scope;
44  import org.openjdk.jmh.annotations.Setup;
45  import org.openjdk.jmh.annotations.State;
46  import org.openjdk.jmh.annotations.Threads;
47  import org.openjdk.jmh.annotations.Warmup;
48  import org.openjdk.jmh.infra.Blackhole;
49  import org.supercsv.io.CsvListReader;
50  import org.supercsv.prefs.CsvPreference;
51  
52  @BenchmarkMode(Mode.AverageTime)
53  @Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
54  @Threads(1)
55  @Warmup(iterations = 5)
56  @Measurement(iterations = 20)
57  @OutputTimeUnit(TimeUnit.MILLISECONDS)
58  @State(Scope.Benchmark)
59  public class CSVBenchmark {
60  
61      private String data;
62  
63      /**
64       * Load the data in memory before running the benchmarks, this takes out IO from the results.
65       */
66      @Setup
67      public void init() throws IOException {
68          InputStream in = this.getClass().getClassLoader().getResourceAsStream(
69              "org/apache/commons/csv/perf/worldcitiespop.txt.gz");
70          try (final InputStream gzin = new GZIPInputStream(in, 8192)) {
71              this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
72          }
73      }
74  
75      private Reader getReader() {
76          return new StringReader(data);
77      }
78  
79      @Benchmark
80      public int read(final Blackhole bh) throws Exception {
81          int count = 0;
82  
83          try (BufferedReader reader = new BufferedReader(getReader())) {
84              while (reader.readLine() != null) {
85                count++;
86              }
87          }
88  
89          bh.consume(count);
90          return count;
91      }
92  
93      @Benchmark
94      public int scan(final Blackhole bh) throws Exception {
95          int count = 0;
96  
97          try (Scanner scanner = new Scanner(getReader())) {
98              while (scanner.hasNextLine()) {
99                scanner.nextLine();
100               count++;
101             }
102         }
103 
104         bh.consume(count);
105         return count;
106     }
107 
108     @Benchmark
109     public int split(final Blackhole bh) throws Exception {
110       int count = 0;
111 
112       try (BufferedReader reader = new BufferedReader(getReader())) {
113           String line;
114           while ((line = reader.readLine()) != null) {
115             final String[] values = StringUtils.split(line, ',');
116             count += values.length;
117           }
118       }
119 
120       bh.consume(count);
121       return count;
122     }
123 
124     @Benchmark
125     public int parseCommonsCSV(final Blackhole bh) throws Exception {
126         int count = 0;
127 
128         try (final Reader in = getReader()) {
129             final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
130             Iterator<CSVRecord> iter = format.parse(in).iterator();
131             while (iter.hasNext()) {
132                 count++;
133                 iter.next();
134             }
135         }
136 
137         bh.consume(count);
138         return count;
139     }
140 
141     @Benchmark
142     public int parseGenJavaCSV(final Blackhole bh) throws Exception {
143         int count = 0;
144 
145         try (final Reader in = getReader()) {
146             final CsvReader reader = new CsvReader(in);
147             reader.setFieldDelimiter(',');
148             while (reader.readLine() != null) {
149                 count++;
150             }
151         }
152 
153         bh.consume(count);
154         return count;
155     }
156 
157     @Benchmark
158     public int parseJavaCSV(final Blackhole bh) throws Exception {
159         int count = 0;
160 
161         try (final Reader in = getReader()) {
162             final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
163             reader.setRecordDelimiter('\n');
164             while (reader.readRecord()) {
165                 count++;
166             }
167         }
168 
169         bh.consume(count);
170         return count;
171     }
172 
173     @Benchmark
174     public int parseOpenCSV(final Blackhole bh) throws Exception {
175         int count = 0;
176 
177         final com.opencsv.CSVParser parser = new CSVParserBuilder()
178           .withSeparator(',').withIgnoreQuotations(true).build();
179 
180         try (final Reader in = getReader()) {
181             final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
182             while (reader.readNext() != null) {
183                 count++;
184             }
185         }
186 
187         bh.consume(count);
188         return count;
189     }
190 
191     @Benchmark
192     public int parseSkifeCSV(final Blackhole bh) throws Exception {
193         final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
194         reader.setSeperator(',');
195         final CountingReaderCallback callback = new CountingReaderCallback();
196 
197         try (final Reader in = getReader()) {
198           reader.parse(in, callback);
199         }
200 
201         bh.consume(callback);
202         return callback.count;
203     }
204 
205     private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
206         public int count;
207 
208         @Override
209         public void onRow(final String[] fields) {
210             count++;
211         }
212     }
213 
214     @Benchmark
215     public int parseSuperCSV(final Blackhole bh) throws Exception {
216         int count = 0;
217 
218         try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
219             while (reader.read() != null) {
220                 count++;
221             }
222         }
223 
224         bh.consume(count);
225         return count;
226     }
227 }