1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.csv;
21
22 import static org.apache.commons.io.IOUtils.EOF;
23
24 import java.io.BufferedReader;
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.InputStreamReader;
31 import java.io.OutputStream;
32 import java.io.Reader;
33 import java.lang.reflect.Constructor;
34 import java.lang.reflect.InvocationTargetException;
35 import java.nio.charset.StandardCharsets;
36 import java.nio.file.Files;
37 import java.nio.file.Paths;
38 import java.util.zip.GZIPInputStream;
39
40 import org.apache.commons.io.FileUtils;
41 import org.apache.commons.io.IOUtils;
42
43
44
45
46 @SuppressWarnings("boxing")
47 public class PerformanceTest {
48
49 @FunctionalInterface
50 private interface CSVParserFactory {
51 CSVParser createParser() throws IOException;
52 }
53
54
55 private static final class Stats {
56 final int count;
57 final int fields;
58
59 Stats(final int c, final int f) {
60 count = c;
61 fields = f;
62 }
63 }
64
65 private static final String[] PROPERTY_NAMES = { "java.version",
66 "java.vendor",
67
68
69
70 "java.vm.version",
71
72 "java.vm.name",
73
74
75
76
77 "os.name",
78 "os.arch",
79 "os.version",
80 };
81 private static int max = 11;
82
83 private static int num;
84
85 private static final long[] ELAPSED_TIMES = new long[max];
86 private static final CSVFormat format = CSVFormat.EXCEL;
87
88 private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
89
90 private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt");
91
92 private static Reader createReader() throws IOException {
93 return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
94 }
95
96 private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
97 throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
98 return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
99 }
100
101 private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
102 @SuppressWarnings("unchecked")
103 final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
104 return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
105 }
106
107 private static Stats iterate(final Iterable<CSVRecord> iterable) {
108 int count = 0;
109 int fields = 0;
110 for (final CSVRecord record : iterable) {
111 count++;
112 fields += record.size();
113 }
114 return new Stats(count, fields);
115 }
116
117 public static void main(final String[] args) throws Exception {
118 if (BIG_FILE.exists()) {
119 System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
120 } else {
121 System.out.println("Decompressing test fixture to: " + BIG_FILE + "...");
122 try (InputStream input = new GZIPInputStream(PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC));
123 OutputStream output = new FileOutputStream(BIG_FILE)) {
124 IOUtils.copy(input, output);
125 System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
126 }
127 }
128 final int argc = args.length;
129 if (argc > 0) {
130 max = Integer.parseInt(args[0]);
131 }
132
133 final String[] tests;
134 if (argc > 1) {
135 tests = new String[argc - 1];
136 System.arraycopy(args, 1, tests, 0, argc - 1);
137 } else {
138 tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" };
139 }
140 for (final String p : PROPERTY_NAMES) {
141 System.out.printf("%s=%s%n", p, System.getProperty(p));
142 }
143 System.out.printf("Max count: %d%n%n", max);
144
145 for (final String test : tests) {
146 switch (test) {
147 case "file":
148 testReadBigFile(false);
149 break;
150 case "split":
151 testReadBigFile(true);
152 break;
153 case "csv":
154 testParseCommonsCSV();
155 break;
156 case "csv-path":
157 testParsePath();
158 break;
159 case "csv-path-db":
160 testParsePathDoubleBuffering();
161 break;
162 case "csv-url":
163 testParseURL();
164 break;
165 case "lexreset":
166 testCSVLexer(false, test);
167 break;
168 case "lexnew":
169 testCSVLexer(true, test);
170 break;
171 default:
172 if (test.startsWith("CSVLexer")) {
173 testCSVLexer(false, test);
174 } else if ("extb".equals(test)) {
175 testExtendedBuffer(false);
176 } else if ("exts".equals(test)) {
177 testExtendedBuffer(true);
178 } else {
179 System.out.printf("Invalid test name: %s%n", test);
180 }
181 break;
182 }
183 }
184 }
185
186 private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
187 int count = 0;
188 int fields = 0;
189 String record;
190 while ((record = in.readLine()) != null) {
191 count++;
192 fields += split ? record.split(",").length : 1;
193 }
194 return new Stats(count, fields);
195 }
196
197
198 private static void show() {
199 if (num > 1) {
200 long tot = 0;
201 for (int i = 1; i < num; i++) {
202 tot += ELAPSED_TIMES[i];
203 }
204 System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1));
205 }
206 num = 0;
207 }
208
209
210 private static void show(final String msg, final Stats s, final long start) {
211 final long elapsed = System.currentTimeMillis() - start;
212 System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
213 ELAPSED_TIMES[num] = elapsed;
214 num++;
215 }
216
217 private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
218 Token token = new Token();
219 String dynamic = "";
220 for (int i = 0; i < max; i++) {
221 final String simpleName;
222 final Stats stats;
223 final long startMillis;
224 try (ExtendedBufferedReader input = new ExtendedBufferedReader(createReader());
225 Lexer lexer = createTestCSVLexer(test, input)) {
226 if (test.startsWith("CSVLexer")) {
227 dynamic = "!";
228 }
229 simpleName = lexer.getClass().getSimpleName();
230 int count = 0;
231 int fields = 0;
232 startMillis = System.currentTimeMillis();
233 do {
234 if (newToken) {
235 token = new Token();
236 } else {
237 token.reset();
238 }
239 lexer.nextToken(token);
240 switch (token.type) {
241 case EOF:
242 break;
243 case EORECORD:
244 fields++;
245 count++;
246 break;
247 case INVALID:
248 throw new IOException("invalid parse sequence <" + token.content.toString() + ">");
249 case TOKEN:
250 fields++;
251 break;
252 case COMMENT:
253 break;
254 default:
255 throw new IllegalStateException("Unexpected Token type: " + token.type);
256 }
257 } while (!token.type.equals(Token.Type.EOF));
258 stats = new Stats(count, fields);
259 }
260 show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis);
261 }
262 show();
263 }
264
265 private static void testExtendedBuffer(final boolean makeString) throws Exception {
266 for (int i = 0; i < max; i++) {
267 int fields = 0;
268 int lines = 0;
269 final long startMillis;
270 try (ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
271 startMillis = System.currentTimeMillis();
272 int read;
273 if (makeString) {
274 StringBuilder sb = new StringBuilder();
275 while ((read = in.read()) != EOF) {
276 sb.append((char) read);
277 if (read == ',') {
278 sb.toString();
279 sb = new StringBuilder();
280 fields++;
281 } else if (read == '\n') {
282 sb.toString();
283 sb = new StringBuilder();
284 lines++;
285 }
286 }
287 } else {
288 while ((read = in.read()) != EOF) {
289 if (read == ',') {
290 fields++;
291 } else if (read == '\n') {
292 lines++;
293 }
294 }
295 }
296 fields += lines;
297 }
298 show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
299 }
300 show();
301 }
302
303 private static void testParseCommonsCSV() throws Exception {
304 testParser("CSV", () -> CSVParser.builder().setReader(createReader()).setFormat(format).get());
305 }
306
307 private static void testParsePath() throws Exception {
308 testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
309 }
310
311 private static void testParsePathDoubleBuffering() throws Exception {
312 testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
313 }
314
315 private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
316 for (int i = 0; i < max; i++) {
317 final long startMillis;
318 final Stats stats;
319 try (CSVParser parser = fac.createParser()) {
320 startMillis = System.currentTimeMillis();
321 stats = iterate(parser);
322 }
323 show(msg, stats, startMillis);
324 }
325 show();
326 }
327
328 private static void testParseURL() throws Exception {
329 testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
330 }
331
332 private static void testReadBigFile(final boolean split) throws Exception {
333 for (int i = 0; i < max; i++) {
334 final long startMillis;
335 final Stats stats;
336 try (BufferedReader in = new BufferedReader(createReader())) {
337 startMillis = System.currentTimeMillis();
338 stats = readAll(in, split);
339 }
340 show(split ? "file+split" : "file", stats, startMillis);
341 }
342 show();
343 }
344 }
345