View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.csv;
21  
22  import static org.apache.commons.io.IOUtils.EOF;
23  
24  import java.io.BufferedReader;
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.FileOutputStream;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.io.InputStreamReader;
31  import java.io.OutputStream;
32  import java.io.Reader;
33  import java.lang.reflect.Constructor;
34  import java.lang.reflect.InvocationTargetException;
35  import java.nio.charset.StandardCharsets;
36  import java.nio.file.Files;
37  import java.nio.file.Paths;
38  import java.util.zip.GZIPInputStream;
39  
40  import org.apache.commons.io.FileUtils;
41  import org.apache.commons.io.IOUtils;
42  
43  /**
44   * Basic test harness.
45   */
46  @SuppressWarnings("boxing")
47  public class PerformanceTest {
48  
49      @FunctionalInterface
50      private interface CSVParserFactory {
51          CSVParser createParser() throws IOException;
52      }
53  
54      // Container for basic statistics
55      private static final class Stats {
56          final int count;
57          final int fields;
58  
59          Stats(final int c, final int f) {
60              count = c;
61              fields = f;
62          }
63      }
64  
65      private static final String[] PROPERTY_NAMES = { "java.version", // Java Runtime Environment version
66              "java.vendor", // Java Runtime Environment vendor
67  //        "java.vm.specification.version", // Java Virtual Machine specification version
68  //        "java.vm.specification.vendor",  // Java Virtual Machine specification vendor
69  //        "java.vm.specification.name",    // Java Virtual Machine specification name
70              "java.vm.version", // Java Virtual Machine implementation version
71  //        "java.vm.vendor",                // Java Virtual Machine implementation vendor
72              "java.vm.name", // Java Virtual Machine implementation name
73  //        "java.specification.version",    // Java Runtime Environment specification version
74  //        "java.specification.vendor",     // Java Runtime Environment specification vendor
75  //        "java.specification.name",       // Java Runtime Environment specification name
76  
77              "os.name", // Operating system name
78              "os.arch", // Operating system architecture
79              "os.version", // Operating system version
80      };
81      private static int max = 11; // skip first test
82  
83      private static int num; // number of elapsed times recorded
84  
85      private static final long[] ELAPSED_TIMES = new long[max];
86      private static final CSVFormat format = CSVFormat.EXCEL;
87  
88      private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
89  
90      private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt");
91  
92      private static Reader createReader() throws IOException {
93          return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
94      }
95  
96      private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
97              throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
98          return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
99      }
100 
101     private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
102         @SuppressWarnings("unchecked")
103         final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
104         return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
105     }
106 
107     private static Stats iterate(final Iterable<CSVRecord> iterable) {
108         int count = 0;
109         int fields = 0;
110         for (final CSVRecord record : iterable) {
111             count++;
112             fields += record.size();
113         }
114         return new Stats(count, fields);
115     }
116 
117     public static void main(final String[] args) throws Exception {
118         if (BIG_FILE.exists()) {
119             System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
120         } else {
121             System.out.println("Decompressing test fixture to: " + BIG_FILE + "...");
122             try (InputStream input = new GZIPInputStream(PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC));
123                     OutputStream output = new FileOutputStream(BIG_FILE)) {
124                 IOUtils.copy(input, output);
125                 System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
126             }
127         }
128         final int argc = args.length;
129         if (argc > 0) {
130             max = Integer.parseInt(args[0]);
131         }
132 
133         final String[] tests;
134         if (argc > 1) {
135             tests = new String[argc - 1];
136             System.arraycopy(args, 1, tests, 0, argc - 1);
137         } else {
138             tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" };
139         }
140         for (final String p : PROPERTY_NAMES) {
141             System.out.printf("%s=%s%n", p, System.getProperty(p));
142         }
143         System.out.printf("Max count: %d%n%n", max);
144 
145         for (final String test : tests) {
146             switch (test) {
147             case "file":
148                 testReadBigFile(false);
149                 break;
150             case "split":
151                 testReadBigFile(true);
152                 break;
153             case "csv":
154                 testParseCommonsCSV();
155                 break;
156             case "csv-path":
157                 testParsePath();
158                 break;
159             case "csv-path-db":
160                 testParsePathDoubleBuffering();
161                 break;
162             case "csv-url":
163                 testParseURL();
164                 break;
165             case "lexreset":
166                 testCSVLexer(false, test);
167                 break;
168             case "lexnew":
169                 testCSVLexer(true, test);
170                 break;
171             default:
172                 if (test.startsWith("CSVLexer")) {
173                     testCSVLexer(false, test);
174                 } else if ("extb".equals(test)) {
175                     testExtendedBuffer(false);
176                 } else if ("exts".equals(test)) {
177                     testExtendedBuffer(true);
178                 } else {
179                     System.out.printf("Invalid test name: %s%n", test);
180                 }
181                 break;
182             }
183         }
184     }
185 
186     private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
187         int count = 0;
188         int fields = 0;
189         String record;
190         while ((record = in.readLine()) != null) {
191             count++;
192             fields += split ? record.split(",").length : 1;
193         }
194         return new Stats(count, fields);
195     }
196 
197     // calculate and show average
198     private static void show() {
199         if (num > 1) {
200             long tot = 0;
201             for (int i = 1; i < num; i++) { // skip first test
202                 tot += ELAPSED_TIMES[i];
203             }
204             System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1));
205         }
206         num = 0; // ready for next set
207     }
208 
209     // Display end stats; store elapsed for average
210     private static void show(final String msg, final Stats s, final long start) {
211         final long elapsed = System.currentTimeMillis() - start;
212         System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
213         ELAPSED_TIMES[num] = elapsed;
214         num++;
215     }
216 
217     private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
218         Token token = new Token();
219         String dynamic = "";
220         for (int i = 0; i < max; i++) {
221             final String simpleName;
222             final Stats stats;
223             final long startMillis;
224             try (ExtendedBufferedReader input = new ExtendedBufferedReader(createReader());
225                     Lexer lexer = createTestCSVLexer(test, input)) {
226                 if (test.startsWith("CSVLexer")) {
227                     dynamic = "!";
228                 }
229                 simpleName = lexer.getClass().getSimpleName();
230                 int count = 0;
231                 int fields = 0;
232                 startMillis = System.currentTimeMillis();
233                 do {
234                     if (newToken) {
235                         token = new Token();
236                     } else {
237                         token.reset();
238                     }
239                     lexer.nextToken(token);
240                     switch (token.type) {
241                     case EOF:
242                         break;
243                     case EORECORD:
244                         fields++;
245                         count++;
246                         break;
247                     case INVALID:
248                         throw new IOException("invalid parse sequence <" + token.content.toString() + ">");
249                     case TOKEN:
250                         fields++;
251                         break;
252                     case COMMENT: // not really expecting these
253                         break;
254                     default:
255                         throw new IllegalStateException("Unexpected Token type: " + token.type);
256                     }
257                 } while (!token.type.equals(Token.Type.EOF));
258                 stats = new Stats(count, fields);
259             }
260             show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis);
261         }
262         show();
263     }
264 
265     private static void testExtendedBuffer(final boolean makeString) throws Exception {
266         for (int i = 0; i < max; i++) {
267             int fields = 0;
268             int lines = 0;
269             final long startMillis;
270             try (ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
271                 startMillis = System.currentTimeMillis();
272                 int read;
273                 if (makeString) {
274                     StringBuilder sb = new StringBuilder();
275                     while ((read = in.read()) != EOF) {
276                         sb.append((char) read);
277                         if (read == ',') { // count delimiters
278                             sb.toString();
279                             sb = new StringBuilder();
280                             fields++;
281                         } else if (read == '\n') {
282                             sb.toString();
283                             sb = new StringBuilder();
284                             lines++;
285                         }
286                     }
287                 } else {
288                     while ((read = in.read()) != EOF) {
289                         if (read == ',') { // count delimiters
290                             fields++;
291                         } else if (read == '\n') {
292                             lines++;
293                         }
294                     }
295                 }
296                 fields += lines; // EOL is a delimiter too
297             }
298             show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
299         }
300         show();
301     }
302 
303     private static void testParseCommonsCSV() throws Exception {
304         testParser("CSV", () -> CSVParser.builder().setReader(createReader()).setFormat(format).get());
305     }
306 
307     private static void testParsePath() throws Exception {
308         testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
309     }
310 
311     private static void testParsePathDoubleBuffering() throws Exception {
312         testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
313     }
314 
315     private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
316         for (int i = 0; i < max; i++) {
317             final long startMillis;
318             final Stats stats;
319             try (CSVParser parser = fac.createParser()) {
320                 startMillis = System.currentTimeMillis();
321                 stats = iterate(parser);
322             }
323             show(msg, stats, startMillis);
324         }
325         show();
326     }
327 
328     private static void testParseURL() throws Exception {
329         testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
330     }
331 
332     private static void testReadBigFile(final boolean split) throws Exception {
333         for (int i = 0; i < max; i++) {
334             final long startMillis;
335             final Stats stats;
336             try (BufferedReader in = new BufferedReader(createReader())) {
337                 startMillis = System.currentTimeMillis();
338                 stats = readAll(in, split);
339             }
340             show(split ? "file+split" : "file", stats, startMillis);
341         }
342         show();
343     }
344 }
345