View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import java.io.BufferedReader;
21  import java.io.File;
22  import java.io.FileInputStream;
23  import java.io.FileOutputStream;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.io.OutputStream;
28  import java.io.Reader;
29  import java.lang.reflect.Constructor;
30  import java.lang.reflect.InvocationTargetException;
31  import java.nio.charset.StandardCharsets;
32  import java.nio.file.Files;
33  import java.nio.file.Paths;
34  import java.util.zip.GZIPInputStream;
35  
36  import org.apache.commons.io.IOUtils;
37  
38  /**
39   * Basic test harness.
40   */
41  @SuppressWarnings("boxing")
42  public class PerformanceTest {
43  
44      @FunctionalInterface
45      private interface CSVParserFactory {
46          CSVParser createParser() throws IOException;
47      }
48  
49      // Container for basic statistics
50      private static class Stats {
51          final int count;
52          final int fields;
53          Stats(final int c, final int f) {
54              count = c;
55              fields = f;
56          }
57      }
58  
59      private static final String[] PROPERTY_NAMES = {
60          "java.version",                  // Java Runtime Environment version
61          "java.vendor",                   // Java Runtime Environment vendor
62  //        "java.vm.specification.version", // Java Virtual Machine specification version
63  //        "java.vm.specification.vendor",  // Java Virtual Machine specification vendor
64  //        "java.vm.specification.name",    // Java Virtual Machine specification name
65          "java.vm.version",               // Java Virtual Machine implementation version
66  //        "java.vm.vendor",                // Java Virtual Machine implementation vendor
67          "java.vm.name",                  // Java Virtual Machine implementation name
68  //        "java.specification.version",    // Java Runtime Environment specification version
69  //        "java.specification.vendor",     // Java Runtime Environment specification vendor
70  //        "java.specification.name",       // Java Runtime Environment specification name
71  
72          "os.name",                       // Operating system name
73          "os.arch",                       // Operating system architecture
74          "os.version",                    // Operating system version
75      };
76      private static int max = 11; // skip first test
77  
78      private static int num; // number of elapsed times recorded
79  
80      private static final long[] ELAPSED_TIMES = new long[max];
81      private static final CSVFormat format = CSVFormat.EXCEL;
82  
83      private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
84  
85      private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
86  
87      private static Reader createReader() throws IOException {
88          return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
89      }
90  
91      private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
92              throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
93          return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
94      }
95  
96      private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
97          @SuppressWarnings("unchecked")
98          final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
99          return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
100     }
101 
102     private static Stats iterate(final Iterable<CSVRecord> iterable) {
103         int count = 0;
104         int fields = 0;
105         for (final CSVRecord record : iterable) {
106             count++;
107             fields += record.size();
108         }
109         return new Stats(count, fields);
110     }
111 
112     public static void main(final String [] args) throws Exception {
113         if (BIG_FILE.exists()) {
114             System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
115         } else {
116           System.out.println("Decompressing test fixture to: " + BIG_FILE + "...");
117           try (
118               final InputStream input = new GZIPInputStream(
119                   PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC));
120               final OutputStream output = new FileOutputStream(BIG_FILE)) {
121               IOUtils.copy(input, output);
122               System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
123           }
124         }
125         final int argc = args.length;
126         if (argc > 0) {
127             max = Integer.parseInt(args[0]);
128         }
129 
130         final String[] tests;
131         if (argc > 1) {
132             tests = new String[argc - 1];
133             System.arraycopy(args, 1, tests, 0, argc - 1);
134         } else {
135             tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" };
136         }
137         for (final String p : PROPERTY_NAMES) {
138             System.out.printf("%s=%s%n", p, System.getProperty(p));
139         }
140         System.out.printf("Max count: %d%n%n", max);
141 
142         for (final String test : tests) {
143             if ("file".equals(test)) {
144                 testReadBigFile(false);
145             } else if ("split".equals(test)) {
146                 testReadBigFile(true);
147             } else if ("csv".equals(test)) {
148                 testParseCommonsCSV();
149             } else if ("csv-path".equals(test)) {
150                 testParsePath();
151             } else if ("csv-path-db".equals(test)) {
152                 testParsePathDoubleBuffering();
153             } else if ("csv-url".equals(test)) {
154                 testParseURL();
155             } else if ("lexreset".equals(test)) {
156                 testCSVLexer(false, test);
157             } else if ("lexnew".equals(test)) {
158                 testCSVLexer(true, test);
159             } else if (test.startsWith("CSVLexer")) {
160                 testCSVLexer(false, test);
161             } else if ("extb".equals(test)) {
162                 testExtendedBuffer(false);
163             } else if ("exts".equals(test)) {
164                 testExtendedBuffer(true);
165             } else {
166                 System.out.printf("Invalid test name: %s%n", test);
167             }
168         }
169     }
170 
171     private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
172         int count = 0;
173         int fields = 0;
174         String record;
175         while ((record = in.readLine()) != null) {
176             count++;
177             fields += split ? record.split(",").length : 1;
178         }
179         return new Stats(count, fields);
180     }
181 
182     // calculate and show average
183     private static void show(){
184         if (num > 1) {
185             long tot = 0;
186             for (int i = 1; i < num; i++) { // skip first test
187                 tot += ELAPSED_TIMES[i];
188             }
189             System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1));
190         }
191         num = 0; // ready for next set
192     }
193 
194     // Display end stats; store elapsed for average
195     private static void show(final String msg, final Stats s, final long start) {
196         final long elapsed = System.currentTimeMillis() - start;
197         System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
198         ELAPSED_TIMES[num] = elapsed;
199         num++;
200     }
201 
202     private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
203         Token token = new Token();
204         String dynamic = "";
205         for (int i = 0; i < max; i++) {
206             final String simpleName;
207             final Stats stats;
208             final long startMillis;
209             try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader());
210                     final Lexer lexer = createTestCSVLexer(test, input)) {
211                 if (test.startsWith("CSVLexer")) {
212                     dynamic = "!";
213                 }
214                 simpleName = lexer.getClass().getSimpleName();
215                 int count = 0;
216                 int fields = 0;
217                 startMillis = System.currentTimeMillis();
218                 do {
219                     if (newToken) {
220                         token = new Token();
221                     } else {
222                         token.reset();
223                     }
224                     lexer.nextToken(token);
225                     switch (token.type) {
226                     case EOF:
227                         break;
228                     case EORECORD:
229                         fields++;
230                         count++;
231                         break;
232                     case INVALID:
233                         throw new IOException("invalid parse sequence <" + token.content.toString() + ">");
234                     case TOKEN:
235                         fields++;
236                         break;
237                     case COMMENT: // not really expecting these
238                         break;
239                     default:
240                         throw new IllegalStateException("Unexpected Token type: " + token.type);
241                     }
242                 } while (!token.type.equals(Token.Type.EOF));
243                 stats = new Stats(count, fields);
244             }
245             show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis);
246         }
247         show();
248     }
249 
250     private static void testExtendedBuffer(final boolean makeString) throws Exception {
251         for (int i = 0; i < max; i++) {
252             int fields = 0;
253             int lines = 0;
254             final long startMillis;
255             try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
256                 startMillis = System.currentTimeMillis();
257                 int read;
258                 if (makeString) {
259                     StringBuilder sb = new StringBuilder();
260                     while ((read = in.read()) != -1) {
261                         sb.append((char) read);
262                         if (read == ',') { // count delimiters
263                             sb.toString();
264                             sb = new StringBuilder();
265                             fields++;
266                         } else if (read == '\n') {
267                             sb.toString();
268                             sb = new StringBuilder();
269                             lines++;
270                         }
271                     }
272                 } else {
273                     while ((read = in.read()) != -1) {
274                         if (read == ',') { // count delimiters
275                             fields++;
276                         } else if (read == '\n') {
277                             lines++;
278                         }
279                     }
280                 }
281                 fields += lines; // EOL is a delimiter too
282             }
283             show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
284         }
285         show();
286     }
287 
288     private static void testParseCommonsCSV() throws Exception {
289         testParser("CSV", () -> new CSVParser(createReader(), format));
290     }
291 
292     private static void testParsePath() throws Exception {
293         testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
294     }
295 
296     private static void testParsePathDoubleBuffering() throws Exception {
297         testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
298     }
299 
300     private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
301         for (int i = 0; i < max; i++) {
302             final long startMillis;
303             final Stats stats;
304             try (final CSVParser parser = fac.createParser()) {
305                 startMillis = System.currentTimeMillis();
306                 stats = iterate(parser);
307             }
308             show(msg, stats, startMillis);
309         }
310         show();
311     }
312 
313     private static void testParseURL() throws Exception {
314         testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
315     }
316 
317     private static void testReadBigFile(final boolean split) throws Exception {
318         for (int i = 0; i < max; i++) {
319             final long startMillis;
320             final Stats stats;
321             try (final BufferedReader in = new BufferedReader(createReader())) {
322                 startMillis = System.currentTimeMillis();
323                 stats = readAll(in, split);
324             }
325             show(split ? "file+split" : "file", stats, startMillis);
326         }
327         show();
328     }
329 
330 }