1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.csv;
19
20 import java.io.BufferedReader;
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.FileOutputStream;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.io.OutputStream;
28 import java.io.Reader;
29 import java.lang.reflect.Constructor;
30 import java.lang.reflect.InvocationTargetException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.file.Files;
33 import java.nio.file.Paths;
34 import java.util.zip.GZIPInputStream;
35
36 import org.apache.commons.io.IOUtils;
37
38
39
40
41 @SuppressWarnings("boxing")
42 public class PerformanceTest {
43
44 @FunctionalInterface
45 private interface CSVParserFactory {
46 CSVParser createParser() throws IOException;
47 }
48
49
50 private static class Stats {
51 final int count;
52 final int fields;
53 Stats(final int c, final int f) {
54 count = c;
55 fields = f;
56 }
57 }
58
59 private static final String[] PROPERTY_NAMES = {
60 "java.version",
61 "java.vendor",
62
63
64
65 "java.vm.version",
66
67 "java.vm.name",
68
69
70
71
72 "os.name",
73 "os.arch",
74 "os.version",
75 };
76 private static int max = 11;
77
78 private static int num;
79
80 private static final long[] ELAPSED_TIMES = new long[max];
81 private static final CSVFormat format = CSVFormat.EXCEL;
82
83 private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
84
85 private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
86
87 private static Reader createReader() throws IOException {
88 return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
89 }
90
91 private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
92 throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
93 return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
94 }
95
96 private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
97 @SuppressWarnings("unchecked")
98 final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
99 return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
100 }
101
102 private static Stats iterate(final Iterable<CSVRecord> iterable) {
103 int count = 0;
104 int fields = 0;
105 for (final CSVRecord record : iterable) {
106 count++;
107 fields += record.size();
108 }
109 return new Stats(count, fields);
110 }
111
112 public static void main(final String [] args) throws Exception {
113 if (BIG_FILE.exists()) {
114 System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
115 } else {
116 System.out.println("Decompressing test fixture to: " + BIG_FILE + "...");
117 try (
118 final InputStream input = new GZIPInputStream(
119 PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC));
120 final OutputStream output = new FileOutputStream(BIG_FILE)) {
121 IOUtils.copy(input, output);
122 System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
123 }
124 }
125 final int argc = args.length;
126 if (argc > 0) {
127 max = Integer.parseInt(args[0]);
128 }
129
130 final String[] tests;
131 if (argc > 1) {
132 tests = new String[argc - 1];
133 System.arraycopy(args, 1, tests, 0, argc - 1);
134 } else {
135 tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" };
136 }
137 for (final String p : PROPERTY_NAMES) {
138 System.out.printf("%s=%s%n", p, System.getProperty(p));
139 }
140 System.out.printf("Max count: %d%n%n", max);
141
142 for (final String test : tests) {
143 if ("file".equals(test)) {
144 testReadBigFile(false);
145 } else if ("split".equals(test)) {
146 testReadBigFile(true);
147 } else if ("csv".equals(test)) {
148 testParseCommonsCSV();
149 } else if ("csv-path".equals(test)) {
150 testParsePath();
151 } else if ("csv-path-db".equals(test)) {
152 testParsePathDoubleBuffering();
153 } else if ("csv-url".equals(test)) {
154 testParseURL();
155 } else if ("lexreset".equals(test)) {
156 testCSVLexer(false, test);
157 } else if ("lexnew".equals(test)) {
158 testCSVLexer(true, test);
159 } else if (test.startsWith("CSVLexer")) {
160 testCSVLexer(false, test);
161 } else if ("extb".equals(test)) {
162 testExtendedBuffer(false);
163 } else if ("exts".equals(test)) {
164 testExtendedBuffer(true);
165 } else {
166 System.out.printf("Invalid test name: %s%n", test);
167 }
168 }
169 }
170
171 private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
172 int count = 0;
173 int fields = 0;
174 String record;
175 while ((record = in.readLine()) != null) {
176 count++;
177 fields += split ? record.split(",").length : 1;
178 }
179 return new Stats(count, fields);
180 }
181
182
183 private static void show(){
184 if (num > 1) {
185 long tot = 0;
186 for (int i = 1; i < num; i++) {
187 tot += ELAPSED_TIMES[i];
188 }
189 System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1));
190 }
191 num = 0;
192 }
193
194
195 private static void show(final String msg, final Stats s, final long start) {
196 final long elapsed = System.currentTimeMillis() - start;
197 System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
198 ELAPSED_TIMES[num] = elapsed;
199 num++;
200 }
201
202 private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
203 Token token = new Token();
204 String dynamic = "";
205 for (int i = 0; i < max; i++) {
206 final String simpleName;
207 final Stats stats;
208 final long startMillis;
209 try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader());
210 final Lexer lexer = createTestCSVLexer(test, input)) {
211 if (test.startsWith("CSVLexer")) {
212 dynamic = "!";
213 }
214 simpleName = lexer.getClass().getSimpleName();
215 int count = 0;
216 int fields = 0;
217 startMillis = System.currentTimeMillis();
218 do {
219 if (newToken) {
220 token = new Token();
221 } else {
222 token.reset();
223 }
224 lexer.nextToken(token);
225 switch (token.type) {
226 case EOF:
227 break;
228 case EORECORD:
229 fields++;
230 count++;
231 break;
232 case INVALID:
233 throw new IOException("invalid parse sequence <" + token.content.toString() + ">");
234 case TOKEN:
235 fields++;
236 break;
237 case COMMENT:
238 break;
239 default:
240 throw new IllegalStateException("Unexpected Token type: " + token.type);
241 }
242 } while (!token.type.equals(Token.Type.EOF));
243 stats = new Stats(count, fields);
244 }
245 show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis);
246 }
247 show();
248 }
249
250 private static void testExtendedBuffer(final boolean makeString) throws Exception {
251 for (int i = 0; i < max; i++) {
252 int fields = 0;
253 int lines = 0;
254 final long startMillis;
255 try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
256 startMillis = System.currentTimeMillis();
257 int read;
258 if (makeString) {
259 StringBuilder sb = new StringBuilder();
260 while ((read = in.read()) != -1) {
261 sb.append((char) read);
262 if (read == ',') {
263 sb.toString();
264 sb = new StringBuilder();
265 fields++;
266 } else if (read == '\n') {
267 sb.toString();
268 sb = new StringBuilder();
269 lines++;
270 }
271 }
272 } else {
273 while ((read = in.read()) != -1) {
274 if (read == ',') {
275 fields++;
276 } else if (read == '\n') {
277 lines++;
278 }
279 }
280 }
281 fields += lines;
282 }
283 show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
284 }
285 show();
286 }
287
288 private static void testParseCommonsCSV() throws Exception {
289 testParser("CSV", () -> new CSVParser(createReader(), format));
290 }
291
292 private static void testParsePath() throws Exception {
293 testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
294 }
295
296 private static void testParsePathDoubleBuffering() throws Exception {
297 testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
298 }
299
300 private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
301 for (int i = 0; i < max; i++) {
302 final long startMillis;
303 final Stats stats;
304 try (final CSVParser parser = fac.createParser()) {
305 startMillis = System.currentTimeMillis();
306 stats = iterate(parser);
307 }
308 show(msg, stats, startMillis);
309 }
310 show();
311 }
312
313 private static void testParseURL() throws Exception {
314 testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
315 }
316
317 private static void testReadBigFile(final boolean split) throws Exception {
318 for (int i = 0; i < max; i++) {
319 final long startMillis;
320 final Stats stats;
321 try (final BufferedReader in = new BufferedReader(createReader())) {
322 startMillis = System.currentTimeMillis();
323 stats = readAll(in, split);
324 }
325 show(split ? "file+split" : "file", stats, startMillis);
326 }
327 show();
328 }
329
330 }