1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.csv.perf;
21
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileNotFoundException;
25 import java.io.FileOutputStream;
26 import java.io.FileReader;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStream;
30 import java.io.Reader;
31 import java.util.zip.GZIPInputStream;
32
33 import org.apache.commons.csv.CSVFormat;
34 import org.apache.commons.csv.CSVParser;
35 import org.apache.commons.csv.CSVRecord;
36 import org.apache.commons.io.FileUtils;
37 import org.apache.commons.io.IOUtils;
38 import org.junit.jupiter.api.BeforeAll;
39 import org.junit.jupiter.api.Test;
40
41
42
43
44
45
46 class PerformanceTest {
47
48 private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
49
50 private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt");
51
52 @BeforeAll
53 public static void setUpClass() throws FileNotFoundException, IOException {
54 if (BIG_FILE.exists()) {
55 System.out.println(String.format("Found test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
56 return;
57 }
58 System.out.println("Decompressing test fixture to: " + BIG_FILE + "...");
59 try (InputStream input = new GZIPInputStream(PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC));
60 OutputStream output = new FileOutputStream(BIG_FILE)) {
61 IOUtils.copy(input, output);
62 System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
63 }
64 }
65
66 private final int max = 10;
67
68 private BufferedReader createBufferedReader() throws IOException {
69 return new BufferedReader(new FileReader(BIG_FILE));
70 }
71
72 private long parse(final Reader reader, final boolean traverseColumns) throws IOException {
73 final CSVFormat format = CSVFormat.DEFAULT.builder().setIgnoreSurroundingSpaces(false).get();
74 long recordCount = 0;
75 try (CSVParser parser = format.parse(reader)) {
76 for (final CSVRecord record : parser) {
77 recordCount++;
78 if (traverseColumns) {
79 for (@SuppressWarnings("unused")
80 final String value : record) {
81
82 }
83 }
84 }
85 }
86 return recordCount;
87 }
88
89 private void println(final String s) {
90 System.out.println(s);
91 }
92
93 private long readLines(final BufferedReader in) throws IOException {
94 long count = 0;
95 while (in.readLine() != null) {
96 count++;
97 }
98 return count;
99 }
100
101 public long testParseBigFile(final boolean traverseColumns) throws Exception {
102 final long startMillis = System.currentTimeMillis();
103 try (BufferedReader reader = createBufferedReader()) {
104 final long count = parse(reader, traverseColumns);
105 final long totalMillis = System.currentTimeMillis() - startMillis;
106 println(
107 String.format("File parsed in %,d milliseconds with Commons CSV: %,d lines.", totalMillis, count));
108 return totalMillis;
109 }
110 }
111
112 @Test
113 void testParseBigFileRepeat() throws Exception {
114 long bestTime = Long.MAX_VALUE;
115 for (int i = 0; i < this.max; i++) {
116 bestTime = Math.min(testParseBigFile(false), bestTime);
117 }
118 println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime));
119 }
120
121 @Test
122 void testReadBigFile() throws Exception {
123 long bestTime = Long.MAX_VALUE;
124 long count;
125 for (int i = 0; i < this.max; i++) {
126 final long startMillis;
127 try (BufferedReader in = createBufferedReader()) {
128 startMillis = System.currentTimeMillis();
129 count = readLines(in);
130 }
131 final long totalMillis = System.currentTimeMillis() - startMillis;
132 bestTime = Math.min(totalMillis, bestTime);
133 println(String.format("File read in %,d milliseconds: %,d lines.", totalMillis, count));
134 }
135 println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime));
136 }
137 }