View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.codec.digest;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.fail;
21  
22  import java.io.FileNotFoundException;
23  import java.io.FileOutputStream;
24  import java.io.PrintStream;
25  import java.lang.reflect.Constructor;
26  import java.nio.charset.StandardCharsets;
27  import java.util.ArrayList;
28  import java.util.List;
29  import java.util.Properties;
30  import java.util.Random;
31  import java.util.zip.CRC32;
32  import java.util.zip.Checksum;
33  
34  import org.junit.jupiter.api.Test;
35  
36  /**
37   * Unit test to verify that the pure-Java CRC32 algorithm gives the same results as the built-in implementation.
38   *
39   * Copied from Hadoop 2.6.3 (Renamed TestPureJavaCrc32 to PureJavaCrc32Test).
40   */
41  public class PureJavaCrc32Test {
42  
43      /**
44       * Performance tests to compare performance of the Pure Java implementation to the built-in java.util.zip implementation. This can be run from the command
45       * line with:
46       *
47       * java -cp path/to/test/classes:path/to/common/classes \ 'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest'
48       *
49       * The output is in JIRA table format.
50       */
51      public static class PerformanceTest {
52          private static final class BenchResult {
53              /** CRC value */
54              final long value;
55              /** Speed (MB per second) */
56              final double mbps;
57  
58              BenchResult(final long value, final double mbps) {
59                  this.value = value;
60                  this.mbps = mbps;
61              }
62          }
63  
64          public static final int MAX_LEN = 32 * 1024 * 1024; // up to 32MB chunks
65  
66          public static final int BYTES_PER_SIZE = MAX_LEN * 4;
67          static final Class<? extends Checksum> zip = CRC32.class;
68          static final List<Class<? extends Checksum>> CRCS = new ArrayList<>();
69  
70          static {
71              CRCS.add(zip);
72              CRCS.add(PureJavaCrc32.class);
73          }
74  
75          private static BenchResult doBench(final Class<? extends Checksum> clazz, final int numThreads, final byte[] bytes, final int size) throws Exception {
76  
77              final Thread[] threads = new Thread[numThreads];
78              final BenchResult[] results = new BenchResult[threads.length];
79  
80              {
81                  final int trials = BYTES_PER_SIZE / size;
82                  final double mbProcessed = trials * size / 1024.0 / 1024.0;
83                  final Constructor<? extends Checksum> ctor = clazz.getConstructor();
84  
85                  for (int i = 0; i < threads.length; i++) {
86                      final int index = i;
87                      threads[i] = new Thread() {
88                          final Checksum crc = ctor.newInstance();
89  
90                          @Override
91                          public void run() {
92                              final long st = System.nanoTime();
93                              crc.reset();
94                              for (int trialIndex = 0; trialIndex < trials; trialIndex++) {
95                                  crc.update(bytes, 0, size);
96                              }
97                              final long et = System.nanoTime();
98                              final double secondsElapsed = (et - st) / 1000000000.0d;
99                              results[index] = new BenchResult(crc.getValue(), mbProcessed / secondsElapsed);
100                         }
101                     };
102                 }
103             }
104 
105             for (final Thread thread : threads) {
106                 thread.start();
107             }
108             for (final Thread thread : threads) {
109                 thread.join();
110             }
111 
112             final long expected = results[0].value;
113             double sum = results[0].mbps;
114             for (int i = 1; i < results.length; i++) {
115                 if (results[i].value != expected) {
116                     throw new AssertionError(clazz.getSimpleName() + " results not matched.");
117                 }
118                 sum += results[i].mbps;
119             }
120             return new BenchResult(expected, sum / results.length);
121         }
122 
123         private static void doBench(final List<Class<? extends Checksum>> crcs, final byte[] bytes, final int size, final PrintStream out) throws Exception {
124             final String numBytesStr = " #Bytes ";
125             final String numThreadsStr = "#T";
126             final String diffStr = "% diff";
127 
128             out.print('|');
129             printCell(numBytesStr, 0, out);
130             printCell(numThreadsStr, 0, out);
131             for (int i = 0; i < crcs.size(); i++) {
132                 final Class<? extends Checksum> c = crcs.get(i);
133                 out.print('|');
134                 printCell(c.getSimpleName(), 8, out);
135                 for (int j = 0; j < i; j++) {
136                     printCell(diffStr, diffStr.length(), out);
137                 }
138             }
139             out.printf("\n");
140 
141             for (int numThreads = 1; numThreads <= 16; numThreads <<= 1) {
142                 out.printf("|");
143                 printCell(String.valueOf(size), numBytesStr.length(), out);
144                 printCell(String.valueOf(numThreads), numThreadsStr.length(), out);
145 
146                 BenchResult expected = null;
147                 final List<BenchResult> previous = new ArrayList<>();
148                 for (final Class<? extends Checksum> c : crcs) {
149                     System.gc();
150 
151                     final BenchResult result = doBench(c, numThreads, bytes, size);
152                     printCell(String.format("%9.1f", result.mbps), c.getSimpleName().length() + 1, out);
153 
154                     // check result
155                     if (c == zip) {
156                         expected = result;
157                     } else if (expected == null) {
158                         fail("The first class is " + c.getName() + " but not " + zip.getName());
159                     } else if (result.value != expected.value) {
160                         fail(c + " has bugs!");
161                     }
162 
163                     // compare result with previous
164                     for (final BenchResult p : previous) {
165                         final double diff = (result.mbps - p.mbps) / p.mbps * 100;
166                         printCell(String.format("%5.1f%%", diff), diffStr.length(), out);
167                     }
168                     previous.add(result);
169                 }
170                 out.printf("\n");
171             }
172         }
173 
174         private static void doBench(final List<Class<? extends Checksum>> crcs, final PrintStream out) throws Exception {
175             final byte[] bytes = new byte[MAX_LEN];
176             new Random().nextBytes(bytes);
177 
178             // Print header
179             out.printf("\nPerformance Table (The unit is MB/sec; #T = #Theads)\n");
180 
181             // Warm up implementations to get jit going.
182             for (final Class<? extends Checksum> c : crcs) {
183                 doBench(c, 1, bytes, 2);
184                 doBench(c, 1, bytes, 2101);
185             }
186 
187             // Test on a variety of sizes with different number of threads
188             for (int size = 32; size <= MAX_LEN; size <<= 1) {
189                 doBench(crcs, bytes, size, out);
190             }
191         }
192 
193         public static void main(final String args[]) throws Exception {
194             printSystemProperties(System.out);
195             doBench(CRCS, System.out);
196         }
197 
198         private static void printCell(final String s, final int width, final PrintStream out) {
199             final int w = s.length() > width ? s.length() : width;
200             out.printf(" %" + w + "s |", s);
201         }
202 
203         private static void printSystemProperties(final PrintStream out) {
204             final String[] names = { "java.version", "java.runtime.name", "java.runtime.version", "java.vm.version", "java.vm.vendor", "java.vm.name",
205                     "java.vm.specification.version", "java.specification.version", "os.arch", "os.name", "os.version" };
206             final Properties p = System.getProperties();
207             for (final String n : names) {
208                 out.println(n + " = " + p.getProperty(n));
209             }
210         }
211     }
212 
213     /**
214      * Generate a table to perform checksums based on the same CRC-32 polynomial that java.util.zip.CRC32 uses.
215      */
216     public static class Table {
217         /** Generate CRC-32 lookup tables */
218         public static void main(final String[] args) throws FileNotFoundException {
219             if (args.length != 1) {
220                 System.err.println("Usage: " + Table.class.getName() + " <polynomial>");
221                 System.exit(1);
222             }
223             final long polynomial = Long.parseLong(args[0], 16);
224 
225             final int i = 8;
226             final Table t = new Table(i, 16, polynomial);
227             final String s = t.toString();
228             System.out.println(s);
229 
230             // print to a file
231             try (final PrintStream out = new PrintStream(new FileOutputStream("table" + i + ".txt"), true)) {
232                 out.println(s);
233             }
234         }
235 
236         private final int[][] tables;
237 
238         private Table(final int nBits, final int nTables, final long polynomial) {
239             tables = new int[nTables][];
240             final int size = 1 << nBits;
241             for (int i = 0; i < tables.length; i++) {
242                 tables[i] = new int[size];
243             }
244 
245             // compute the first table
246             final int[] first = tables[0];
247             for (int i = 0; i < first.length; i++) {
248                 int crc = i;
249                 for (int j = 0; j < nBits; j++) {
250                     if ((crc & 1) == 1) {
251                         crc >>>= 1;
252                         crc ^= polynomial;
253                     } else {
254                         crc >>>= 1;
255                     }
256                 }
257                 first[i] = crc;
258             }
259 
260             // compute the remaining tables
261             final int mask = first.length - 1;
262             for (int j = 1; j < tables.length; j++) {
263                 final int[] previous = tables[j - 1];
264                 final int[] current = tables[j];
265                 for (int i = 0; i < current.length; i++) {
266                     current[i] = previous[i] >>> nBits ^ first[previous[i] & mask];
267                 }
268             }
269         }
270 
271         @Override
272         public String toString() {
273             final StringBuilder b = new StringBuilder();
274 
275             final String tableFormat = String.format("T%d_", Integer.numberOfTrailingZeros(tables[0].length)) + "%d";
276             final String startFormat = "  private static final int " + tableFormat + "_start = %d*256;";
277 
278             for (int j = 0; j < tables.length; j++) {
279                 b.append(String.format(startFormat, j, j));
280                 b.append("\n");
281             }
282 
283             b.append("  private static final int[] T = new int[] {");
284             for (final String s : toStrings(tableFormat)) {
285                 b.append("\n");
286                 b.append(s);
287             }
288             b.setCharAt(b.length() - 2, '\n');
289             b.append(" };\n");
290             return b.toString();
291         }
292 
293         String[] toStrings(final String nameFormat) {
294             final String[] s = new String[tables.length];
295             for (int j = 0; j < tables.length; j++) {
296                 final int[] t = tables[j];
297                 final StringBuilder b = new StringBuilder();
298                 b.append(String.format("    /* " + nameFormat + " */", j));
299                 for (int i = 0; i < t.length;) {
300                     b.append("\n    ");
301                     for (int k = 0; k < 4; k++) {
302                         b.append(String.format("0x%08X, ", t[i++]));
303                     }
304                 }
305                 s[j] = b.toString();
306             }
307             return s;
308         }
309     }
310 
311     private final CRC32 theirs = new CRC32();
312 
313     private final PureJavaCrc32 ours = new PureJavaCrc32();
314 
315     private void checkOnBytes(final byte[] bytes, final boolean print) {
316         theirs.reset();
317         ours.reset();
318         checkSame();
319 
320         for (final byte b : bytes) {
321             ours.update(b);
322             theirs.update(b);
323             checkSame();
324         }
325 
326         if (print) {
327             System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) + "\nours:\t" + Long.toHexString(ours.getValue()));
328         }
329 
330         theirs.reset();
331         ours.reset();
332 
333         ours.update(bytes, 0, bytes.length);
334         theirs.update(bytes, 0, bytes.length);
335         if (print) {
336             System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) + "\nours:\t" + Long.toHexString(ours.getValue()));
337         }
338 
339         checkSame();
340 
341         if (bytes.length >= 10) {
342             ours.update(bytes, 5, 5);
343             theirs.update(bytes, 5, 5);
344             checkSame();
345         }
346     }
347 
348     private void checkSame() {
349         assertEquals(theirs.getValue(), ours.getValue());
350     }
351 
352     @Test
353     public void testCorrectness() throws Exception {
354         checkSame();
355 
356         theirs.update(104);
357         ours.update(104);
358         checkSame();
359 
360         checkOnBytes(new byte[] { 40, 60, 97, -70 }, false);
361 
362         checkOnBytes("hello world!".getBytes(StandardCharsets.UTF_8), false);
363 
364         final Random random1 = new Random();
365         final Random random2 = new Random();
366         for (int i = 0; i < 10000; i++) {
367             final byte[] randomBytes = new byte[random1.nextInt(2048)];
368             random2.nextBytes(randomBytes);
369             checkOnBytes(randomBytes, false);
370         }
371 
372     }
373 }