View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.codec.digest;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.fail;
21  
22  import java.io.FileNotFoundException;
23  import java.io.FileOutputStream;
24  import java.io.PrintStream;
25  import java.lang.reflect.Constructor;
26  import java.nio.charset.StandardCharsets;
27  import java.util.ArrayList;
28  import java.util.List;
29  import java.util.Properties;
30  import java.util.Random;
31  import java.util.zip.CRC32;
32  import java.util.zip.Checksum;
33  
34  import org.junit.jupiter.api.Test;
35  
36  /**
37   * Unit test to verify that the pure-Java CRC32 algorithm gives the same results as the built-in implementation.
38   *
39   * Copied from Hadoop 2.6.3 (Renamed TestPureJavaCrc32 to PureJavaCrc32Test).
40   */
41  class PureJavaCrc32Test {
42  
43      /**
44       * Performance tests to compare performance of the Pure Java implementation to the built-in java.util.zip implementation. This can be run from the command
45       * line with:
46       *
47       * java -cp path/to/test/classes:path/to/common/classes \ 'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest'
48       *
49       * The output is in JIRA table format.
50       */
51      public static class PerformanceTest {
52          private static final class BenchResult {
53  
54              /** CRC value */
55              final long value;
56  
57              /** Speed (MB per second) */
58              final double mbps;
59  
60              BenchResult(final long value, final double mbps) {
61                  this.value = value;
62                  this.mbps = mbps;
63              }
64          }
65  
66          public static final int MAX_LEN = 32 * 1024 * 1024; // up to 32MB chunks
67  
68          public static final int BYTES_PER_SIZE = MAX_LEN * 4;
69          static final Class<? extends Checksum> zip = CRC32.class;
70          static final List<Class<? extends Checksum>> CRCS = new ArrayList<>();
71  
72          static {
73              CRCS.add(zip);
74              CRCS.add(PureJavaCrc32.class);
75          }
76  
77          private static BenchResult doBench(final Class<? extends Checksum> clazz, final int numThreads, final byte[] bytes, final int size) throws Exception {
78  
79              final Thread[] threads = new Thread[numThreads];
80              final BenchResult[] results = new BenchResult[threads.length];
81  
82              {
83                  final int trials = BYTES_PER_SIZE / size;
84                  final double mbProcessed = trials * size / 1024.0 / 1024.0;
85                  final Constructor<? extends Checksum> ctor = clazz.getConstructor();
86  
87                  for (int i = 0; i < threads.length; i++) {
88                      final int index = i;
89                      threads[i] = new Thread() {
90                          final Checksum crc = ctor.newInstance();
91  
92                          @Override
93                          public void run() {
94                              final long st = System.nanoTime();
95                              crc.reset();
96                              for (int trialIndex = 0; trialIndex < trials; trialIndex++) {
97                                  crc.update(bytes, 0, size);
98                              }
99                              final long et = System.nanoTime();
100                             final double secondsElapsed = (et - st) / 1000000000.0d;
101                             results[index] = new BenchResult(crc.getValue(), mbProcessed / secondsElapsed);
102                         }
103                     };
104                 }
105             }
106 
107             for (final Thread thread : threads) {
108                 thread.start();
109             }
110             for (final Thread thread : threads) {
111                 thread.join();
112             }
113 
114             final long expected = results[0].value;
115             double sum = results[0].mbps;
116             for (int i = 1; i < results.length; i++) {
117                 if (results[i].value != expected) {
118                     throw new AssertionError(clazz.getSimpleName() + " results not matched.");
119                 }
120                 sum += results[i].mbps;
121             }
122             return new BenchResult(expected, sum / results.length);
123         }
124 
125         private static void doBench(final List<Class<? extends Checksum>> crcs, final byte[] bytes, final int size, final PrintStream out) throws Exception {
126             final String numBytesStr = " #Bytes ";
127             final String numThreadsStr = "#T";
128             final String diffStr = "% diff";
129 
130             out.print('|');
131             printCell(numBytesStr, 0, out);
132             printCell(numThreadsStr, 0, out);
133             for (int i = 0; i < crcs.size(); i++) {
134                 final Class<? extends Checksum> c = crcs.get(i);
135                 out.print('|');
136                 printCell(c.getSimpleName(), 8, out);
137                 for (int j = 0; j < i; j++) {
138                     printCell(diffStr, diffStr.length(), out);
139                 }
140             }
141             out.printf("\n");
142 
143             for (int numThreads = 1; numThreads <= 16; numThreads <<= 1) {
144                 out.printf("|");
145                 printCell(String.valueOf(size), numBytesStr.length(), out);
146                 printCell(String.valueOf(numThreads), numThreadsStr.length(), out);
147 
148                 BenchResult expected = null;
149                 final List<BenchResult> previous = new ArrayList<>();
150                 for (final Class<? extends Checksum> c : crcs) {
151                     System.gc();
152 
153                     final BenchResult result = doBench(c, numThreads, bytes, size);
154                     printCell(String.format("%9.1f", result.mbps), c.getSimpleName().length() + 1, out);
155 
156                     // check result
157                     if (c == zip) {
158                         expected = result;
159                     } else if (expected == null) {
160                         fail("The first class is " + c.getName() + " but not " + zip.getName());
161                     } else if (result.value != expected.value) {
162                         fail(c + " has bugs!");
163                     }
164 
165                     // compare result with previous
166                     for (final BenchResult p : previous) {
167                         final double diff = (result.mbps - p.mbps) / p.mbps * 100;
168                         printCell(String.format("%5.1f%%", diff), diffStr.length(), out);
169                     }
170                     previous.add(result);
171                 }
172                 out.printf("\n");
173             }
174         }
175 
176         private static void doBench(final List<Class<? extends Checksum>> crcs, final PrintStream out) throws Exception {
177             final byte[] bytes = new byte[MAX_LEN];
178             new Random().nextBytes(bytes);
179 
180             // Print header
181             out.printf("\nPerformance Table (The unit is MB/sec; #T = #Theads)\n");
182 
183             // Warm up implementations to get jit going.
184             for (final Class<? extends Checksum> c : crcs) {
185                 doBench(c, 1, bytes, 2);
186                 doBench(c, 1, bytes, 2101);
187             }
188 
189             // Test on a variety of sizes with different number of threads
190             for (int size = 32; size <= MAX_LEN; size <<= 1) {
191                 doBench(crcs, bytes, size, out);
192             }
193         }
194 
195         public static void main(final String[] args) throws Exception {
196             printSystemProperties(System.out);
197             doBench(CRCS, System.out);
198         }
199 
200         private static void printCell(final String s, final int width, final PrintStream out) {
201             final int w = s.length() > width ? s.length() : width;
202             out.printf(" %" + w + "s |", s);
203         }
204 
205         private static void printSystemProperties(final PrintStream out) {
206             final String[] names = { "java.version", "java.runtime.name", "java.runtime.version", "java.vm.version", "java.vm.vendor", "java.vm.name",
207                     "java.vm.specification.version", "java.specification.version", "os.arch", "os.name", "os.version" };
208             final Properties p = System.getProperties();
209             for (final String n : names) {
210                 out.println(n + " = " + p.getProperty(n));
211             }
212         }
213     }
214 
215     /**
216      * Generate a table to perform checksums based on the same CRC-32 polynomial that java.util.zip.CRC32 uses.
217      */
218     public static class Table {
219 
220         /** Generate CRC-32 lookup tables */
221         public static void main(final String[] args) throws FileNotFoundException {
222             if (args.length != 1) {
223                 System.err.println("Usage: " + Table.class.getName() + " <polynomial>");
224                 System.exit(1);
225             }
226             final long polynomial = Long.parseLong(args[0], 16);
227 
228             final int i = 8;
229             final Table t = new Table(i, 16, polynomial);
230             final String s = t.toString();
231             System.out.println(s);
232 
233             // print to a file
234             try (PrintStream out = new PrintStream(new FileOutputStream("table" + i + ".txt"), true)) {
235                 out.println(s);
236             }
237         }
238 
239         private final int[][] tables;
240 
241         private Table(final int nBits, final int nTables, final long polynomial) {
242             tables = new int[nTables][];
243             final int size = 1 << nBits;
244             for (int i = 0; i < tables.length; i++) {
245                 tables[i] = new int[size];
246             }
247 
248             // compute the first table
249             final int[] first = tables[0];
250             for (int i = 0; i < first.length; i++) {
251                 int crc = i;
252                 for (int j = 0; j < nBits; j++) {
253                     if ((crc & 1) == 1) {
254                         crc >>>= 1;
255                         crc ^= polynomial;
256                     } else {
257                         crc >>>= 1;
258                     }
259                 }
260                 first[i] = crc;
261             }
262 
263             // compute the remaining tables
264             final int mask = first.length - 1;
265             for (int j = 1; j < tables.length; j++) {
266                 final int[] previous = tables[j - 1];
267                 final int[] current = tables[j];
268                 for (int i = 0; i < current.length; i++) {
269                     current[i] = previous[i] >>> nBits ^ first[previous[i] & mask];
270                 }
271             }
272         }
273 
274         @Override
275         public String toString() {
276             final StringBuilder b = new StringBuilder();
277 
278             final String tableFormat = String.format("T%d_", Integer.numberOfTrailingZeros(tables[0].length)) + "%d";
279             final String startFormat = "  private static final int " + tableFormat + "_start = %d*256;";
280 
281             for (int j = 0; j < tables.length; j++) {
282                 b.append(String.format(startFormat, j, j));
283                 b.append("\n");
284             }
285 
286             b.append("  private static final int[] T = new int[] {");
287             for (final String s : toStrings(tableFormat)) {
288                 b.append("\n");
289                 b.append(s);
290             }
291             b.setCharAt(b.length() - 2, '\n');
292             b.append(" };\n");
293             return b.toString();
294         }
295 
296         String[] toStrings(final String nameFormat) {
297             final String[] s = new String[tables.length];
298             for (int j = 0; j < tables.length; j++) {
299                 final int[] t = tables[j];
300                 final StringBuilder b = new StringBuilder();
301                 b.append(String.format("    /* " + nameFormat + " */", j));
302                 for (int i = 0; i < t.length;) {
303                     b.append("\n    ");
304                     for (int k = 0; k < 4; k++) {
305                         b.append(String.format("0x%08X, ", t[i++]));
306                     }
307                 }
308                 s[j] = b.toString();
309             }
310             return s;
311         }
312     }
313 
314     private final CRC32 theirs = new CRC32();
315 
316     private final PureJavaCrc32 ours = new PureJavaCrc32();
317 
318     private void checkOnBytes(final byte[] bytes, final boolean print) {
319         theirs.reset();
320         ours.reset();
321         checkSame();
322 
323         for (final byte b : bytes) {
324             ours.update(b);
325             theirs.update(b);
326             checkSame();
327         }
328 
329         if (print) {
330             System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) + "\nours:\t" + Long.toHexString(ours.getValue()));
331         }
332 
333         theirs.reset();
334         ours.reset();
335 
336         ours.update(bytes, 0, bytes.length);
337         theirs.update(bytes, 0, bytes.length);
338         if (print) {
339             System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) + "\nours:\t" + Long.toHexString(ours.getValue()));
340         }
341 
342         checkSame();
343 
344         if (bytes.length >= 10) {
345             ours.update(bytes, 5, 5);
346             theirs.update(bytes, 5, 5);
347             checkSame();
348         }
349     }
350 
351     private void checkSame() {
352         assertEquals(theirs.getValue(), ours.getValue());
353     }
354 
355     @Test
356     void testCorrectness() throws Exception {
357         checkSame();
358 
359         theirs.update(104);
360         ours.update(104);
361         checkSame();
362 
363         checkOnBytes(new byte[] { 40, 60, 97, -70 }, false);
364 
365         checkOnBytes("hello world!".getBytes(StandardCharsets.UTF_8), false);
366 
367         final Random random1 = new Random();
368         final Random random2 = new Random();
369         for (int i = 0; i < 10000; i++) {
370             final byte[] randomBytes = new byte[random1.nextInt(2048)];
371             random2.nextBytes(randomBytes);
372             checkOnBytes(randomBytes, false);
373         }
374 
375     }
376 }