1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.numbers.examples.jmh.arrays;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.EnumSet;
23 import java.util.List;
24 import java.util.Locale;
25 import java.util.Objects;
26 import java.util.concurrent.TimeUnit;
27 import java.util.concurrent.atomic.AtomicInteger;
28 import java.util.function.BiConsumer;
29 import java.util.function.BiFunction;
30 import java.util.function.Consumer;
31 import java.util.function.Function;
32 import java.util.logging.Logger;
33 import java.util.stream.Collectors;
34 import org.apache.commons.numbers.arrays.Selection;
35 import org.apache.commons.rng.UniformRandomProvider;
36 import org.apache.commons.rng.sampling.ArraySampler;
37 import org.apache.commons.rng.sampling.PermutationSampler;
38 import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler;
39 import org.apache.commons.rng.sampling.distribution.SharedStateDiscreteSampler;
40 import org.apache.commons.rng.simple.RandomSource;
41 import org.openjdk.jmh.annotations.Benchmark;
42 import org.openjdk.jmh.annotations.BenchmarkMode;
43 import org.openjdk.jmh.annotations.Fork;
44 import org.openjdk.jmh.annotations.Level;
45 import org.openjdk.jmh.annotations.Measurement;
46 import org.openjdk.jmh.annotations.Mode;
47 import org.openjdk.jmh.annotations.OutputTimeUnit;
48 import org.openjdk.jmh.annotations.Param;
49 import org.openjdk.jmh.annotations.Scope;
50 import org.openjdk.jmh.annotations.Setup;
51 import org.openjdk.jmh.annotations.State;
52 import org.openjdk.jmh.annotations.Warmup;
53 import org.openjdk.jmh.infra.Blackhole;
54
55 /**
56 * Executes a benchmark of the selection of indices from array data.
57 */
58 @BenchmarkMode(Mode.AverageTime)
59 @OutputTimeUnit(TimeUnit.NANOSECONDS)
60 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
61 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
62 @State(Scope.Benchmark)
63 @Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx8192M"})
64 public class SelectionPerformance {
65 /** Use the JDK sort function. */
66 private static final String JDK = "JDK";
67 /** Use a sort function. */
68 private static final String SORT = "Sort";
69 /** Baseline for the benchmark. */
70 private static final String BASELINE = "Baseline";
71 /** Selection method using a heap. */
72 private static final String HEAP_SELECT = "HeapSelect";
73 /** Selection method using a sort. */
74 private static final String SORT_SELECT = "SortSelect";
75
76 // First generation partition functions.
77 // These are based on the KthSelector class used in Commons Math:
78 // - Single k or a pair of indices (k,k+1) are selected in a single
79 // call; multiple indices cache pivots in a heap structure or use a BitSet.
80 // - They dynamically correct signed zeros when they are encountered.
81
82 /** Single-pivot partitioning. This method uses a special comparison of double
83 * values similar to {@link Double#compare(double, double)}. This handles
84 * NaN and signed zeros. */
85 private static final String SP = "SP";
86 /** Single-pivot partitioning; uses a BitSet to cache pivots. */
87 private static final String SPN = "SPN";
88 /** Single-pivot partitioning using a heap to cache pivots.
89 * This method is copied from Commons Math. */
90 private static final String SPH = "SPH";
91 /** Bentley-McIlroy partitioning (Sedgewick); uses a BitSet to cache pivots. */
92 private static final String SBM = "SBM";
93 /** Bentley-McIlroy partitioning (original); uses a BitSet to cache pivots. */
94 private static final String BM = "BM";
95 /** Dual-pivot partitioning; uses a BitSet to cache pivots. */
96 private static final String DP = "DP";
97 /** Dual-pivot partitioning with 5 sorted points to choose pivots; uses a BitSet to cache pivots. */
98 private static final String DP5 = "5DP";
99
100 // Second generation partition functions.
101 // These pre-process data to sort NaN to the end and count signed zeros;
102 // post-processing is performed to restore signed zeros in order.
103 // The exception is SBM2 which dynamically corrects signed zeros.
104
105 /** Bentley-McIlroy partitioning (Sedgewick). This second generation function
106 * dynamically corrects signed zeros when they are encountered. It is based on
107 * the fastest first generation method with changes to allow different pivot
108 * store strategies: SEQUENTIAL, INDEX_SET, PIVOT_CACHE. */
109 private static final String SBM2 = "2SBM";
110
111 /** Floyd-Rivest partitioning. Only for single k. */
112 private static final String FR = "FR";
113 /** Floyd-Rivest partitioning (Kiwiel). Only for a single k. */
114 private static final String KFR = "KFR";
115
116 // Introselect functions - switch to a stopper function when progress is poor.
117 // Allow specification of configuration using the name parameter:
118 // single-pivot strategy (Partition.SPStrategy);
119 // multiple key strategy (Partition.KeyStrategy);
120 // paired (close) keys strategy (Partition.PairedKeyStrategy);
121 // edge-selection strategy (Partition.EdgeSelectStrategy);
122 // stopper strategy (Partition.StopperStrategy).
123 // Parameters to control strategies and introspection are set using the name parameter.
124 // See PartitionFactory for details.
125
126 /** Introselect implementation with single-pivot partitioning. */
127 private static final String ISP = "ISP";
128 /** Introselect implementation with dual-pivot partitioning. */
129 private static final String IDP = "IDP";
130
131 // Single k selection using various methods which provide linear runtime (Order(n)).
132
133 /** Linearselect implementation with single pivot partitioning using median-of-medians-of-5
134 * for pivot selection. */
135 private static final String LSP = "LSP";
136 /** Linearselect implementation with single pivot partitioning using optimised
137 * median-of-medians. */
138 private static final String LINEAR = "Linear";
139 /** Quickselect adaptive implementation. Has configuration of the far-step method and some
140 * adaption modes. */
141 private static final String QA = "QA";
142 /** Quickselect adaptive implementation. Uses the best performing far-step method and
143 * has configurable adaption control allowing starting at and skipping over adaption modes. */
144 private static final String QA2 = "QA2";
145
146 /** Commons Numbers select implementation. This method is built using the best performing
147 * select function across a range of input data. This algorithm cannot be configured. */
148 private static final String SELECT = "SELECT";
149
150 /** Random source. */
151 private static final RandomSource RANDOM_SOURCE = RandomSource.XO_RO_SHI_RO_128_PP;
152
153 /**
154 * Source of {@code double} array data.
155 *
156 * <p>By default this uses the adverse input test suite from figure 1 in Bentley and McIlroy
157 * (1993) Engineering a sort function, Software, practice and experience, Vol.23(11),
158 * 1249–1265.
159 *
160 * <p>An alternative set of data is from Valois (2000) Introspective sorting and selection
161 * revisited, Software, practice and experience, Vol.30(6), 617-638.
162 *
163 * <p>Note
164 *
165 * <p>This class has setter methods to allow re-use in unit testing without requiring
166 * use of reflection to set fields. Parameters set by JMH are initialized to their
167 * defaults for convenience. Re-use requires:
168 *
169 * <ol>
170 * <li>Creating an instance of the abstract class that provides the data length
171 * <li>Calling {@link #setup()} to create the data
172 * <li>Iterating over the data
173 * </ol>
174 *
175 * <pre>
176 * AbstractDataSource s = new AbstractDataSource() {
177 * protected int getLength() {
178 * return 123;
179 * }
180 * };
181 * s.setDistribution(Distribution.SAWTOOTH, Distribution.SHUFFLE);
182 * s.setModification(Modification.REVERSE_FRONT);
183 * s.setRange(2);
184 * s.setup();
185 * for (int i = 0; i < s.size(); i++) {
186 * s.getData(i);
187 * }
188 * </pre>
189 *
190 * <p>Random distribution mode
191 *
192 * <p>The default BM configuration includes random samples generated as a family of
193 * single samples created from ranges that are powers of two [0, 2^i). This small set
194 * of samples is only a small representation of randomness. For small lengths this may
195 * only be a few random samples.
196 *
197 * <p>The data source can be changed to generate a fixed number of random samples
198 * using a uniform distribution [0, n]. For this purpose the distribution must be set
199 * to {@link Distribution#RANDOM} and the {@link #setSamples(int) samples} set above
200 * zero. The inclusive upper bound {@code n} is set using the {@link #setSeed(int) seed}.
201 * If this is zero then the default is {@link Integer#MAX_VALUE}.
202 *
203 * <p>Order
204 *
205 * <p>Data are created in distribution families. If these are passed in order to a
206 * partition method the JVM can change behaviour of the algorithm as branch prediction
207 * statistics stabilise for the family. To mitigate this effect the order is permuted
208 * per invocation of the benchmark (see {@link #createOrder()}. This stabilises the
209 * average timing results from JMH. Using per-invocation data generation requires
210 * the benchmark execution time is higher than 1 millisecond. Benchmarks that use
211 * tiny data (e.g. sort 5 elements) must use several million samples.
212 */
213 @State(Scope.Benchmark)
214 public abstract static class AbstractDataSource {
215 /** All distributions / modifications. */
216 private static final String ALL = "all";
217 /** All distributions / modifications in the Bentley and McIlroy test suite. */
218 private static final String BM = "bm";
219 /** All distributions in the Valois test suite. These currently ignore the seed.
220 * To replicate Valois used a fixed seed and the copy modification. */
221 private static final String VALOIS = "valois";
222 /** Flag to determine if the data size should be logged. This is useful to be
223 * able to determine the execution time per sample when the number of samples
224 * is dynamically created based on the data length, range and seed. */
225 private static final AtomicInteger LOG_SIZE = new AtomicInteger();
226
227 /**
228 * The type of distribution.
229 */
230 enum Distribution {
231 // B&M (1993)
232
233 /** Sawtooth distribution. Ascending data from 0 to m, that repeats. */
234 SAWTOOTH,
235 /** Random distribution. Uniform random data in [0, m] */
236 RANDOM,
237 /** Stagger distribution. Multiple interlaced ascending sequences. */
238 STAGGER,
239 /** Plateau distribution. Ascending data from 0 to m, then constant. */
240 PLATEAU,
241 /** Shuffle distribution. Two randomly interlaced ascending sequences of different lengths. */
242 SHUFFLE,
243
244 /** Sharktooth distribution. Alternating ascending then descending data from 0
245 * to m and back. This is an addition to the original suite of BM
246 * and is not included in the test suite by default and must be specified.
247 *
248 * <p>An ascending then descending sequence is also known as organpipe in
249 * Valois (2000). This version allows multiple ascending/descending runs in the
250 * same length. */
251 SHARKTOOTH,
252
253 // Valois (2000)
254
255 /** Sorted. */
256 SORTED,
257 /** Permutation of ones and zeros. */
258 ONEZERO,
259 /** Musser's median-of-3 killer. This elicits worst case performance for a median-of-3
260 * pivot selection strategy. */
261 M3KILLER,
262 /** A sorted sequence rotated left once. */
263 ROTATED,
264 /** Musser's two-faced sequence (the median-of-3 killer with two random permutations). */
265 TWOFACED,
266 /** An ascending then descending sequence. */
267 ORGANPIPE;
268 }
269
270 /**
271 * The type of data modification.
272 */
273 enum Modification {
274 /** Copy modification. */
275 COPY,
276 /** Reverse modification. */
277 REVERSE,
278 /** Reverse front-half modification. */
279 REVERSE_FRONT,
280 /** Reverse back-half modification. */
281 REVERSE_BACK,
282 /** Sort modification. */
283 SORT,
284 /** Descending modification (this is an addition to the original suite of BM).
285 * It is useful for testing worst case performance, e.g. insertion sort performs
286 * poorly on descending data. Heapselect using a max heap (to find k minimum elements)
287 * would perform poorly if data is processed in the forward direction as all elements
288 * must be inserted.
289 *
290 * <p>This is not included in the test suite by default and must be specified.
291 * Note that the Shuffle distribution with a very large seed 'm' is effectively an
292 * ascending sequence and will be reversed to descending as part of the original
293 * B&M suite of data. */
294 DESCENDING,
295 /** Dither modification. Add i % 5 to the data element i. */
296 DITHER;
297 }
298
299 /**
300 * Sample information. Used to obtain information about samples that may be slow
301 * for a particular partition method, e.g. they use excessive recursion during quickselect.
302 * This is used for testing: each sample from the data source can provide the
303 * information to create the sample distribution.
304 */
305 public static final class SampleInfo {
306 /** Distribution. */
307 private final Distribution dist;
308 /** Modification. */
309 private final Modification mod;
310 /** Length. */
311 private final int n;
312 /** Seed. */
313 private final int m;
314 /** Offset. */
315 private final int o;
316
317 /**
318 * @param dist Distribution.
319 * @param mod Modification.
320 * @param n Length.
321 * @param m Seed.
322 * @param o Offset.
323 */
324 SampleInfo(Distribution dist, Modification mod, int n, int m, int o) {
325 this.dist = dist;
326 this.mod = mod;
327 this.n = n;
328 this.m = m;
329 this.o = o;
330 }
331
332 /**
333 * Create an instance with the specified distribution.
334 *
335 * @param v Value.
336 * @return the instance
337 */
338 SampleInfo with(Distribution v) {
339 return new SampleInfo(v, mod, n, m, o);
340 }
341
342 /**
343 * Create an instance with the specified modification.
344 *
345 * @param v Value.
346 * @return the instance
347 */
348 SampleInfo with(Modification v) {
349 return new SampleInfo(dist, v, n, m, o);
350 }
351
352 /**
353 * @return the distribution
354 */
355 Distribution getDistribution() {
356 return dist;
357 }
358
359 /**
360 * @return the modification
361 */
362 Modification getModification() {
363 return mod;
364 }
365
366 /**
367 * @return the data length
368 */
369 int getN() {
370 return n;
371 }
372
373 /**
374 * @return the distribution seed
375 */
376 int getM() {
377 return m;
378 }
379
380 /**
381 * @return the distribution offset
382 */
383 int getO() {
384 return o;
385 }
386
387 @Override
388 public String toString() {
389 return String.format("%s, %s, n=%d, m=%d, o=%d", dist, mod, n, m, o);
390 }
391 }
392
393 /** Order. This is randomized to ensure that successive calls do not partition
394 * similar distributions. Randomized per invocation to avoid the JVM 'learning'
395 * branch decisions on small data sets. */
396 protected int[] order;
397 /** Cached source of randomness. */
398 protected UniformRandomProvider rng;
399
400 /** Type of data. Multiple types can be specified in the same string using
401 * lower/upper case, delimited using ':'. */
402 @Param({BM})
403 private String distribution = BM;
404
405 /** Type of data modification. Multiple types can be specified in the same string using
406 * lower/upper case, delimited using ':'. */
407 @Param({BM})
408 private String modification = BM;
409
410 /** Extra range to add to the data length.
411 * E.g. Use 1 to force use of odd and even length samples. */
412 @Param({"1"})
413 private int range = 1;
414
415 /** Sample 'seed'. This is {@code m} in Bentley and McIlroy's test suite.
416 * If set to zero the default is to use powers of 2 based on sample size. */
417 @Param({"0"})
418 private int seed;
419
420 /** Sample offset. This is used to shift each distribution to create different data.
421 * It is advanced on each invocation of {@link #setup()}. */
422 @Param({"0"})
423 private int offset;
424
425 /** Number of samples. Applies only to the random distribution. In this case
426 * the length of the data is randomly chosen in {@code [length, length + range)}. */
427 @Param({"0"})
428 private int samples;
429
430 /** RNG seed. Created using ThreadLocalRandom.current().nextLong(). This is advanced
431 * for the random distribution mode per iteration. Each benchmark executed by
432 * JMH will use the same random data, even across JVMs.
433 *
434 * <p>If this is zero then a random seed is chosen. */
435 @Param({"-7450238124206088695"})
436 private long rngSeed = -7450238124206088695L;
437
438 /** Data. This is stored as integer data which saves memory. Note that when ranking
439 * data it is not necessary to have the full range of the double data type; the same
440 * number of unique values can be recorded in an array using an integer type.
441 * Returning a double[] forces a copy to be generated for destructive sorting /
442 * partitioning methods. */
443 private int[][] data;
444
445 /** Sample information. */
446 private List<SampleInfo> sampleInfo;
447
448 /**
449 * Gets the sample for the given {@code index}.
450 *
451 * <p>This is returned in a randomized order per iteration.
452 *
453 * @param index Index.
454 * @return the data sample
455 */
456 public double[] getData(int index) {
457 return getDataSample(order[index]);
458 }
459
460 /**
461 * Gets the sample for the given {@code index}.
462 *
463 * <p>This is returned in a randomized order per iteration.
464 *
465 * @param index Index.
466 * @return the data sample
467 */
468 public int[] getIntData(int index) {
469 return getIntDataSample(order[index]);
470 }
471
472 /**
473 * Gets the sample for the given {@code index}.
474 *
475 * @param index Index.
476 * @return the data sample
477 */
478 protected double[] getDataSample(int index) {
479 final int[] a = data[index];
480 final double[] x = new double[a.length];
481 for (int i = -1; ++i < a.length;) {
482 x[i] = a[i];
483 }
484 return x;
485 }
486
487 /**
488 * Gets the sample for the given {@code index}.
489 *
490 * @param index Index.
491 * @return the data sample
492 */
493 protected int[] getIntDataSample(int index) {
494 // For parity with other methods do not use data.clone()
495 final int[] a = data[index];
496 final int[] x = new int[a.length];
497 for (int i = -1; ++i < a.length;) {
498 x[i] = a[i];
499 }
500 return x;
501 }
502
503 /**
504 * Gets the sample size for the given {@code index}.
505 *
506 * @param index Index.
507 * @return the data sample size
508 */
509 public int getDataSize(int index) {
510 return data[index].length;
511 }
512
513 /**
514 * Gets the sample information for the given {@code index}.
515 * Matches the (native) order returned by {@link #getDataSample(int)}.
516 *
517 * @param index Index.
518 * @return the data sample information
519 */
520 SampleInfo getDataSampleInfo(int index) {
521 return sampleInfo.get(index);
522 }
523
524 /**
525 * Get the number of data samples.
526 *
527 * <p>Note: This data source will create a permutation order per invocation based on
528 * this size. Per-invocation control in JMH is recommended for methods that take
529 * more than 1 millisecond to execute. For very small data and/or fast methods
530 * this may not be achievable. Child classes may override this value to create
531 * a large number of repeats of the same data per invocation. Any class performing
532 * this should also override {@link #getData(int)} to prevent index out of bound errors.
533 * This can be done by mapping the index to the original index using the number of repeats
534 * e.g. {@code original index = index / repeats}.
535 *
536 * @return the number of samples
537 */
538 public int size() {
539 return data.length;
540 }
541
542 /**
543 * Create the data.
544 */
545 @Setup(Level.Iteration)
546 public void setup() {
547 Objects.requireNonNull(distribution);
548 Objects.requireNonNull(modification);
549
550 // Set-up using parameters (may throw)
551 final EnumSet<Distribution> dist = getDistributions();
552 final int length = getLength();
553 if (length < 1) {
554 throw new IllegalStateException("Unsupported length: " + length);
555 }
556 // Note: Bentley-McIlroy use n in {100, 1023, 1024, 1025}.
557 // Here we only support a continuous range.
558 final int r = range > 0 ? range : 0;
559 if (length + (long) r > Integer.MAX_VALUE) {
560 throw new IllegalStateException("Unsupported upper length: " + length);
561 }
562 final int length2 = length + r;
563
564 // Allow pseudorandom seeding
565 if (rngSeed == 0) {
566 rngSeed = RandomSource.createLong();
567 }
568 if (rng == null) {
569 // First call, create objects
570 rng = RANDOM_SOURCE.create(rngSeed);
571 }
572
573 // Special case for random distribution mode
574 if (dist.contains(Distribution.RANDOM) && dist.size() == 1 && samples > 0) {
575 data = new int[samples][];
576 sampleInfo = new ArrayList<>(samples);
577 final int upper = seed > 0 ? seed : Integer.MAX_VALUE;
578 final SharedStateDiscreteSampler s1 = DiscreteUniformSampler.of(rng, 0, upper);
579 final SharedStateDiscreteSampler s2 = DiscreteUniformSampler.of(rng, length, length2);
580 for (int i = 0; i < data.length; i++) {
581 final int[] a = new int[s2.sample()];
582 for (int j = a.length; --j >= 0;) {
583 a[j] = s1.sample();
584 }
585 data[i] = a;
586 sampleInfo.add(new SampleInfo(Distribution.RANDOM, Modification.COPY, a.length, 0, 0));
587 }
588 return;
589 }
590
591 // New data per iteration
592 data = null;
593 final int o = offset;
594 offset = rng.nextInt();
595
596 final EnumSet<Modification> mod = getModifications();
597
598 // Data using the RNG will be randomized only once.
599 // Here we use the same seed for parity across benchmark methods.
600 // Note that most distributions do not use the source of randomness.
601 final ArrayList<int[]> sampleData = new ArrayList<>();
602 sampleInfo = new ArrayList<>();
603 final List<SampleInfo> info = new ArrayList<>();
604 for (int n = length; n <= length2; n++) {
605 // Note: Large lengths may wish to limit the range of m to limit
606 // the memory required to store the samples. Currently a single
607 // m is supported via the seed parameter.
608 // Default seed will create ceil(log2(2*n)) * 5 dist * 6 mods samples:
609 // MAX = 32 * 5 * 7 * (2^31-1) * 4 bytes == 7679 GiB
610 // HUGE = 31 * 5 * 7 * 2^30 * 4 bytes == 3719 GiB
611 // BIG = 21 * 5 * 7 * 2^20 * 4 bytes == 2519 MiB <-- within configured JVM -Xmx
612 // MED = 11 * 5 * 7 * 2^10 * 4 bytes == 1318 KiB
613 // (This is for the B&M data.)
614 // It is possible to create lengths above 2^30 using a single distribution,
615 // modification, and seed:
616 // MAX1 = 1 * 1 * 1 * (2^31-1) * 4 bytes == 8191 MiB
617 // However this is then used to create double[] data thus requiring an extra
618 // ~16GiB memory for the sample to partition.
619 for (final int m : createSeeds(seed, n)) {
620 final List<int[]> d = createDistributions(dist, rng, n, m, o, info);
621 for (int i = 0; i < d.size(); i++) {
622 final int[] x = d.get(i);
623 final SampleInfo si = info.get(i);
624 if (mod.contains(Modification.COPY)) {
625 // Don't copy! All other methods generate copies
626 // so we can use this in-place.
627 sampleData.add(x);
628 sampleInfo.add(si.with(Modification.COPY));
629 }
630 if (mod.contains(Modification.REVERSE)) {
631 sampleData.add(reverse(x, 0, n));
632 sampleInfo.add(si.with(Modification.REVERSE));
633 }
634 if (mod.contains(Modification.REVERSE_FRONT)) {
635 sampleData.add(reverse(x, 0, n >>> 1));
636 sampleInfo.add(si.with(Modification.REVERSE_FRONT));
637 }
638 if (mod.contains(Modification.REVERSE_BACK)) {
639 sampleData.add(reverse(x, n >>> 1, n));
640 sampleInfo.add(si.with(Modification.REVERSE_BACK));
641 }
642 // Only sort once
643 if (mod.contains(Modification.SORT) ||
644 mod.contains(Modification.DESCENDING)) {
645 final int[] y = x.clone();
646 Arrays.sort(y);
647 if (mod.contains(Modification.DESCENDING)) {
648 sampleData.add(reverse(y, 0, n));
649 sampleInfo.add(si.with(Modification.DESCENDING));
650 }
651 if (mod.contains(Modification.SORT)) {
652 sampleData.add(y);
653 sampleInfo.add(si.with(Modification.SORT));
654 }
655 }
656 if (mod.contains(Modification.DITHER)) {
657 sampleData.add(dither(x));
658 sampleInfo.add(si.with(Modification.DITHER));
659 }
660 }
661 }
662 }
663 data = sampleData.toArray(new int[0][]);
664 if (LOG_SIZE.getAndSet(length) != length) {
665 Logger.getLogger(getClass().getName()).info(
666 () -> String.format("Data length: [%d, %d] n=%d", length, length2, data.length));
667 }
668 }
669
670 /**
671 * Create the order to process the indices.
672 *
673 * <p>JMH recommends that invocations should take at
674 * least 1 millisecond for timings to be usable. In practice there should be
675 * enough data that processing takes much longer than a few milliseconds.
676 */
677 @Setup(Level.Invocation)
678 public void createOrder() {
679 if (order == null) {
680 // First call, create objects
681 order = PermutationSampler.natural(size());
682 }
683 ArraySampler.shuffle(rng, order);
684 }
685
686 /**
687 * @return the distributions
688 */
689 private EnumSet<Distribution> getDistributions() {
690 EnumSet<Distribution> dist;
691 if (BM.equals(distribution)) {
692 dist = EnumSet.of(
693 Distribution.SAWTOOTH,
694 Distribution.RANDOM,
695 Distribution.STAGGER,
696 Distribution.PLATEAU,
697 Distribution.SHUFFLE);
698 } else if (VALOIS.equals(distribution)) {
699 dist = EnumSet.of(
700 Distribution.RANDOM,
701 Distribution.SORTED,
702 Distribution.ONEZERO,
703 Distribution.M3KILLER,
704 Distribution.ROTATED,
705 Distribution.TWOFACED,
706 Distribution.ORGANPIPE);
707 } else {
708 dist = getEnumFromParam(Distribution.class, distribution);
709 }
710 return dist;
711 }
712
713 /**
714 * @return the modifications
715 */
716 private EnumSet<Modification> getModifications() {
717 EnumSet<Modification> mod;
718 if (BM.equals(modification)) {
719 // Modifications are from Bentley and McIlroy
720 mod = EnumSet.allOf(Modification.class);
721 // ... except descending
722 mod.remove(Modification.DESCENDING);
723 } else if (VALOIS.equals(modification)) {
724 // For convenience alias Valois to copy
725 mod = EnumSet.of(Modification.COPY);
726 } else {
727 mod = getEnumFromParam(Modification.class, modification);
728 }
729 return mod;
730 }
731
732 /**
733 * Gets all the enum values of the given class from the parameters.
734 *
735 * @param <E> Enum type.
736 * @param cls Class of the enum.
737 * @param parameters Parameters (multiple values delimited by ':')
738 * @return the enum values
739 */
740 static <E extends Enum<E>> EnumSet<E> getEnumFromParam(Class<E> cls, String parameters) {
741 if (ALL.equals(parameters)) {
742 return EnumSet.allOf(cls);
743 }
744 final EnumSet<E> set = EnumSet.noneOf(cls);
745 final String s = parameters.toUpperCase(Locale.ROOT);
746 for (final E e : cls.getEnumConstants()) {
747 // Scan for the name
748 for (int i = s.indexOf(e.name(), 0); i >= 0; i = s.indexOf(e.name(), i)) {
749 // Ensure a full match to the name:
750 // either at the end of the string, or followed by the delimiter
751 i += e.name().length();
752 if (i == s.length() || s.charAt(i) == ':') {
753 set.add(e);
754 break;
755 }
756 }
757 }
758 if (set.isEmpty()) {
759 throw new IllegalStateException("Unknown parameters: " + parameters);
760 }
761 return set;
762 }
763
764 /**
765 * Creates the seeds.
766 *
767 * <p>This can be pasted into a JShell terminal to verify it works for any size
768 * {@code 1 <= n < 2^31}. With the default behaviour all seeds {@code m} are unsigned
769 * strictly positive powers of 2 and the highest seed should be below {@code 2*n}.
770 *
771 * @param seed Seed (use 0 for default; or provide a strictly positive {@code 1 <= m <= 2^31}).
772 * @param n Sample size.
773 * @return the seeds
774 */
775 private static int[] createSeeds(int seed, int n) {
776 // Allow [1, 2^31] (note 2^31 is negative but handled as a power of 2)
777 if (seed - 1 >= 0) {
778 return new int[] {seed};
779 }
780 // Bentley-McIlroy use:
781 // for: m = 1; m < 2 * n; m *= 2
782 // This has been modified here to handle n up to MAX_VALUE
783 // by knowing the count of m to generate as the power of 2 >= n.
784
785 // ceil(log2(n)) + 1 == ceil(log2(2*n)) but handles MAX_VALUE
786 int c = 33 - Integer.numberOfLeadingZeros(n - 1);
787 final int[] seeds = new int[c];
788 c = 0;
789 for (int m = 1; c != seeds.length; m *= 2) {
790 seeds[c++] = m;
791 }
792 return seeds;
793 }
794
795 /**
796 * Creates the distribution samples. Handles {@code m = 2^31} using
797 * {@link Integer#MIN_VALUE}.
798 *
799 * <p>The offset is used to adjust each distribution to generate a different
800 * output. Only applies to distributions that do not use the source of randomness.
801 *
802 * <p>Distributions are generated in enum order and recorded in the output {@code info}.
803 * Distributions that are a constant value at {@code m == 1} are not generated.
804 * This case is handled by the plateau distribution which will be a constant value
805 * except one occurrence of zero.
806 *
807 * @param dist Distributions.
808 * @param rng Source of randomness.
809 * @param n Length of the sample.
810 * @param m Sample seed (in [1, 2^31])
811 * @param o Offset.
812 * @param info Sample information.
813 * @return the samples
814 */
815 private static List<int[]> createDistributions(EnumSet<Distribution> dist,
816 UniformRandomProvider rng, int n, int m, int o, List<SampleInfo> info) {
817 final ArrayList<int[]> distData = new ArrayList<>(6);
818 int[] x;
819 info.clear();
820 SampleInfo si = new SampleInfo(null, null, n, m, o);
821 // B&M (1993)
822 if (dist.contains(Distribution.SAWTOOTH) && m != 1) {
823 x = createSample(distData, info, si.with(Distribution.SAWTOOTH));
824 // i % m
825 // Typical case m is a power of 2 so we can use a mask
826 // Use the offset.
827 final int mask = m - 1;
828 if ((m & mask) == 0) {
829 for (int i = -1; ++i < n;) {
830 x[i] = (i + o) & mask;
831 }
832 } else {
833 // User input seed. Start at the offset.
834 int j = Integer.remainderUnsigned(o, m);
835 for (int i = -1; ++i < n;) {
836 j = j % m;
837 x[i] = j++;
838 }
839 }
840 }
841 if (dist.contains(Distribution.RANDOM) && m != 1) {
842 x = createSample(distData, info, si.with(Distribution.RANDOM));
843 // rand() % m
844 // A sampler is faster than rng.nextInt(m); the sampler has an inclusive upper.
845 final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, m - 1);
846 for (int i = -1; ++i < n;) {
847 x[i] = s.sample();
848 }
849 }
850 if (dist.contains(Distribution.STAGGER)) {
851 x = createSample(distData, info, si.with(Distribution.STAGGER));
852 // Overflow safe: (i * m + i) % n
853 final long nn = n;
854 final long oo = Integer.toUnsignedLong(o);
855 for (int i = -1; ++i < n;) {
856 final long j = i + oo;
857 x[i] = (int) ((j * m + j) % nn);
858 }
859 }
860 if (dist.contains(Distribution.PLATEAU)) {
861 x = createSample(distData, info, si.with(Distribution.PLATEAU));
862 // min(i, m)
863 for (int i = Math.min(n, m); --i >= 0;) {
864 x[i] = i;
865 }
866 for (int i = m - 1; ++i < n;) {
867 x[i] = m;
868 }
869 // Rotate
870 final int n1 = Integer.remainderUnsigned(o, n);
871 if (n1 != 0) {
872 final int[] a = x.clone();
873 final int n2 = n - n1;
874 System.arraycopy(a, 0, x, n1, n2);
875 System.arraycopy(a, n2, x, 0, n1);
876 }
877 }
878 if (dist.contains(Distribution.SHUFFLE) && m != 1) {
879 x = createSample(distData, info, si.with(Distribution.SHUFFLE));
880 // rand() % m ? (j += 2) : (k += 2)
881 final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, m - 1);
882 for (int i = -1, j = 0, k = 1; ++i < n;) {
883 x[i] = s.sample() != 0 ? (j += 2) : (k += 2);
884 }
885 }
886 // Extra - based on organpipe with a variable ascending/descending length
887 if (dist.contains(Distribution.SHARKTOOTH) && m != 1) {
888 x = createSample(distData, info, si.with(Distribution.SHARKTOOTH));
889 // ascending-descending runs
890 int i = -1;
891 int j = (o & Integer.MAX_VALUE) % m - 1;
892 OUTER:
893 for (;;) {
894 while (++j < m) {
895 if (++i == n) {
896 break OUTER;
897 }
898 x[i] = j;
899 }
900 while (--j >= 0) {
901 if (++i == n) {
902 break OUTER;
903 }
904 x[i] = j;
905 }
906 }
907 }
908 // Valois (2000)
909 if (dist.contains(Distribution.SORTED)) {
910 x = createSample(distData, info, si.with(Distribution.SORTED));
911 for (int i = -1; ++i < n;) {
912 x[i] = i;
913 }
914 }
915 if (dist.contains(Distribution.ONEZERO)) {
916 x = createSample(distData, info, si.with(Distribution.ONEZERO));
917 // permutation of floor(n/2) ones and ceil(n/2) zeroes.
918 // For convenience this uses random ones and zeros to avoid a shuffle
919 // and simply reads bits from integers. The distribution will not
920 // be exactly 50:50.
921 final int end = n & ~31;
922 for (int i = 0; i < end; i += 32) {
923 int z = rng.nextInt();
924 for (int j = -1; ++j < 32;) {
925 x[i + j] = z & 1;
926 z >>>= 1;
927 }
928 }
929 for (int i = end; ++i < n;) {
930 x[i] = rng.nextBoolean() ? 1 : 0;
931 }
932 }
933 if (dist.contains(Distribution.M3KILLER)) {
934 x = createSample(distData, info, si.with(Distribution.M3KILLER));
935 medianOf3Killer(x);
936 }
937 if (dist.contains(Distribution.ROTATED)) {
938 x = createSample(distData, info, si.with(Distribution.ROTATED));
939 // sorted sequence rotated left once
940 // 1, 2, 3, ..., n-1, 0
941 for (int i = 1; i < n; i++) {
942 x[i - 1] = i;
943 }
944 }
945 if (dist.contains(Distribution.TWOFACED)) {
946 x = createSample(distData, info, si.with(Distribution.TWOFACED));
947 // Musser's two faced randomly permutes a median-of-3 killer in
948 // 4 floor(log2(n)) through n/2 and n/2 + 4 floor(log2(n)) through n
949 medianOf3Killer(x);
950 final int j = 4 * (31 - Integer.numberOfLeadingZeros(n));
951 final int n2 = n >>> 1;
952 ArraySampler.shuffle(rng, x, j, n2);
953 ArraySampler.shuffle(rng, x, n2 + j, n);
954 }
955 if (dist.contains(Distribution.ORGANPIPE)) {
956 x = createSample(distData, info, si.with(Distribution.ORGANPIPE));
957 // 0, 1, 2, 3, ..., 3, 2, 1, 0
958 // n should be even to leave two equal values in the middle, otherwise a single
959 for (int i = -1, j = n; ++i <= --j;) {
960 x[i] = i;
961 x[j] = i;
962 }
963 }
964 return distData;
965 }
966
967 /**
968 * Create the sample array and add it to the {@code data}; add the information to the {@code info}.
969 *
970 * @param data Data samples.
971 * @param info Sample information.
972 * @param s Sample information.
973 * @return the new sample array
974 */
975 private static int[] createSample(ArrayList<int[]> data, List<SampleInfo> info,
976 SampleInfo s) {
977 final int[] x = new int[s.getN()];
978 data.add(x);
979 info.add(s);
980 return x;
981 }
982
983 /**
984 * Create Musser's median-of-3 killer sequence (in-place).
985 *
986 * @param x Data.
987 */
988 private static void medianOf3Killer(int[] x) {
989 // This uses the original K_2k sequence from Musser (1997)
990 // Introspective sorting and selection algorithms,
991 // Software—Practice and Experience, 27(8), 983–993.
992 // A true median-of-3 killer requires n to be an even integer divisible by 4,
993 // i.e. k is an even positive integer. This causes a median-of-3 partition
994 // strategy to produce a sequence of n/4 partitions into sub-sequences of
995 // length 2 and n-2, 2 and n-4, ..., 2 and n/2.
996 // 1 2 3 4 5 k-2 k-1 k k+1 k+2 k+3 2k-1 2k
997 // 1, k+1, 3, k+3, 5, ..., 2k-3, k-1 2k-1 2 4 6 ... 2k-2 2k
998 final int n = x.length;
999 final int k = n >>> 1;
1000 for (int i = 0; i < k; i++) {
1001 x[i] = ++i;
1002 x[i] = k + i;
1003 }
1004 for (int i = k - 1, j = 2; ++i < n; j += 2) {
1005 x[i] = j;
1006 }
1007 }
1008
1009 /**
1010 * Return a (part) reversed copy of the data.
1011 *
1012 * @param x Data.
1013 * @param from Start index to reverse (inclusive).
1014 * @param to End index to reverse (exclusive).
1015 * @return the copy
1016 */
1017 private static int[] reverse(int[] x, int from, int to) {
1018 final int[] a = x.clone();
1019 for (int i = from - 1, j = to; ++i < --j;) {
1020 final int v = a[i];
1021 a[i] = a[j];
1022 a[j] = v;
1023 }
1024 return a;
1025 }
1026
1027 /**
1028 * Return a dithered copy of the data.
1029 *
1030 * @param x Data.
1031 * @return the copy
1032 */
1033 private static int[] dither(int[] x) {
1034 final int[] a = x.clone();
1035 for (int i = a.length; --i >= 0;) {
1036 // Bentley-McIlroy use i % 5.
1037 // It is important this is not a power of 2 so it will not coincide
1038 // with patterns created in the data using the default m powers-of-2.
1039 a[i] += i % 5;
1040 }
1041 return a;
1042 }
1043
1044 /**
1045 * Gets the minimum length of the data.
1046 * The actual length is enumerated in {@code [length, length + range]}.
1047 *
1048 * @return the length
1049 */
1050 protected abstract int getLength();
1051
1052 /**
1053 * Gets the range.
1054 *
1055 * @return the range
1056 */
1057 final int getRange() {
1058 return range;
1059 }
1060
1061 /**
1062 * Sets the distribution(s) of the data.
1063 * If the input is an empty array or the first enum value is null,
1064 * then all distributions are used.
1065 *
1066 * @param v Values.
1067 */
1068 void setDistribution(Distribution... v) {
1069 if (v.length == 0 || v[0] == null) {
1070 distribution = ALL;
1071 } else {
1072 final EnumSet<Distribution> s = EnumSet.of(v[0], v);
1073 distribution = s.stream().map(Enum::name).collect(Collectors.joining(":"));
1074 }
1075 }
1076
1077 /**
1078 * Sets the modification of the data.
1079 * If the input is an empty array or the first enum value is null,
1080 * then all distributions are used.
1081 *
1082 * @param v Value.
1083 */
1084 void setModification(Modification... v) {
1085 if (v.length == 0 || v[0] == null) {
1086 modification = ALL;
1087 } else {
1088 final EnumSet<Modification> s = EnumSet.of(v[0], v);
1089 modification = s.stream().map(Enum::name).collect(Collectors.joining(":"));
1090 }
1091 }
1092
1093 /**
1094 * Sets the maximum addition to extend the length of each sample of data.
1095 * The actual length is enumerated in {@code [length, length + range]}.
1096 *
1097 * @param v Value.
1098 */
1099 void setRange(int v) {
1100 range = v;
1101 }
1102
1103 /**
1104 * Sets the sample 'seed' used to generate distributions.
1105 * If set to zero the default is to use powers of 2 based on sample size.
1106 *
1107 * <p>Supports positive values and the edge case of {@link Integer#MIN_VALUE}
1108 * which is treated as an unsigned power of 2.
1109 *
1110 * @param v Value (ignored if not within {@code [1, 2^31]}).
1111 */
1112 void setSeed(int v) {
1113 seed = v;
1114 }
1115
1116 /**
1117 * Sets the sample 'offset' used to generate distributions. Advanced to a new
1118 * random integer on each invocation of {@link #setup()}.
1119 *
1120 * @param v Value.
1121 */
1122 void setOffset(int v) {
1123 offset = v;
1124 }
1125
1126 /**
1127 * Sets the number of samples to use for the random distribution mode.
1128 * See {@link AbstractDataSource} for details.
1129 *
1130 * @param v Value.
1131 */
1132 void setSamples(int v) {
1133 samples = v;
1134 }
1135
1136 /**
1137 * Sets the seed for the random number generator.
1138 *
1139 * @param v Value.
1140 */
1141 void setRngSeed(long v) {
1142 this.rngSeed = v;
1143 }
1144 }
1145
1146 /**
1147 * Source of {@code double} array data to sort.
1148 */
1149 @State(Scope.Benchmark)
1150 public static class SortSource extends AbstractDataSource {
1151 /** Data length. */
1152 @Param({"1023"})
1153 private int length;
1154 /** Number of repeats. This is used to control the number of times the data is processed
1155 * per invocation. Note that each invocation randomises the order. For very small data
1156 * and/or fast methods there may not be enough data to achieve the target of 1
1157 * millisecond per invocation. Use this value to increase the length of each invocation.
1158 * For example the insertion sort on tiny data, or the sort5 methods, may require this
1159 * to be 1,000,000 or higher. */
1160 @Param({"1"})
1161 private int repeats;
1162
1163 /** {@inheritDoc} */
1164 @Override
1165 protected int getLength() {
1166 return length;
1167 }
1168
1169 /** {@inheritDoc} */
1170 @Override
1171 public int size() {
1172 return super.size() * repeats;
1173 }
1174
1175 /** {@inheritDoc} */
1176 @Override
1177 public double[] getData(int index) {
1178 // order = (data index) * repeats + repeat
1179 // data index = order / repeats
1180 return super.getDataSample(order[index] / repeats);
1181 }
1182 }
1183
1184 /**
1185 * Source of k-th indices to partition.
1186 *
1187 * <p>This class provides both data to partition and the indices to partition.
1188 * The indices and data are created per iteration. The order to process them
1189 * is created per invocation.
1190 */
1191 @State(Scope.Benchmark)
1192 public static class KSource extends AbstractDataSource {
1193 /** Data length. */
1194 @Param({"1023"})
1195 private int length;
1196 /** Number of indices to select. */
1197 @Param({"1", "2", "3", "5", "10"})
1198 private int k;
1199 /** Number of repeats. */
1200 @Param({"10"})
1201 private int repeats;
1202 /** Distribution mode. K indices can be distributed randomly or uniformly.
1203 * <ul>
1204 * <li>"random": distribute k indices randomly
1205 * <li>"uniform": distribute k indices uniformly but with a random start point
1206 * <li>"index": Use a single index at k
1207 * <li>"single": Use a single index at k uniformly spaced points. This mode
1208 * first generates the spacing for the indices. Then samples from that spacing
1209 * using the configured repeats. Common usage of k=10 will have 10 samples with a
1210 * single index, each in a different position.
1211 * </ul>
1212 * <p>If the mode ends with a "s" then the indices are sorted. For example "randoms"
1213 * will sort the random indices.
1214 */
1215 @Param({"random"})
1216 private String mode;
1217 /** Separation. K can be single indices (s=0) or paired (s!=0). Paired indices are
1218 * separated using the specified separation. When running in paired mode the
1219 * number of k is doubled and duplicates may occur. This method is used for
1220 * testing sparse or uniform distributions of paired indices that may occur when
1221 * interpolating quantiles. Since the separation is allowed to be above 1 it also
1222 * allows testing configurations for close indices. */
1223 @Param({"0"})
1224 private int s;
1225
1226 /** Indices. */
1227 private int[][] indices;
1228 /** Cache permutation samplers. */
1229 private PermutationSampler[] samplers;
1230
1231 /** {@inheritDoc} */
1232 @Override
1233 protected int getLength() {
1234 return length;
1235 }
1236
1237 /** {@inheritDoc} */
1238 @Override
1239 public int size() {
1240 return super.size() * repeats;
1241 }
1242
1243 /** {@inheritDoc} */
1244 @Override
1245 public double[] getData(int index) {
1246 // order = (data index) * repeats + repeat
1247 // data index = order / repeats
1248 return super.getDataSample(order[index] / repeats);
1249 }
1250
1251 /** {@inheritDoc} */
1252 @Override
1253 public int[] getIntData(int index) {
1254 return super.getIntDataSample(order[index] / repeats);
1255 }
1256
1257 /**
1258 * Gets the indices for the given {@code index}.
1259 *
1260 * @param index Index.
1261 * @return the data indices
1262 */
1263 public int[] getIndices(int index) {
1264 // order = (data index) * repeats + repeat
1265 // Directly look-up the indices for this repeat.
1266 return indices[order[index]];
1267 }
1268
1269 /**
1270 * Create the indices.
1271 */
1272 @Override
1273 @Setup(Level.Iteration)
1274 public void setup() {
1275 if (s < 0 || s >= getLength()) {
1276 throw new IllegalStateException("Invalid separation: " + s);
1277 }
1278 super.setup();
1279
1280 // Data will be randomized per iteration
1281 if (indices == null) {
1282 // First call, create objects
1283 indices = new int[size()][];
1284 // Cache samplers. These hold an array which is randomized
1285 // per call to obtain a permutation.
1286 if (k > 1) {
1287 samplers = new PermutationSampler[getRange() + 1];
1288 }
1289 }
1290
1291 // Create indices in the data sample length.
1292 // If a separation is provided then the length is reduced by the separation
1293 // to make space for a second index.
1294
1295 int index = 0;
1296 final int noOfSamples = super.size();
1297 if (mode.startsWith("random")) {
1298 // random mode creates a permutation of k indices in the length
1299 if (k > 1) {
1300 final int baseLength = getLength();
1301 for (int i = 0; i < noOfSamples; i++) {
1302 final int len = getDataSize(i);
1303 // Create permutation sampler for the length
1304 PermutationSampler sampler = samplers[len - baseLength];
1305 if (sampler == null) {
1306 // Reduce length by the separation
1307 final int n = len - s;
1308 samplers[len - baseLength] = sampler = new PermutationSampler(rng, n, k);
1309 }
1310 for (int j = repeats; --j >= 0;) {
1311 indices[index++] = sampler.sample();
1312 }
1313 }
1314 } else {
1315 // k=1: No requirement for a permutation
1316 for (int i = 0; i < noOfSamples; i++) {
1317 // Reduce length by the separation
1318 final int n = getDataSize(i) - s;
1319 for (int j = repeats; --j >= 0;) {
1320 indices[index++] = new int[] {rng.nextInt(n)};
1321 }
1322 }
1323 }
1324 } else if (mode.startsWith("uniform")) {
1325 // uniform indices with a random start
1326 for (int i = 0; i < noOfSamples; i++) {
1327 // Reduce length by the separation
1328 final int n = getDataSize(i) - s;
1329 final int step = Math.max(1, (int) Math.round((double) n / k));
1330 for (int j = repeats; --j >= 0;) {
1331 final int[] k1 = new int[k];
1332 int p = rng.nextInt(n);
1333 for (int m = 0; m < k; m++) {
1334 p = (p + step) % n;
1335 k1[m] = p;
1336 }
1337 indices[index++] = k1;
1338 }
1339 }
1340 } else if (mode.startsWith("single")) {
1341 // uniform indices with a random start
1342 for (int i = 0; i < noOfSamples; i++) {
1343 // Reduce length by the separation
1344 final int n = getDataSize(i) - s;
1345 int[] samples;
1346 // When k approaches n then a linear spacing covers every part
1347 // of the array and we sample. Do this when n < k/4. This handles
1348 // k > n (saturation).
1349 if (n < (k >> 2)) {
1350 samples = rng.ints(k, 0, n).toArray();
1351 } else {
1352 // Linear spacing
1353 final int step = n / k;
1354 samples = new int[k];
1355 for (int j = 0, x = step >> 1; j < k; j++, x += step) {
1356 samples[j] = x;
1357 }
1358 }
1359 for (int j = 0; j < repeats; j++) {
1360 final int ii = j % k;
1361 if (ii == 0) {
1362 ArraySampler.shuffle(rng, samples);
1363 }
1364 indices[index++] = new int[] {samples[ii]};
1365 }
1366 }
1367 } else if ("index".equals(mode)) {
1368 // Same single or paired indices for all samples.
1369 // Check the index is valid.
1370 for (int i = 0; i < noOfSamples; i++) {
1371 // Reduce length by the separation
1372 final int n = getDataSize(i) - s;
1373 if (k >= n) {
1374 throw new IllegalStateException("Invalid k: " + k + " >= " + n);
1375 }
1376 }
1377 final int[] kk = s > 0 ? new int[] {k, k + s} : new int[] {k};
1378 Arrays.fill(indices, kk);
1379 return;
1380 } else {
1381 throw new IllegalStateException("Unknown index mode: " + mode);
1382 }
1383 // Add paired indices
1384 if (s > 0) {
1385 for (int i = 0; i < indices.length; i++) {
1386 final int[] k1 = indices[i];
1387 final int[] k2 = new int[k1.length << 1];
1388 for (int j = 0; j < k1.length; j++) {
1389 k2[j << 1] = k1[j];
1390 k2[(j << 1) + 1] = k1[j] + s;
1391 }
1392 indices[i] = k2;
1393 }
1394 }
1395 // Optionally sort
1396 if (mode.endsWith("s")) {
1397 for (int i = 0; i < indices.length; i++) {
1398 Arrays.sort(indices[i]);
1399 }
1400 }
1401 }
1402 }
1403
1404 /**
1405 * Source of k-th indices. This does not extend the {@link AbstractDataSource} to provide
1406 * data to partition. It is to be used to test processing of indices without partition
1407 * overhead.
1408 */
1409 @State(Scope.Benchmark)
1410 public static class IndexSource {
1411 /** Indices. */
1412 protected int[][] indices;
1413 /** Upper bound (exclusive) on the indices. */
1414 @Param({"1000", "1000000", "1000000000"})
1415 private int length;
1416 /** Number of indices to select. */
1417 @Param({"10", "20", "40", "80", "160"})
1418 private int k;
1419 /** Number of repeats. */
1420 @Param({"1000"})
1421 private int repeats;
1422 /** RNG seed. Created using ThreadLocalRandom.current().nextLong(). Each benchmark
1423 * executed by JMH will use the same random data, even across JVMs.
1424 *
1425 * <p>If this is zero then a random seed is chosen. */
1426 @Param({"-7450238124206088695"})
1427 private long rngSeed;
1428 /** Ordered keys. */
1429 @Param({"false"})
1430 private boolean ordered;
1431 /** Minimum separation between keys. */
1432 @Param({"32"})
1433 private int separation;
1434
1435 /**
1436 * @return the indices
1437 */
1438 public int[][] getIndices() {
1439 return indices;
1440 }
1441
1442 /**
1443 * Gets the minimum separation between keys. This is used by benchmarks
1444 * to ignore splitting/search keys below a threshold.
1445 *
1446 * @return the minimum separation
1447 */
1448 public int getMinSeparation() {
1449 return separation;
1450 }
1451
1452 /**
1453 * Create the indices and search points.
1454 */
1455 @Setup(Level.Iteration)
1456 public void setup() {
1457 if (k < 2) {
1458 throw new IllegalStateException("Require multiple indices");
1459 }
1460 // Data will be randomized per iteration. It is the same sequence across
1461 // benchmarks and JVM instances and allows benchmarking across JVM platforms
1462 // with the same data.
1463 // Allow pseudorandom seeding
1464 if (rngSeed == 0) {
1465 rngSeed = RandomSource.createLong();
1466 }
1467 final UniformRandomProvider rng = RANDOM_SOURCE.create(rngSeed);
1468 // Advance the seed for the next iteration.
1469 rngSeed = rng.nextLong();
1470
1471 final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, length - 1);
1472
1473 indices = new int[repeats][];
1474
1475 for (int i = repeats; --i >= 0;) {
1476 // Indices with possible repeats
1477 final int[] x = new int[k];
1478 for (int j = k; --j >= 0;) {
1479 x[j] = s.sample();
1480 }
1481 indices[i] = x;
1482 if (ordered) {
1483 Sorting.sortIndices(x, x.length);
1484 }
1485 }
1486 }
1487
1488 /**
1489 * @return the RNG seed
1490 */
1491 long getRngSeed() {
1492 return rngSeed;
1493 }
1494 }
1495
1496 /**
1497 * Source of k-th indices to be searched/split.
1498 * Can be used to split the same indices multiple times, or split a set of indices
1499 * a single time, e.g. split indices k at point p.
1500 */
1501 @State(Scope.Benchmark)
1502 public static class SplitIndexSource extends IndexSource {
1503 /** Division mode. */
1504 @Param({"RANDOM", "BINARY"})
1505 private DivisionMode mode;
1506
1507 /** Search points. */
1508 private int[][] points;
1509 /** The look-up samples. These are used to identify a set of indices, and a single point to
1510 * find in the range of the indices, e.g. split indices k at point p. The long packs
1511 * two integers: the index of the indices k; and the search point p. These are packed
1512 * as a long to enable easy shuffling of samples and access to the two indices. */
1513 private long[] samples;
1514
1515 /** Options for the division mode. */
1516 public enum DivisionMode {
1517 /** Randomly divide. */
1518 RANDOM,
1519 /** Divide using binary division with recursion left then right. */
1520 BINARY;
1521 }
1522
1523 /**
1524 * Return the search points. They are the median index points between adjacent
1525 * indices. These are in the order specified by the division mode.
1526 *
1527 * @return the search points
1528 */
1529 public int[][] getPoints() {
1530 return points;
1531 }
1532
1533 /**
1534 * @return the sample size
1535 */
1536 int samples() {
1537 return samples.length;
1538 }
1539
1540 /**
1541 * Gets the indices for the random sample.
1542 *
1543 * @param index the index
1544 * @return the indices
1545 */
1546 int[] getIndices(int index) {
1547 return indices[(int) (samples[index] >>> Integer.SIZE)];
1548 }
1549
1550 /**
1551 * Gets the search point for the random sample.
1552 *
1553 * @param index the index
1554 * @return the search point
1555 */
1556 int getPoint(int index) {
1557 return (int) samples[index];
1558 }
1559
1560 /**
1561 * Create the indices and search points.
1562 */
1563 @Override
1564 @Setup(Level.Iteration)
1565 public void setup() {
1566 super.setup();
1567
1568 final UniformRandomProvider rng = RANDOM_SOURCE.create(getRngSeed());
1569
1570 final int[][] indices = getIndices();
1571 points = new int[indices.length][];
1572
1573 final int s = getMinSeparation();
1574
1575 // Set the division mode
1576 final boolean random = Objects.requireNonNull(mode) == DivisionMode.RANDOM;
1577
1578 int size = 0;
1579
1580 for (int i = points.length; --i >= 0;) {
1581 // Get the sorted unique indices
1582 final int[] y = indices[i].clone();
1583 final int unique = Sorting.sortIndices(y, y.length);
1584
1585 // Create the cut points between each unique index
1586 int[] p = new int[unique - 1];
1587 if (random) {
1588 int c = 0;
1589 for (int j = 0; j < p.length; j++) {
1590 // Ignore dense keys
1591 if (y[j] + s < y[j + 1]) {
1592 p[c++] = (y[j] + y[j + 1]) >>> 1;
1593 }
1594 }
1595 p = Arrays.copyOf(p, c);
1596 ArraySampler.shuffle(rng, p);
1597 points[i] = p;
1598 } else {
1599 // binary division
1600 final int c = divide(y, 0, unique - 1, p, 0, s);
1601 points[i] = Arrays.copyOf(p, c);
1602 }
1603 size += points[i].length;
1604 }
1605
1606 // Create the samples: pack indices index+point into a long
1607 samples = new long[size];
1608 for (int i = points.length; --i >= 0;) {
1609 final long l = ((long) i) << Integer.SIZE;
1610 for (final int p : points[i]) {
1611 samples[--size] = l | p;
1612 }
1613 }
1614 ArraySampler.shuffle(rng, samples);
1615 }
1616
1617 /**
1618 * Divide the indices using binary division with recursion left then right.
1619 * If a division is possible store the division point and update the count.
1620 *
1621 * @param indices Indices to divide
1622 * @param lo Lower index in indices (inclusive).
1623 * @param hi Upper index in indices (inclusive).
1624 * @param p Division points.
1625 * @param c Count of division points.
1626 * @param s Minimum separation between indices.
1627 * @return the updated count of division points.
1628 */
1629 private static int divide(int[] indices, int lo, int hi, int[] p, int c, int s) {
1630 if (lo < hi) {
1631 // Divide the interval in half
1632 final int m = (lo + hi) >>> 1;
1633 // Create a division point at approximately the midpoint
1634 final int m1 = m + 1;
1635 // Ignore dense keys
1636 if (indices[m] + s < indices[m1]) {
1637 final int k = (indices[m] + indices[m1]) >>> 1;
1638 p[c++] = k;
1639 }
1640 // Recurse left then right.
1641 // Does nothing if lo + 1 == hi as m == lo and m1 == hi.
1642 c = divide(indices, lo, m, p, c, s);
1643 c = divide(indices, m1, hi, p, c, s);
1644 }
1645 return c;
1646 }
1647 }
1648
1649 /**
1650 * Source of an {@link SearchableInterval}.
1651 */
1652 @State(Scope.Benchmark)
1653 public static class SearchableIntervalSource {
1654 /** Name of the source. */
1655 @Param({"ScanningKeyInterval",
1656 "BinarySearchKeyInterval",
1657 "IndexSetInterval",
1658 "CompressedIndexSet",
1659 // Same speed as the CompressedIndexSet_2
1660 //"CompressedIndexSet2",
1661 })
1662 private String name;
1663
1664 /** The factory. */
1665 private Function<int[], SearchableInterval> factory;
1666
1667 /**
1668 * @param indices Indices.
1669 * @return {@link SearchableInterval}
1670 */
1671 public SearchableInterval create(int[] indices) {
1672 return factory.apply(indices);
1673 }
1674
1675 /**
1676 * Create the function.
1677 */
1678 @Setup
1679 public void setup() {
1680 Objects.requireNonNull(name);
1681 if ("ScanningKeyInterval".equals(name)) {
1682 factory = k -> {
1683 k = k.clone();
1684 final int unique = Sorting.sortIndices(k, k.length);
1685 return ScanningKeyInterval.of(k, unique);
1686 };
1687 } else if ("BinarySearchKeyInterval".equals(name)) {
1688 factory = k -> {
1689 k = k.clone();
1690 final int unique = Sorting.sortIndices(k, k.length);
1691 return BinarySearchKeyInterval.of(k, unique);
1692 };
1693 } else if ("IndexSetInterval".equals(name)) {
1694 factory = IndexSet::of;
1695 } else if (name.equals("CompressedIndexSet2")) {
1696 factory = CompressedIndexSet2::of;
1697 } else if (name.startsWith("CompressedIndexSet")) {
1698 // To use compression 2 requires CompressedIndexSet_2 otherwise
1699 // a fixed compression set will be returned
1700 final int c = getCompression(name);
1701 factory = k -> CompressedIndexSet.of(c, k);
1702 } else {
1703 throw new IllegalStateException("Unknown SearchableInterval: " + name);
1704 }
1705 }
1706
1707 /**
1708 * Gets the compression from the last character of the name.
1709 *
1710 * @param name Name.
1711 * @return the compression
1712 */
1713 private static int getCompression(String name) {
1714 final char c = name.charAt(name.length() - 1);
1715 if (Character.isDigit(c)) {
1716 return Character.digit(c, 10);
1717 }
1718 return 1;
1719 }
1720 }
1721
1722 /**
1723 * Source of an {@link UpdatingInterval}.
1724 */
1725 @State(Scope.Benchmark)
1726 public static class UpdatingIntervalSource {
1727 /** Name of the source. */
1728 @Param({"KeyUpdatingInterval",
1729 // Same speed as BitIndexUpdatingInterval
1730 //"IndexSet",
1731 "BitIndexUpdatingInterval",
1732 })
1733 private String name;
1734
1735 /** The factory. */
1736 private Function<int[], UpdatingInterval> factory;
1737
1738 /**
1739 * @param indices Indices.
1740 * @return {@link UpdatingInterval}
1741 */
1742 public UpdatingInterval create(int[] indices) {
1743 return factory.apply(indices);
1744 }
1745
1746 /**
1747 * Create the function.
1748 */
1749 @Setup
1750 public void setup() {
1751 Objects.requireNonNull(name);
1752 if ("KeyUpdatingInterval".equals(name)) {
1753 factory = k -> {
1754 k = k.clone();
1755 final int unique = Sorting.sortIndices(k, k.length);
1756 return KeyUpdatingInterval.of(k, unique);
1757 };
1758 } else if ("IndexSet".equals(name)) {
1759 factory = k -> IndexSet.of(k).interval();
1760 } else if (name.equals("BitIndexUpdatingInterval")) {
1761 factory = k -> BitIndexUpdatingInterval.of(k, k.length);
1762 } else {
1763 throw new IllegalStateException("Unknown UpdatingInterval: " + name);
1764 }
1765 }
1766 }
1767
1768 /**
1769 * Source of a range of positions to partition. These are positioned away from the edge
1770 * using a power of 2 shift.
1771 *
1772 * <p>This is a specialised class to allow benchmarking the switch from using
1773 * quickselect partitioning to using an edge selection.
1774 *
1775 * <p>This class provides both data to partition and the indices to partition.
1776 * The indices and data are created per iteration. The order to process them
1777 * is created per invocation.
1778 */
1779 @State(Scope.Benchmark)
1780 public static class EdgeSource extends AbstractDataSource {
1781 /** Data length. */
1782 @Param({"1023"})
1783 private int length;
1784 /** Mode. */
1785 @Param({"SHIFT"})
1786 private Mode mode;
1787 /** Parameter to find k. Configured for 'shift' of the length. */
1788 @Param({"1", "2", "3", "4", "5", "6", "7", "8", "9"})
1789 private int p;
1790 /** Target indices (as pairs of {@code [ka, kb]} defining a range to select). */
1791 private int[][] indices;
1792
1793 /** Define the method used to generated the edge k. */
1794 public enum Mode {
1795 /** Create {@code k} using a right-shift {@code >>>} applied to the length. */
1796 SHIFT,
1797 /** Use the parameter {@code p} as an index. */
1798 INDEX;
1799 }
1800
1801 /** {@inheritDoc} */
1802 @Override
1803 public int size() {
1804 return super.size() * 2;
1805 }
1806
1807 /** {@inheritDoc} */
1808 @Override
1809 public double[] getData(int index) {
1810 // order = (data index) * repeats + repeat
1811 // data index = order / repeats; repeats=2 divide by using a shift
1812 return super.getDataSample(order[index] >> 1);
1813 }
1814
1815 /**
1816 * Gets the sample indices for the given {@code index}.
1817 * Returns a range to partition {@code [k1, kn]}.
1818 *
1819 * @param index Index.
1820 * @return the target indices
1821 */
1822 public int[] getIndices(int index) {
1823 // order = (data index) * repeats + repeat
1824 // Directly look-up the indices for this repeat.
1825 return indices[order[index]];
1826 }
1827
1828 /** {@inheritDoc} */
1829 @Override
1830 protected int getLength() {
1831 return length;
1832 }
1833
1834 /**
1835 * Create the data and check the indices are not at the end.
1836 */
1837 @Override
1838 @Setup(Level.Iteration)
1839 public void setup() {
1840 // Data will be randomized per iteration
1841 super.setup();
1842 // Error for a bad configuration. Allow k=0 but not smaller.
1843 // Uses the lower bound on the length.
1844 int k;
1845 if (mode == Mode.SHIFT) {
1846 k = length >>> p;
1847 if (k == 0 && length >>> (p - 1) == 0) {
1848 throw new IllegalStateException(length + " >>> (" + p + " - 1) == 0");
1849 }
1850 } else if (mode == Mode.INDEX) {
1851 k = p;
1852 if (k < 0 || k >= length) {
1853 throw new IllegalStateException("Invalid index [0, " + length + "): " + p);
1854 }
1855 } else {
1856 throw new IllegalStateException("Unknown mode: " + mode);
1857 }
1858
1859 if (indices == null) {
1860 // First call, create objects
1861 indices = new int[size()][];
1862 }
1863
1864 // Create a single index at both ends.
1865 // Note: Data has variable length so we have to compute the upper end for each sample.
1866 // Re-use the constant lower but we do not bother to cache repeats of the upper.
1867 final int[] lower = {k, k};
1868 final int noOfSamples = super.size();
1869 for (int i = 0; i < noOfSamples; i++) {
1870 final int len = getDataSize(i);
1871 final int k1 = len - 1 - k;
1872 indices[i << 1] = lower;
1873 indices[(i << 1) + 1] = new int[] {k1, k1};
1874 }
1875 }
1876 }
1877
1878 /**
1879 * Source of a sort function.
1880 */
1881 @State(Scope.Benchmark)
1882 public static class SortFunctionSource {
1883 /** Name of the source. */
1884 @Param({JDK, SP, BM, SBM, DP, DP5,
1885 SBM2,
1886 // Not run by default as it is slow on large data
1887 //"InsertionSortIF", "InsertionSortIT", "InsertionSort", "InsertionSortB"
1888 // Introsort methods with defaults, can configure using the name
1889 // e.g. ISP_SBM_QS50.
1890 ISP, IDP,
1891 })
1892 private String name;
1893
1894 /** Override of minimum quickselect size. */
1895 @Param({"0"})
1896 private int qs;
1897
1898 /** The action. */
1899 private Consumer<double[]> function;
1900
1901 /**
1902 * @return the function
1903 */
1904 public Consumer<double[]> getFunction() {
1905 return function;
1906 }
1907
1908 /**
1909 * Create the function.
1910 */
1911 @Setup
1912 public void setup() {
1913 Objects.requireNonNull(name);
1914 if (JDK.equals(name)) {
1915 function = Arrays::sort;
1916 // First generation kth-selector functions (not configurable)
1917 } else if (name.startsWith(SP)) {
1918 function = PartitionFactory.createKthSelector(name, SP, qs)::sortSP;
1919 } else if (name.startsWith(SBM)) {
1920 function = PartitionFactory.createKthSelector(name, SBM, qs)::sortSBM;
1921 } else if (name.startsWith(BM)) {
1922 function = PartitionFactory.createKthSelector(name, BM, qs)::sortBM;
1923 } else if (name.startsWith(DP)) {
1924 function = PartitionFactory.createKthSelector(name, DP, qs)::sortDP;
1925 } else if (name.startsWith(DP5)) {
1926 function = PartitionFactory.createKthSelector(name, DP5, qs)::sortDP5;
1927 // 2nd generation partition function
1928 } else if (name.startsWith(SBM2)) {
1929 function = PartitionFactory.createPartition(name, SBM2, qs, 0)::sortSBM;
1930 // Introsort
1931 } else if (name.startsWith(ISP)) {
1932 function = PartitionFactory.createPartition(name, ISP, qs, 0)::sortISP;
1933 } else if (name.startsWith(IDP)) {
1934 function = PartitionFactory.createPartition(name, IDP, qs, 0)::sortIDP;
1935 // Insertion sort variations.
1936 // For parity with the internal version these all use the same (shorter) data
1937 } else if ("InsertionSortIF".equals(name)) {
1938 function = x -> {
1939 // Ignored sentinal
1940 x[0] = Double.NEGATIVE_INFINITY;
1941 Sorting.sort(x, 1, x.length - 1, false);
1942 };
1943 } else if ("InsertionSortIT".equals(name)) {
1944 // Internal version
1945 function = x -> {
1946 // Add a sentinal
1947 x[0] = Double.NEGATIVE_INFINITY;
1948 Sorting.sort(x, 1, x.length - 1, true);
1949 };
1950 } else if ("InsertionSort".equals(name)) {
1951 function = x -> {
1952 x[0] = Double.NEGATIVE_INFINITY;
1953 Sorting.sort(x, 1, x.length - 1);
1954 };
1955 } else if (name.startsWith("PairedInsertionSort")) {
1956 if (name.endsWith("1")) {
1957 function = x -> {
1958 x[0] = Double.NEGATIVE_INFINITY;
1959 Sorting.sortPairedInternal1(x, 1, x.length - 1);
1960 };
1961 } else if (name.endsWith("2")) {
1962 function = x -> {
1963 x[0] = Double.NEGATIVE_INFINITY;
1964 Sorting.sortPairedInternal2(x, 1, x.length - 1);
1965 };
1966 } else if (name.endsWith("3")) {
1967 function = x -> {
1968 x[0] = Double.NEGATIVE_INFINITY;
1969 Sorting.sortPairedInternal3(x, 1, x.length - 1);
1970 };
1971 } else if (name.endsWith("4")) {
1972 function = x -> {
1973 x[0] = Double.NEGATIVE_INFINITY;
1974 Sorting.sortPairedInternal4(x, 1, x.length - 1);
1975 };
1976 }
1977 } else if ("InsertionSortB".equals(name)) {
1978 function = x -> {
1979 x[0] = Double.NEGATIVE_INFINITY;
1980 Sorting.sortb(x, 1, x.length - 1);
1981 };
1982 // Not actually a sort. This is used to benchmark the speed of heapselect
1983 // for a single k as a stopper function against a full sort of small data.
1984 } else if (name.startsWith(HEAP_SELECT)) {
1985 final char c = name.charAt(name.length() - 1);
1986 // This offsets the start by 1 for comparison with insertion sort
1987 final int k = Character.isDigit(c) ? Character.digit(c, 10) + 1 : 1;
1988 function = x -> Partition.heapSelectLeft(x, 1, x.length - 1, k, 0);
1989 }
1990 if (function == null) {
1991 throw new IllegalStateException("Unknown sort function: " + name);
1992 }
1993 }
1994 }
1995
1996 /**
1997 * Source of a sort function to sort 5 points.
1998 */
1999 @State(Scope.Benchmark)
2000 public static class Sort5FunctionSource {
2001 /** Name of the source. */
2002 @Param({"sort5", "sort5b", "sort5c",
2003 //"sort", "sort5head"
2004 })
2005 private String name;
2006
2007 /** The action. */
2008 private Consumer<double[]> function;
2009
2010 /**
2011 * @return the function
2012 */
2013 public Consumer<double[]> getFunction() {
2014 return function;
2015 }
2016
2017 /**
2018 * Create the function.
2019 */
2020 @Setup
2021 public void setup() {
2022 Objects.requireNonNull(name);
2023 // Note: We do not run this on input of length 5. We can run it on input of
2024 // any length above 5. So we choose indices using a spacing of 1/4 of the range.
2025 // Since we do this for all methods it is a fixed overhead. This allows use
2026 // of a variety of data sizes.
2027 if ("sort5".equals(name)) {
2028 function = x -> {
2029 final int s = x.length >> 2;
2030 Sorting.sort5(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2031 };
2032 } else if ("sort5b".equals(name)) {
2033 function = x -> {
2034 final int s = x.length >> 2;
2035 Sorting.sort5b(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2036 };
2037 } else if ("sort5c".equals(name)) {
2038 function = x -> {
2039 final int s = x.length >> 2;
2040 Sorting.sort5c(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2041 };
2042 } else if ("sort".equals(name)) {
2043 function = x -> Sorting.sort(x, 0, 4);
2044 } else if ("sort5head".equals(name)) {
2045 function = x -> Sorting.sort5(x, 0, 1, 2, 3, 4);
2046 // Median of 5. Ensure the median index is computed by storing it in x
2047 } else if ("median5".equals(name)) {
2048 function = x -> {
2049 final int s = x.length >> 2;
2050 x[0] = Sorting.median5(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2051 };
2052 } else if ("median5head".equals(name)) {
2053 function = x -> x[0] = Sorting.median5(x, 0, 1, 2, 3, 4);
2054 // median of 5 continuous elements
2055 } else if ("med5".equals(name)) {
2056 function = x -> x[0] = Sorting.median5(x, 0);
2057 } else if ("med5b".equals(name)) {
2058 function = x -> x[0] = Sorting.median5b(x, 0);
2059 } else if ("med5c".equals(name)) {
2060 function = x -> x[0] = Sorting.median5c(x, 0);
2061 } else if ("med5d".equals(name)) {
2062 function = x -> Sorting.median5d(x, 0, 1, 2, 3, 4);
2063 } else {
2064 throw new IllegalStateException("Unknown sort5 function: " + name);
2065 }
2066 }
2067 }
2068
2069 /**
2070 * Source of a function to compute the median of 4 points.
2071 */
2072 @State(Scope.Benchmark)
2073 public static class Median4FunctionSource {
2074 /** Name of the source. */
2075 @Param({"lower4", "lower4b", "lower4c", "lower4d", "lower4e",
2076 "upper4", "upper4c", "upper4d",
2077 // Full sort is slower
2078 //"sort4"
2079 })
2080 private String name;
2081
2082 /** The action. */
2083 private Consumer<double[]> function;
2084
2085 /**
2086 * @return the function
2087 */
2088 public Consumer<double[]> getFunction() {
2089 return function;
2090 }
2091
2092 /**
2093 * Create the function.
2094 */
2095 @Setup
2096 public void setup() {
2097 Objects.requireNonNull(name);
2098 // Note: We run this across the entire input array to simulate a pass
2099 // of the quickselect adaptive algorithm.
2100 if ("lower4".equals(name)) {
2101 function = x -> {
2102 final int f = x.length >>> 2;
2103 final int f2 = f + f;
2104 for (int i = f; i < f2; i++) {
2105 Sorting.lowerMedian4(x, i - f, i, i + f, i + f2);
2106 }
2107 };
2108 } else if ("lower4b".equals(name)) {
2109 function = x -> {
2110 final int f = x.length >>> 2;
2111 final int f2 = f + f;
2112 for (int i = f; i < f2; i++) {
2113 Sorting.lowerMedian4b(x, i - f, i, i + f, i + f2);
2114 }
2115 };
2116 } else if ("lower4c".equals(name)) {
2117 function = x -> {
2118 final int f = x.length >>> 2;
2119 final int f2 = f + f;
2120 for (int i = f; i < f2; i++) {
2121 Sorting.lowerMedian4c(x, i - f, i, i + f, i + f2);
2122 }
2123 };
2124 } else if ("lower4d".equals(name)) {
2125 function = x -> {
2126 final int f = x.length >>> 2;
2127 final int f2 = f + f;
2128 for (int i = f; i < f2; i++) {
2129 Sorting.lowerMedian4d(x, i - f, i, i + f, i + f2);
2130 }
2131 };
2132 } else if ("lower4e".equals(name)) {
2133 function = x -> {
2134 final int f = x.length >>> 2;
2135 final int f2 = f + f;
2136 for (int i = f; i < f2; i++) {
2137 Sorting.lowerMedian4e(x, i - f, i, i + f, i + f2);
2138 }
2139 };
2140 } else if ("upper4".equals(name)) {
2141 function = x -> {
2142 final int f = x.length >>> 2;
2143 final int f2 = f + f;
2144 for (int i = f; i < f2; i++) {
2145 Sorting.upperMedian4(x, i - f, i, i + f, i + f2);
2146 }
2147 };
2148 } else if ("upper4c".equals(name)) {
2149 function = x -> {
2150 final int f = x.length >>> 2;
2151 final int f2 = f + f;
2152 for (int i = f; i < f2; i++) {
2153 Sorting.upperMedian4c(x, i - f, i, i + f, i + f2);
2154 }
2155 };
2156 } else if ("upper4d".equals(name)) {
2157 function = x -> {
2158 final int f = x.length >>> 2;
2159 final int f2 = f + f;
2160 for (int i = f; i < f2; i++) {
2161 Sorting.upperMedian4d(x, i - f, i, i + f, i + f2);
2162 }
2163 };
2164 } else if ("sort4".equals(name)) {
2165 function = x -> {
2166 final int f = x.length >>> 2;
2167 final int f2 = f + f;
2168 for (int i = f; i < f2; i++) {
2169 Sorting.sort4(x, i - f, i, i + f, i + f2);
2170 }
2171 };
2172 } else {
2173 throw new IllegalStateException("Unknown median4 function: " + name);
2174 }
2175 }
2176 }
2177
2178 /**
2179 * Source of a function to compute the median of 3 points.
2180 */
2181 @State(Scope.Benchmark)
2182 public static class Median3FunctionSource {
2183 /** Name of the source. */
2184 @Param({"sort3", "sort3b", "sort3c"})
2185 private String name;
2186
2187 /** The action. */
2188 private Consumer<double[]> function;
2189
2190 /**
2191 * @return the function
2192 */
2193 public Consumer<double[]> getFunction() {
2194 return function;
2195 }
2196
2197 /**
2198 * Create the function.
2199 */
2200 @Setup
2201 public void setup() {
2202 Objects.requireNonNull(name);
2203 // Note: We run this across the entire input array to simulate a pass
2204 // of the quickselect adaptive algorithm.
2205 if ("sort3".equals(name)) {
2206 function = x -> {
2207 final int f = x.length / 3;
2208 final int f2 = f + f;
2209 for (int i = f; i < f2; i++) {
2210 Sorting.sort3(x, i - f, i, i + f);
2211 }
2212 };
2213 } else if ("sort3b".equals(name)) {
2214 function = x -> {
2215 final int f = x.length / 3;
2216 final int f2 = f + f;
2217 for (int i = f; i < f2; i++) {
2218 Sorting.sort3b(x, i - f, i, i + f);
2219 }
2220 };
2221 } else if ("sort3c".equals(name)) {
2222 function = x -> {
2223 final int f = x.length / 3;
2224 final int f2 = f + f;
2225 for (int i = f; i < f2; i++) {
2226 Sorting.sort3c(x, i - f, i, i + f);
2227 }
2228 };
2229 } else {
2230 throw new IllegalStateException("Unknown sort3 function: " + name);
2231 }
2232 }
2233 }
2234
2235 /**
2236 * Source of a k-th selector function for {@code double} data.
2237 */
2238 @State(Scope.Benchmark)
2239 public static class DoubleKFunctionSource {
2240 /** Name of the source. */
2241 @Param({SORT + JDK, SPH,
2242 SP, BM, SBM,
2243 DP, DP5,
2244 SBM2,
2245 ISP, IDP,
2246 LSP, LINEAR, SELECT})
2247 private String name;
2248
2249 /** Override of minimum quickselect size. */
2250 @Param({"0"})
2251 private int qs;
2252
2253 /** Override of minimum edgeselect constant. */
2254 @Param({"0"})
2255 private int ec;
2256
2257 /** The action. */
2258 private BiFunction<double[], int[], double[]> function;
2259
2260 /**
2261 * @return the function
2262 */
2263 public BiFunction<double[], int[], double[]> getFunction() {
2264 return function;
2265 }
2266
2267 /**
2268 * Create the function.
2269 */
2270 @Setup
2271 public void setup() {
2272 Objects.requireNonNull(name);
2273 // Note: For parity in the test, each partition method that accepts the keys as any array
2274 // receives a clone of the indices.
2275 if (name.equals(BASELINE)) {
2276 function = (data, indices) -> extractIndices(data, indices.clone());
2277 } else if (name.startsWith(SORT)) {
2278 // Sort variants (do not clone the keys)
2279 if (name.contains(ISP)) {
2280 final Partition part = PartitionFactory.createPartition(name.substring(SORT.length()), ISP, qs, ec);
2281 function = (data, indices) -> {
2282 part.sortISP(data);
2283 return extractIndices(data, indices);
2284 };
2285 } else if (name.contains(IDP)) {
2286 final Partition part = PartitionFactory.createPartition(name.substring(SORT.length()), IDP, qs, ec);
2287 function = (data, indices) -> {
2288 part.sortIDP(data);
2289 return extractIndices(data, indices);
2290 };
2291 } else if (name.contains(JDK)) {
2292 function = (data, indices) -> {
2293 Arrays.sort(data);
2294 return extractIndices(data, indices);
2295 };
2296 }
2297 // First generation kth-selector functions
2298 } else if (name.startsWith(SPH)) {
2299 // Ported CM implementation with a heap
2300 final KthSelector selector = PartitionFactory.createKthSelector(name, SPH, qs);
2301 function = (data, indices) -> {
2302 final int n = indices.length;
2303 // Note: Pivots heap is not optimal here but should be enough for most cases.
2304 // The size matches that in the Commons Math Percentile class
2305 final int[] pivots = n <= 1 ?
2306 KthSelector.NO_PIVOTS :
2307 new int[1023];
2308 final double[] x = new double[indices.length];
2309 for (int i = 0; i < indices.length; i++) {
2310 x[i] = selector.selectSPH(data, pivots, indices[i], null);
2311 }
2312 return x;
2313 };
2314 // The following methods clone the indices to avoid destructive modification
2315 } else if (name.startsWith(SPN)) {
2316 final KthSelector selector = PartitionFactory.createKthSelector(name, SPN, qs);
2317 function = (data, indices) -> {
2318 selector.partitionSPN(data, indices.clone());
2319 return extractIndices(data, indices);
2320 };
2321 } else if (name.startsWith(SP)) {
2322 final KthSelector selector = PartitionFactory.createKthSelector(name, SP, qs);
2323 function = (data, indices) -> {
2324 selector.partitionSP(data, indices.clone());
2325 return extractIndices(data, indices);
2326 };
2327 } else if (name.startsWith(BM)) {
2328 final KthSelector selector = PartitionFactory.createKthSelector(name, BM, qs);
2329 function = (data, indices) -> {
2330 selector.partitionBM(data, indices.clone());
2331 return extractIndices(data, indices);
2332 };
2333 } else if (name.startsWith(SBM)) {
2334 final KthSelector selector = PartitionFactory.createKthSelector(name, SBM, qs);
2335 function = (data, indices) -> {
2336 selector.partitionSBM(data, indices.clone());
2337 return extractIndices(data, indices);
2338 };
2339 } else if (name.startsWith(DP)) {
2340 final KthSelector selector = PartitionFactory.createKthSelector(name, DP, qs);
2341 function = (data, indices) -> {
2342 selector.partitionDP(data, indices.clone());
2343 return extractIndices(data, indices);
2344 };
2345 } else if (name.startsWith(DP5)) {
2346 final KthSelector selector = PartitionFactory.createKthSelector(name, DP5, qs);
2347 function = (data, indices) -> {
2348 selector.partitionDP5(data, indices.clone());
2349 return extractIndices(data, indices);
2350 };
2351 // Second generation partition function with configurable key strategy
2352 } else if (name.startsWith(SBM2)) {
2353 final Partition part = PartitionFactory.createPartition(name, SBM2, qs, ec);
2354 function = (data, indices) -> {
2355 part.partitionSBM(data, indices.clone(), indices.length);
2356 return extractIndices(data, indices);
2357 };
2358 // Floyd-Rivest partition functions
2359 } else if (name.startsWith(FR)) {
2360 final Partition part = PartitionFactory.createPartition(name, FR, qs, ec);
2361 function = (data, indices) -> {
2362 part.partitionFR(data, indices.clone(), indices.length);
2363 return extractIndices(data, indices);
2364 };
2365 } else if (name.startsWith(KFR)) {
2366 final Partition part = PartitionFactory.createPartition(name, KFR, qs, ec);
2367 function = (data, indices) -> {
2368 part.partitionKFR(data, indices.clone(), indices.length);
2369 return extractIndices(data, indices);
2370 };
2371 // Introselect implementations which are highly configurable
2372 } else if (name.startsWith(ISP)) {
2373 final Partition part = PartitionFactory.createPartition(name, ISP, qs, ec);
2374 function = (data, indices) -> {
2375 part.partitionISP(data, indices.clone(), indices.length);
2376 return extractIndices(data, indices);
2377 };
2378 } else if (name.startsWith(IDP)) {
2379 final Partition part = PartitionFactory.createPartition(name, IDP, qs, ec);
2380 function = (data, indices) -> {
2381 part.partitionIDP(data, indices.clone(), indices.length);
2382 return extractIndices(data, indices);
2383 };
2384 } else if (name.startsWith(SELECT)) {
2385 // Not configurable
2386 function = (data, indices) -> {
2387 Selection.select(data, indices.clone());
2388 return extractIndices(data, indices);
2389 };
2390 // Linearselect (median-of-medians) implementation (stopper for quickselect)
2391 } else if (name.startsWith(LSP)) {
2392 final Partition part = PartitionFactory.createPartition(name, LSP, qs, ec);
2393 function = (data, indices) -> {
2394 part.partitionLSP(data, indices.clone(), indices.length);
2395 return extractIndices(data, indices);
2396 };
2397 // Linearselect (optimised median-of-medians) implementation (stopper for quickselect)
2398 } else if (name.startsWith(LINEAR)) {
2399 final Partition part = PartitionFactory.createPartition(name, LINEAR, qs, ec);
2400 function = (data, indices) -> {
2401 part.partitionLinear(data, indices.clone(), indices.length);
2402 return extractIndices(data, indices);
2403 };
2404 } else if (name.startsWith(QA2)) {
2405 // Configurable only by static properties.
2406 // Default to FR sampling for the initial mode.
2407 final int mode = PartitionFactory.getControlFlags(new String[] {name}, -1);
2408 final int inc = PartitionFactory.getOptionFlags(new String[] {name}, 1);
2409 Partition.configureQaAdaptive(mode, inc);
2410 function = (data, indices) -> {
2411 Partition.partitionQA2(data, indices.clone(), indices.length);
2412 return extractIndices(data, indices);
2413 };
2414 } else if (name.startsWith(QA)) {
2415 final Partition part = PartitionFactory.createPartition(name, QA, qs, ec);
2416 function = (data, indices) -> {
2417 part.partitionQA(data, indices.clone(), indices.length);
2418 return extractIndices(data, indices);
2419 };
2420 // Heapselect implementation (stopper for quickselect)
2421 } else if (name.startsWith(HEAP_SELECT)) {
2422 function = (data, indices) -> {
2423 int min = indices[indices.length - 1];
2424 int max = min;
2425 for (int i = indices.length - 1; --i >= 0;) {
2426 min = Math.min(min, indices[i]);
2427 max = Math.max(max, indices[i]);
2428 }
2429 Partition.heapSelectRange(data, 0, data.length - 1, min, max);
2430 return extractIndices(data, indices);
2431 };
2432 }
2433 if (function == null) {
2434 throw new IllegalStateException("Unknown selector function: " + name);
2435 }
2436 }
2437
2438 /**
2439 * Extract the data at the specified indices.
2440 *
2441 * @param data Data.
2442 * @param indices Indices.
2443 * @return the data
2444 */
2445 private static double[] extractIndices(double[] data, int[] indices) {
2446 final double[] x = new double[indices.length];
2447 for (int i = 0; i < indices.length; i++) {
2448 x[i] = data[indices[i]];
2449 }
2450 return x;
2451 }
2452 }
2453
2454 /**
2455 * Source of a k-th selector function for {@code int} data.
2456 */
2457 @State(Scope.Benchmark)
2458 public static class IntKFunctionSource {
2459 /** Name of the source. */
2460 @Param({SORT + JDK, SELECT})
2461 private String name;
2462
2463 /** The action. */
2464 private BiFunction<int[], int[], int[]> function;
2465
2466 /**
2467 * @return the function
2468 */
2469 public BiFunction<int[], int[], int[]> getFunction() {
2470 return function;
2471 }
2472
2473 /**
2474 * Create the function.
2475 */
2476 @Setup
2477 public void setup() {
2478 Objects.requireNonNull(name);
2479 // Note: Always clone the indices
2480 if (name.equals(BASELINE)) {
2481 function = (data, indices) -> extractIndices(data, indices.clone());
2482 } else if (name.startsWith(SORT)) {
2483 function = (data, indices) -> {
2484 Arrays.sort(data);
2485 return extractIndices(data, indices.clone());
2486 };
2487 } else if (name.startsWith(SELECT)) {
2488 function = (data, indices) -> {
2489 Selection.select(data, indices.clone());
2490 return extractIndices(data, indices);
2491 };
2492 }
2493 if (function == null) {
2494 throw new IllegalStateException("Unknown int selector function: " + name);
2495 }
2496 }
2497
2498 /**
2499 * Extract the data at the specified indices.
2500 *
2501 * @param data Data.
2502 * @param indices Indices.
2503 * @return the data
2504 */
2505 private static int[] extractIndices(int[] data, int[] indices) {
2506 final int[] x = new int[indices.length];
2507 for (int i = 0; i < indices.length; i++) {
2508 x[i] = data[indices[i]];
2509 }
2510 return x;
2511 }
2512 }
2513
2514 /**
2515 * Source of a function that pre-processes NaN and signed zeros (-0.0).
2516 *
2517 * <p>Detection of signed zero using direct conversion of raw bits and
2518 * comparison with the bit representation is noticeably faster than comparison
2519 * using {@code == 0.0}.
2520 */
2521 @State(Scope.Benchmark)
2522 public static class SortNaNFunctionSource {
2523 /** Name of the source. */
2524 @Param({"RawZeroNaN", "ZeroSignNaN", "NaNRawZero", "NaNZeroSign"})
2525 private String name;
2526
2527 /** The action. */
2528 private BiConsumer<double[], Blackhole> function;
2529
2530 /**
2531 * @return the function
2532 */
2533 public BiConsumer<double[], Blackhole> getFunction() {
2534 return function;
2535 }
2536
2537 /**
2538 * Create the function.
2539 */
2540 @Setup
2541 public void setup() {
2542 // Functions sort NaN and detect signed zeros.
2543 // For convenience the function accepts the blackhole to handle any
2544 // output from the processing.
2545 if ("RawZeroNaN".equals(name)) {
2546 function = (a, bh) -> {
2547 int cn = 0;
2548 int end = a.length;
2549 for (int i = end; --i >= 0;) {
2550 final double v = a[i];
2551 if (Double.doubleToRawLongBits(v) == Long.MIN_VALUE) {
2552 cn++;
2553 a[i] = 0.0;
2554 } else if (v != v) {
2555 a[i] = a[--end];
2556 a[end] = v;
2557 }
2558 }
2559 bh.consume(cn);
2560 bh.consume(end);
2561 };
2562 } else if ("ZeroSignNaN".equals(name)) {
2563 function = (a, bh) -> {
2564 int cn = 0;
2565 int end = a.length;
2566 for (int i = end; --i >= 0;) {
2567 final double v = a[i];
2568 if (v == 0.0 && Double.doubleToRawLongBits(v) < 0) {
2569 cn++;
2570 a[i] = 0.0;
2571 } else if (v != v) {
2572 a[i] = a[--end];
2573 a[end] = v;
2574 }
2575 }
2576 bh.consume(cn);
2577 bh.consume(end);
2578 };
2579 } else if ("NaNRawZero".equals(name)) {
2580 function = (a, bh) -> {
2581 int cn = 0;
2582 int end = a.length;
2583 for (int i = end; --i >= 0;) {
2584 final double v = a[i];
2585 if (v != v) {
2586 a[i] = a[--end];
2587 a[end] = v;
2588 } else if (Double.doubleToRawLongBits(v) == Long.MIN_VALUE) {
2589 cn++;
2590 a[i] = 0.0;
2591 }
2592 }
2593 bh.consume(cn);
2594 bh.consume(end);
2595 };
2596 } else if ("NaNZeroSign".equals(name)) {
2597 function = (a, bh) -> {
2598 int cn = 0;
2599 int end = a.length;
2600 for (int i = end; --i >= 0;) {
2601 final double v = a[i];
2602 if (v != v) {
2603 a[i] = a[--end];
2604 a[end] = v;
2605 } else if (v == 0.0 && Double.doubleToRawLongBits(v) < 0) {
2606 cn++;
2607 a[i] = 0.0;
2608 }
2609 }
2610 bh.consume(cn);
2611 bh.consume(end);
2612 };
2613 } else {
2614 throw new IllegalStateException("Unknown sort NaN function: " + name);
2615 }
2616 }
2617 }
2618
2619 /**
2620 * Source of a edge selector function. This is a function that selects indices
2621 * that are clustered close to the edge of the data.
2622 *
2623 * <p>This is a specialised class to allow benchmarking the switch from using
2624 * quickselect partitioning to using edgeselect.
2625 */
2626 @State(Scope.Benchmark)
2627 public static class EdgeFunctionSource {
2628 /** Name of the source.
2629 * For introselect methods this should effectively turn-off edgeselect. */
2630 @Param({HEAP_SELECT, ISP + "_EC0", IDP + "_EC0",
2631 // Only use for small length as sort insertion is worst case Order(k * (right - left))
2632 // vs heap select() is O(k - left) + O((right - k) * log(k - left))
2633 //SORT_SELECT
2634 })
2635 private String name;
2636
2637 /** The action. */
2638 private BiFunction<double[], int[], double[]> function;
2639
2640 /**
2641 * @return the function
2642 */
2643 public BiFunction<double[], int[], double[]> getFunction() {
2644 return function;
2645 }
2646
2647 /**
2648 * Create the function.
2649 */
2650 @Setup
2651 public void setup() {
2652 Objects.requireNonNull(name);
2653 // Direct use of heapselect. This has variations which use different
2654 // optimisations for small heaps.
2655 // Note: Optimisation for small heap size (n=1,2) is not observable on large data.
2656 // It requires the use of small data (e.g. len=[16, 32)) to observe differences.
2657 // The main overhead is the test for insertion against the current top of the
2658 // heap which grows increasingly unlikely as the range is scanned.
2659 // Optimisation for n=1 is negligible; for n=2 it is up to 10%. However using only
2660 // heapSelectRange2 is not as fast as the non-optimised heapSelectRange0
2661 // when the heap is size 1. For n=1 the heap insertion branch prediction
2662 // can learn the heap has no children and skip descending the heap, whereas
2663 // heap size n=2 can descend 1 level if the child is smaller/bigger. This is not
2664 // as fast as dedicated code for the single child case.
2665 // This benchmark requires repeating with variable heap size to avoid branch
2666 // prediction learning what to do, i.e. use with an index source that has variable
2667 // distance from the edge.
2668 if (HEAP_SELECT.equals(name)) {
2669 function = (data, indices) -> {
2670 heapSelectRange0(data, 0, data.length - 1, indices[0], indices[1]);
2671 return extractIndices(data, indices[0], indices[1]);
2672 };
2673 } else if ((HEAP_SELECT + "1").equals(name)) {
2674 function = (data, indices) -> {
2675 heapSelectRange1(data, 0, data.length - 1, indices[0], indices[1]);
2676 return extractIndices(data, indices[0], indices[1]);
2677 };
2678 } else if ((HEAP_SELECT + "2").equals(name)) {
2679 function = (data, indices) -> {
2680 heapSelectRange2(data, 0, data.length - 1, indices[0], indices[1]);
2681 return extractIndices(data, indices[0], indices[1]);
2682 };
2683 } else if ((HEAP_SELECT + "12").equals(name)) {
2684 function = (data, indices) -> {
2685 heapSelectRange12(data, 0, data.length - 1, indices[0], indices[1]);
2686 return extractIndices(data, indices[0], indices[1]);
2687 };
2688 // Only use on small edge as insertion is Order(k)
2689 } else if (SORT_SELECT.equals(name)) {
2690 function = (data, indices) -> {
2691 Partition.sortSelectRange(data, 0, data.length - 1, indices[0], indices[1]);
2692 return extractIndices(data, indices[0], indices[1]);
2693 };
2694 // Introselect methods - these should be configured to not use edgeselect.
2695 // These directly call the introselect method to skip NaN/signed zero processing.
2696 } else if (name.startsWith(ISP)) {
2697 final Partition part = PartitionFactory.createPartition(name, ISP);
2698 function = (data, indices) -> {
2699 part.introselect(part.getSPFunction(), data,
2700 0, data.length - 1, IndexIntervals.interval(indices[0], indices[1]), 10000);
2701 return extractIndices(data, indices[0], indices[1]);
2702 };
2703 } else if (name.startsWith(IDP)) {
2704 final Partition part = PartitionFactory.createPartition(name, IDP);
2705 function = (data, indices) -> {
2706 part.introselect(Partition::partitionDP, data,
2707 0, data.length - 1, IndexIntervals.interval(indices[0], indices[1]), 10000);
2708 return extractIndices(data, indices[0], indices[1]);
2709 };
2710 } else {
2711 throw new IllegalStateException("Unknown edge selector function: " + name);
2712 }
2713 }
2714
2715 /**
2716 * Partition the elements between {@code ka} and {@code kb} using a heap select
2717 * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2718 *
2719 * <p>Note:
2720 *
2721 * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2722 * It uses no optimised versions for small heaps.
2723 *
2724 * @param a Data array to use to find out the K<sup>th</sup> value.
2725 * @param left Lower bound (inclusive).
2726 * @param right Upper bound (inclusive).
2727 * @param ka Lower index to select.
2728 * @param kb Upper index to select.
2729 */
2730 static void heapSelectRange0(double[] a, int left, int right, int ka, int kb) {
2731 if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2732 Sorting.sort(a, left, right);
2733 return;
2734 }
2735 if (kb - left < right - ka) {
2736 Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2737 } else {
2738 Partition.heapSelectRight(a, left, right, ka, kb - ka);
2739 }
2740 }
2741
2742 /**
2743 * Partition the elements between {@code ka} and {@code kb} using a heap select
2744 * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2745 *
2746 * <p>Note:
2747 *
2748 * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2749 * It uses no optimised versions for small heap of size 1.
2750 *
2751 * @param a Data array to use to find out the K<sup>th</sup> value.
2752 * @param left Lower bound (inclusive).
2753 * @param right Upper bound (inclusive).
2754 * @param ka Lower index to select.
2755 * @param kb Upper index to select.
2756 */
2757 static void heapSelectRange1(double[] a, int left, int right, int ka, int kb) {
2758 if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2759 Sorting.sort(a, left, right);
2760 return;
2761 }
2762 if (kb - left < right - ka) {
2763 // Optimise
2764 if (kb == left) {
2765 Partition.selectMinIgnoreZeros(a, left, right);
2766 } else {
2767 Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2768 }
2769 } else {
2770 // Optimise
2771 if (ka == right) {
2772 Partition.selectMaxIgnoreZeros(a, left, right);
2773 } else {
2774 Partition.heapSelectRight(a, left, right, ka, kb - ka);
2775 }
2776 }
2777 }
2778
2779 /**
2780 * Partition the elements between {@code ka} and {@code kb} using a heap select
2781 * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2782 *
2783 * <p>Note:
2784 *
2785 * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2786 * It uses optimised versions for small heap of size 2.
2787 *
2788 * @param a Data array to use to find out the K<sup>th</sup> value.
2789 * @param left Lower bound (inclusive).
2790 * @param right Upper bound (inclusive).
2791 * @param ka Lower index to select.
2792 * @param kb Upper index to select.
2793 */
2794 static void heapSelectRange2(double[] a, int left, int right, int ka, int kb) {
2795 if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2796 Sorting.sort(a, left, right);
2797 return;
2798 }
2799 if (kb - left < right - ka) {
2800 // Optimise
2801 if (kb - 1 <= left) {
2802 Partition.selectMin2IgnoreZeros(a, left, right);
2803 } else {
2804 Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2805 }
2806 } else {
2807 // Optimise
2808 if (ka + 1 >= right) {
2809 Partition.selectMax2IgnoreZeros(a, left, right);
2810 } else {
2811 Partition.heapSelectRight(a, left, right, ka, kb - ka);
2812 }
2813 }
2814 }
2815
2816 /**
2817 * Partition the elements between {@code ka} and {@code kb} using a heap select
2818 * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2819 *
2820 * <p>Note:
2821 *
2822 * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2823 * It uses optimised versions for small heap of size 1 and 2.
2824 *
2825 * @param a Data array to use to find out the K<sup>th</sup> value.
2826 * @param left Lower bound (inclusive).
2827 * @param right Upper bound (inclusive).
2828 * @param ka Lower index to select.
2829 * @param kb Upper index to select.
2830 */
2831 static void heapSelectRange12(double[] a, int left, int right, int ka, int kb) {
2832 if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2833 Sorting.sort(a, left, right);
2834 return;
2835 }
2836 if (kb - left < right - ka) {
2837 // Optimise
2838 if (kb - 1 <= left) {
2839 if (kb == left) {
2840 Partition.selectMinIgnoreZeros(a, left, right);
2841 } else {
2842 Partition.selectMin2IgnoreZeros(a, left, right);
2843 }
2844 } else {
2845 Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2846 }
2847 } else {
2848 // Optimise
2849 if (ka + 1 >= right) {
2850 if (ka == right) {
2851 Partition.selectMaxIgnoreZeros(a, left, right);
2852 } else {
2853 Partition.selectMax2IgnoreZeros(a, left, right);
2854 }
2855 } else {
2856 Partition.heapSelectRight(a, left, right, ka, kb - ka);
2857 }
2858 }
2859 }
2860
2861 /**
2862 * Extract the data at the specified indices.
2863 *
2864 * @param data Data.
2865 * @param l Lower bound (inclusive).
2866 * @param r Upper bound (inclusive).
2867 * @return the data
2868 */
2869 private static double[] extractIndices(double[] data, int l, int r) {
2870 final double[] x = new double[r - l + 1];
2871 for (int i = l; i <= r; i++) {
2872 x[i - l] = data[i];
2873 }
2874 return x;
2875 }
2876 }
2877
2878 /**
2879 * Source of an search function. This is a function that find an index
2880 * in a sorted list of indices, e.g. a binary search.
2881 */
2882 @State(Scope.Benchmark)
2883 public static class IndexSearchFunctionSource {
2884 /** Name of the source. */
2885 @Param({"Binary",
2886 //"binarySearch",
2887 "Scan"})
2888 private String name;
2889
2890 /** The action. */
2891 private SearchFunction function;
2892
2893 /**
2894 * Define a search function.
2895 */
2896 public interface SearchFunction {
2897 /**
2898 * Find the index of the element {@code k}, or the closest index
2899 * to the element (implementation definitions may vary).
2900 *
2901 * @param a Data.
2902 * @param k Element.
2903 * @return the index
2904 */
2905 int find(int[] a, int k);
2906 }
2907
2908 /**
2909 * @return the function
2910 */
2911 public SearchFunction getFunction() {
2912 return function;
2913 }
2914
2915 /**
2916 * Create the function.
2917 */
2918 @Setup
2919 public void setup() {
2920 Objects.requireNonNull(name);
2921 if ("Binary".equals(name)) {
2922 function = (keys, k) -> Partition.searchLessOrEqual(keys, 0, keys.length - 1, k);
2923 } else if ("binarySearch".equals(name)) {
2924 function = (keys, k) -> Arrays.binarySearch(keys, 0, keys.length, k);
2925 } else if ("Scan".equals(name)) {
2926 function = (keys, k) -> {
2927 // Assume that k >= keys[0]
2928 int i = keys.length;
2929 do {
2930 --i;
2931 } while (keys[i] > k);
2932 return i;
2933 };
2934 } else {
2935 throw new IllegalStateException("Unknown index search function: " + name);
2936 }
2937 }
2938 }
2939
2940 /**
2941 * Benchmark a sort on the data.
2942 *
2943 * @param function Source of the function.
2944 * @param source Source of the data.
2945 * @param bh Data sink.
2946 */
2947 @Benchmark
2948 public void sort(SortFunctionSource function, SortSource source, Blackhole bh) {
2949 final int size = source.size();
2950 final Consumer<double[]> fun = function.getFunction();
2951 for (int j = -1; ++j < size;) {
2952 final double[] y = source.getData(j);
2953 fun.accept(y);
2954 bh.consume(y);
2955 }
2956 }
2957
2958 /**
2959 * Benchmark a sort of 5 data values.
2960 * This tests the pivot selection from 5 values used in dual-pivot partitioning.
2961 *
2962 * @param function Source of the function.
2963 * @param source Source of the data.
2964 * @param bh Data sink.
2965 */
2966 @Benchmark
2967 public void fiveSort(Sort5FunctionSource function, SortSource source, Blackhole bh) {
2968 final int size = source.size();
2969 final Consumer<double[]> fun = function.getFunction();
2970 for (int j = -1; ++j < size;) {
2971 final double[] y = source.getData(j);
2972 fun.accept(y);
2973 bh.consume(y);
2974 }
2975 }
2976
2977 /**
2978 * Benchmark a pass over the entire data computing the medians of 4 data values.
2979 * This simulates a QuickselectAdaptive step.
2980 *
2981 * @param function Source of the function.
2982 * @param source Source of the data.
2983 * @param bh Data sink.
2984 */
2985 @Benchmark
2986 public void fourMedian(Median4FunctionSource function, SortSource source, Blackhole bh) {
2987 final int size = source.size();
2988 final Consumer<double[]> fun = function.getFunction();
2989 for (int j = -1; ++j < size;) {
2990 final double[] y = source.getData(j);
2991 fun.accept(y);
2992 bh.consume(y);
2993 }
2994 }
2995
2996 /**
2997 * Benchmark a pass over the entire data computing the medians of 3 data values.
2998 * This simulates a QuickselectAdaptive step.
2999 *
3000 * @param function Source of the function.
3001 * @param source Source of the data.
3002 * @param bh Data sink.
3003 */
3004 @Benchmark
3005 public void threeMedian(Median3FunctionSource function, SortSource source, Blackhole bh) {
3006 final int size = source.size();
3007 final Consumer<double[]> fun = function.getFunction();
3008 for (int j = -1; ++j < size;) {
3009 final double[] y = source.getData(j);
3010 fun.accept(y);
3011 bh.consume(y);
3012 }
3013 }
3014
3015 /**
3016 * Benchmark partitioning using k partition indices.
3017 *
3018 * @param function Source of the function.
3019 * @param source Source of the data.
3020 * @param bh Data sink.
3021 */
3022 @Benchmark
3023 public void doublePartition(DoubleKFunctionSource function, KSource source, Blackhole bh) {
3024 final int size = source.size();
3025 final BiFunction<double[], int[], double[]> fun = function.getFunction();
3026 for (int j = -1; ++j < size;) {
3027 // Note: This uses the indices without cloning. This is because some
3028 // functions do not destructively modify the data.
3029 bh.consume(fun.apply(source.getData(j), source.getIndices(j)));
3030 }
3031 }
3032
3033 /**
3034 * Benchmark partitioning using k partition indices on {@code int} data.
3035 *
3036 * @param function Source of the function.
3037 * @param source Source of the data.
3038 * @param bh Data sink.
3039 */
3040 @Benchmark
3041 public void intPartition(IntKFunctionSource function, KSource source, Blackhole bh) {
3042 final int size = source.size();
3043 final BiFunction<int[], int[], int[]> fun = function.getFunction();
3044 for (int j = -1; ++j < size;) {
3045 // Note: This uses the indices without cloning. This is because some
3046 // functions do not destructively modify the data.
3047 bh.consume(fun.apply(source.getIntData(j), source.getIndices(j)));
3048 }
3049 }
3050
3051 /**
3052 * Benchmark partitioning of an interval of indices a set distance from the edge.
3053 * This is used to benchmark the switch from quickselect partitioning to edgeselect.
3054 *
3055 * @param function Source of the function.
3056 * @param source Source of the data.
3057 * @param bh Data sink.
3058 */
3059 @Benchmark
3060 public void edgeSelect(EdgeFunctionSource function, EdgeSource source, Blackhole bh) {
3061 final int size = source.size();
3062 final BiFunction<double[], int[], double[]> fun = function.getFunction();
3063 for (int j = -1; ++j < size;) {
3064 bh.consume(fun.apply(source.getData(j), source.getIndices(j)));
3065 }
3066 }
3067
3068 /**
3069 * Benchmark pre-processing of NaN and signed zeros (-0.0).
3070 *
3071 * @param function Source of the function.
3072 * @param source Source of the data.
3073 * @param bh Data sink.
3074 */
3075 @Benchmark
3076 public void nanZero(SortNaNFunctionSource function, SortSource source, Blackhole bh) {
3077 final int size = source.size();
3078 final BiConsumer<double[], Blackhole> fun = function.getFunction();
3079 for (int j = -1; ++j < size;) {
3080 fun.accept(source.getData(j), bh);
3081 }
3082 }
3083
3084 /**
3085 * Benchmark the search of an ordered set of indices.
3086 *
3087 * @param function Source of the search.
3088 * @param source Source of the data.
3089 * @return value to consume
3090 */
3091 @Benchmark
3092 public long indexSearch(IndexSearchFunctionSource function, SplitIndexSource source) {
3093 final IndexSearchFunctionSource.SearchFunction fun = function.getFunction();
3094 // Ensure we have something to consume during the benchmark
3095 long sum = 0;
3096 for (int i = source.samples(); --i >= 0;) {
3097 // Single point in the range
3098 sum += fun.find(source.getIndices(i), source.getPoint(i));
3099 }
3100 return sum;
3101 }
3102
3103 /**
3104 * Benchmark the tracking of an interval of indices during a partition algorithm.
3105 *
3106 * <p>The {@link SearchableInterval} is created for the indices of interest. These are then
3107 * cut at all points in the interval between indices to simulate a partition algorithm
3108 * dividing the data and requiring a new interval to use in each part:
3109 * <pre>{@code
3110 * cut
3111 * |
3112 * -------k1--------k2---------k3---- ... ---------kn--------
3113 * <-- scan previous
3114 * scan next -->
3115 * }</pre>
3116 *
3117 * <p>Note: If a cut is made in the interval then the smallest region of data
3118 * that was most recently partitioned was the length between the two flanking k.
3119 * This involves a full scan (and partitioning) over the data of length (k2 - k1).
3120 * A BitSet-type structure will require a scan over 1/64 of this length of data
3121 * to find the next and previous index from a cut point. In practice
3122 * the interval may be partitioned over a much larger length, e.g. (kn - k1).
3123 * Thus the length of time for the partition algorithm is expected to be at least
3124 * 64x the length of time for the BitSet-type scan. The disadvantage of the
3125 * BitSet-type structure is memory consumption. For a small number of keys the
3126 * structures that search the entire set of keys are fast enough. At very high
3127 * density the BitSet-type structures are preferred.
3128 *
3129 * @param function Source of the interval.
3130 * @param source Source of the data.
3131 * @return value to consume
3132 */
3133 @Benchmark
3134 public long searchableIntervalNextPrevious(SearchableIntervalSource function, SplitIndexSource source) {
3135 final int[][] indices = source.getIndices();
3136 final int[][] points = source.getPoints();
3137 // Ensure we have something to consume during the benchmark
3138 long sum = 0;
3139 for (int i = 0; i < indices.length; i++) {
3140 final int[] x = indices[i];
3141 final int[] p = points[i];
3142 final SearchableInterval interval = function.create(x);
3143 for (final int k : p) {
3144 sum += interval.nextIndex(k);
3145 sum += interval.previousIndex(k);
3146 }
3147 }
3148 return sum;
3149 }
3150
3151 /**
3152 * Benchmark the tracking of an interval of indices during a partition algorithm.
3153 *
3154 * <p>This is similar to
3155 * {@link #searchableIntervalNextPrevious(SearchableIntervalSource, SplitIndexSource)}.
3156 * It uses the {@link SearchableInterval#split(int, int, int[])} method. This requires
3157 * {@code k} to be in an open interval. Some modes of the {@link IndexSource} do not
3158 * ensure that {@code left < k < right} for all split points so we have to check this
3159 * before calling the split method (it is a fixed overhead for the benchmark).
3160 *
3161 * @param function Source of the interval.
3162 * @param source Source of the data.
3163 * @return value to consume
3164 */
3165 @Benchmark
3166 public long searchableIntervalSplit(SearchableIntervalSource function, SplitIndexSource source) {
3167 final int[][] indices = source.getIndices();
3168 final int[][] points = source.getPoints();
3169 // Ensure we have something to consume during the benchmark
3170 long sum = 0;
3171 final int[] bound = {0};
3172 for (int i = 0; i < indices.length; i++) {
3173 final int[] x = indices[i];
3174 final int[] p = points[i];
3175 // Note: A partition algorithm would only call split if there are indices
3176 // above and below the split point.
3177 final SearchableInterval interval = function.create(x);
3178 final int left = interval.left();
3179 final int right = interval.right();
3180 for (final int k : p) {
3181 // Check k is in the open interval (left, right)
3182 if (left < k && k < right) {
3183 sum += interval.split(k, k, bound);
3184 sum += bound[0];
3185 }
3186 }
3187 }
3188 return sum;
3189 }
3190
3191 /**
3192 * Benchmark the creation of an interval of indices for controlling a partition
3193 * algorithm.
3194 *
3195 * <p>This baselines the
3196 * {@link #searchableIntervalNextPrevious(SearchableIntervalSource, SplitIndexSource)}
3197 * benchmark. For the BitSet-type structures a large overhead is the memory allocation
3198 * to create the {@link SearchableInterval}. Note that this will be at most 1/64 the
3199 * size of the array that is being partitioned and in practice this overhead is not
3200 * significant.
3201 *
3202 * @param function Source of the interval.
3203 * @param source Source of the data.
3204 * @param bh Data sink.
3205 */
3206 @Benchmark
3207 public void createSearchableInterval(SearchableIntervalSource function, IndexSource source, Blackhole bh) {
3208 final int[][] indices = source.getIndices();
3209 for (final int[] x : indices) {
3210 bh.consume(function.create(x));
3211 }
3212 }
3213
3214 /**
3215 * Benchmark the splitting of an interval of indices during a partition algorithm.
3216 *
3217 * <p>This is similar to
3218 * {@link #searchableIntervalSplit(SearchableIntervalSource, SplitIndexSource)}. It
3219 * uses the {@link UpdatingInterval#splitLeft(int, int)} method by recursive division
3220 * of the indices.
3221 *
3222 * @param function Source of the interval.
3223 * @param source Source of the data.
3224 * @param bh Data sink.
3225 */
3226 @Benchmark
3227 public void updatingIntervalSplit(UpdatingIntervalSource function, IndexSource source, Blackhole bh) {
3228 final int[][] indices = source.getIndices();
3229 final int s = source.getMinSeparation();
3230 for (int i = 0; i < indices.length; i++) {
3231 split(function.create(indices[i]), s, bh);
3232 }
3233 }
3234
3235 /**
3236 * Recursively split the interval until the length is below the provided separation.
3237 * Consume the interval when no more divides can occur. Simulates a single-pivot
3238 * partition algorithm.
3239 *
3240 * @param interval Interval.
3241 * @param s Minimum separation between left and right.
3242 * @param bh Data sink.
3243 */
3244 private static void split(UpdatingInterval interval, int s, Blackhole bh) {
3245 int l = interval.left();
3246 final int r = interval.right();
3247 // Note: A partition algorithm would only call split if there are indices
3248 // above and below the split point.
3249 if (r - l > s) {
3250 final int middle = (l + r) >>> 1;
3251 // recurse left
3252 split(interval.splitLeft(middle, middle), s, bh);
3253 // continue on right side
3254 l = interval.left();
3255 }
3256 bh.consume(interval);
3257 }
3258 }