View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.numbers.examples.jmh.arrays;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.EnumSet;
23  import java.util.List;
24  import java.util.Locale;
25  import java.util.Objects;
26  import java.util.concurrent.TimeUnit;
27  import java.util.concurrent.atomic.AtomicInteger;
28  import java.util.function.BiConsumer;
29  import java.util.function.BiFunction;
30  import java.util.function.Consumer;
31  import java.util.function.Function;
32  import java.util.logging.Logger;
33  import java.util.stream.Collectors;
34  import org.apache.commons.numbers.arrays.Selection;
35  import org.apache.commons.rng.UniformRandomProvider;
36  import org.apache.commons.rng.sampling.ArraySampler;
37  import org.apache.commons.rng.sampling.PermutationSampler;
38  import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler;
39  import org.apache.commons.rng.sampling.distribution.SharedStateDiscreteSampler;
40  import org.apache.commons.rng.simple.RandomSource;
41  import org.openjdk.jmh.annotations.Benchmark;
42  import org.openjdk.jmh.annotations.BenchmarkMode;
43  import org.openjdk.jmh.annotations.Fork;
44  import org.openjdk.jmh.annotations.Level;
45  import org.openjdk.jmh.annotations.Measurement;
46  import org.openjdk.jmh.annotations.Mode;
47  import org.openjdk.jmh.annotations.OutputTimeUnit;
48  import org.openjdk.jmh.annotations.Param;
49  import org.openjdk.jmh.annotations.Scope;
50  import org.openjdk.jmh.annotations.Setup;
51  import org.openjdk.jmh.annotations.State;
52  import org.openjdk.jmh.annotations.Warmup;
53  import org.openjdk.jmh.infra.Blackhole;
54  
55  /**
56   * Executes a benchmark of the selection of indices from array data.
57   */
58  @BenchmarkMode(Mode.AverageTime)
59  @OutputTimeUnit(TimeUnit.NANOSECONDS)
60  @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
61  @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
62  @State(Scope.Benchmark)
63  @Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx8192M"})
64  public class SelectionPerformance {
65      /** Use the JDK sort function. */
66      private static final String JDK = "JDK";
67      /** Use a sort function. */
68      private static final String SORT = "Sort";
69      /** Baseline for the benchmark. */
70      private static final String BASELINE = "Baseline";
71      /** Selection method using a heap. */
72      private static final String HEAP_SELECT = "HeapSelect";
73      /** Selection method using a sort. */
74      private static final String SORT_SELECT = "SortSelect";
75  
76      // First generation partition functions.
77      // These are based on the KthSelector class used in Commons Math:
78      // - Single k or a pair of indices (k,k+1) are selected in a single
79      // call; multiple indices cache pivots in a heap structure or use a BitSet.
80      // - They dynamically correct signed zeros when they are encountered.
81  
82      /** Single-pivot partitioning. This method uses a special comparison of double
83       * values similar to {@link Double#compare(double, double)}. This handles
84       * NaN and signed zeros. */
85      private static final String SP = "SP";
86      /** Single-pivot partitioning; uses a BitSet to cache pivots. */
87      private static final String SPN = "SPN";
88      /** Single-pivot partitioning using a heap to cache pivots.
89       * This method is copied from Commons Math. */
90      private static final String SPH = "SPH";
91      /** Bentley-McIlroy partitioning (Sedgewick); uses a BitSet to cache pivots. */
92      private static final String SBM = "SBM";
93      /** Bentley-McIlroy partitioning (original); uses a BitSet to cache pivots. */
94      private static final String BM = "BM";
95      /** Dual-pivot partitioning; uses a BitSet to cache pivots. */
96      private static final String DP = "DP";
97      /** Dual-pivot partitioning with 5 sorted points to choose pivots; uses a BitSet to cache pivots. */
98      private static final String DP5 = "5DP";
99  
100     // Second generation partition functions.
101     // These pre-process data to sort NaN to the end and count signed zeros;
102     // post-processing is performed to restore signed zeros in order.
103     // The exception is SBM2 which dynamically corrects signed zeros.
104 
105     /** Bentley-McIlroy partitioning (Sedgewick). This second generation function
106      * dynamically corrects signed zeros when they are encountered. It is based on
107      * the fastest first generation method with changes to allow different pivot
108      * store strategies: SEQUENTIAL, INDEX_SET, PIVOT_CACHE. */
109     private static final String SBM2 = "2SBM";
110 
111     /** Floyd-Rivest partitioning. Only for single k. */
112     private static final String FR = "FR";
113     /** Floyd-Rivest partitioning (Kiwiel). Only for a single k. */
114     private static final String KFR = "KFR";
115 
116     // Introselect functions - switch to a stopper function when progress is poor.
117     // Allow specification of configuration using the name parameter:
118     // single-pivot strategy (Partition.SPStrategy);
119     // multiple key strategy (Partition.KeyStrategy);
120     // paired (close) keys strategy (Partition.PairedKeyStrategy);
121     // edge-selection strategy (Partition.EdgeSelectStrategy);
122     // stopper strategy (Partition.StopperStrategy).
123     // Parameters to control strategies and introspection are set using the name parameter.
124     // See PartitionFactory for details.
125 
126     /** Introselect implementation with single-pivot partitioning. */
127     private static final String ISP = "ISP";
128     /** Introselect implementation with dual-pivot partitioning. */
129     private static final String IDP = "IDP";
130 
131     // Single k selection using various methods which provide linear runtime (Order(n)).
132 
133     /** Linearselect implementation with single pivot partitioning using median-of-medians-of-5
134      * for pivot selection. */
135     private static final String LSP = "LSP";
136     /** Linearselect implementation with single pivot partitioning using optimised
137      * median-of-medians. */
138     private static final String LINEAR = "Linear";
139     /** Quickselect adaptive implementation. Has configuration of the far-step method and some
140      * adaption modes. */
141     private static final String QA = "QA";
142     /** Quickselect adaptive implementation. Uses the best performing far-step method and
143      * has configurable adaption control allowing starting at and skipping over adaption modes. */
144     private static final String QA2 = "QA2";
145 
146     /** Commons Numbers select implementation. This method is built using the best performing
147      * select function across a range of input data. This algorithm cannot be configured. */
148     private static final String SELECT = "SELECT";
149 
150     /** Random source. */
151     private static final RandomSource RANDOM_SOURCE = RandomSource.XO_RO_SHI_RO_128_PP;
152 
153     /**
154      * Source of {@code double} array data.
155      *
156      * <p>By default this uses the adverse input test suite from figure 1 in Bentley and McIlroy
157      * (1993) Engineering a sort function, Software, practice and experience, Vol.23(11),
158      * 1249–1265.
159      *
160      * <p>An alternative set of data is from Valois (2000) Introspective sorting and selection
161      * revisited, Software, practice and experience, Vol.30(6), 617-638.
162      *
163      * <p>Note
164      *
165      * <p>This class has setter methods to allow re-use in unit testing without requiring
166      * use of reflection to set fields. Parameters set by JMH are initialized to their
167      * defaults for convenience. Re-use requires:
168      *
169      * <ol>
170      * <li>Creating an instance of the abstract class that provides the data length
171      * <li>Calling {@link #setup()} to create the data
172      * <li>Iterating over the data
173      * </ol>
174      *
175      * <pre>
176      * AbstractDataSource s = new AbstractDataSource() {
177      *     protected int getLength() {
178      *         return 123;
179      *     }
180      * };
181      * s.setDistribution(Distribution.SAWTOOTH, Distribution.SHUFFLE);
182      * s.setModification(Modification.REVERSE_FRONT);
183      * s.setRange(2);
184      * s.setup();
185      * for (int i = 0; i &lt; s.size(); i++) {
186      *     s.getData(i);
187      * }
188      * </pre>
189      *
190      * <p>Random distribution mode
191      *
192      * <p>The default BM configuration includes random samples generated as a family of
193      * single samples created from ranges that are powers of two [0, 2^i). This small set
194      * of samples is only a small representation of randomness. For small lengths this may
195      * only be a few random samples.
196      *
197      * <p>The data source can be changed to generate a fixed number of random samples
198      * using a uniform distribution [0, n]. For this purpose the distribution must be set
199      * to {@link Distribution#RANDOM} and the {@link #setSamples(int) samples} set above
200      * zero. The inclusive upper bound {@code n} is set using the {@link #setSeed(int) seed}.
201      * If this is zero then the default is {@link Integer#MAX_VALUE}.
202      *
203      * <p>Order
204      *
205      * <p>Data are created in distribution families. If these are passed in order to a
206      * partition method the JVM can change behaviour of the algorithm as branch prediction
207      * statistics stabilise for the family. To mitigate this effect the order is permuted
208      * per invocation of the benchmark (see {@link #createOrder()}. This stabilises the
209      * average timing results from JMH. Using per-invocation data generation requires
210      * the benchmark execution time is higher than 1 millisecond. Benchmarks that use
211      * tiny data (e.g. sort 5 elements) must use several million samples.
212      */
213     @State(Scope.Benchmark)
214     public abstract static class AbstractDataSource {
215         /** All distributions / modifications. */
216         private static final String ALL = "all";
217         /** All distributions / modifications in the Bentley and McIlroy test suite. */
218         private static final String BM = "bm";
219         /** All distributions in the Valois test suite. These currently ignore the seed.
220          * To replicate Valois used a fixed seed and the copy modification. */
221         private static final String VALOIS = "valois";
222         /** Flag to determine if the data size should be logged. This is useful to be
223          * able to determine the execution time per sample when the number of samples
224          * is dynamically created based on the data length, range and seed. */
225         private static final AtomicInteger LOG_SIZE = new AtomicInteger();
226 
227         /**
228          * The type of distribution.
229          */
230         enum Distribution {
231             // B&M (1993)
232 
233             /** Sawtooth distribution. Ascending data from 0 to m, that repeats. */
234             SAWTOOTH,
235             /** Random distribution. Uniform random data in [0, m] */
236             RANDOM,
237             /** Stagger distribution. Multiple interlaced ascending sequences. */
238             STAGGER,
239             /** Plateau distribution. Ascending data from 0 to m, then constant.  */
240             PLATEAU,
241             /** Shuffle distribution. Two randomly interlaced ascending sequences of different lengths. */
242             SHUFFLE,
243 
244             /** Sharktooth distribution. Alternating ascending then descending data from 0
245              * to m and back. This is an addition to the original suite of BM
246              * and is not included in the test suite by default and must be specified.
247              *
248              * <p>An ascending then descending sequence is also known as organpipe in
249              * Valois (2000). This version allows multiple ascending/descending runs in the
250              * same length. */
251             SHARKTOOTH,
252 
253             // Valois (2000)
254 
255             /** Sorted. */
256             SORTED,
257             /** Permutation of ones and zeros. */
258             ONEZERO,
259             /** Musser's median-of-3 killer. This elicits worst case performance for a median-of-3
260              * pivot selection strategy. */
261             M3KILLER,
262             /** A sorted sequence rotated left once. */
263             ROTATED,
264             /** Musser's two-faced sequence (the median-of-3 killer with two random permutations). */
265             TWOFACED,
266             /** An ascending then descending sequence. */
267             ORGANPIPE;
268         }
269 
270         /**
271          * The type of data modification.
272          */
273         enum Modification {
274             /** Copy modification. */
275             COPY,
276             /** Reverse modification. */
277             REVERSE,
278             /** Reverse front-half modification. */
279             REVERSE_FRONT,
280             /** Reverse back-half modification. */
281             REVERSE_BACK,
282             /** Sort modification. */
283             SORT,
284             /** Descending modification (this is an addition to the original suite of BM).
285              * It is useful for testing worst case performance, e.g. insertion sort performs
286              * poorly on descending data. Heapselect using a max heap (to find k minimum elements)
287              * would perform poorly if data is processed in the forward direction as all elements
288              * must be inserted.
289              *
290              * <p>This is not included in the test suite by default and must be specified.
291              * Note that the Shuffle distribution with a very large seed 'm' is effectively an
292              * ascending sequence and will be reversed to descending as part of the original
293              * B&M suite of data. */
294             DESCENDING,
295             /** Dither modification. Add i % 5 to the data element i.  */
296             DITHER;
297         }
298 
299         /**
300          * Sample information. Used to obtain information about samples that may be slow
301          * for a particular partition method, e.g. they use excessive recursion during quickselect.
302          * This is used for testing: each sample from the data source can provide the
303          * information to create the sample distribution.
304          */
305         public static final class SampleInfo {
306             /** Distribution. */
307             private final Distribution dist;
308             /** Modification. */
309             private final Modification mod;
310             /** Length. */
311             private final int n;
312             /** Seed. */
313             private final int m;
314             /** Offset. */
315             private final int o;
316 
317             /**
318              * @param dist Distribution.
319              * @param mod Modification.
320              * @param n Length.
321              * @param m Seed.
322              * @param o Offset.
323              */
324             SampleInfo(Distribution dist, Modification mod, int n, int m, int o) {
325                 this.dist = dist;
326                 this.mod = mod;
327                 this.n = n;
328                 this.m = m;
329                 this.o = o;
330             }
331 
332             /**
333              * Create an instance with the specified distribution.
334              *
335              * @param v Value.
336              * @return the instance
337              */
338             SampleInfo with(Distribution v) {
339                 return new SampleInfo(v, mod, n, m, o);
340             }
341 
342             /**
343              * Create an instance with the specified modification.
344              *
345              * @param v Value.
346              * @return the instance
347              */
348             SampleInfo with(Modification v) {
349                 return new SampleInfo(dist, v, n, m, o);
350             }
351 
352             /**
353              * @return the distribution
354              */
355             Distribution getDistribution() {
356                 return dist;
357             }
358 
359             /**
360              * @return the modification
361              */
362             Modification getModification() {
363                 return mod;
364             }
365 
366             /**
367              * @return the data length
368              */
369             int getN() {
370                 return n;
371             }
372 
373             /**
374              * @return the distribution seed
375              */
376             int getM() {
377                 return m;
378             }
379 
380             /**
381              * @return the distribution offset
382              */
383             int getO() {
384                 return o;
385             }
386 
387             @Override
388             public String toString() {
389                 return String.format("%s, %s, n=%d, m=%d, o=%d", dist, mod, n, m, o);
390             }
391         }
392 
393         /** Order. This is randomized to ensure that successive calls do not partition
394          * similar distributions. Randomized per invocation to avoid the JVM 'learning'
395          * branch decisions on small data sets. */
396         protected int[] order;
397         /** Cached source of randomness. */
398         protected UniformRandomProvider rng;
399 
400         /** Type of data. Multiple types can be specified in the same string using
401          * lower/upper case, delimited using ':'. */
402         @Param({BM})
403         private String distribution = BM;
404 
405         /** Type of data modification. Multiple types can be specified in the same string using
406          * lower/upper case, delimited using ':'. */
407         @Param({BM})
408         private String modification = BM;
409 
410         /** Extra range to add to the data length.
411          * E.g. Use 1 to force use of odd and even length samples. */
412         @Param({"1"})
413         private int range = 1;
414 
415         /** Sample 'seed'. This is {@code m} in Bentley and McIlroy's test suite.
416          * If set to zero the default is to use powers of 2 based on sample size. */
417         @Param({"0"})
418         private int seed;
419 
420         /** Sample offset. This is used to shift each distribution to create different data.
421          * It is advanced on each invocation of {@link #setup()}. */
422         @Param({"0"})
423         private int offset;
424 
425         /** Number of samples. Applies only to the random distribution. In this case
426          * the length of the data is randomly chosen in {@code [length, length + range)}. */
427         @Param({"0"})
428         private int samples;
429 
430         /** RNG seed. Created using ThreadLocalRandom.current().nextLong(). This is advanced
431          * for the random distribution mode per iteration. Each benchmark executed by
432          * JMH will use the same random data, even across JVMs.
433          *
434          * <p>If this is zero then a random seed is chosen. */
435         @Param({"-7450238124206088695"})
436         private long rngSeed = -7450238124206088695L;
437 
438         /** Data. This is stored as integer data which saves memory. Note that when ranking
439          * data it is not necessary to have the full range of the double data type; the same
440          * number of unique values can be recorded in an array using an integer type.
441          * Returning a double[] forces a copy to be generated for destructive sorting /
442          * partitioning methods. */
443         private int[][] data;
444 
445         /** Sample information. */
446         private List<SampleInfo> sampleInfo;
447 
448         /**
449          * Gets the sample for the given {@code index}.
450          *
451          * <p>This is returned in a randomized order per iteration.
452          *
453          * @param index Index.
454          * @return the data sample
455          */
456         public double[] getData(int index) {
457             return getDataSample(order[index]);
458         }
459 
460         /**
461          * Gets the sample for the given {@code index}.
462          *
463          * <p>This is returned in a randomized order per iteration.
464          *
465          * @param index Index.
466          * @return the data sample
467          */
468         public int[] getIntData(int index) {
469             return getIntDataSample(order[index]);
470         }
471 
472         /**
473          * Gets the sample for the given {@code index}.
474          *
475          * @param index Index.
476          * @return the data sample
477          */
478         protected double[] getDataSample(int index) {
479             final int[] a = data[index];
480             final double[] x = new double[a.length];
481             for (int i = -1; ++i < a.length;) {
482                 x[i] = a[i];
483             }
484             return x;
485         }
486 
487         /**
488          * Gets the sample for the given {@code index}.
489          *
490          * @param index Index.
491          * @return the data sample
492          */
493         protected int[] getIntDataSample(int index) {
494             // For parity with other methods do not use data.clone()
495             final int[] a = data[index];
496             final int[] x = new int[a.length];
497             for (int i = -1; ++i < a.length;) {
498                 x[i] = a[i];
499             }
500             return x;
501         }
502 
503         /**
504          * Gets the sample size for the given {@code index}.
505          *
506          * @param index Index.
507          * @return the data sample size
508          */
509         public int getDataSize(int index) {
510             return data[index].length;
511         }
512 
513         /**
514          * Gets the sample information for the given {@code index}.
515          * Matches the (native) order returned by {@link #getDataSample(int)}.
516          *
517          * @param index Index.
518          * @return the data sample information
519          */
520         SampleInfo getDataSampleInfo(int index) {
521             return sampleInfo.get(index);
522         }
523 
524         /**
525          * Get the number of data samples.
526          *
527          * <p>Note: This data source will create a permutation order per invocation based on
528          * this size. Per-invocation control in JMH is recommended for methods that take
529          * more than 1 millisecond to execute. For very small data and/or fast methods
530          * this may not be achievable. Child classes may override this value to create
531          * a large number of repeats of the same data per invocation. Any class performing
532          * this should also override {@link #getData(int)} to prevent index out of bound errors.
533          * This can be done by mapping the index to the original index using the number of repeats
534          * e.g. {@code original index = index / repeats}.
535          *
536          * @return the number of samples
537          */
538         public int size() {
539             return data.length;
540         }
541 
542         /**
543          * Create the data.
544          */
545         @Setup(Level.Iteration)
546         public void setup() {
547             Objects.requireNonNull(distribution);
548             Objects.requireNonNull(modification);
549 
550             // Set-up using parameters (may throw)
551             final EnumSet<Distribution> dist = getDistributions();
552             final int length = getLength();
553             if (length < 1) {
554                 throw new IllegalStateException("Unsupported length: " + length);
555             }
556             // Note: Bentley-McIlroy use n in {100, 1023, 1024, 1025}.
557             // Here we only support a continuous range.
558             final int r = range > 0 ? range : 0;
559             if (length + (long) r > Integer.MAX_VALUE) {
560                 throw new IllegalStateException("Unsupported upper length: " + length);
561             }
562             final int length2 = length + r;
563 
564             // Allow pseudorandom seeding
565             if (rngSeed == 0) {
566                 rngSeed = RandomSource.createLong();
567             }
568             if (rng == null) {
569                 // First call, create objects
570                 rng = RANDOM_SOURCE.create(rngSeed);
571             }
572 
573             // Special case for random distribution mode
574             if (dist.contains(Distribution.RANDOM) && dist.size() == 1 && samples > 0) {
575                 data = new int[samples][];
576                 sampleInfo = new ArrayList<>(samples);
577                 final int upper = seed > 0 ? seed : Integer.MAX_VALUE;
578                 final SharedStateDiscreteSampler s1 = DiscreteUniformSampler.of(rng, 0, upper);
579                 final SharedStateDiscreteSampler s2 = DiscreteUniformSampler.of(rng, length, length2);
580                 for (int i = 0; i < data.length; i++) {
581                     final int[] a = new int[s2.sample()];
582                     for (int j = a.length; --j >= 0;) {
583                         a[j] = s1.sample();
584                     }
585                     data[i] = a;
586                     sampleInfo.add(new SampleInfo(Distribution.RANDOM, Modification.COPY, a.length, 0, 0));
587                 }
588                 return;
589             }
590 
591             // New data per iteration
592             data = null;
593             final int o = offset;
594             offset = rng.nextInt();
595 
596             final EnumSet<Modification> mod = getModifications();
597 
598             // Data using the RNG will be randomized only once.
599             // Here we use the same seed for parity across benchmark methods.
600             // Note that most distributions do not use the source of randomness.
601             final ArrayList<int[]> sampleData = new ArrayList<>();
602             sampleInfo = new ArrayList<>();
603             final List<SampleInfo> info = new ArrayList<>();
604             for (int n = length; n <= length2; n++) {
605                 // Note: Large lengths may wish to limit the range of m to limit
606                 // the memory required to store the samples. Currently a single
607                 // m is supported via the seed parameter.
608                 // Default seed will create ceil(log2(2*n)) * 5 dist * 6 mods samples:
609                 // MAX  = 32 * 5 * 7 * (2^31-1) * 4 bytes == 7679 GiB
610                 // HUGE = 31 * 5 * 7 * 2^30 * 4 bytes == 3719 GiB
611                 // BIG  = 21 * 5 * 7 * 2^20 * 4 bytes == 2519 MiB  <-- within configured JVM -Xmx
612                 // MED  = 11 * 5 * 7 * 2^10 * 4 bytes == 1318 KiB
613                 // (This is for the B&M data.)
614                 // It is possible to create lengths above 2^30 using a single distribution,
615                 // modification, and seed:
616                 // MAX1 = 1 * 1 * 1 * (2^31-1) * 4 bytes == 8191 MiB
617                 // However this is then used to create double[] data thus requiring an extra
618                 // ~16GiB memory for the sample to partition.
619                 for (final int m : createSeeds(seed, n)) {
620                     final List<int[]> d = createDistributions(dist, rng, n, m, o, info);
621                     for (int i = 0; i < d.size(); i++) {
622                         final int[] x = d.get(i);
623                         final SampleInfo si = info.get(i);
624                         if (mod.contains(Modification.COPY)) {
625                             // Don't copy! All other methods generate copies
626                             // so we can use this in-place.
627                             sampleData.add(x);
628                             sampleInfo.add(si.with(Modification.COPY));
629                         }
630                         if (mod.contains(Modification.REVERSE)) {
631                             sampleData.add(reverse(x, 0, n));
632                             sampleInfo.add(si.with(Modification.REVERSE));
633                         }
634                         if (mod.contains(Modification.REVERSE_FRONT)) {
635                             sampleData.add(reverse(x, 0, n >>> 1));
636                             sampleInfo.add(si.with(Modification.REVERSE_FRONT));
637                         }
638                         if (mod.contains(Modification.REVERSE_BACK)) {
639                             sampleData.add(reverse(x, n >>> 1, n));
640                             sampleInfo.add(si.with(Modification.REVERSE_BACK));
641                         }
642                         // Only sort once
643                         if (mod.contains(Modification.SORT) ||
644                             mod.contains(Modification.DESCENDING)) {
645                             final int[] y = x.clone();
646                             Arrays.sort(y);
647                             if (mod.contains(Modification.DESCENDING)) {
648                                 sampleData.add(reverse(y, 0, n));
649                                 sampleInfo.add(si.with(Modification.DESCENDING));
650                             }
651                             if (mod.contains(Modification.SORT)) {
652                                 sampleData.add(y);
653                                 sampleInfo.add(si.with(Modification.SORT));
654                             }
655                         }
656                         if (mod.contains(Modification.DITHER)) {
657                             sampleData.add(dither(x));
658                             sampleInfo.add(si.with(Modification.DITHER));
659                         }
660                     }
661                 }
662             }
663             data = sampleData.toArray(new int[0][]);
664             if (LOG_SIZE.getAndSet(length) != length) {
665                 Logger.getLogger(getClass().getName()).info(
666                     () -> String.format("Data length: [%d, %d] n=%d", length, length2, data.length));
667             }
668         }
669 
670         /**
671          * Create the order to process the indices.
672          *
673          * <p>JMH recommends that invocations should take at
674          * least 1 millisecond for timings to be usable. In practice there should be
675          * enough data that processing takes much longer than a few milliseconds.
676          */
677         @Setup(Level.Invocation)
678         public void createOrder() {
679             if (order == null) {
680                 // First call, create objects
681                 order = PermutationSampler.natural(size());
682             }
683             ArraySampler.shuffle(rng, order);
684         }
685 
686         /**
687          * @return the distributions
688          */
689         private EnumSet<Distribution> getDistributions() {
690             EnumSet<Distribution> dist;
691             if (BM.equals(distribution)) {
692                 dist = EnumSet.of(
693                     Distribution.SAWTOOTH,
694                     Distribution.RANDOM,
695                     Distribution.STAGGER,
696                     Distribution.PLATEAU,
697                     Distribution.SHUFFLE);
698             } else if (VALOIS.equals(distribution)) {
699                 dist = EnumSet.of(
700                     Distribution.RANDOM,
701                     Distribution.SORTED,
702                     Distribution.ONEZERO,
703                     Distribution.M3KILLER,
704                     Distribution.ROTATED,
705                     Distribution.TWOFACED,
706                     Distribution.ORGANPIPE);
707             } else {
708                 dist = getEnumFromParam(Distribution.class, distribution);
709             }
710             return dist;
711         }
712 
713         /**
714          * @return the modifications
715          */
716         private EnumSet<Modification> getModifications() {
717             EnumSet<Modification> mod;
718             if (BM.equals(modification)) {
719                 // Modifications are from Bentley and McIlroy
720                 mod = EnumSet.allOf(Modification.class);
721                 // ... except descending
722                 mod.remove(Modification.DESCENDING);
723             } else if (VALOIS.equals(modification)) {
724                 // For convenience alias Valois to copy
725                 mod = EnumSet.of(Modification.COPY);
726             } else {
727                 mod = getEnumFromParam(Modification.class, modification);
728             }
729             return mod;
730         }
731 
732         /**
733          * Gets all the enum values of the given class from the parameters.
734          *
735          * @param <E> Enum type.
736          * @param cls Class of the enum.
737          * @param parameters Parameters (multiple values delimited by ':')
738          * @return the enum values
739          */
740         static <E extends Enum<E>> EnumSet<E> getEnumFromParam(Class<E> cls, String parameters) {
741             if (ALL.equals(parameters)) {
742                 return EnumSet.allOf(cls);
743             }
744             final EnumSet<E> set = EnumSet.noneOf(cls);
745             final String s = parameters.toUpperCase(Locale.ROOT);
746             for (final E e : cls.getEnumConstants()) {
747                 // Scan for the name
748                 for (int i = s.indexOf(e.name(), 0); i >= 0; i = s.indexOf(e.name(), i)) {
749                     // Ensure a full match to the name:
750                     // either at the end of the string, or followed by the delimiter
751                     i += e.name().length();
752                     if (i == s.length() || s.charAt(i) == ':') {
753                         set.add(e);
754                         break;
755                     }
756                 }
757             }
758             if (set.isEmpty()) {
759                 throw new IllegalStateException("Unknown parameters: " + parameters);
760             }
761             return set;
762         }
763 
764         /**
765          * Creates the seeds.
766          *
767          * <p>This can be pasted into a JShell terminal to verify it works for any size
768          * {@code 1 <= n < 2^31}. With the default behaviour all seeds {@code m} are unsigned
769          * strictly positive powers of 2 and the highest seed should be below {@code 2*n}.
770          *
771          * @param seed Seed (use 0 for default; or provide a strictly positive {@code 1 <= m <= 2^31}).
772          * @param n Sample size.
773          * @return the seeds
774          */
775         private static int[] createSeeds(int seed, int n) {
776             // Allow [1, 2^31] (note 2^31 is negative but handled as a power of 2)
777             if (seed - 1 >= 0) {
778                 return new int[] {seed};
779             }
780             // Bentley-McIlroy use:
781             // for: m = 1; m < 2 * n; m *= 2
782             // This has been modified here to handle n up to MAX_VALUE
783             // by knowing the count of m to generate as the power of 2 >= n.
784 
785             // ceil(log2(n)) + 1 == ceil(log2(2*n)) but handles MAX_VALUE
786             int c = 33 - Integer.numberOfLeadingZeros(n - 1);
787             final int[] seeds = new int[c];
788             c = 0;
789             for (int m = 1; c != seeds.length; m *= 2) {
790                 seeds[c++] = m;
791             }
792             return seeds;
793         }
794 
795         /**
796          * Creates the distribution samples. Handles {@code m = 2^31} using
797          * {@link Integer#MIN_VALUE}.
798          *
799          * <p>The offset is used to adjust each distribution to generate a different
800          * output. Only applies to distributions that do not use the source of randomness.
801          *
802          * <p>Distributions are generated in enum order and recorded in the output {@code info}.
803          * Distributions that are a constant value at {@code m == 1} are not generated.
804          * This case is handled by the plateau distribution which will be a constant value
805          * except one occurrence of zero.
806          *
807          * @param dist Distributions.
808          * @param rng Source of randomness.
809          * @param n Length of the sample.
810          * @param m Sample seed (in [1, 2^31])
811          * @param o Offset.
812          * @param info Sample information.
813          * @return the samples
814          */
815         private static List<int[]> createDistributions(EnumSet<Distribution> dist,
816                 UniformRandomProvider rng, int n, int m, int o, List<SampleInfo> info) {
817             final ArrayList<int[]> distData = new ArrayList<>(6);
818             int[] x;
819             info.clear();
820             SampleInfo si = new SampleInfo(null, null, n, m, o);
821             // B&M (1993)
822             if (dist.contains(Distribution.SAWTOOTH) && m != 1) {
823                 x = createSample(distData, info, si.with(Distribution.SAWTOOTH));
824                 // i % m
825                 // Typical case m is a power of 2 so we can use a mask
826                 // Use the offset.
827                 final int mask = m - 1;
828                 if ((m & mask) == 0) {
829                     for (int i = -1; ++i < n;) {
830                         x[i] = (i + o) & mask;
831                     }
832                 } else {
833                     // User input seed. Start at the offset.
834                     int j = Integer.remainderUnsigned(o, m);
835                     for (int i = -1; ++i < n;) {
836                         j = j % m;
837                         x[i] = j++;
838                     }
839                 }
840             }
841             if (dist.contains(Distribution.RANDOM) && m != 1) {
842                 x = createSample(distData, info, si.with(Distribution.RANDOM));
843                 // rand() % m
844                 // A sampler is faster than rng.nextInt(m); the sampler has an inclusive upper.
845                 final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, m - 1);
846                 for (int i = -1; ++i < n;) {
847                     x[i] = s.sample();
848                 }
849             }
850             if (dist.contains(Distribution.STAGGER)) {
851                 x = createSample(distData, info, si.with(Distribution.STAGGER));
852                 // Overflow safe: (i * m + i) % n
853                 final long nn = n;
854                 final long oo = Integer.toUnsignedLong(o);
855                 for (int i = -1; ++i < n;) {
856                     final long j = i + oo;
857                     x[i] = (int) ((j * m + j) % nn);
858                 }
859             }
860             if (dist.contains(Distribution.PLATEAU)) {
861                 x = createSample(distData, info, si.with(Distribution.PLATEAU));
862                 // min(i, m)
863                 for (int i = Math.min(n, m); --i >= 0;) {
864                     x[i] = i;
865                 }
866                 for (int i = m - 1; ++i < n;) {
867                     x[i] = m;
868                 }
869                 // Rotate
870                 final int n1 = Integer.remainderUnsigned(o, n);
871                 if (n1 != 0) {
872                     final int[] a = x.clone();
873                     final int n2 = n - n1;
874                     System.arraycopy(a, 0, x, n1, n2);
875                     System.arraycopy(a, n2, x, 0, n1);
876                 }
877             }
878             if (dist.contains(Distribution.SHUFFLE) && m != 1) {
879                 x = createSample(distData, info, si.with(Distribution.SHUFFLE));
880                 // rand() % m ? (j += 2) : (k += 2)
881                 final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, m - 1);
882                 for (int i = -1, j = 0, k = 1; ++i < n;) {
883                     x[i] = s.sample() != 0 ? (j += 2) : (k += 2);
884                 }
885             }
886             // Extra - based on organpipe with a variable ascending/descending length
887             if (dist.contains(Distribution.SHARKTOOTH) && m != 1) {
888                 x = createSample(distData, info, si.with(Distribution.SHARKTOOTH));
889                 // ascending-descending runs
890                 int i = -1;
891                 int j = (o & Integer.MAX_VALUE) % m - 1;
892                 OUTER:
893                 for (;;) {
894                     while (++j < m) {
895                         if (++i == n) {
896                             break OUTER;
897                         }
898                         x[i] = j;
899                     }
900                     while (--j >= 0) {
901                         if (++i == n) {
902                             break OUTER;
903                         }
904                         x[i] = j;
905                     }
906                 }
907             }
908             // Valois (2000)
909             if (dist.contains(Distribution.SORTED)) {
910                 x = createSample(distData, info, si.with(Distribution.SORTED));
911                 for (int i = -1; ++i < n;) {
912                     x[i] = i;
913                 }
914             }
915             if (dist.contains(Distribution.ONEZERO)) {
916                 x = createSample(distData, info, si.with(Distribution.ONEZERO));
917                 // permutation of floor(n/2) ones and ceil(n/2) zeroes.
918                 // For convenience this uses random ones and zeros to avoid a shuffle
919                 // and simply reads bits from integers. The distribution will not
920                 // be exactly 50:50.
921                 final int end = n & ~31;
922                 for (int i = 0; i < end; i += 32) {
923                     int z = rng.nextInt();
924                     for (int j = -1; ++j < 32;) {
925                         x[i + j] = z & 1;
926                         z >>>= 1;
927                     }
928                 }
929                 for (int i = end; ++i < n;) {
930                     x[i] = rng.nextBoolean() ? 1 : 0;
931                 }
932             }
933             if (dist.contains(Distribution.M3KILLER)) {
934                 x = createSample(distData, info, si.with(Distribution.M3KILLER));
935                 medianOf3Killer(x);
936             }
937             if (dist.contains(Distribution.ROTATED)) {
938                 x = createSample(distData, info, si.with(Distribution.ROTATED));
939                 // sorted sequence rotated left once
940                 // 1, 2, 3, ..., n-1, 0
941                 for (int i = 1; i < n; i++) {
942                     x[i - 1] = i;
943                 }
944             }
945             if (dist.contains(Distribution.TWOFACED)) {
946                 x = createSample(distData, info, si.with(Distribution.TWOFACED));
947                 // Musser's two faced randomly permutes a median-of-3 killer in
948                 // 4 floor(log2(n)) through n/2 and n/2 + 4 floor(log2(n)) through n
949                 medianOf3Killer(x);
950                 final int j = 4 * (31 - Integer.numberOfLeadingZeros(n));
951                 final int n2 = n >>> 1;
952                 ArraySampler.shuffle(rng, x, j, n2);
953                 ArraySampler.shuffle(rng, x, n2 + j, n);
954             }
955             if (dist.contains(Distribution.ORGANPIPE)) {
956                 x = createSample(distData, info, si.with(Distribution.ORGANPIPE));
957                 // 0, 1, 2, 3, ..., 3, 2, 1, 0
958                 // n should be even to leave two equal values in the middle, otherwise a single
959                 for (int i = -1, j = n; ++i <= --j;) {
960                     x[i] = i;
961                     x[j] = i;
962                 }
963             }
964             return distData;
965         }
966 
967         /**
968          * Create the sample array and add it to the {@code data}; add the information to the {@code info}.
969          *
970          * @param data Data samples.
971          * @param info Sample information.
972          * @param s Sample information.
973          * @return the new sample array
974          */
975         private static int[] createSample(ArrayList<int[]> data, List<SampleInfo> info,
976             SampleInfo s) {
977             final int[] x = new int[s.getN()];
978             data.add(x);
979             info.add(s);
980             return x;
981         }
982 
983         /**
984          * Create Musser's median-of-3 killer sequence (in-place).
985          *
986          * @param x Data.
987          */
988         private static void medianOf3Killer(int[] x) {
989             // This uses the original K_2k sequence from Musser (1997)
990             // Introspective sorting and selection algorithms,
991             // Software—Practice and Experience, 27(8), 983–993.
992             // A true median-of-3 killer requires n to be an even integer divisible by 4,
993             // i.e. k is an even positive integer. This causes a median-of-3 partition
994             // strategy to produce a sequence of n/4 partitions into sub-sequences of
995             // length 2 and n-2, 2 and n-4, ..., 2 and n/2.
996             // 1   2   3   4   5       k-2   k-1  k   k+1 k+2 k+3     2k-1  2k
997             // 1, k+1, 3, k+3, 5, ..., 2k-3, k-1 2k-1  2   4   6  ... 2k-2  2k
998             final int n = x.length;
999             final int k = n >>> 1;
1000             for (int i = 0; i < k; i++) {
1001                 x[i] = ++i;
1002                 x[i] = k + i;
1003             }
1004             for (int i = k - 1, j = 2; ++i < n; j += 2) {
1005                 x[i] = j;
1006             }
1007         }
1008 
1009         /**
1010          * Return a (part) reversed copy of the data.
1011          *
1012          * @param x Data.
1013          * @param from Start index to reverse (inclusive).
1014          * @param to End index to reverse (exclusive).
1015          * @return the copy
1016          */
1017         private static int[] reverse(int[] x, int from, int to) {
1018             final int[] a = x.clone();
1019             for (int i = from - 1, j = to; ++i < --j;) {
1020                 final int v = a[i];
1021                 a[i] = a[j];
1022                 a[j] = v;
1023             }
1024             return a;
1025         }
1026 
1027         /**
1028          * Return a dithered copy of the data.
1029          *
1030          * @param x Data.
1031          * @return the copy
1032          */
1033         private static int[] dither(int[] x) {
1034             final int[] a = x.clone();
1035             for (int i = a.length; --i >= 0;) {
1036                 // Bentley-McIlroy use i % 5.
1037                 // It is important this is not a power of 2 so it will not coincide
1038                 // with patterns created in the data using the default m powers-of-2.
1039                 a[i] += i % 5;
1040             }
1041             return a;
1042         }
1043 
1044         /**
1045          * Gets the minimum length of the data.
1046          * The actual length is enumerated in {@code [length, length + range]}.
1047          *
1048          * @return the length
1049          */
1050         protected abstract int getLength();
1051 
1052         /**
1053          * Gets the range.
1054          *
1055          * @return the range
1056          */
1057         final int getRange() {
1058             return range;
1059         }
1060 
1061         /**
1062          * Sets the distribution(s) of the data.
1063          * If the input is an empty array or the first enum value is null,
1064          * then all distributions are used.
1065          *
1066          * @param v Values.
1067          */
1068         void setDistribution(Distribution... v) {
1069             if (v.length == 0 || v[0] == null) {
1070                 distribution = ALL;
1071             } else {
1072                 final EnumSet<Distribution> s = EnumSet.of(v[0], v);
1073                 distribution = s.stream().map(Enum::name).collect(Collectors.joining(":"));
1074             }
1075         }
1076 
1077         /**
1078          * Sets the modification of the data.
1079          * If the input is an empty array or the first enum value is null,
1080          * then all distributions are used.
1081          *
1082          * @param v Value.
1083          */
1084         void setModification(Modification... v) {
1085             if (v.length == 0 || v[0] == null) {
1086                 modification = ALL;
1087             } else {
1088                 final EnumSet<Modification> s = EnumSet.of(v[0], v);
1089                 modification = s.stream().map(Enum::name).collect(Collectors.joining(":"));
1090             }
1091         }
1092 
1093         /**
1094          * Sets the maximum addition to extend the length of each sample of data.
1095          * The actual length is enumerated in {@code [length, length + range]}.
1096          *
1097          * @param v Value.
1098          */
1099         void setRange(int v) {
1100             range = v;
1101         }
1102 
1103         /**
1104          * Sets the sample 'seed' used to generate distributions.
1105          * If set to zero the default is to use powers of 2 based on sample size.
1106          *
1107          * <p>Supports positive values and the edge case of {@link Integer#MIN_VALUE}
1108          * which is treated as an unsigned power of 2.
1109          *
1110          * @param v Value (ignored if not within {@code [1, 2^31]}).
1111          */
1112         void setSeed(int v) {
1113             seed = v;
1114         }
1115 
1116         /**
1117          * Sets the sample 'offset' used to generate distributions. Advanced to a new
1118          * random integer on each invocation of {@link #setup()}.
1119          *
1120          * @param v Value.
1121          */
1122         void setOffset(int v) {
1123             offset = v;
1124         }
1125 
1126         /**
1127          * Sets the number of samples to use for the random distribution mode.
1128          * See {@link AbstractDataSource} for details.
1129          *
1130          * @param v Value.
1131          */
1132         void setSamples(int v) {
1133             samples = v;
1134         }
1135 
1136         /**
1137          * Sets the seed for the random number generator.
1138          *
1139          * @param v Value.
1140          */
1141         void setRngSeed(long v) {
1142             this.rngSeed = v;
1143         }
1144     }
1145 
1146     /**
1147      * Source of {@code double} array data to sort.
1148      */
1149     @State(Scope.Benchmark)
1150     public static class SortSource extends AbstractDataSource {
1151         /** Data length. */
1152         @Param({"1023"})
1153         private int length;
1154         /** Number of repeats. This is used to control the number of times the data is processed
1155          * per invocation. Note that each invocation randomises the order. For very small data
1156          * and/or fast methods there may not be enough data to achieve the target of 1
1157          * millisecond per invocation. Use this value to increase the length of each invocation.
1158          * For example the insertion sort on tiny data, or the sort5 methods, may require this
1159          * to be 1,000,000 or higher. */
1160         @Param({"1"})
1161         private int repeats;
1162 
1163         /** {@inheritDoc} */
1164         @Override
1165         protected int getLength() {
1166             return length;
1167         }
1168 
1169         /** {@inheritDoc} */
1170         @Override
1171         public int size() {
1172             return super.size() * repeats;
1173         }
1174 
1175         /** {@inheritDoc} */
1176         @Override
1177         public double[] getData(int index) {
1178             // order = (data index) * repeats + repeat
1179             // data index = order / repeats
1180             return super.getDataSample(order[index] / repeats);
1181         }
1182     }
1183 
1184     /**
1185      * Source of k-th indices to partition.
1186      *
1187      * <p>This class provides both data to partition and the indices to partition.
1188      * The indices and data are created per iteration. The order to process them
1189      * is created per invocation.
1190      */
1191     @State(Scope.Benchmark)
1192     public static class KSource extends AbstractDataSource {
1193         /** Data length. */
1194         @Param({"1023"})
1195         private int length;
1196         /** Number of indices to select. */
1197         @Param({"1", "2", "3", "5", "10"})
1198         private int k;
1199         /** Number of repeats. */
1200         @Param({"10"})
1201         private int repeats;
1202         /** Distribution mode. K indices can be distributed randomly or uniformly.
1203          * <ul>
1204          * <li>"random": distribute k indices randomly
1205          * <li>"uniform": distribute k indices uniformly but with a random start point
1206          * <li>"index": Use a single index at k
1207          * <li>"single": Use a single index at k uniformly spaced points. This mode
1208          * first generates the spacing for the indices. Then samples from that spacing
1209          * using the configured repeats. Common usage of k=10 will have 10 samples with a
1210          * single index, each in a different position.
1211          * </ul>
1212          * <p>If the mode ends with a "s" then the indices are sorted. For example "randoms"
1213          * will sort the random indices.
1214          */
1215         @Param({"random"})
1216         private String mode;
1217         /** Separation. K can be single indices (s=0) or paired (s!=0). Paired indices are
1218          * separated using the specified separation. When running in paired mode the
1219          * number of k is doubled and duplicates may occur. This method is used for
1220          * testing sparse or uniform distributions of paired indices that may occur when
1221          * interpolating quantiles. Since the separation is allowed to be above 1 it also
1222          * allows testing configurations for close indices. */
1223         @Param({"0"})
1224         private int s;
1225 
1226         /** Indices. */
1227         private int[][] indices;
1228         /** Cache permutation samplers. */
1229         private PermutationSampler[] samplers;
1230 
1231         /** {@inheritDoc} */
1232         @Override
1233         protected int getLength() {
1234             return length;
1235         }
1236 
1237         /** {@inheritDoc} */
1238         @Override
1239         public int size() {
1240             return super.size() * repeats;
1241         }
1242 
1243         /** {@inheritDoc} */
1244         @Override
1245         public double[] getData(int index) {
1246             // order = (data index) * repeats + repeat
1247             // data index = order / repeats
1248             return super.getDataSample(order[index] / repeats);
1249         }
1250 
1251         /** {@inheritDoc} */
1252         @Override
1253         public int[] getIntData(int index) {
1254             return super.getIntDataSample(order[index] / repeats);
1255         }
1256 
1257         /**
1258          * Gets the indices for the given {@code index}.
1259          *
1260          * @param index Index.
1261          * @return the data indices
1262          */
1263         public int[] getIndices(int index) {
1264             // order = (data index) * repeats + repeat
1265             // Directly look-up the indices for this repeat.
1266             return indices[order[index]];
1267         }
1268 
1269         /**
1270          * Create the indices.
1271          */
1272         @Override
1273         @Setup(Level.Iteration)
1274         public void setup() {
1275             if (s < 0 || s >= getLength()) {
1276                 throw new IllegalStateException("Invalid separation: " + s);
1277             }
1278             super.setup();
1279 
1280             // Data will be randomized per iteration
1281             if (indices == null) {
1282                 // First call, create objects
1283                 indices = new int[size()][];
1284                 // Cache samplers. These hold an array which is randomized
1285                 // per call to obtain a permutation.
1286                 if (k > 1) {
1287                     samplers = new PermutationSampler[getRange() + 1];
1288                 }
1289             }
1290 
1291             // Create indices in the data sample length.
1292             // If a separation is provided then the length is reduced by the separation
1293             // to make space for a second index.
1294 
1295             int index = 0;
1296             final int noOfSamples = super.size();
1297             if (mode.startsWith("random")) {
1298                 // random mode creates a permutation of k indices in the length
1299                 if (k > 1) {
1300                     final int baseLength = getLength();
1301                     for (int i = 0; i < noOfSamples; i++) {
1302                         final int len = getDataSize(i);
1303                         // Create permutation sampler for the length
1304                         PermutationSampler sampler = samplers[len - baseLength];
1305                         if (sampler == null) {
1306                             // Reduce length by the separation
1307                             final int n = len - s;
1308                             samplers[len - baseLength] = sampler = new PermutationSampler(rng, n, k);
1309                         }
1310                         for (int j = repeats; --j >= 0;) {
1311                             indices[index++] = sampler.sample();
1312                         }
1313                     }
1314                 } else {
1315                     // k=1: No requirement for a permutation
1316                     for (int i = 0; i < noOfSamples; i++) {
1317                         // Reduce length by the separation
1318                         final int n = getDataSize(i) - s;
1319                         for (int j = repeats; --j >= 0;) {
1320                             indices[index++] = new int[] {rng.nextInt(n)};
1321                         }
1322                     }
1323                 }
1324             } else if (mode.startsWith("uniform")) {
1325                 // uniform indices with a random start
1326                 for (int i = 0; i < noOfSamples; i++) {
1327                     // Reduce length by the separation
1328                     final int n = getDataSize(i) - s;
1329                     final int step = Math.max(1, (int) Math.round((double) n / k));
1330                     for (int j = repeats; --j >= 0;) {
1331                         final int[] k1 = new int[k];
1332                         int p = rng.nextInt(n);
1333                         for (int m = 0; m < k; m++) {
1334                             p = (p + step) % n;
1335                             k1[m] = p;
1336                         }
1337                         indices[index++] = k1;
1338                     }
1339                 }
1340             } else if (mode.startsWith("single")) {
1341                 // uniform indices with a random start
1342                 for (int i = 0; i < noOfSamples; i++) {
1343                     // Reduce length by the separation
1344                     final int n = getDataSize(i) - s;
1345                     int[] samples;
1346                     // When k approaches n then a linear spacing covers every part
1347                     // of the array and we sample. Do this when n < k/4. This handles
1348                     // k > n (saturation).
1349                     if (n < (k >> 2)) {
1350                         samples = rng.ints(k, 0, n).toArray();
1351                     } else {
1352                         // Linear spacing
1353                         final int step = n / k;
1354                         samples = new int[k];
1355                         for (int j = 0, x = step >> 1; j < k; j++, x += step) {
1356                             samples[j] = x;
1357                         }
1358                     }
1359                     for (int j = 0; j < repeats; j++) {
1360                         final int ii = j % k;
1361                         if (ii == 0) {
1362                             ArraySampler.shuffle(rng, samples);
1363                         }
1364                         indices[index++] = new int[] {samples[ii]};
1365                     }
1366                 }
1367             } else if ("index".equals(mode)) {
1368                 // Same single or paired indices for all samples.
1369                 // Check the index is valid.
1370                 for (int i = 0; i < noOfSamples; i++) {
1371                     // Reduce length by the separation
1372                     final int n = getDataSize(i) - s;
1373                     if (k >= n) {
1374                         throw new IllegalStateException("Invalid k: " + k + " >= " + n);
1375                     }
1376                 }
1377                 final int[] kk = s > 0 ? new int[] {k, k + s} : new int[] {k};
1378                 Arrays.fill(indices, kk);
1379                 return;
1380             } else {
1381                 throw new IllegalStateException("Unknown index mode: " + mode);
1382             }
1383             // Add paired indices
1384             if (s > 0) {
1385                 for (int i = 0; i < indices.length; i++) {
1386                     final int[] k1 = indices[i];
1387                     final int[] k2 = new int[k1.length << 1];
1388                     for (int j = 0; j < k1.length; j++) {
1389                         k2[j << 1] = k1[j];
1390                         k2[(j << 1) + 1] = k1[j] + s;
1391                     }
1392                     indices[i] = k2;
1393                 }
1394             }
1395             // Optionally sort
1396             if (mode.endsWith("s")) {
1397                 for (int i = 0; i < indices.length; i++) {
1398                     Arrays.sort(indices[i]);
1399                 }
1400             }
1401         }
1402     }
1403 
1404     /**
1405      * Source of k-th indices. This does not extend the {@link AbstractDataSource} to provide
1406      * data to partition. It is to be used to test processing of indices without partition
1407      * overhead.
1408      */
1409     @State(Scope.Benchmark)
1410     public static class IndexSource {
1411         /** Indices. */
1412         protected int[][] indices;
1413         /** Upper bound (exclusive) on the indices. */
1414         @Param({"1000", "1000000", "1000000000"})
1415         private int length;
1416         /** Number of indices to select. */
1417         @Param({"10", "20", "40", "80", "160"})
1418         private int k;
1419         /** Number of repeats. */
1420         @Param({"1000"})
1421         private int repeats;
1422         /** RNG seed. Created using ThreadLocalRandom.current().nextLong(). Each benchmark
1423          * executed by JMH will use the same random data, even across JVMs.
1424          *
1425          * <p>If this is zero then a random seed is chosen. */
1426         @Param({"-7450238124206088695"})
1427         private long rngSeed;
1428         /** Ordered keys. */
1429         @Param({"false"})
1430         private boolean ordered;
1431         /** Minimum separation between keys. */
1432         @Param({"32"})
1433         private int separation;
1434 
1435         /**
1436          * @return the indices
1437          */
1438         public int[][] getIndices() {
1439             return indices;
1440         }
1441 
1442         /**
1443          * Gets the minimum separation between keys. This is used by benchmarks
1444          * to ignore splitting/search keys below a threshold.
1445          *
1446          * @return the minimum separation
1447          */
1448         public int getMinSeparation() {
1449             return separation;
1450         }
1451 
1452         /**
1453          * Create the indices and search points.
1454          */
1455         @Setup(Level.Iteration)
1456         public void setup() {
1457             if (k < 2) {
1458                 throw new IllegalStateException("Require multiple indices");
1459             }
1460             // Data will be randomized per iteration. It is the same sequence across
1461             // benchmarks and JVM instances and allows benchmarking across JVM platforms
1462             // with the same data.
1463             // Allow pseudorandom seeding
1464             if (rngSeed == 0) {
1465                 rngSeed = RandomSource.createLong();
1466             }
1467             final UniformRandomProvider rng = RANDOM_SOURCE.create(rngSeed);
1468             // Advance the seed for the next iteration.
1469             rngSeed = rng.nextLong();
1470 
1471             final SharedStateDiscreteSampler s = DiscreteUniformSampler.of(rng, 0, length - 1);
1472 
1473             indices = new int[repeats][];
1474 
1475             for (int i = repeats; --i >= 0;) {
1476                 // Indices with possible repeats
1477                 final int[] x = new int[k];
1478                 for (int j = k; --j >= 0;) {
1479                     x[j] = s.sample();
1480                 }
1481                 indices[i] = x;
1482                 if (ordered) {
1483                     Sorting.sortIndices(x, x.length);
1484                 }
1485             }
1486         }
1487 
1488         /**
1489          * @return the RNG seed
1490          */
1491         long getRngSeed() {
1492             return rngSeed;
1493         }
1494     }
1495 
1496     /**
1497      * Source of k-th indices to be searched/split.
1498      * Can be used to split the same indices multiple times, or split a set of indices
1499      * a single time, e.g. split indices k at point p.
1500      */
1501     @State(Scope.Benchmark)
1502     public static class SplitIndexSource extends IndexSource {
1503         /** Division mode. */
1504         @Param({"RANDOM", "BINARY"})
1505         private DivisionMode mode;
1506 
1507         /** Search points. */
1508         private int[][] points;
1509         /** The look-up samples. These are used to identify a set of indices, and a single point to
1510          * find in the range of the indices, e.g. split indices k at point p. The long packs
1511          * two integers: the index of the indices k; and the search point p. These are packed
1512          * as a long to enable easy shuffling of samples and access to the two indices. */
1513         private long[] samples;
1514 
1515         /** Options for the division mode. */
1516         public enum DivisionMode {
1517             /** Randomly divide. */
1518             RANDOM,
1519             /** Divide using binary division with recursion left then right. */
1520             BINARY;
1521         }
1522 
1523         /**
1524          * Return the search points. They are the median index points between adjacent
1525          * indices. These are in the order specified by the division mode.
1526          *
1527          * @return the search points
1528          */
1529         public int[][] getPoints() {
1530             return points;
1531         }
1532 
1533         /**
1534          * @return the sample size
1535          */
1536         int samples() {
1537             return samples.length;
1538         }
1539 
1540         /**
1541          * Gets the indices for the random sample.
1542          *
1543          * @param index the index
1544          * @return the indices
1545          */
1546         int[] getIndices(int index) {
1547             return indices[(int) (samples[index] >>> Integer.SIZE)];
1548         }
1549 
1550         /**
1551          * Gets the search point for the random sample.
1552          *
1553          * @param index the index
1554          * @return the search point
1555          */
1556         int getPoint(int index) {
1557             return (int) samples[index];
1558         }
1559 
1560         /**
1561          * Create the indices and search points.
1562          */
1563         @Override
1564         @Setup(Level.Iteration)
1565         public void setup() {
1566             super.setup();
1567 
1568             final UniformRandomProvider rng = RANDOM_SOURCE.create(getRngSeed());
1569 
1570             final int[][] indices = getIndices();
1571             points = new int[indices.length][];
1572 
1573             final int s = getMinSeparation();
1574 
1575             // Set the division mode
1576             final boolean random = Objects.requireNonNull(mode) == DivisionMode.RANDOM;
1577 
1578             int size = 0;
1579 
1580             for (int i = points.length; --i >= 0;) {
1581                 // Get the sorted unique indices
1582                 final int[] y = indices[i].clone();
1583                 final int unique = Sorting.sortIndices(y, y.length);
1584 
1585                 // Create the cut points between each unique index
1586                 int[] p = new int[unique - 1];
1587                 if (random) {
1588                     int c = 0;
1589                     for (int j = 0; j < p.length; j++) {
1590                         // Ignore dense keys
1591                         if (y[j] + s < y[j + 1]) {
1592                             p[c++] = (y[j] + y[j + 1]) >>> 1;
1593                         }
1594                     }
1595                     p = Arrays.copyOf(p, c);
1596                     ArraySampler.shuffle(rng, p);
1597                     points[i] = p;
1598                 } else {
1599                     // binary division
1600                     final int c = divide(y, 0, unique - 1, p, 0, s);
1601                     points[i] = Arrays.copyOf(p, c);
1602                 }
1603                 size += points[i].length;
1604             }
1605 
1606             // Create the samples: pack indices index+point into a long
1607             samples = new long[size];
1608             for (int i = points.length; --i >= 0;) {
1609                 final long l = ((long) i) << Integer.SIZE;
1610                 for (final int p : points[i]) {
1611                     samples[--size] = l | p;
1612                 }
1613             }
1614             ArraySampler.shuffle(rng, samples);
1615         }
1616 
1617         /**
1618          * Divide the indices using binary division with recursion left then right.
1619          * If a division is possible store the division point and update the count.
1620          *
1621          * @param indices Indices to divide
1622          * @param lo Lower index in indices (inclusive).
1623          * @param hi Upper index in indices (inclusive).
1624          * @param p Division points.
1625          * @param c Count of division points.
1626          * @param s Minimum separation between indices.
1627          * @return the updated count of division points.
1628          */
1629         private static int divide(int[] indices, int lo, int hi, int[] p, int c, int s) {
1630             if (lo < hi) {
1631                 // Divide the interval in half
1632                 final int m = (lo + hi) >>> 1;
1633                 // Create a division point at approximately the midpoint
1634                 final int m1 = m + 1;
1635                 // Ignore dense keys
1636                 if (indices[m] + s < indices[m1]) {
1637                     final int k = (indices[m] + indices[m1]) >>> 1;
1638                     p[c++] = k;
1639                 }
1640                 // Recurse left then right.
1641                 // Does nothing if lo + 1 == hi as m == lo and m1 == hi.
1642                 c = divide(indices, lo, m, p, c, s);
1643                 c = divide(indices, m1, hi, p, c, s);
1644             }
1645             return c;
1646         }
1647     }
1648 
1649     /**
1650      * Source of an {@link SearchableInterval}.
1651      */
1652     @State(Scope.Benchmark)
1653     public static class SearchableIntervalSource {
1654         /** Name of the source. */
1655         @Param({"ScanningKeyInterval",
1656             "BinarySearchKeyInterval",
1657             "IndexSetInterval",
1658             "CompressedIndexSet",
1659             // Same speed as the CompressedIndexSet_2
1660             //"CompressedIndexSet2",
1661             })
1662         private String name;
1663 
1664         /** The factory. */
1665         private Function<int[], SearchableInterval> factory;
1666 
1667         /**
1668          * @param indices Indices.
1669          * @return {@link SearchableInterval}
1670          */
1671         public SearchableInterval create(int[] indices) {
1672             return factory.apply(indices);
1673         }
1674 
1675         /**
1676          * Create the function.
1677          */
1678         @Setup
1679         public void setup() {
1680             Objects.requireNonNull(name);
1681             if ("ScanningKeyInterval".equals(name)) {
1682                 factory = k -> {
1683                     k = k.clone();
1684                     final int unique = Sorting.sortIndices(k, k.length);
1685                     return ScanningKeyInterval.of(k, unique);
1686                 };
1687             } else if ("BinarySearchKeyInterval".equals(name)) {
1688                 factory = k -> {
1689                     k = k.clone();
1690                     final int unique = Sorting.sortIndices(k, k.length);
1691                     return BinarySearchKeyInterval.of(k, unique);
1692                 };
1693             } else if ("IndexSetInterval".equals(name)) {
1694                 factory = IndexSet::of;
1695             } else if (name.equals("CompressedIndexSet2")) {
1696                 factory = CompressedIndexSet2::of;
1697             } else if (name.startsWith("CompressedIndexSet")) {
1698                 // To use compression 2 requires CompressedIndexSet_2 otherwise
1699                 // a fixed compression set will be returned
1700                 final int c = getCompression(name);
1701                 factory = k -> CompressedIndexSet.of(c, k);
1702             } else {
1703                 throw new IllegalStateException("Unknown SearchableInterval: " + name);
1704             }
1705         }
1706 
1707         /**
1708          * Gets the compression from the last character of the name.
1709          *
1710          * @param name Name.
1711          * @return the compression
1712          */
1713         private static int getCompression(String name) {
1714             final char c = name.charAt(name.length() - 1);
1715             if (Character.isDigit(c)) {
1716                 return Character.digit(c, 10);
1717             }
1718             return 1;
1719         }
1720     }
1721 
1722     /**
1723      * Source of an {@link UpdatingInterval}.
1724      */
1725     @State(Scope.Benchmark)
1726     public static class UpdatingIntervalSource {
1727         /** Name of the source. */
1728         @Param({"KeyUpdatingInterval",
1729             // Same speed as BitIndexUpdatingInterval
1730             //"IndexSet",
1731             "BitIndexUpdatingInterval",
1732             })
1733         private String name;
1734 
1735         /** The factory. */
1736         private Function<int[], UpdatingInterval> factory;
1737 
1738         /**
1739          * @param indices Indices.
1740          * @return {@link UpdatingInterval}
1741          */
1742         public UpdatingInterval create(int[] indices) {
1743             return factory.apply(indices);
1744         }
1745 
1746         /**
1747          * Create the function.
1748          */
1749         @Setup
1750         public void setup() {
1751             Objects.requireNonNull(name);
1752             if ("KeyUpdatingInterval".equals(name)) {
1753                 factory = k -> {
1754                     k = k.clone();
1755                     final int unique = Sorting.sortIndices(k, k.length);
1756                     return KeyUpdatingInterval.of(k, unique);
1757                 };
1758             } else if ("IndexSet".equals(name)) {
1759                 factory = k -> IndexSet.of(k).interval();
1760             } else if (name.equals("BitIndexUpdatingInterval")) {
1761                 factory = k -> BitIndexUpdatingInterval.of(k, k.length);
1762             } else {
1763                 throw new IllegalStateException("Unknown UpdatingInterval: " + name);
1764             }
1765         }
1766     }
1767 
1768     /**
1769      * Source of a range of positions to partition. These are positioned away from the edge
1770      * using a power of 2 shift.
1771      *
1772      * <p>This is a specialised class to allow benchmarking the switch from using
1773      * quickselect partitioning to using an edge selection.
1774      *
1775      * <p>This class provides both data to partition and the indices to partition.
1776      * The indices and data are created per iteration. The order to process them
1777      * is created per invocation.
1778      */
1779     @State(Scope.Benchmark)
1780     public static class EdgeSource extends AbstractDataSource {
1781         /** Data length. */
1782         @Param({"1023"})
1783         private int length;
1784         /** Mode. */
1785         @Param({"SHIFT"})
1786         private Mode mode;
1787         /** Parameter to find k. Configured for 'shift' of the length. */
1788         @Param({"1", "2", "3", "4", "5", "6", "7", "8", "9"})
1789         private int p;
1790         /** Target indices (as pairs of {@code [ka, kb]} defining a range to select). */
1791         private int[][] indices;
1792 
1793         /** Define the method used to generated the edge k. */
1794         public enum Mode {
1795             /** Create {@code k} using a right-shift {@code >>>} applied to the length. */
1796             SHIFT,
1797             /** Use the parameter {@code p} as an index. */
1798             INDEX;
1799         }
1800 
1801         /** {@inheritDoc} */
1802         @Override
1803         public int size() {
1804             return super.size() * 2;
1805         }
1806 
1807         /** {@inheritDoc} */
1808         @Override
1809         public double[] getData(int index) {
1810             // order = (data index) * repeats + repeat
1811             // data index = order / repeats; repeats=2 divide by using a shift
1812             return super.getDataSample(order[index] >> 1);
1813         }
1814 
1815         /**
1816          * Gets the sample indices for the given {@code index}.
1817          * Returns a range to partition {@code [k1, kn]}.
1818          *
1819          * @param index Index.
1820          * @return the target indices
1821          */
1822         public int[] getIndices(int index) {
1823             // order = (data index) * repeats + repeat
1824             // Directly look-up the indices for this repeat.
1825             return indices[order[index]];
1826         }
1827 
1828         /** {@inheritDoc} */
1829         @Override
1830         protected int getLength() {
1831             return length;
1832         }
1833 
1834         /**
1835          * Create the data and check the indices are not at the end.
1836          */
1837         @Override
1838         @Setup(Level.Iteration)
1839         public void setup() {
1840             // Data will be randomized per iteration
1841             super.setup();
1842             // Error for a bad configuration. Allow k=0 but not smaller.
1843             // Uses the lower bound on the length.
1844             int k;
1845             if (mode == Mode.SHIFT) {
1846                 k = length >>> p;
1847                 if (k == 0 && length >>> (p - 1) == 0) {
1848                     throw new IllegalStateException(length + " >>> (" + p + " - 1) == 0");
1849                 }
1850             } else if (mode == Mode.INDEX) {
1851                 k = p;
1852                 if (k < 0 || k >= length) {
1853                     throw new IllegalStateException("Invalid index [0, " + length + "): " + p);
1854                 }
1855             } else {
1856                 throw new IllegalStateException("Unknown mode: " + mode);
1857             }
1858 
1859             if (indices == null) {
1860                 // First call, create objects
1861                 indices = new int[size()][];
1862             }
1863 
1864             // Create a single index at both ends.
1865             // Note: Data has variable length so we have to compute the upper end for each sample.
1866             // Re-use the constant lower but we do not bother to cache repeats of the upper.
1867             final int[] lower = {k, k};
1868             final int noOfSamples = super.size();
1869             for (int i = 0; i < noOfSamples; i++) {
1870                 final int len = getDataSize(i);
1871                 final int k1 = len - 1 - k;
1872                 indices[i << 1] = lower;
1873                 indices[(i << 1) + 1] = new int[] {k1, k1};
1874             }
1875         }
1876     }
1877 
1878     /**
1879      * Source of a sort function.
1880      */
1881     @State(Scope.Benchmark)
1882     public static class SortFunctionSource {
1883         /** Name of the source. */
1884         @Param({JDK, SP, BM, SBM, DP, DP5,
1885             SBM2,
1886             // Not run by default as it is slow on large data
1887             //"InsertionSortIF", "InsertionSortIT", "InsertionSort", "InsertionSortB"
1888             // Introsort methods with defaults, can configure using the name
1889             // e.g. ISP_SBM_QS50.
1890             ISP, IDP,
1891             })
1892         private String name;
1893 
1894         /** Override of minimum quickselect size. */
1895         @Param({"0"})
1896         private int qs;
1897 
1898         /** The action. */
1899         private Consumer<double[]> function;
1900 
1901         /**
1902          * @return the function
1903          */
1904         public Consumer<double[]> getFunction() {
1905             return function;
1906         }
1907 
1908         /**
1909          * Create the function.
1910          */
1911         @Setup
1912         public void setup() {
1913             Objects.requireNonNull(name);
1914             if (JDK.equals(name)) {
1915                 function = Arrays::sort;
1916             // First generation kth-selector functions (not configurable)
1917             } else if (name.startsWith(SP)) {
1918                 function = PartitionFactory.createKthSelector(name, SP, qs)::sortSP;
1919             } else if (name.startsWith(SBM)) {
1920                 function = PartitionFactory.createKthSelector(name, SBM, qs)::sortSBM;
1921             } else if (name.startsWith(BM)) {
1922                 function = PartitionFactory.createKthSelector(name, BM, qs)::sortBM;
1923             } else if (name.startsWith(DP)) {
1924                 function = PartitionFactory.createKthSelector(name, DP, qs)::sortDP;
1925             } else if (name.startsWith(DP5)) {
1926                 function = PartitionFactory.createKthSelector(name, DP5, qs)::sortDP5;
1927             // 2nd generation partition function
1928             } else if (name.startsWith(SBM2)) {
1929                 function = PartitionFactory.createPartition(name, SBM2, qs, 0)::sortSBM;
1930             // Introsort
1931             } else if (name.startsWith(ISP)) {
1932                 function = PartitionFactory.createPartition(name, ISP, qs, 0)::sortISP;
1933             } else if (name.startsWith(IDP)) {
1934                 function = PartitionFactory.createPartition(name, IDP, qs, 0)::sortIDP;
1935             // Insertion sort variations.
1936             // For parity with the internal version these all use the same (shorter) data
1937             } else if ("InsertionSortIF".equals(name)) {
1938                 function = x -> {
1939                     // Ignored sentinal
1940                     x[0] = Double.NEGATIVE_INFINITY;
1941                     Sorting.sort(x, 1, x.length - 1, false);
1942                 };
1943             } else if ("InsertionSortIT".equals(name)) {
1944                 // Internal version
1945                 function = x -> {
1946                     // Add a sentinal
1947                     x[0] = Double.NEGATIVE_INFINITY;
1948                     Sorting.sort(x, 1, x.length - 1, true);
1949                 };
1950             } else if ("InsertionSort".equals(name)) {
1951                 function = x -> {
1952                     x[0] = Double.NEGATIVE_INFINITY;
1953                     Sorting.sort(x, 1, x.length - 1);
1954                 };
1955             } else if (name.startsWith("PairedInsertionSort")) {
1956                 if (name.endsWith("1")) {
1957                     function = x -> {
1958                         x[0] = Double.NEGATIVE_INFINITY;
1959                         Sorting.sortPairedInternal1(x, 1, x.length - 1);
1960                     };
1961                 } else if (name.endsWith("2")) {
1962                     function = x -> {
1963                         x[0] = Double.NEGATIVE_INFINITY;
1964                         Sorting.sortPairedInternal2(x, 1, x.length - 1);
1965                     };
1966                 } else if (name.endsWith("3")) {
1967                     function = x -> {
1968                         x[0] = Double.NEGATIVE_INFINITY;
1969                         Sorting.sortPairedInternal3(x, 1, x.length - 1);
1970                     };
1971                 } else if (name.endsWith("4")) {
1972                     function = x -> {
1973                         x[0] = Double.NEGATIVE_INFINITY;
1974                         Sorting.sortPairedInternal4(x, 1, x.length - 1);
1975                     };
1976                 }
1977             } else if ("InsertionSortB".equals(name)) {
1978                 function = x -> {
1979                     x[0] = Double.NEGATIVE_INFINITY;
1980                     Sorting.sortb(x, 1, x.length - 1);
1981                 };
1982             // Not actually a sort. This is used to benchmark the speed of heapselect
1983             // for a single k as a stopper function against a full sort of small data.
1984             } else if (name.startsWith(HEAP_SELECT)) {
1985                 final char c = name.charAt(name.length() - 1);
1986                 // This offsets the start by 1 for comparison with insertion sort
1987                 final int k = Character.isDigit(c) ? Character.digit(c, 10) + 1 : 1;
1988                 function = x -> Partition.heapSelectLeft(x, 1, x.length - 1, k, 0);
1989             }
1990             if (function == null) {
1991                 throw new IllegalStateException("Unknown sort function: " + name);
1992             }
1993         }
1994     }
1995 
1996     /**
1997      * Source of a sort function to sort 5 points.
1998      */
1999     @State(Scope.Benchmark)
2000     public static class Sort5FunctionSource {
2001         /** Name of the source. */
2002         @Param({"sort5", "sort5b", "sort5c",
2003             //"sort", "sort5head"
2004             })
2005         private String name;
2006 
2007         /** The action. */
2008         private Consumer<double[]> function;
2009 
2010         /**
2011          * @return the function
2012          */
2013         public Consumer<double[]> getFunction() {
2014             return function;
2015         }
2016 
2017         /**
2018          * Create the function.
2019          */
2020         @Setup
2021         public void setup() {
2022             Objects.requireNonNull(name);
2023             // Note: We do not run this on input of length 5. We can run it on input of
2024             // any length above 5. So we choose indices using a spacing of 1/4 of the range.
2025             // Since we do this for all methods it is a fixed overhead. This allows use
2026             // of a variety of data sizes.
2027             if ("sort5".equals(name)) {
2028                 function = x -> {
2029                     final int s = x.length >> 2;
2030                     Sorting.sort5(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2031                 };
2032             } else if ("sort5b".equals(name)) {
2033                 function = x -> {
2034                     final int s = x.length >> 2;
2035                     Sorting.sort5b(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2036                 };
2037             } else if ("sort5c".equals(name)) {
2038                 function = x -> {
2039                     final int s = x.length >> 2;
2040                     Sorting.sort5c(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2041                 };
2042             } else if ("sort".equals(name)) {
2043                 function = x -> Sorting.sort(x, 0, 4);
2044             } else if ("sort5head".equals(name)) {
2045                 function = x -> Sorting.sort5(x, 0, 1, 2, 3, 4);
2046             // Median of 5. Ensure the median index is computed by storing it in x
2047             } else if ("median5".equals(name)) {
2048                 function = x -> {
2049                     final int s = x.length >> 2;
2050                     x[0] = Sorting.median5(x, 0, s, s << 1, x.length - 1 - s, x.length - 1);
2051                 };
2052             } else if ("median5head".equals(name)) {
2053                 function = x -> x[0] = Sorting.median5(x, 0, 1, 2, 3, 4);
2054             // median of 5 continuous elements
2055             } else if ("med5".equals(name)) {
2056                 function = x -> x[0] = Sorting.median5(x, 0);
2057             } else if ("med5b".equals(name)) {
2058                 function = x -> x[0] = Sorting.median5b(x, 0);
2059             } else if ("med5c".equals(name)) {
2060                 function = x -> x[0] = Sorting.median5c(x, 0);
2061             } else if ("med5d".equals(name)) {
2062                 function = x -> Sorting.median5d(x, 0, 1, 2, 3, 4);
2063             } else {
2064                 throw new IllegalStateException("Unknown sort5 function: " + name);
2065             }
2066         }
2067     }
2068 
2069     /**
2070      * Source of a function to compute the median of 4 points.
2071      */
2072     @State(Scope.Benchmark)
2073     public static class Median4FunctionSource {
2074         /** Name of the source. */
2075         @Param({"lower4", "lower4b", "lower4c", "lower4d", "lower4e",
2076             "upper4", "upper4c", "upper4d",
2077             // Full sort is slower
2078             //"sort4"
2079             })
2080         private String name;
2081 
2082         /** The action. */
2083         private Consumer<double[]> function;
2084 
2085         /**
2086          * @return the function
2087          */
2088         public Consumer<double[]> getFunction() {
2089             return function;
2090         }
2091 
2092         /**
2093          * Create the function.
2094          */
2095         @Setup
2096         public void setup() {
2097             Objects.requireNonNull(name);
2098             // Note: We run this across the entire input array to simulate a pass
2099             // of the quickselect adaptive algorithm.
2100             if ("lower4".equals(name)) {
2101                 function = x -> {
2102                     final int f = x.length >>> 2;
2103                     final int f2 = f + f;
2104                     for (int i = f; i < f2; i++) {
2105                         Sorting.lowerMedian4(x, i - f, i, i + f, i + f2);
2106                     }
2107                 };
2108             } else if ("lower4b".equals(name)) {
2109                 function = x -> {
2110                     final int f = x.length >>> 2;
2111                     final int f2 = f + f;
2112                     for (int i = f; i < f2; i++) {
2113                         Sorting.lowerMedian4b(x, i - f, i, i + f, i + f2);
2114                     }
2115                 };
2116             } else if ("lower4c".equals(name)) {
2117                 function = x -> {
2118                     final int f = x.length >>> 2;
2119                     final int f2 = f + f;
2120                     for (int i = f; i < f2; i++) {
2121                         Sorting.lowerMedian4c(x, i - f, i, i + f, i + f2);
2122                     }
2123                 };
2124             } else if ("lower4d".equals(name)) {
2125                 function = x -> {
2126                     final int f = x.length >>> 2;
2127                     final int f2 = f + f;
2128                     for (int i = f; i < f2; i++) {
2129                         Sorting.lowerMedian4d(x, i - f, i, i + f, i + f2);
2130                     }
2131                 };
2132             } else if ("lower4e".equals(name)) {
2133                 function = x -> {
2134                     final int f = x.length >>> 2;
2135                     final int f2 = f + f;
2136                     for (int i = f; i < f2; i++) {
2137                         Sorting.lowerMedian4e(x, i - f, i, i + f, i + f2);
2138                     }
2139                 };
2140             } else if ("upper4".equals(name)) {
2141                 function = x -> {
2142                     final int f = x.length >>> 2;
2143                     final int f2 = f + f;
2144                     for (int i = f; i < f2; i++) {
2145                         Sorting.upperMedian4(x, i - f, i, i + f, i + f2);
2146                     }
2147                 };
2148             } else if ("upper4c".equals(name)) {
2149                 function = x -> {
2150                     final int f = x.length >>> 2;
2151                     final int f2 = f + f;
2152                     for (int i = f; i < f2; i++) {
2153                         Sorting.upperMedian4c(x, i - f, i, i + f, i + f2);
2154                     }
2155                 };
2156             } else if ("upper4d".equals(name)) {
2157                 function = x -> {
2158                     final int f = x.length >>> 2;
2159                     final int f2 = f + f;
2160                     for (int i = f; i < f2; i++) {
2161                         Sorting.upperMedian4d(x, i - f, i, i + f, i + f2);
2162                     }
2163                 };
2164             } else if ("sort4".equals(name)) {
2165                 function = x -> {
2166                     final int f = x.length >>> 2;
2167                     final int f2 = f + f;
2168                     for (int i = f; i < f2; i++) {
2169                         Sorting.sort4(x, i - f, i, i + f, i + f2);
2170                     }
2171                 };
2172             } else {
2173                 throw new IllegalStateException("Unknown median4 function: " + name);
2174             }
2175         }
2176     }
2177 
2178     /**
2179      * Source of a function to compute the median of 3 points.
2180      */
2181     @State(Scope.Benchmark)
2182     public static class Median3FunctionSource {
2183         /** Name of the source. */
2184         @Param({"sort3", "sort3b", "sort3c"})
2185         private String name;
2186 
2187         /** The action. */
2188         private Consumer<double[]> function;
2189 
2190         /**
2191          * @return the function
2192          */
2193         public Consumer<double[]> getFunction() {
2194             return function;
2195         }
2196 
2197         /**
2198          * Create the function.
2199          */
2200         @Setup
2201         public void setup() {
2202             Objects.requireNonNull(name);
2203             // Note: We run this across the entire input array to simulate a pass
2204             // of the quickselect adaptive algorithm.
2205             if ("sort3".equals(name)) {
2206                 function = x -> {
2207                     final int f = x.length / 3;
2208                     final int f2 = f + f;
2209                     for (int i = f; i < f2; i++) {
2210                         Sorting.sort3(x, i - f, i, i + f);
2211                     }
2212                 };
2213             } else if ("sort3b".equals(name)) {
2214                 function = x -> {
2215                     final int f = x.length / 3;
2216                     final int f2 = f + f;
2217                     for (int i = f; i < f2; i++) {
2218                         Sorting.sort3b(x, i - f, i, i + f);
2219                     }
2220                 };
2221             } else if ("sort3c".equals(name)) {
2222                 function = x -> {
2223                     final int f = x.length / 3;
2224                     final int f2 = f + f;
2225                     for (int i = f; i < f2; i++) {
2226                         Sorting.sort3c(x, i - f, i, i + f);
2227                     }
2228                 };
2229             } else {
2230                 throw new IllegalStateException("Unknown sort3 function: " + name);
2231             }
2232         }
2233     }
2234 
2235     /**
2236      * Source of a k-th selector function for {@code double} data.
2237      */
2238     @State(Scope.Benchmark)
2239     public static class DoubleKFunctionSource {
2240         /** Name of the source. */
2241         @Param({SORT + JDK, SPH,
2242             SP, BM, SBM,
2243             DP, DP5,
2244             SBM2,
2245             ISP, IDP,
2246             LSP, LINEAR, SELECT})
2247         private String name;
2248 
2249         /** Override of minimum quickselect size. */
2250         @Param({"0"})
2251         private int qs;
2252 
2253         /** Override of minimum edgeselect constant. */
2254         @Param({"0"})
2255         private int ec;
2256 
2257         /** The action. */
2258         private BiFunction<double[], int[], double[]> function;
2259 
2260         /**
2261          * @return the function
2262          */
2263         public BiFunction<double[], int[], double[]> getFunction() {
2264             return function;
2265         }
2266 
2267         /**
2268          * Create the function.
2269          */
2270         @Setup
2271         public void setup() {
2272             Objects.requireNonNull(name);
2273             // Note: For parity in the test, each partition method that accepts the keys as any array
2274             // receives a clone of the indices.
2275             if (name.equals(BASELINE)) {
2276                 function = (data, indices) -> extractIndices(data, indices.clone());
2277             } else  if (name.startsWith(SORT)) {
2278                 // Sort variants (do not clone the keys)
2279                 if (name.contains(ISP)) {
2280                     final Partition part = PartitionFactory.createPartition(name.substring(SORT.length()), ISP, qs, ec);
2281                     function = (data, indices) -> {
2282                         part.sortISP(data);
2283                         return extractIndices(data, indices);
2284                     };
2285                 } else if (name.contains(IDP)) {
2286                     final Partition part = PartitionFactory.createPartition(name.substring(SORT.length()), IDP, qs, ec);
2287                     function = (data, indices) -> {
2288                         part.sortIDP(data);
2289                         return extractIndices(data, indices);
2290                     };
2291                 } else if (name.contains(JDK)) {
2292                     function = (data, indices) -> {
2293                         Arrays.sort(data);
2294                         return extractIndices(data, indices);
2295                     };
2296                 }
2297             // First generation kth-selector functions
2298             } else if (name.startsWith(SPH)) {
2299                 // Ported CM implementation with a heap
2300                 final KthSelector selector = PartitionFactory.createKthSelector(name, SPH, qs);
2301                 function = (data, indices) -> {
2302                     final int n = indices.length;
2303                     // Note: Pivots heap is not optimal here but should be enough for most cases.
2304                     // The size matches that in the Commons Math Percentile class
2305                     final int[] pivots = n <= 1 ?
2306                         KthSelector.NO_PIVOTS :
2307                         new int[1023];
2308                     final double[] x = new double[indices.length];
2309                     for (int i = 0; i < indices.length; i++) {
2310                         x[i] = selector.selectSPH(data, pivots, indices[i], null);
2311                     }
2312                     return x;
2313                 };
2314             // The following methods clone the indices to avoid destructive modification
2315             } else if (name.startsWith(SPN)) {
2316                 final KthSelector selector = PartitionFactory.createKthSelector(name, SPN, qs);
2317                 function = (data, indices) -> {
2318                     selector.partitionSPN(data, indices.clone());
2319                     return extractIndices(data, indices);
2320                 };
2321             } else if (name.startsWith(SP)) {
2322                 final KthSelector selector = PartitionFactory.createKthSelector(name, SP, qs);
2323                 function = (data, indices) -> {
2324                     selector.partitionSP(data, indices.clone());
2325                     return extractIndices(data, indices);
2326                 };
2327             } else if (name.startsWith(BM)) {
2328                 final KthSelector selector = PartitionFactory.createKthSelector(name, BM, qs);
2329                 function = (data, indices) -> {
2330                     selector.partitionBM(data, indices.clone());
2331                     return extractIndices(data, indices);
2332                 };
2333             } else if (name.startsWith(SBM)) {
2334                 final KthSelector selector = PartitionFactory.createKthSelector(name, SBM, qs);
2335                 function = (data, indices) -> {
2336                     selector.partitionSBM(data, indices.clone());
2337                     return extractIndices(data, indices);
2338                 };
2339             } else if (name.startsWith(DP)) {
2340                 final KthSelector selector = PartitionFactory.createKthSelector(name, DP, qs);
2341                 function = (data, indices) -> {
2342                     selector.partitionDP(data, indices.clone());
2343                     return extractIndices(data, indices);
2344                 };
2345             } else if (name.startsWith(DP5)) {
2346                 final KthSelector selector = PartitionFactory.createKthSelector(name, DP5, qs);
2347                 function = (data, indices) -> {
2348                     selector.partitionDP5(data, indices.clone());
2349                     return extractIndices(data, indices);
2350                 };
2351             // Second generation partition function with configurable key strategy
2352             } else if (name.startsWith(SBM2)) {
2353                 final Partition part = PartitionFactory.createPartition(name, SBM2, qs, ec);
2354                 function = (data, indices) -> {
2355                     part.partitionSBM(data, indices.clone(), indices.length);
2356                     return extractIndices(data, indices);
2357                 };
2358             // Floyd-Rivest partition functions
2359             } else if (name.startsWith(FR)) {
2360                 final Partition part = PartitionFactory.createPartition(name, FR, qs, ec);
2361                 function = (data, indices) -> {
2362                     part.partitionFR(data, indices.clone(), indices.length);
2363                     return extractIndices(data, indices);
2364                 };
2365             } else if (name.startsWith(KFR)) {
2366                 final Partition part = PartitionFactory.createPartition(name, KFR, qs, ec);
2367                 function = (data, indices) -> {
2368                     part.partitionKFR(data, indices.clone(), indices.length);
2369                     return extractIndices(data, indices);
2370                 };
2371             // Introselect implementations which are highly configurable
2372             } else if (name.startsWith(ISP)) {
2373                 final Partition part = PartitionFactory.createPartition(name, ISP, qs, ec);
2374                 function = (data, indices) -> {
2375                     part.partitionISP(data, indices.clone(), indices.length);
2376                     return extractIndices(data, indices);
2377                 };
2378             } else if (name.startsWith(IDP)) {
2379                 final Partition part = PartitionFactory.createPartition(name, IDP, qs, ec);
2380                 function = (data, indices) -> {
2381                     part.partitionIDP(data, indices.clone(), indices.length);
2382                     return extractIndices(data, indices);
2383                 };
2384             } else if (name.startsWith(SELECT)) {
2385                 // Not configurable
2386                 function = (data, indices) -> {
2387                     Selection.select(data, indices.clone());
2388                     return extractIndices(data, indices);
2389                 };
2390             // Linearselect (median-of-medians) implementation (stopper for quickselect)
2391             } else if (name.startsWith(LSP)) {
2392                 final Partition part = PartitionFactory.createPartition(name, LSP, qs, ec);
2393                 function = (data, indices) -> {
2394                     part.partitionLSP(data, indices.clone(), indices.length);
2395                     return extractIndices(data, indices);
2396                 };
2397             // Linearselect (optimised median-of-medians) implementation (stopper for quickselect)
2398             } else if (name.startsWith(LINEAR)) {
2399                 final Partition part = PartitionFactory.createPartition(name, LINEAR, qs, ec);
2400                 function = (data, indices) -> {
2401                     part.partitionLinear(data, indices.clone(), indices.length);
2402                     return extractIndices(data, indices);
2403                 };
2404             } else if (name.startsWith(QA2)) {
2405                 // Configurable only by static properties.
2406                 // Default to FR sampling for the initial mode.
2407                 final int mode = PartitionFactory.getControlFlags(new String[] {name}, -1);
2408                 final int inc = PartitionFactory.getOptionFlags(new String[] {name}, 1);
2409                 Partition.configureQaAdaptive(mode, inc);
2410                 function = (data, indices) -> {
2411                     Partition.partitionQA2(data, indices.clone(), indices.length);
2412                     return extractIndices(data, indices);
2413                 };
2414             } else if (name.startsWith(QA)) {
2415                 final Partition part = PartitionFactory.createPartition(name, QA, qs, ec);
2416                 function = (data, indices) -> {
2417                     part.partitionQA(data, indices.clone(), indices.length);
2418                     return extractIndices(data, indices);
2419                 };
2420             // Heapselect implementation (stopper for quickselect)
2421             } else if (name.startsWith(HEAP_SELECT)) {
2422                 function = (data, indices) -> {
2423                     int min = indices[indices.length - 1];
2424                     int max = min;
2425                     for (int i = indices.length - 1; --i >= 0;) {
2426                         min = Math.min(min, indices[i]);
2427                         max = Math.max(max, indices[i]);
2428                     }
2429                     Partition.heapSelectRange(data, 0, data.length - 1, min, max);
2430                     return extractIndices(data, indices);
2431                 };
2432             }
2433             if (function == null) {
2434                 throw new IllegalStateException("Unknown selector function: " + name);
2435             }
2436         }
2437 
2438         /**
2439          * Extract the data at the specified indices.
2440          *
2441          * @param data Data.
2442          * @param indices Indices.
2443          * @return the data
2444          */
2445         private static double[] extractIndices(double[] data, int[] indices) {
2446             final double[] x = new double[indices.length];
2447             for (int i = 0; i < indices.length; i++) {
2448                 x[i] = data[indices[i]];
2449             }
2450             return x;
2451         }
2452     }
2453 
2454     /**
2455      * Source of a k-th selector function for {@code int} data.
2456      */
2457     @State(Scope.Benchmark)
2458     public static class IntKFunctionSource {
2459         /** Name of the source. */
2460         @Param({SORT + JDK, SELECT})
2461         private String name;
2462 
2463         /** The action. */
2464         private BiFunction<int[], int[], int[]> function;
2465 
2466         /**
2467          * @return the function
2468          */
2469         public BiFunction<int[], int[], int[]> getFunction() {
2470             return function;
2471         }
2472 
2473         /**
2474          * Create the function.
2475          */
2476         @Setup
2477         public void setup() {
2478             Objects.requireNonNull(name);
2479             // Note: Always clone the indices
2480             if (name.equals(BASELINE)) {
2481                 function = (data, indices) -> extractIndices(data, indices.clone());
2482             } else  if (name.startsWith(SORT)) {
2483                 function = (data, indices) -> {
2484                     Arrays.sort(data);
2485                     return extractIndices(data, indices.clone());
2486                 };
2487             } else if (name.startsWith(SELECT)) {
2488                 function = (data, indices) -> {
2489                     Selection.select(data, indices.clone());
2490                     return extractIndices(data, indices);
2491                 };
2492             }
2493             if (function == null) {
2494                 throw new IllegalStateException("Unknown int selector function: " + name);
2495             }
2496         }
2497 
2498         /**
2499          * Extract the data at the specified indices.
2500          *
2501          * @param data Data.
2502          * @param indices Indices.
2503          * @return the data
2504          */
2505         private static int[] extractIndices(int[] data, int[] indices) {
2506             final int[] x = new int[indices.length];
2507             for (int i = 0; i < indices.length; i++) {
2508                 x[i] = data[indices[i]];
2509             }
2510             return x;
2511         }
2512     }
2513 
2514     /**
2515      * Source of a function that pre-processes NaN and signed zeros (-0.0).
2516      *
2517      * <p>Detection of signed zero using direct conversion of raw bits and
2518      * comparison with the bit representation is noticeably faster than comparison
2519      * using {@code == 0.0}.
2520      */
2521     @State(Scope.Benchmark)
2522     public static class SortNaNFunctionSource {
2523         /** Name of the source. */
2524         @Param({"RawZeroNaN", "ZeroSignNaN", "NaNRawZero", "NaNZeroSign"})
2525         private String name;
2526 
2527         /** The action. */
2528         private BiConsumer<double[], Blackhole> function;
2529 
2530         /**
2531          * @return the function
2532          */
2533         public BiConsumer<double[], Blackhole> getFunction() {
2534             return function;
2535         }
2536 
2537         /**
2538          * Create the function.
2539          */
2540         @Setup
2541         public void setup() {
2542             // Functions sort NaN and detect signed zeros.
2543             // For convenience the function accepts the blackhole to handle any
2544             // output from the processing.
2545             if ("RawZeroNaN".equals(name)) {
2546                 function = (a, bh) -> {
2547                     int cn = 0;
2548                     int end = a.length;
2549                     for (int i = end; --i >= 0;) {
2550                         final double v = a[i];
2551                         if (Double.doubleToRawLongBits(v) == Long.MIN_VALUE) {
2552                             cn++;
2553                             a[i] = 0.0;
2554                         } else if (v != v) {
2555                             a[i] = a[--end];
2556                             a[end] = v;
2557                         }
2558                     }
2559                     bh.consume(cn);
2560                     bh.consume(end);
2561                 };
2562             } else if ("ZeroSignNaN".equals(name)) {
2563                 function = (a, bh) -> {
2564                     int cn = 0;
2565                     int end = a.length;
2566                     for (int i = end; --i >= 0;) {
2567                         final double v = a[i];
2568                         if (v == 0.0 && Double.doubleToRawLongBits(v) < 0) {
2569                             cn++;
2570                             a[i] = 0.0;
2571                         } else if (v != v) {
2572                             a[i] = a[--end];
2573                             a[end] = v;
2574                         }
2575                     }
2576                     bh.consume(cn);
2577                     bh.consume(end);
2578                 };
2579             } else if ("NaNRawZero".equals(name)) {
2580                 function = (a, bh) -> {
2581                     int cn = 0;
2582                     int end = a.length;
2583                     for (int i = end; --i >= 0;) {
2584                         final double v = a[i];
2585                         if (v != v) {
2586                             a[i] = a[--end];
2587                             a[end] = v;
2588                         } else if (Double.doubleToRawLongBits(v) == Long.MIN_VALUE) {
2589                             cn++;
2590                             a[i] = 0.0;
2591                         }
2592                     }
2593                     bh.consume(cn);
2594                     bh.consume(end);
2595                 };
2596             } else if ("NaNZeroSign".equals(name)) {
2597                 function = (a, bh) -> {
2598                     int cn = 0;
2599                     int end = a.length;
2600                     for (int i = end; --i >= 0;) {
2601                         final double v = a[i];
2602                         if (v != v) {
2603                             a[i] = a[--end];
2604                             a[end] = v;
2605                         } else if (v == 0.0 && Double.doubleToRawLongBits(v) < 0) {
2606                             cn++;
2607                             a[i] = 0.0;
2608                         }
2609                     }
2610                     bh.consume(cn);
2611                     bh.consume(end);
2612                 };
2613             } else {
2614                 throw new IllegalStateException("Unknown sort NaN function: " + name);
2615             }
2616         }
2617     }
2618 
2619     /**
2620      * Source of a edge selector function. This is a function that selects indices
2621      * that are clustered close to the edge of the data.
2622      *
2623      * <p>This is a specialised class to allow benchmarking the switch from using
2624      * quickselect partitioning to using edgeselect.
2625      */
2626     @State(Scope.Benchmark)
2627     public static class EdgeFunctionSource {
2628         /** Name of the source.
2629          * For introselect methods this should effectively turn-off edgeselect. */
2630         @Param({HEAP_SELECT, ISP + "_EC0", IDP + "_EC0",
2631             // Only use for small length as sort insertion is worst case Order(k * (right - left))
2632             // vs heap select() is O(k - left) + O((right - k) * log(k - left))
2633             //SORT_SELECT
2634             })
2635         private String name;
2636 
2637         /** The action. */
2638         private BiFunction<double[], int[], double[]> function;
2639 
2640         /**
2641          * @return the function
2642          */
2643         public BiFunction<double[], int[], double[]> getFunction() {
2644             return function;
2645         }
2646 
2647         /**
2648          * Create the function.
2649          */
2650         @Setup
2651         public void setup() {
2652             Objects.requireNonNull(name);
2653             // Direct use of heapselect. This has variations which use different
2654             // optimisations for small heaps.
2655             // Note: Optimisation for small heap size (n=1,2) is not observable on large data.
2656             // It requires the use of small data (e.g. len=[16, 32)) to observe differences.
2657             // The main overhead is the test for insertion against the current top of the
2658             // heap which grows increasingly unlikely as the range is scanned.
2659             // Optimisation for n=1 is negligible; for n=2 it is up to 10%. However using only
2660             // heapSelectRange2 is not as fast as the non-optimised heapSelectRange0
2661             // when the heap is size 1. For n=1 the heap insertion branch prediction
2662             // can learn the heap has no children and skip descending the heap, whereas
2663             // heap size n=2 can descend 1 level if the child is smaller/bigger. This is not
2664             // as fast as dedicated code for the single child case.
2665             // This benchmark requires repeating with variable heap size to avoid branch
2666             // prediction learning what to do, i.e. use with an index source that has variable
2667             // distance from the edge.
2668             if (HEAP_SELECT.equals(name)) {
2669                 function = (data, indices) -> {
2670                     heapSelectRange0(data, 0, data.length - 1, indices[0], indices[1]);
2671                     return extractIndices(data, indices[0], indices[1]);
2672                 };
2673             } else if ((HEAP_SELECT + "1").equals(name)) {
2674                 function = (data, indices) -> {
2675                     heapSelectRange1(data, 0, data.length - 1, indices[0], indices[1]);
2676                     return extractIndices(data, indices[0], indices[1]);
2677                 };
2678             } else if ((HEAP_SELECT + "2").equals(name)) {
2679                 function = (data, indices) -> {
2680                     heapSelectRange2(data, 0, data.length - 1, indices[0], indices[1]);
2681                     return extractIndices(data, indices[0], indices[1]);
2682                 };
2683             } else if ((HEAP_SELECT + "12").equals(name)) {
2684                 function = (data, indices) -> {
2685                     heapSelectRange12(data, 0, data.length - 1, indices[0], indices[1]);
2686                     return extractIndices(data, indices[0], indices[1]);
2687                 };
2688             // Only use on small edge as insertion is Order(k)
2689             } else if (SORT_SELECT.equals(name)) {
2690                 function = (data, indices) -> {
2691                     Partition.sortSelectRange(data, 0, data.length - 1, indices[0], indices[1]);
2692                     return extractIndices(data, indices[0], indices[1]);
2693                 };
2694             // Introselect methods - these should be configured to not use edgeselect.
2695             // These directly call the introselect method to skip NaN/signed zero processing.
2696             } else if (name.startsWith(ISP)) {
2697                 final Partition part = PartitionFactory.createPartition(name, ISP);
2698                 function = (data, indices) -> {
2699                     part.introselect(part.getSPFunction(), data,
2700                         0, data.length - 1, IndexIntervals.interval(indices[0], indices[1]), 10000);
2701                     return extractIndices(data, indices[0], indices[1]);
2702                 };
2703             } else if (name.startsWith(IDP)) {
2704                 final Partition part = PartitionFactory.createPartition(name, IDP);
2705                 function = (data, indices) -> {
2706                     part.introselect(Partition::partitionDP, data,
2707                         0, data.length - 1, IndexIntervals.interval(indices[0], indices[1]), 10000);
2708                     return extractIndices(data, indices[0], indices[1]);
2709                 };
2710             } else {
2711                 throw new IllegalStateException("Unknown edge selector function: " + name);
2712             }
2713         }
2714 
2715         /**
2716          * Partition the elements between {@code ka} and {@code kb} using a heap select
2717          * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2718          *
2719          * <p>Note:
2720          *
2721          * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2722          * It uses no optimised versions for small heaps.
2723          *
2724          * @param a Data array to use to find out the K<sup>th</sup> value.
2725          * @param left Lower bound (inclusive).
2726          * @param right Upper bound (inclusive).
2727          * @param ka Lower index to select.
2728          * @param kb Upper index to select.
2729          */
2730         static void heapSelectRange0(double[] a, int left, int right, int ka, int kb) {
2731             if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2732                 Sorting.sort(a, left, right);
2733                 return;
2734             }
2735             if (kb - left < right - ka) {
2736                 Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2737             } else {
2738                 Partition.heapSelectRight(a, left, right, ka, kb - ka);
2739             }
2740         }
2741 
2742         /**
2743          * Partition the elements between {@code ka} and {@code kb} using a heap select
2744          * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2745          *
2746          * <p>Note:
2747          *
2748          * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2749          * It uses no optimised versions for small heap of size 1.
2750          *
2751          * @param a Data array to use to find out the K<sup>th</sup> value.
2752          * @param left Lower bound (inclusive).
2753          * @param right Upper bound (inclusive).
2754          * @param ka Lower index to select.
2755          * @param kb Upper index to select.
2756          */
2757         static void heapSelectRange1(double[] a, int left, int right, int ka, int kb) {
2758             if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2759                 Sorting.sort(a, left, right);
2760                 return;
2761             }
2762             if (kb - left < right - ka) {
2763                 // Optimise
2764                 if (kb == left) {
2765                     Partition.selectMinIgnoreZeros(a, left, right);
2766                 } else {
2767                     Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2768                 }
2769             } else {
2770                 // Optimise
2771                 if (ka == right) {
2772                     Partition.selectMaxIgnoreZeros(a, left, right);
2773                 } else {
2774                     Partition.heapSelectRight(a, left, right, ka, kb - ka);
2775                 }
2776             }
2777         }
2778 
2779         /**
2780          * Partition the elements between {@code ka} and {@code kb} using a heap select
2781          * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2782          *
2783          * <p>Note:
2784          *
2785          * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2786          * It uses optimised versions for small heap of size 2.
2787          *
2788          * @param a Data array to use to find out the K<sup>th</sup> value.
2789          * @param left Lower bound (inclusive).
2790          * @param right Upper bound (inclusive).
2791          * @param ka Lower index to select.
2792          * @param kb Upper index to select.
2793          */
2794         static void heapSelectRange2(double[] a, int left, int right, int ka, int kb) {
2795             if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2796                 Sorting.sort(a, left, right);
2797                 return;
2798             }
2799             if (kb - left < right - ka) {
2800                 // Optimise
2801                 if (kb - 1 <= left) {
2802                     Partition.selectMin2IgnoreZeros(a, left, right);
2803                 } else {
2804                     Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2805                 }
2806             } else {
2807                 // Optimise
2808                 if (ka + 1 >= right) {
2809                     Partition.selectMax2IgnoreZeros(a, left, right);
2810                 } else {
2811                     Partition.heapSelectRight(a, left, right, ka, kb - ka);
2812                 }
2813             }
2814         }
2815 
2816         /**
2817          * Partition the elements between {@code ka} and {@code kb} using a heap select
2818          * algorithm. It is assumed {@code left <= ka <= kb <= right}.
2819          *
2820          * <p>Note:
2821          *
2822          * <p>This is a copy of {@link Partition#heapSelectRange(double[], int, int, int, int)}.
2823          * It uses optimised versions for small heap of size 1 and 2.
2824          *
2825          * @param a Data array to use to find out the K<sup>th</sup> value.
2826          * @param left Lower bound (inclusive).
2827          * @param right Upper bound (inclusive).
2828          * @param ka Lower index to select.
2829          * @param kb Upper index to select.
2830          */
2831         static void heapSelectRange12(double[] a, int left, int right, int ka, int kb) {
2832             if (right - left < Partition.MIN_HEAPSELECT_SIZE) {
2833                 Sorting.sort(a, left, right);
2834                 return;
2835             }
2836             if (kb - left < right - ka) {
2837                 // Optimise
2838                 if (kb - 1 <= left) {
2839                     if (kb == left) {
2840                         Partition.selectMinIgnoreZeros(a, left, right);
2841                     } else {
2842                         Partition.selectMin2IgnoreZeros(a, left, right);
2843                     }
2844                 } else {
2845                     Partition.heapSelectLeft(a, left, right, kb, kb - ka);
2846                 }
2847             } else {
2848                 // Optimise
2849                 if (ka + 1 >= right) {
2850                     if (ka == right) {
2851                         Partition.selectMaxIgnoreZeros(a, left, right);
2852                     } else {
2853                         Partition.selectMax2IgnoreZeros(a, left, right);
2854                     }
2855                 } else {
2856                     Partition.heapSelectRight(a, left, right, ka, kb - ka);
2857                 }
2858             }
2859         }
2860 
2861         /**
2862          * Extract the data at the specified indices.
2863          *
2864          * @param data Data.
2865          * @param l Lower bound (inclusive).
2866          * @param r Upper bound (inclusive).
2867          * @return the data
2868          */
2869         private static double[] extractIndices(double[] data, int l, int r) {
2870             final double[] x = new double[r - l + 1];
2871             for (int i = l; i <= r; i++) {
2872                 x[i - l] = data[i];
2873             }
2874             return x;
2875         }
2876     }
2877 
2878     /**
2879      * Source of an search function. This is a function that find an index
2880      * in a sorted list of indices, e.g. a binary search.
2881      */
2882     @State(Scope.Benchmark)
2883     public static class IndexSearchFunctionSource {
2884         /** Name of the source. */
2885         @Param({"Binary",
2886             //"binarySearch",
2887             "Scan"})
2888         private String name;
2889 
2890         /** The action. */
2891         private SearchFunction function;
2892 
2893         /**
2894          * Define a search function.
2895          */
2896         public interface SearchFunction {
2897             /**
2898              * Find the index of the element {@code k}, or the closest index
2899              * to the element (implementation definitions may vary).
2900              *
2901              * @param a Data.
2902              * @param k Element.
2903              * @return the index
2904              */
2905             int find(int[] a, int k);
2906         }
2907 
2908         /**
2909          * @return the function
2910          */
2911         public SearchFunction getFunction() {
2912             return function;
2913         }
2914 
2915         /**
2916          * Create the function.
2917          */
2918         @Setup
2919         public void setup() {
2920             Objects.requireNonNull(name);
2921             if ("Binary".equals(name)) {
2922                 function = (keys, k) -> Partition.searchLessOrEqual(keys, 0, keys.length - 1, k);
2923             } else if ("binarySearch".equals(name)) {
2924                 function = (keys, k) -> Arrays.binarySearch(keys, 0, keys.length, k);
2925             } else if ("Scan".equals(name)) {
2926                 function = (keys, k) -> {
2927                     // Assume that k >= keys[0]
2928                     int i = keys.length;
2929                     do {
2930                         --i;
2931                     } while (keys[i] > k);
2932                     return i;
2933                 };
2934             } else {
2935                 throw new IllegalStateException("Unknown index search function: " + name);
2936             }
2937         }
2938     }
2939 
2940     /**
2941      * Benchmark a sort on the data.
2942      *
2943      * @param function Source of the function.
2944      * @param source Source of the data.
2945      * @param bh Data sink.
2946      */
2947     @Benchmark
2948     public void sort(SortFunctionSource function, SortSource source, Blackhole bh) {
2949         final int size = source.size();
2950         final Consumer<double[]> fun = function.getFunction();
2951         for (int j = -1; ++j < size;) {
2952             final double[] y = source.getData(j);
2953             fun.accept(y);
2954             bh.consume(y);
2955         }
2956     }
2957 
2958     /**
2959      * Benchmark a sort of 5 data values.
2960      * This tests the pivot selection from 5 values used in dual-pivot partitioning.
2961      *
2962      * @param function Source of the function.
2963      * @param source Source of the data.
2964      * @param bh Data sink.
2965      */
2966     @Benchmark
2967     public void fiveSort(Sort5FunctionSource function, SortSource source, Blackhole bh) {
2968         final int size = source.size();
2969         final Consumer<double[]> fun = function.getFunction();
2970         for (int j = -1; ++j < size;) {
2971             final double[] y = source.getData(j);
2972             fun.accept(y);
2973             bh.consume(y);
2974         }
2975     }
2976 
2977     /**
2978      * Benchmark a pass over the entire data computing the medians of 4 data values.
2979      * This simulates a QuickselectAdaptive step.
2980      *
2981      * @param function Source of the function.
2982      * @param source Source of the data.
2983      * @param bh Data sink.
2984      */
2985     @Benchmark
2986     public void fourMedian(Median4FunctionSource function, SortSource source, Blackhole bh) {
2987         final int size = source.size();
2988         final Consumer<double[]> fun = function.getFunction();
2989         for (int j = -1; ++j < size;) {
2990             final double[] y = source.getData(j);
2991             fun.accept(y);
2992             bh.consume(y);
2993         }
2994     }
2995 
2996     /**
2997      * Benchmark a pass over the entire data computing the medians of 3 data values.
2998      * This simulates a QuickselectAdaptive step.
2999      *
3000      * @param function Source of the function.
3001      * @param source Source of the data.
3002      * @param bh Data sink.
3003      */
3004     @Benchmark
3005     public void threeMedian(Median3FunctionSource function, SortSource source, Blackhole bh) {
3006         final int size = source.size();
3007         final Consumer<double[]> fun = function.getFunction();
3008         for (int j = -1; ++j < size;) {
3009             final double[] y = source.getData(j);
3010             fun.accept(y);
3011             bh.consume(y);
3012         }
3013     }
3014 
3015     /**
3016      * Benchmark partitioning using k partition indices.
3017      *
3018      * @param function Source of the function.
3019      * @param source Source of the data.
3020      * @param bh Data sink.
3021      */
3022     @Benchmark
3023     public void doublePartition(DoubleKFunctionSource function, KSource source, Blackhole bh) {
3024         final int size = source.size();
3025         final BiFunction<double[], int[], double[]> fun = function.getFunction();
3026         for (int j = -1; ++j < size;) {
3027             // Note: This uses the indices without cloning. This is because some
3028             // functions do not destructively modify the data.
3029             bh.consume(fun.apply(source.getData(j), source.getIndices(j)));
3030         }
3031     }
3032 
3033     /**
3034      * Benchmark partitioning using k partition indices on {@code int} data.
3035      *
3036      * @param function Source of the function.
3037      * @param source Source of the data.
3038      * @param bh Data sink.
3039      */
3040     @Benchmark
3041     public void intPartition(IntKFunctionSource function, KSource source, Blackhole bh) {
3042         final int size = source.size();
3043         final BiFunction<int[], int[], int[]> fun = function.getFunction();
3044         for (int j = -1; ++j < size;) {
3045             // Note: This uses the indices without cloning. This is because some
3046             // functions do not destructively modify the data.
3047             bh.consume(fun.apply(source.getIntData(j), source.getIndices(j)));
3048         }
3049     }
3050 
3051     /**
3052      * Benchmark partitioning of an interval of indices a set distance from the edge.
3053      * This is used to benchmark the switch from quickselect partitioning to edgeselect.
3054      *
3055      * @param function Source of the function.
3056      * @param source Source of the data.
3057      * @param bh Data sink.
3058      */
3059     @Benchmark
3060     public void edgeSelect(EdgeFunctionSource function, EdgeSource source, Blackhole bh) {
3061         final int size = source.size();
3062         final BiFunction<double[], int[], double[]> fun = function.getFunction();
3063         for (int j = -1; ++j < size;) {
3064             bh.consume(fun.apply(source.getData(j), source.getIndices(j)));
3065         }
3066     }
3067 
3068     /**
3069      * Benchmark pre-processing of NaN and signed zeros (-0.0).
3070      *
3071      * @param function Source of the function.
3072      * @param source Source of the data.
3073      * @param bh Data sink.
3074      */
3075     @Benchmark
3076     public void nanZero(SortNaNFunctionSource function, SortSource source, Blackhole bh) {
3077         final int size = source.size();
3078         final BiConsumer<double[], Blackhole> fun = function.getFunction();
3079         for (int j = -1; ++j < size;) {
3080             fun.accept(source.getData(j), bh);
3081         }
3082     }
3083 
3084     /**
3085      * Benchmark the search of an ordered set of indices.
3086      *
3087      * @param function Source of the search.
3088      * @param source Source of the data.
3089      * @return value to consume
3090      */
3091     @Benchmark
3092     public long indexSearch(IndexSearchFunctionSource function, SplitIndexSource source) {
3093         final IndexSearchFunctionSource.SearchFunction fun = function.getFunction();
3094         // Ensure we have something to consume during the benchmark
3095         long sum = 0;
3096         for (int i = source.samples(); --i >= 0;) {
3097             // Single point in the range
3098             sum += fun.find(source.getIndices(i), source.getPoint(i));
3099         }
3100         return sum;
3101     }
3102 
3103     /**
3104      * Benchmark the tracking of an interval of indices during a partition algorithm.
3105      *
3106      * <p>The {@link SearchableInterval} is created for the indices of interest. These are then
3107      * cut at all points in the interval between indices to simulate a partition algorithm
3108      * dividing the data and requiring a new interval to use in each part:
3109      * <pre>{@code
3110      *            cut
3111      *             |
3112      * -------k1--------k2---------k3---- ... ---------kn--------
3113      *          <-- scan previous
3114      *    scan next -->
3115      * }</pre>
3116      *
3117      * <p>Note: If a cut is made in the interval then the smallest region of data
3118      * that was most recently partitioned was the length between the two flanking k.
3119      * This involves a full scan (and partitioning) over the data of length (k2 - k1).
3120      * A BitSet-type structure will require a scan over 1/64 of this length of data
3121      * to find the next and previous index from a cut point. In practice
3122      * the interval may be partitioned over a much larger length, e.g. (kn - k1).
3123      * Thus the length of time for the partition algorithm is expected to be at least
3124      * 64x the length of time for the BitSet-type scan. The disadvantage of the
3125      * BitSet-type structure is memory consumption. For a small number of keys the
3126      * structures that search the entire set of keys are fast enough. At very high
3127      * density the BitSet-type structures are preferred.
3128      *
3129      * @param function Source of the interval.
3130      * @param source Source of the data.
3131      * @return value to consume
3132      */
3133     @Benchmark
3134     public long searchableIntervalNextPrevious(SearchableIntervalSource function, SplitIndexSource source) {
3135         final int[][] indices = source.getIndices();
3136         final int[][] points = source.getPoints();
3137         // Ensure we have something to consume during the benchmark
3138         long sum = 0;
3139         for (int i = 0; i < indices.length; i++) {
3140             final int[] x = indices[i];
3141             final int[] p = points[i];
3142             final SearchableInterval interval = function.create(x);
3143             for (final int k : p) {
3144                 sum += interval.nextIndex(k);
3145                 sum += interval.previousIndex(k);
3146             }
3147         }
3148         return sum;
3149     }
3150 
3151     /**
3152      * Benchmark the tracking of an interval of indices during a partition algorithm.
3153      *
3154      * <p>This is similar to
3155      * {@link #searchableIntervalNextPrevious(SearchableIntervalSource, SplitIndexSource)}.
3156      * It uses the {@link SearchableInterval#split(int, int, int[])} method. This requires
3157      * {@code k} to be in an open interval. Some modes of the {@link IndexSource} do not
3158      * ensure that {@code left < k < right} for all split points so we have to check this
3159      * before calling the split method (it is a fixed overhead for the benchmark).
3160      *
3161      * @param function Source of the interval.
3162      * @param source Source of the data.
3163      * @return value to consume
3164      */
3165     @Benchmark
3166     public long searchableIntervalSplit(SearchableIntervalSource function, SplitIndexSource source) {
3167         final int[][] indices = source.getIndices();
3168         final int[][] points = source.getPoints();
3169         // Ensure we have something to consume during the benchmark
3170         long sum = 0;
3171         final int[] bound = {0};
3172         for (int i = 0; i < indices.length; i++) {
3173             final int[] x = indices[i];
3174             final int[] p = points[i];
3175             // Note: A partition algorithm would only call split if there are indices
3176             // above and below the split point.
3177             final SearchableInterval interval = function.create(x);
3178             final int left = interval.left();
3179             final int right = interval.right();
3180             for (final int k : p) {
3181                 // Check k is in the open interval (left, right)
3182                 if (left < k && k < right) {
3183                     sum += interval.split(k, k, bound);
3184                     sum += bound[0];
3185                 }
3186             }
3187         }
3188         return sum;
3189     }
3190 
3191     /**
3192      * Benchmark the creation of an interval of indices for controlling a partition
3193      * algorithm.
3194      *
3195      * <p>This baselines the
3196      * {@link #searchableIntervalNextPrevious(SearchableIntervalSource, SplitIndexSource)}
3197      * benchmark. For the BitSet-type structures a large overhead is the memory allocation
3198      * to create the {@link SearchableInterval}. Note that this will be at most 1/64 the
3199      * size of the array that is being partitioned and in practice this overhead is not
3200      * significant.
3201      *
3202      * @param function Source of the interval.
3203      * @param source Source of the data.
3204      * @param bh Data sink.
3205      */
3206     @Benchmark
3207     public void createSearchableInterval(SearchableIntervalSource function, IndexSource source, Blackhole bh) {
3208         final int[][] indices = source.getIndices();
3209         for (final int[] x : indices) {
3210             bh.consume(function.create(x));
3211         }
3212     }
3213 
3214     /**
3215      * Benchmark the splitting of an interval of indices during a partition algorithm.
3216      *
3217      * <p>This is similar to
3218      * {@link #searchableIntervalSplit(SearchableIntervalSource, SplitIndexSource)}. It
3219      * uses the {@link UpdatingInterval#splitLeft(int, int)} method by recursive division
3220      * of the indices.
3221      *
3222      * @param function Source of the interval.
3223      * @param source Source of the data.
3224      * @param bh Data sink.
3225      */
3226     @Benchmark
3227     public void updatingIntervalSplit(UpdatingIntervalSource function, IndexSource source, Blackhole bh) {
3228         final int[][] indices = source.getIndices();
3229         final int s = source.getMinSeparation();
3230         for (int i = 0; i < indices.length; i++) {
3231             split(function.create(indices[i]), s, bh);
3232         }
3233     }
3234 
3235     /**
3236      * Recursively split the interval until the length is below the provided separation.
3237      * Consume the interval when no more divides can occur. Simulates a single-pivot
3238      * partition algorithm.
3239      *
3240      * @param interval Interval.
3241      * @param s Minimum separation between left and right.
3242      * @param bh Data sink.
3243      */
3244     private static void split(UpdatingInterval interval, int s, Blackhole bh) {
3245         int l = interval.left();
3246         final int r = interval.right();
3247         // Note: A partition algorithm would only call split if there are indices
3248         // above and below the split point.
3249         if (r - l > s) {
3250             final int middle = (l + r) >>> 1;
3251             // recurse left
3252             split(interval.splitLeft(middle, middle), s, bh);
3253             // continue on right side
3254             l = interval.left();
3255         }
3256         bh.consume(interval);
3257     }
3258 }