1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.arrays;
18
19 /**
20 * Support for creating {@link UpdatingInterval} implementations and validating indices.
21 *
22 * @since 1.2
23 */
24 final class IndexSupport {
25 /** The upper threshold to use a modified insertion sort to find unique indices. */
26 private static final int INSERTION_SORT_SIZE = 20;
27
28 /** No instances. */
29 private IndexSupport() {}
30
31 /**
32 * Returns an interval that covers the specified indices {@code k}.
33 *
34 * @param left Lower bound of data (inclusive).
35 * @param right Upper bound of data (inclusive).
36 * @param k Indices.
37 * @param n Count of indices (must be strictly positive).
38 * @throws IndexOutOfBoundsException if any index {@code k} is not within the
39 * sub-range {@code [left, right]}
40 * @return the interval
41 */
42 static UpdatingInterval createUpdatingInterval(int left, int right, int[] k, int n) {
43 // Note: A typical use case is to have a few indices. Thus the heuristics
44 // in this method should be very fast when n is small.
45 // We have a choice between a KeyUpdatingInterval which requires
46 // sorted keys or a BitIndexUpdatingInterval which handles keys in any order.
47 // The purpose of the heuristics is to avoid a very bad choice of data structure,
48 // rather than choosing the best data structure in all situations. As long as the
49 // choice is reasonable the speed will not impact a partition algorithm.
50
51 // Simple cases
52 if (n == 2) {
53 if (k[0] == k[1]) {
54 return newUpdatingInterval(k, 1);
55 }
56 if (k[1] < k[0]) {
57 final int v = k[0];
58 k[0] = k[1];
59 k[1] = v;
60 }
61 return newUpdatingInterval(k, 2);
62 }
63
64 // Strategy: Must be fast on already ascending data.
65 // Note: The recommended way to generate a lot of partition indices is to
66 // generate in sequence.
67
68 // n <= small:
69 // Modified insertion sort (naturally finds ascending data)
70 // n > small:
71 // Look for ascending sequence and compact
72 // else:
73 // Remove duplicates using an order(1) data structure and sort
74
75 if (n <= INSERTION_SORT_SIZE) {
76 final int unique = Sorting.insertionSortIndices(k, n);
77 return newUpdatingInterval(k, unique);
78 }
79
80 if (isAscending(k, n)) {
81 // For sorted keys the KeyUpdatingInterval is fast. It may be slower than the
82 // BitIndexUpdatingInterval depending on data length but not significantly
83 // slower and the difference is lost in the time taken for partitioning.
84 // So always use the keys.
85 final int unique = compressDuplicates(k, n);
86 return newUpdatingInterval(k, unique);
87 }
88
89 // At least 20 indices that are partially unordered.
90
91 // Find min/max to understand the range.
92 int min = k[n - 1];
93 int max = min;
94 for (int i = n - 1; --i >= 0;) {
95 min = Math.min(min, k[i]);
96 max = Math.max(max, k[i]);
97 }
98
99 // Here we use a simple test based on the number of comparisons required
100 // to perform the expected next/previous look-ups after a split.
101 // It is expected that we can cut n keys a maximum of n-1 times.
102 // Each cut requires a scan next/previous to divide the interval into two intervals:
103 //
104 // cut
105 // |
106 // k1--------k2---------k3---- ... ---------kn initial interval
107 // <--| find previous
108 // find next |-->
109 // k1 k2---------k3---- ... ---------kn divided intervals
110 //
111 // An BitSet will scan from the cut location and find a match in time proportional to
112 // the index density. Average density is (size / n) and the scanning covers 64
113 // indices together: Order(2 * n * (size / n) / 64) = Order(size / 32)
114
115 // Sorted keys: Sort time Order(n log(n)) : Splitting time Order(log(n)) (binary search approx)
116 // Bit keys : Sort time Order(1) : Splitting time Order(size / 32)
117
118 // Transition when n * n ~ size / 32
119 // Benchmarking shows this is a reasonable approximation when size < 2^20.
120 // The speed of the bit keys is approximately independent of n and proportional to size.
121 // Large size observes degrading performance of the bit keys vs sorted keys.
122 // We introduce a penalty for each 4x increase over size = 2^20.
123 // n * n = size/32 * 2^log4(size / 2^20)
124 // The transition point still favours the bit keys when sorted keys would be faster.
125 // However the difference is held within 4x and the BitSet type structure is still fast
126 // enough to be negligible against the speed of partitioning.
127
128 // Transition point: n = sqrt(size/32)
129 // size n
130 // 2^10 5.66
131 // 2^15 32.0
132 // 2^20 181.0
133
134 // Transition point: n = sqrt(size/32 * 2^(log4(size/2^20))))
135 // size n
136 // 2^22 512.0
137 // 2^24 1448.2
138 // 2^28 11585
139 // 2^31 55108
140
141 final int size = max - min + 1;
142
143 // Divide by 32 is a shift of 5. This is reduced for each 4-fold size above 2^20.
144 // At 2^31 the shift reduces to 0.
145 int shift = 5;
146 if (size > (1 << 20)) {
147 // log4(size/2^20) == (log2(size) - 20) / 2
148 shift -= (ceilLog2(size) - 20) >>> 1;
149 }
150
151 if ((long) n * n > (size >> shift)) {
152 final BitIndexUpdatingInterval interval = new BitIndexUpdatingInterval(min, max);
153 for (int i = n; --i >= 0;) {
154 interval.set(k[i]);
155 }
156 return interval;
157 }
158
159 // Sort with a hash set to filter indices
160 final int unique = Sorting.sortIndices(k, n);
161 return new KeyUpdatingInterval(k, unique);
162 }
163
164 /**
165 * Test the data is in ascending order: {@code data[i] <= data[i+1]} for all {@code i}.
166 * Data is assumed to be at least length 1.
167 *
168 * @param data Data.
169 * @param n Length of data.
170 * @return true if ascending
171 */
172 private static boolean isAscending(int[] data, int n) {
173 for (int i = 0; ++i < n;) {
174 if (data[i] < data[i - 1]) {
175 // descending
176 return false;
177 }
178 }
179 return true;
180 }
181
182 /**
183 * Compress duplicates in the ascending data.
184 *
185 * <p>Warning: Requires {@code n > 0}.
186 *
187 * @param data Indices.
188 * @param n Number of indices.
189 * @return the number of unique indices
190 */
191 private static int compressDuplicates(int[] data, int n) {
192 // Compress to remove duplicates
193 int last = 0;
194 int top = data[0];
195 for (int i = 0; ++i < n;) {
196 final int v = data[i];
197 if (v == top) {
198 continue;
199 }
200 top = v;
201 data[++last] = v;
202 }
203 return last + 1;
204 }
205
206 /**
207 * Compute {@code ceil(log2(x))}. This is valid for all strictly positive {@code x}.
208 *
209 * <p>Returns -1 for {@code x = 0} in place of -infinity.
210 *
211 * @param x Value.
212 * @return {@code ceil(log2(x))}
213 */
214 private static int ceilLog2(int x) {
215 return 32 - Integer.numberOfLeadingZeros(x - 1);
216 }
217
218 /**
219 * Returns an interval that covers the specified indices {@code k}.
220 * The indices must be sorted.
221 *
222 * @param k Indices.
223 * @param n Count of indices (must be strictly positive).
224 * @throws IndexOutOfBoundsException if any index {@code k} is not within the
225 * sub-range {@code [left, right]}
226 * @return the interval
227 */
228 private static UpdatingInterval newUpdatingInterval(int[] k, int n) {
229 return new KeyUpdatingInterval(k, n);
230 }
231
232 /**
233 * Count the number of indices. Returns a negative value if the indices are sorted.
234 *
235 * @param keys Keys.
236 * @param n Count of indices.
237 * @return the count of (sorted) indices
238 */
239 static int countIndices(UpdatingInterval keys, int n) {
240 if (keys instanceof KeyUpdatingInterval) {
241 return -((KeyUpdatingInterval) keys).size();
242 }
243 return n;
244 }
245
246 /**
247 * Checks if the sub-range from fromIndex (inclusive) to toIndex (exclusive) is
248 * within the bounds of range from 0 (inclusive) to length (exclusive).
249 *
250 * <p>This function provides the functionality of
251 * {@code java.utils.Objects.checkFromToIndex} introduced in JDK 9. The <a
252 * href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Objects.html#checkFromToIndex(int,int,int)">Objects</a>
253 * javadoc has been reproduced for reference. The return value has been changed
254 * to void.
255 *
256 * <p>The sub-range is defined to be out of bounds if any of the following
257 * inequalities is true:
258 * <ul>
259 * <li>{@code fromIndex < 0}
260 * <li>{@code fromIndex > toIndex}
261 * <li>{@code toIndex > length}
262 * <li>{@code length < 0}, which is implied from the former inequalities
263 * </ul>
264 *
265 * @param fromIndex Lower-bound (inclusive) of the sub-range.
266 * @param toIndex Upper-bound (exclusive) of the sub-range.
267 * @param length Upper-bound (exclusive) of the range.
268 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
269 */
270 static void checkFromToIndex(int fromIndex, int toIndex, int length) {
271 // Checks as documented above
272 if (fromIndex < 0 || fromIndex > toIndex || toIndex > length) {
273 throw new IndexOutOfBoundsException(
274 msgRangeOutOfBounds(fromIndex, toIndex, length));
275 }
276 }
277
278 /**
279 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}.
280 *
281 * @param fromIndex Lower-bound (inclusive) of the sub-range.
282 * @param toIndex Upper-bound (exclusive) of the sub-range.
283 * @param k Indices.
284 * @throws IndexOutOfBoundsException if any index is out of bounds
285 */
286 static void checkIndices(int fromIndex, int toIndex, int[] k) {
287 for (final int i : k) {
288 checkIndex(fromIndex, toIndex, i);
289 }
290 }
291
292 /**
293 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}.
294 *
295 * @param fromIndex Lower-bound (inclusive) of the sub-range.
296 * @param toIndex Upper-bound (exclusive) of the sub-range.
297 * @param index Index.
298 * @throws IndexOutOfBoundsException if the index is out of bounds
299 */
300 static void checkIndex(int fromIndex, int toIndex, int index) {
301 if (index < fromIndex || index >= toIndex) {
302 throw new IndexOutOfBoundsException(
303 msgIndexOutOfBounds(fromIndex, toIndex, index));
304 }
305 }
306
307 // Message formatting moved to separate methods to assist inlining of the validation methods.
308
309 /**
310 * Format a message when range [from, to) is not entirely within the length.
311 *
312 * @param fromIndex Lower-bound (inclusive) of the sub-range.
313 * @param toIndex Upper-bound (exclusive) of the sub-range.
314 * @param length Upper-bound (exclusive) of the range.
315 * @return the message
316 */
317 private static String msgRangeOutOfBounds(int fromIndex, int toIndex, int length) {
318 return String.format("Range [%d, %d) out of bounds for length %d", fromIndex, toIndex, length);
319 }
320
321 /**
322 * Format a message when index is not within range [from, to).
323 *
324 * @param fromIndex Lower-bound (inclusive) of the sub-range.
325 * @param toIndex Upper-bound (exclusive) of the sub-range.
326 * @param index Index.
327 * @return the message
328 */
329 private static String msgIndexOutOfBounds(int fromIndex, int toIndex, int index) {
330 return String.format("Index %d out of bounds for range [%d, %d)", index, fromIndex, toIndex);
331 }
332 }