1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.arrays;
18
19 /**
20 * Support for creating {@link UpdatingInterval} implementations and validating indices.
21 *
22 * @since 1.2
23 */
24 final class IndexSupport {
25 /** The upper threshold to use a modified insertion sort to find unique indices. */
26 private static final int INSERTION_SORT_SIZE = 20;
27
28 /** No instances. */
29 private IndexSupport() {}
30
31 /**
32 * Returns an interval that covers the specified indices {@code k}.
33 *
34 * @param k Indices.
35 * @param n Count of indices (must be strictly positive).
36 * @return the interval
37 */
38 static UpdatingInterval createUpdatingInterval(int[] k, int n) {
39 // Note: A typical use case is to have a few indices. Thus the heuristics
40 // in this method should be very fast when n is small.
41 // We have a choice between a KeyUpdatingInterval which requires
42 // sorted keys or a BitIndexUpdatingInterval which handles keys in any order.
43 // The purpose of the heuristics is to avoid a very bad choice of data structure,
44 // rather than choosing the best data structure in all situations. As long as the
45 // choice is reasonable the speed will not impact a partition algorithm.
46
47 // Simple cases
48 if (n == 2) {
49 if (k[0] == k[1]) {
50 return newUpdatingInterval(k, 1);
51 }
52 if (k[1] < k[0]) {
53 final int v = k[0];
54 k[0] = k[1];
55 k[1] = v;
56 }
57 return newUpdatingInterval(k, 2);
58 }
59
60 // Strategy: Must be fast on already ascending data.
61 // Note: The recommended way to generate a lot of partition indices is to
62 // generate in sequence.
63
64 // n <= small:
65 // Modified insertion sort (naturally finds ascending data)
66 // n > small:
67 // Look for ascending sequence and compact
68 // else:
69 // Remove duplicates using an order(1) data structure and sort
70
71 if (n <= INSERTION_SORT_SIZE) {
72 final int unique = Sorting.insertionSortIndices(k, n);
73 return newUpdatingInterval(k, unique);
74 }
75
76 if (isAscending(k, n)) {
77 // For sorted keys the KeyUpdatingInterval is fast. It may be slower than the
78 // BitIndexUpdatingInterval depending on data length but not significantly
79 // slower and the difference is lost in the time taken for partitioning.
80 // So always use the keys.
81 final int unique = compressDuplicates(k, n);
82 return newUpdatingInterval(k, unique);
83 }
84
85 // At least 20 indices that are partially unordered.
86
87 // Find min/max to understand the range.
88 int min = k[n - 1];
89 int max = min;
90 for (int i = n - 2; i >= 0; i--) {
91 min = Math.min(min, k[i]);
92 max = Math.max(max, k[i]);
93 }
94
95 // Here we use a simple test based on the number of comparisons required
96 // to perform the expected next/previous look-ups after a split.
97 // It is expected that we can cut n keys a maximum of n-1 times.
98 // Each cut requires a scan next/previous to divide the interval into two intervals:
99 //
100 // cut
101 // |
102 // k1--------k2---------k3---- ... ---------kn initial interval
103 // <--| find previous
104 // find next |-->
105 // k1 k2---------k3---- ... ---------kn divided intervals
106 //
107 // An BitSet will scan from the cut location and find a match in time proportional to
108 // the index density. Average density is (size / n) and the scanning covers 64
109 // indices together: Order(2 * n * (size / n) / 64) = Order(size / 32)
110
111 // Sorted keys: Sort time Order(n log(n)) : Splitting time Order(log(n)) (binary search approx)
112 // Bit keys : Sort time Order(1) : Splitting time Order(size / 32)
113
114 // Transition when n * n ~ size / 32
115 // Benchmarking shows this is a reasonable approximation when size < 2^20.
116 // The speed of the bit keys is approximately independent of n and proportional to size.
117 // Large size observes degrading performance of the bit keys vs sorted keys.
118 // We introduce a penalty for each 4x increase over size = 2^20.
119 // n * n = size/32 * 2^log4(size / 2^20)
120 // The transition point still favours the bit keys when sorted keys would be faster.
121 // However the difference is held within 4x and the BitSet type structure is still fast
122 // enough to be negligible against the speed of partitioning.
123
124 // Transition point: n = sqrt(size/32)
125 // size n
126 // 2^10 5.66
127 // 2^15 32.0
128 // 2^20 181.0
129
130 // Transition point: n = sqrt(size/32 * 2^(log4(size/2^20))))
131 // size n
132 // 2^22 512.0
133 // 2^24 1448.2
134 // 2^28 11585
135 // 2^31 55108
136
137 final int size = max - min + 1;
138
139 // Divide by 32 is a shift of 5. This is reduced for each 4-fold size above 2^20.
140 // At 2^31 the shift reduces to 0.
141 int shift = 5;
142 if (size > (1 << 20)) {
143 // log4(size/2^20) == (log2(size) - 20) / 2
144 shift -= (ceilLog2(size) - 20) >>> 1;
145 }
146
147 if ((long) n * n > (size >> shift)) {
148 final BitIndexUpdatingInterval interval = new BitIndexUpdatingInterval(min, max);
149 for (int i = n; --i >= 0;) {
150 interval.set(k[i]);
151 }
152 return interval;
153 }
154
155 // Sort with a hash set to filter indices
156 final int unique = Sorting.sortIndices(k, n);
157 return new KeyUpdatingInterval(k, unique);
158 }
159
160 /**
161 * Test the data is in ascending order: {@code data[i] <= data[i+1]} for all {@code i}.
162 * Data is assumed to be at least length 1.
163 *
164 * @param data Data.
165 * @param n Length of data.
166 * @return true if ascending
167 */
168 private static boolean isAscending(int[] data, int n) {
169 for (int i = 1; i < n; i++) {
170 if (data[i] < data[i - 1]) {
171 // descending
172 return false;
173 }
174 }
175 return true;
176 }
177
178 /**
179 * Compress duplicates in the ascending data.
180 *
181 * <p>Warning: Requires {@code n > 0}.
182 *
183 * @param data Indices.
184 * @param n Number of indices.
185 * @return the number of unique indices
186 */
187 private static int compressDuplicates(int[] data, int n) {
188 // Compress to remove duplicates
189 int last = 0;
190 int top = data[0];
191 for (int i = 1; i < n; i++) {
192 final int v = data[i];
193 if (v == top) {
194 continue;
195 }
196 top = v;
197 data[++last] = v;
198 }
199 return last + 1;
200 }
201
202 /**
203 * Compute {@code ceil(log2(x))}. This is valid for all strictly positive {@code x}.
204 *
205 * <p>Returns -1 for {@code x = 0} in place of -infinity.
206 *
207 * @param x Value.
208 * @return {@code ceil(log2(x))}
209 */
210 private static int ceilLog2(int x) {
211 return 32 - Integer.numberOfLeadingZeros(x - 1);
212 }
213
214 /**
215 * Returns an interval that covers the specified indices {@code k}.
216 * The indices must be sorted.
217 *
218 * @param k Indices.
219 * @param n Count of indices (must be strictly positive).
220 * @throws IndexOutOfBoundsException if any index {@code k} is not within the
221 * sub-range {@code [left, right]}
222 * @return the interval
223 */
224 private static UpdatingInterval newUpdatingInterval(int[] k, int n) {
225 return new KeyUpdatingInterval(k, n);
226 }
227
228 /**
229 * Count the number of indices. Returns a negative value if the indices are sorted.
230 *
231 * @param keys Keys.
232 * @param n Count of indices.
233 * @return the count of (sorted) indices
234 */
235 static int countIndices(UpdatingInterval keys, int n) {
236 if (keys instanceof KeyUpdatingInterval) {
237 return -((KeyUpdatingInterval) keys).size();
238 }
239 return n;
240 }
241
242 /**
243 * Checks if the sub-range from fromIndex (inclusive) to toIndex (exclusive) is
244 * within the bounds of range from 0 (inclusive) to length (exclusive).
245 *
246 * <p>This function provides the functionality of
247 * {@code java.utils.Objects.checkFromToIndex} introduced in JDK 9. The <a
248 * href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Objects.html#checkFromToIndex(int,int,int)">Objects</a>
249 * javadoc has been reproduced for reference. The return value has been changed
250 * to void.
251 *
252 * <p>The sub-range is defined to be out of bounds if any of the following
253 * inequalities is true:
254 * <ul>
255 * <li>{@code fromIndex < 0}</li>
256 * <li>{@code fromIndex > toIndex}</li>
257 * <li>{@code toIndex > length}</li>
258 * <li>{@code length < 0}, which is implied from the former inequalities</li>
259 * </ul>
260 *
261 * @param fromIndex Lower-bound (inclusive) of the sub-range.
262 * @param toIndex Upper-bound (exclusive) of the sub-range.
263 * @param length Upper-bound (exclusive) of the range.
264 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
265 */
266 static void checkFromToIndex(int fromIndex, int toIndex, int length) {
267 // Checks as documented above
268 if (fromIndex < 0 || fromIndex > toIndex || toIndex > length) {
269 throw new IndexOutOfBoundsException(
270 msgRangeOutOfBounds(fromIndex, toIndex, length));
271 }
272 }
273
274 /**
275 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}.
276 *
277 * @param fromIndex Lower-bound (inclusive) of the sub-range.
278 * @param toIndex Upper-bound (exclusive) of the sub-range.
279 * @param k Indices.
280 * @throws IndexOutOfBoundsException if any index is out of bounds
281 */
282 static void checkIndices(int fromIndex, int toIndex, int[] k) {
283 for (final int i : k) {
284 checkIndex(fromIndex, toIndex, i);
285 }
286 }
287
288 /**
289 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}.
290 *
291 * @param fromIndex Lower-bound (inclusive) of the sub-range.
292 * @param toIndex Upper-bound (exclusive) of the sub-range.
293 * @param index Index.
294 * @throws IndexOutOfBoundsException if the index is out of bounds
295 */
296 static void checkIndex(int fromIndex, int toIndex, int index) {
297 if (index < fromIndex || index >= toIndex) {
298 throw new IndexOutOfBoundsException(
299 msgIndexOutOfBounds(fromIndex, toIndex, index));
300 }
301 }
302
303 // Message formatting moved to separate methods to assist inlining of the validation methods.
304
305 /**
306 * Format a message when range [from, to) is not entirely within the length.
307 *
308 * @param fromIndex Lower-bound (inclusive) of the sub-range.
309 * @param toIndex Upper-bound (exclusive) of the sub-range.
310 * @param length Upper-bound (exclusive) of the range.
311 * @return the message
312 */
313 private static String msgRangeOutOfBounds(int fromIndex, int toIndex, int length) {
314 return String.format("Range [%d, %d) out of bounds for length %d", fromIndex, toIndex, length);
315 }
316
317 /**
318 * Format a message when index is not within range [from, to).
319 *
320 * @param fromIndex Lower-bound (inclusive) of the sub-range.
321 * @param toIndex Upper-bound (exclusive) of the sub-range.
322 * @param index Index.
323 * @return the message
324 */
325 private static String msgIndexOutOfBounds(int fromIndex, int toIndex, int index) {
326 return String.format("Index %d out of bounds for range [%d, %d)", index, fromIndex, toIndex);
327 }
328 }