1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.numbers.arrays; 18 19 /** 20 * Support for creating {@link UpdatingInterval} implementations and validating indices. 21 * 22 * @since 1.2 23 */ 24 final class IndexSupport { 25 /** The upper threshold to use a modified insertion sort to find unique indices. */ 26 private static final int INSERTION_SORT_SIZE = 20; 27 28 /** No instances. */ 29 private IndexSupport() {} 30 31 /** 32 * Returns an interval that covers the specified indices {@code k}. 33 * 34 * @param left Lower bound of data (inclusive). 35 * @param right Upper bound of data (inclusive). 36 * @param k Indices. 37 * @param n Count of indices (must be strictly positive). 38 * @throws IndexOutOfBoundsException if any index {@code k} is not within the 39 * sub-range {@code [left, right]} 40 * @return the interval 41 */ 42 static UpdatingInterval createUpdatingInterval(int left, int right, int[] k, int n) { 43 // Note: A typical use case is to have a few indices. Thus the heuristics 44 // in this method should be very fast when n is small. 45 // We have a choice between a KeyUpdatingInterval which requires 46 // sorted keys or a BitIndexUpdatingInterval which handles keys in any order. 47 // The purpose of the heuristics is to avoid a very bad choice of data structure, 48 // rather than choosing the best data structure in all situations. As long as the 49 // choice is reasonable the speed will not impact a partition algorithm. 50 51 // Simple cases 52 if (n == 2) { 53 if (k[0] == k[1]) { 54 return newUpdatingInterval(k, 1); 55 } 56 if (k[1] < k[0]) { 57 final int v = k[0]; 58 k[0] = k[1]; 59 k[1] = v; 60 } 61 return newUpdatingInterval(k, 2); 62 } 63 64 // Strategy: Must be fast on already ascending data. 65 // Note: The recommended way to generate a lot of partition indices is to 66 // generate in sequence. 67 68 // n <= small: 69 // Modified insertion sort (naturally finds ascending data) 70 // n > small: 71 // Look for ascending sequence and compact 72 // else: 73 // Remove duplicates using an order(1) data structure and sort 74 75 if (n <= INSERTION_SORT_SIZE) { 76 final int unique = Sorting.insertionSortIndices(k, n); 77 return newUpdatingInterval(k, unique); 78 } 79 80 if (isAscending(k, n)) { 81 // For sorted keys the KeyUpdatingInterval is fast. It may be slower than the 82 // BitIndexUpdatingInterval depending on data length but not significantly 83 // slower and the difference is lost in the time taken for partitioning. 84 // So always use the keys. 85 final int unique = compressDuplicates(k, n); 86 return newUpdatingInterval(k, unique); 87 } 88 89 // At least 20 indices that are partially unordered. 90 91 // Find min/max to understand the range. 92 int min = k[n - 1]; 93 int max = min; 94 for (int i = n - 1; --i >= 0;) { 95 min = Math.min(min, k[i]); 96 max = Math.max(max, k[i]); 97 } 98 99 // Here we use a simple test based on the number of comparisons required 100 // to perform the expected next/previous look-ups after a split. 101 // It is expected that we can cut n keys a maximum of n-1 times. 102 // Each cut requires a scan next/previous to divide the interval into two intervals: 103 // 104 // cut 105 // | 106 // k1--------k2---------k3---- ... ---------kn initial interval 107 // <--| find previous 108 // find next |--> 109 // k1 k2---------k3---- ... ---------kn divided intervals 110 // 111 // An BitSet will scan from the cut location and find a match in time proportional to 112 // the index density. Average density is (size / n) and the scanning covers 64 113 // indices together: Order(2 * n * (size / n) / 64) = Order(size / 32) 114 115 // Sorted keys: Sort time Order(n log(n)) : Splitting time Order(log(n)) (binary search approx) 116 // Bit keys : Sort time Order(1) : Splitting time Order(size / 32) 117 118 // Transition when n * n ~ size / 32 119 // Benchmarking shows this is a reasonable approximation when size < 2^20. 120 // The speed of the bit keys is approximately independent of n and proportional to size. 121 // Large size observes degrading performance of the bit keys vs sorted keys. 122 // We introduce a penalty for each 4x increase over size = 2^20. 123 // n * n = size/32 * 2^log4(size / 2^20) 124 // The transition point still favours the bit keys when sorted keys would be faster. 125 // However the difference is held within 4x and the BitSet type structure is still fast 126 // enough to be negligible against the speed of partitioning. 127 128 // Transition point: n = sqrt(size/32) 129 // size n 130 // 2^10 5.66 131 // 2^15 32.0 132 // 2^20 181.0 133 134 // Transition point: n = sqrt(size/32 * 2^(log4(size/2^20)))) 135 // size n 136 // 2^22 512.0 137 // 2^24 1448.2 138 // 2^28 11585 139 // 2^31 55108 140 141 final int size = max - min + 1; 142 143 // Divide by 32 is a shift of 5. This is reduced for each 4-fold size above 2^20. 144 // At 2^31 the shift reduces to 0. 145 int shift = 5; 146 if (size > (1 << 20)) { 147 // log4(size/2^20) == (log2(size) - 20) / 2 148 shift -= (ceilLog2(size) - 20) >>> 1; 149 } 150 151 if ((long) n * n > (size >> shift)) { 152 final BitIndexUpdatingInterval interval = new BitIndexUpdatingInterval(min, max); 153 for (int i = n; --i >= 0;) { 154 interval.set(k[i]); 155 } 156 return interval; 157 } 158 159 // Sort with a hash set to filter indices 160 final int unique = Sorting.sortIndices(k, n); 161 return new KeyUpdatingInterval(k, unique); 162 } 163 164 /** 165 * Test the data is in ascending order: {@code data[i] <= data[i+1]} for all {@code i}. 166 * Data is assumed to be at least length 1. 167 * 168 * @param data Data. 169 * @param n Length of data. 170 * @return true if ascending 171 */ 172 private static boolean isAscending(int[] data, int n) { 173 for (int i = 0; ++i < n;) { 174 if (data[i] < data[i - 1]) { 175 // descending 176 return false; 177 } 178 } 179 return true; 180 } 181 182 /** 183 * Compress duplicates in the ascending data. 184 * 185 * <p>Warning: Requires {@code n > 0}. 186 * 187 * @param data Indices. 188 * @param n Number of indices. 189 * @return the number of unique indices 190 */ 191 private static int compressDuplicates(int[] data, int n) { 192 // Compress to remove duplicates 193 int last = 0; 194 int top = data[0]; 195 for (int i = 0; ++i < n;) { 196 final int v = data[i]; 197 if (v == top) { 198 continue; 199 } 200 top = v; 201 data[++last] = v; 202 } 203 return last + 1; 204 } 205 206 /** 207 * Compute {@code ceil(log2(x))}. This is valid for all strictly positive {@code x}. 208 * 209 * <p>Returns -1 for {@code x = 0} in place of -infinity. 210 * 211 * @param x Value. 212 * @return {@code ceil(log2(x))} 213 */ 214 private static int ceilLog2(int x) { 215 return 32 - Integer.numberOfLeadingZeros(x - 1); 216 } 217 218 /** 219 * Returns an interval that covers the specified indices {@code k}. 220 * The indices must be sorted. 221 * 222 * @param k Indices. 223 * @param n Count of indices (must be strictly positive). 224 * @throws IndexOutOfBoundsException if any index {@code k} is not within the 225 * sub-range {@code [left, right]} 226 * @return the interval 227 */ 228 private static UpdatingInterval newUpdatingInterval(int[] k, int n) { 229 return new KeyUpdatingInterval(k, n); 230 } 231 232 /** 233 * Count the number of indices. Returns a negative value if the indices are sorted. 234 * 235 * @param keys Keys. 236 * @param n Count of indices. 237 * @return the count of (sorted) indices 238 */ 239 static int countIndices(UpdatingInterval keys, int n) { 240 if (keys instanceof KeyUpdatingInterval) { 241 return -((KeyUpdatingInterval) keys).size(); 242 } 243 return n; 244 } 245 246 /** 247 * Checks if the sub-range from fromIndex (inclusive) to toIndex (exclusive) is 248 * within the bounds of range from 0 (inclusive) to length (exclusive). 249 * 250 * <p>This function provides the functionality of 251 * {@code java.utils.Objects.checkFromToIndex} introduced in JDK 9. The <a 252 * href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Objects.html#checkFromToIndex(int,int,int)">Objects</a> 253 * javadoc has been reproduced for reference. The return value has been changed 254 * to void. 255 * 256 * <p>The sub-range is defined to be out of bounds if any of the following 257 * inequalities is true: 258 * <ul> 259 * <li>{@code fromIndex < 0} 260 * <li>{@code fromIndex > toIndex} 261 * <li>{@code toIndex > length} 262 * <li>{@code length < 0}, which is implied from the former inequalities 263 * </ul> 264 * 265 * @param fromIndex Lower-bound (inclusive) of the sub-range. 266 * @param toIndex Upper-bound (exclusive) of the sub-range. 267 * @param length Upper-bound (exclusive) of the range. 268 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 269 */ 270 static void checkFromToIndex(int fromIndex, int toIndex, int length) { 271 // Checks as documented above 272 if (fromIndex < 0 || fromIndex > toIndex || toIndex > length) { 273 throw new IndexOutOfBoundsException( 274 msgRangeOutOfBounds(fromIndex, toIndex, length)); 275 } 276 } 277 278 /** 279 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}. 280 * 281 * @param fromIndex Lower-bound (inclusive) of the sub-range. 282 * @param toIndex Upper-bound (exclusive) of the sub-range. 283 * @param k Indices. 284 * @throws IndexOutOfBoundsException if any index is out of bounds 285 */ 286 static void checkIndices(int fromIndex, int toIndex, int[] k) { 287 for (final int i : k) { 288 checkIndex(fromIndex, toIndex, i); 289 } 290 } 291 292 /** 293 * Checks if the {@code index} is within the half-open interval {@code [fromIndex, toIndex)}. 294 * 295 * @param fromIndex Lower-bound (inclusive) of the sub-range. 296 * @param toIndex Upper-bound (exclusive) of the sub-range. 297 * @param index Index. 298 * @throws IndexOutOfBoundsException if the index is out of bounds 299 */ 300 static void checkIndex(int fromIndex, int toIndex, int index) { 301 if (index < fromIndex || index >= toIndex) { 302 throw new IndexOutOfBoundsException( 303 msgIndexOutOfBounds(fromIndex, toIndex, index)); 304 } 305 } 306 307 // Message formatting moved to separate methods to assist inlining of the validation methods. 308 309 /** 310 * Format a message when range [from, to) is not entirely within the length. 311 * 312 * @param fromIndex Lower-bound (inclusive) of the sub-range. 313 * @param toIndex Upper-bound (exclusive) of the sub-range. 314 * @param length Upper-bound (exclusive) of the range. 315 * @return the message 316 */ 317 private static String msgRangeOutOfBounds(int fromIndex, int toIndex, int length) { 318 return String.format("Range [%d, %d) out of bounds for length %d", fromIndex, toIndex, length); 319 } 320 321 /** 322 * Format a message when index is not within range [from, to). 323 * 324 * @param fromIndex Lower-bound (inclusive) of the sub-range. 325 * @param toIndex Upper-bound (exclusive) of the sub-range. 326 * @param index Index. 327 * @return the message 328 */ 329 private static String msgIndexOutOfBounds(int fromIndex, int toIndex, int index) { 330 return String.format("Index %d out of bounds for range [%d, %d)", index, fromIndex, toIndex); 331 } 332 }