1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.numbers.examples.jmh.arrays; 18 19 import java.util.Arrays; 20 import java.util.function.IntConsumer; 21 22 /** 23 * A fixed size set of indices within an inclusive range {@code [left, right]}. 24 * 25 * <p>This is a specialised class to implement a reduced API similar to a 26 * {@link java.util.BitSet}. It uses no bounds range checks and supports only a 27 * fixed size. It contains the methods required to store and look-up intervals of indices. 28 * 29 * <p>An offset is supported to allow the fixed size to cover a range of indices starting 30 * above 0 with the most efficient usage of storage. 31 * 32 * <p>The class has methods to directly set and get bits in the range. 33 * Implementations of the {@link PivotCache} interface use range checks and maintain 34 * floating pivots flanking the range to allow bracketing any index within the range. 35 * 36 * <p>Stores all pivots between the support {@code [left, right]}. Uses two 37 * floating pivots which are the closest known pivots surrounding this range. 38 * 39 * <p>See the BloomFilter code in Commons Collections for use of long[] data to store 40 * bits. 41 * 42 * @since 1.2 43 */ 44 final class IndexSet implements PivotCache, SearchableInterval, SearchableInterval2 { 45 /** All 64-bits bits set. */ 46 private static final long LONG_MASK = -1L; 47 /** A bit shift to apply to an integer to divided by 64 (2^6). */ 48 private static final int DIVIDE_BY_64 = 6; 49 /** Default value for an unset upper floating pivot. 50 * Set as a value higher than any valid array index. */ 51 private static final int UPPER_DEFAULT = Integer.MAX_VALUE; 52 53 /** Bit indexes. */ 54 private final long[] data; 55 56 /** Left bound of the support. */ 57 private final int left; 58 /** Right bound of the support. */ 59 private final int right; 60 /** The upstream pivot closest to the left bound of the support. 61 * Provides a lower search bound for the range [left, right]. */ 62 private int lowerPivot = -1; 63 /** The downstream pivot closest to the right bound of the support. 64 * Provides an upper search bound for the range [left, right]. */ 65 private int upperPivot = UPPER_DEFAULT; 66 67 /** 68 * Create an instance to store indices within the range {@code [left, right]}. 69 * 70 * @param left Lower bound (inclusive). 71 * @param right Upper bound (inclusive). 72 */ 73 private IndexSet(int left, int right) { 74 this.left = left; 75 this.right = right; 76 // Allocate storage to store index==right 77 // Note: This may allow directly writing to index > right if there 78 // is extra capacity. Ranges checks to prevent this are provided by 79 // the PivotCache.add(int) method rather than using set(int). 80 data = new long[getLongIndex(right - left) + 1]; 81 } 82 83 /** 84 * Create an instance to store indices within the range {@code [left, right]}. 85 * 86 * @param left Lower bound (inclusive). 87 * @param right Upper bound (inclusive). 88 * @return the index set 89 * @throws IllegalArgumentException if {@code right < left} 90 */ 91 static IndexSet ofRange(int left, int right) { 92 if (left < 0) { 93 throw new IllegalArgumentException("Invalid lower index: " + left); 94 } 95 checkRange(left, right); 96 return new IndexSet(left, right); 97 } 98 99 /** 100 * Initialise an instance with the {@code indices}. The capacity is defined by the 101 * range required to store the minimum and maximum index. 102 * 103 * @param indices Indices. 104 * @return the index set 105 * @throws IllegalArgumentException if {@code indices.length == 0} 106 */ 107 static IndexSet of(int[] indices) { 108 return of(indices, indices.length); 109 } 110 111 /** 112 * Initialise an instance with the {@code indices}. The capacity is defined by the 113 * range required to store the minimum and maximum index. 114 * 115 * @param indices Indices. 116 * @param n Number of indices. 117 * @return the index set 118 * @throws IllegalArgumentException if {@code n == 0} 119 */ 120 static IndexSet of(int[] indices, int n) { 121 if (n <= 0) { 122 throw new IllegalArgumentException("No indices to define the range"); 123 } 124 int min = indices[0]; 125 int max = min; 126 for (int i = 1; i < n; i++) { 127 min = Math.min(min, indices[i]); 128 max = Math.max(max, indices[i]); 129 } 130 final IndexSet set = IndexSet.ofRange(min, max); 131 for (int i = 0; i < n; i++) { 132 set.set(indices[i]); 133 } 134 return set; 135 } 136 137 /** 138 * Return the memory footprint in bytes. This is always a multiple of 64. 139 * 140 * <p>The result is {@code 8 * ceil((right - left + 1) / 64)}. 141 * 142 * <p>This method is intended to provide information to choose if the data structure 143 * is memory efficient. 144 * 145 * <p>Warning: It is assumed {@code 0 <= left <= right}. Use with the min/max index 146 * that is to be stored. 147 * 148 * @param left Lower bound (inclusive). 149 * @param right Upper bound (inclusive). 150 * @return the memory footprint 151 */ 152 static long memoryFootprint(int left, int right) { 153 return (getLongIndex(right - left) + 1L) * Long.BYTES; 154 } 155 156 /** 157 * Gets the filter index for the specified bit index assuming the filter is using 158 * 64-bit longs to store bits starting at index 0. 159 * 160 * <p>The index is assumed to be positive. For a positive index the result will match 161 * {@code bitIndex / 64}.</p> 162 * 163 * <p><em>The divide is performed using bit shifts. If the input is negative the 164 * behavior is not defined.</em></p> 165 * 166 * @param bitIndex the bit index (assumed to be positive) 167 * @return the index of the bit map in an array of bit maps. 168 */ 169 private static int getLongIndex(final int bitIndex) { 170 // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is 171 // positive. 172 // We do not explicitly check for a negative here. Instead we use a 173 // signed shift. Any negative index will produce a negative value 174 // by sign-extension and if used as an index into an array it will throw an 175 // exception. 176 return bitIndex >> DIVIDE_BY_64; 177 } 178 179 /** 180 * Gets the filter bit mask for the specified bit index assuming the filter is using 181 * 64-bit longs to store bits starting at index 0. The returned value is a 182 * {@code long} with only 1 bit set. 183 * 184 * <p>The index is assumed to be positive. For a positive index the result will match 185 * {@code 1L << (bitIndex % 64)}.</p> 186 * 187 * <p><em>If the input is negative the behavior is not defined.</em></p> 188 * 189 * @param bitIndex the bit index (assumed to be positive) 190 * @return the filter bit 191 */ 192 private static long getLongBit(final int bitIndex) { 193 // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this 194 // using 0x3f (63) or compute bitIndex % 64. 195 // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and 196 // this will identify an incorrect bit. 197 return 1L << bitIndex; 198 } 199 200 // Compressed cardinality methods 201 202 /** 203 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 204 * compression of 2 to 1. This counts as enabled <em>all</em> bits of each consecutive 205 * 2 bits if <em>any</em> of the consecutive 2 bits are set to {@code true}. 206 * <pre> 207 * 0010100011000101000100 208 * 0 2 2 0 2 0 2 2 0 2 0 209 * </pre> 210 * <p>This method can be used to assess the saturation of the indices in the range. 211 * 212 * @return the number of bits set to {@code true} in this {@code IndexSet} using a 213 * compression of 2 to 1 214 */ 215 public int cardinality2() { 216 int c = 0; 217 for (long x : data) { 218 // Shift and mask out the bits that were shifted 219 x = (x | (x >>> 1)) & 0b0101010101010101010101010101010101010101010101010101010101010101L; 220 // Add [0, 32] 221 c += Long.bitCount(x); 222 } 223 // Multiply by 2 224 return c << 1; 225 } 226 227 /** 228 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 229 * compression of 4 to 1. This counts as enabled <em>all</em> bits of each consecutive 230 * 4 bits if <em>any</em> of the consecutive 4 bits are set to {@code true}. 231 * <pre> 232 * 0010000011000101000100 233 * 4 0 4 4 4 0 234 * </pre> 235 * <p>This method can be used to assess the saturation of the indices in the range. 236 * 237 * @return the number of bits set to {@code true} in this {@code IndexSet} using a compression 238 * of 8 to 1 239 */ 240 public int cardinality4() { 241 int c = 0; 242 for (long x : data) { 243 // Shift powers of 2 and mask out the bits that were shifted 244 x = x | (x >>> 1); 245 x = (x | (x >>> 2)) & 0b0001000100010001000100010001000100010001000100010001000100010001L; 246 // Expect a population count intrinsic method 247 // Add [0, 16] 248 c += Long.bitCount(x); 249 } 250 // Multiply by 4 251 return c << 2; 252 } 253 254 /** 255 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 256 * compression of 8 to 1. This counts as enabled <em>all</em> bits of each consecutive 257 * 8 bits if <em>any</em> of the consecutive 8 bits are set to {@code true}. 258 * <pre> 259 * 0010000011000101000000 260 * 8 8 0 261 * </pre> 262 * <p>This method can be used to assess the saturation of the indices in the range. 263 * 264 * @return the number of bits set to {@code true} in this {@code IndexSet} using a compression 265 * of 8 to 1 266 */ 267 public int cardinality8() { 268 int c = 0; 269 for (long x : data) { 270 // Shift powers of 2 and mask out the bits that were shifted 271 x = x | (x >>> 1); 272 x = x | (x >>> 2); 273 x = (x | (x >>> 4)) & 0b0000000100000001000000010000000100000001000000010000000100000001L; 274 // Expect a population count intrinsic method 275 // Add [0, 8] 276 c += Long.bitCount(x); 277 } 278 // Multiply by 8 279 return c << 3; 280 } 281 282 /** 283 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 284 * compression of 16 to 1. This counts as enabled <em>all</em> bits of each consecutive 285 * 16 bits if <em>any</em> of the consecutive 16 bits are set to {@code true}. 286 * <pre> 287 * 0010000011000101000000 288 * 16 0 289 * </pre> 290 * <p>This method can be used to assess the saturation of the indices in the range. 291 * 292 * @return the number of bits set to {@code true} in this {@code IndexSet} using a compression 293 * of 16 to 1 294 */ 295 public int cardinality16() { 296 int c = 0; 297 for (long x : data) { 298 // Shift powers of 2 and mask out the bits that were shifted 299 x = x | (x >>> 1); 300 x = x | (x >>> 2); 301 x = x | (x >>> 4); 302 x = (x | (x >>> 8)) & 0b0000000000000001000000000000000100000000000000010000000000000001L; 303 // Count the bits using folding 304 // if x = mask: 305 // (x += (x >>> 16)) : 0000000000000001000000000000001000000000000000100000000000000010 306 // (x += (x >>> 32)) : 0000000100000001000000100000001000000011000000110000010000000100 307 x = x + (x >>> 16); // put count of each 32 bits into their lowest 2 bits 308 x = x + (x >>> 32); // put count of each 64 bits into their lowest 3 bits 309 // Add [0, 4] 310 c += (int) x & 0b111; 311 } 312 // Multiply by 16 313 return c << 4; 314 } 315 316 /** 317 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 318 * compression of 32 to 1. This counts as enabled <em>all</em> bits of each consecutive 319 * 32 bits if <em>any</em> of the consecutive 32 bits are set to {@code true}. 320 * <pre> 321 * 0010000011000101000000 322 * 32 323 * </pre> 324 * <p>This method can be used to assess the saturation of the indices in the range. 325 * 326 * @return the number of bits set to {@code true} in this {@code IndexSet} using a compression 327 * of 32 to 1 328 */ 329 public int cardinality32() { 330 int c = 0; 331 for (final long x : data) { 332 // Are any lower 32-bits or upper 32-bits set? 333 c += (int) x != 0 ? 1 : 0; 334 c += (x >>> 32) != 0L ? 1 : 0; 335 } 336 // Multiply by 32 337 return c << 5; 338 } 339 340 /** 341 * Returns the number of bits set to {@code true} in this {@code IndexSet} using a 342 * compression of 64 to 1. This counts as enabled <em>all</em> bits of each consecutive 343 * 64 bits if <em>any</em> of the consecutive 64 bits are set to {@code true}. 344 * <pre> 345 * 0010000011000101000000 346 * 64 347 * </pre> 348 * <p>This method can be used to assess the saturation of the indices in the range. 349 * 350 * @return the number of bits set to {@code true} in this {@code IndexSet} using a compression 351 * of 64 to 1 352 */ 353 public int cardinality64() { 354 int c = 0; 355 for (final long x : data) { 356 // Are any bits set? 357 c += x != 0L ? 1 : 0; 358 } 359 // Multiply by 64 360 return c << 6; 361 } 362 363 // Adapt method API from BitSet 364 365 /** 366 * Returns the number of bits set to {@code true} in this {@code IndexSet}. 367 * 368 * @return the number of bits set to {@code true} in this {@code IndexSet} 369 */ 370 public int cardinality() { 371 int c = 0; 372 for (final long x : data) { 373 c += Long.bitCount(x); 374 } 375 return c; 376 } 377 378 /** 379 * Returns the value of the bit with the specified index. 380 * 381 * @param bitIndex the bit index (assumed to be positive) 382 * @return the value of the bit with the specified index 383 */ 384 public boolean get(int bitIndex) { 385 // WARNING: No range checks !!! 386 final int index = bitIndex - left; 387 final int i = getLongIndex(index); 388 final long m = getLongBit(index); 389 return (data[i] & m) != 0; 390 } 391 392 /** 393 * Sets the bit at the specified index to {@code true}. 394 * 395 * <p>Warning: This has no range checks. Use {@link #add(int)} to add an index that 396 * may be outside the support. 397 * 398 * @param bitIndex the bit index (assumed to be positive) 399 */ 400 public void set(int bitIndex) { 401 // WARNING: No range checks !!! 402 final int index = bitIndex - left; 403 final int i = getLongIndex(index); 404 final long m = getLongBit(index); 405 data[i] |= m; 406 } 407 408 /** 409 * Sets the bits from the specified {@code leftIndex} (inclusive) to the specified 410 * {@code rightIndex} (inclusive) to {@code true}. 411 * 412 * <p><em>If {@code rightIndex - leftIndex < 0} the behavior is not defined.</em></p> 413 * 414 * <p>Note: In contrast to the BitSet API, this uses an <em>inclusive</em> end as this 415 * is the main use case for the class. 416 * 417 * <p>Warning: This has no range checks. Use {@link #add(int, int)} to range that 418 * may be outside the support. 419 * 420 * @param leftIndex the left index 421 * @param rightIndex the right index 422 */ 423 public void set(int leftIndex, int rightIndex) { 424 final int l = leftIndex - left; 425 final int r = rightIndex - left; 426 427 // WARNING: No range checks !!! 428 int i = getLongIndex(l); 429 final int j = getLongIndex(r); 430 431 // Fill in bits using (big-endian mask): 432 // end middle start 433 // 00011111 11111111 11111100 434 435 // start = -1L << (left % 64) 436 // end = -1L >>> (64 - ((right+1) % 64)) 437 final long start = LONG_MASK << l; 438 final long end = LONG_MASK >>> -(r + 1); 439 if (i == j) { 440 // Special case where the two masks overlap at the same long index 441 // 11111100 & 00011111 => 00011100 442 data[i] |= start & end; 443 } else { 444 // 11111100 445 data[i] |= start; 446 while (++i < j) { 447 // 11111111 448 // Note: -1L is all bits set 449 data[i] = -1L; 450 } 451 // 00011111 452 data[j] |= end; 453 } 454 } 455 456 /** 457 * Returns the index of the nearest bit that is set to {@code true} that occurs on or 458 * before the specified starting index. If no such bit exists, then {@code -1} is returned. 459 * 460 * @param fromIndex Index to start checking from (inclusive). 461 * @return the index of the previous set bit, or {@code -1} if there is no such bit 462 */ 463 public int previousSetBit(int fromIndex) { 464 return previousSetBitOrElse(fromIndex, -1); 465 } 466 467 /** 468 * Returns the index of the nearest bit that is set to {@code true} that occurs on or 469 * before the specified starting index. If no such bit exists, then 470 * {@code defaultValue} is returned. 471 * 472 * @param fromIndex Index to start checking from (inclusive). 473 * @param defaultValue Default value. 474 * @return the index of the previous set bit, or {@code defaultValue} if there is no such bit 475 */ 476 int previousSetBitOrElse(int fromIndex, int defaultValue) { 477 if (fromIndex < left) { 478 // index is in an unknown range 479 return defaultValue; 480 } 481 final int index = fromIndex - left; 482 int i = getLongIndex(index); 483 484 long bits = data[i]; 485 486 // Repeat logic of get(int) to check the bit 487 if ((bits & getLongBit(index)) != 0) { 488 return fromIndex; 489 } 490 491 // Mask bits before the bit index 492 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64)) 493 bits &= LONG_MASK >>> -(index + 1); 494 for (;;) { 495 if (bits != 0) { 496 //(i+1) i 497 // | index | 498 // | | | 499 // 0 001010000 500 return (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits) - 1 + left; 501 } 502 if (i == 0) { 503 return defaultValue; 504 } 505 bits = data[--i]; 506 } 507 } 508 509 /** 510 * Returns the index of the first bit that is set to {@code true} that occurs on or 511 * after the specified starting index. If no such bit exists then {@code -1} is 512 * returned. 513 * 514 * @param fromIndex Index to start checking from (inclusive). 515 * @return the index of the next set bit, or {@code -1} if there is no such bit 516 */ 517 public int nextSetBit(int fromIndex) { 518 return nextSetBitOrElse(fromIndex, -1); 519 } 520 521 /** 522 * Returns the index of the first bit that is set to {@code true} that occurs on or 523 * after the specified starting index. If no such bit exists then {@code defaultValue} is 524 * returned. 525 * 526 * @param fromIndex Index to start checking from (inclusive). 527 * @param defaultValue Default value. 528 * @return the index of the next set bit, or {@code defaultValue} if there is no such bit 529 */ 530 int nextSetBitOrElse(int fromIndex, int defaultValue) { 531 // Support searching forward through the known range 532 final int index = fromIndex < left ? 0 : fromIndex - left; 533 534 int i = getLongIndex(index); 535 536 // Mask bits after the bit index 537 // mask = 11111000 = -1L << (index % 64) 538 long bits = data[i] & (LONG_MASK << index); 539 for (;;) { 540 if (bits != 0) { 541 //(i+1) i 542 // | index | 543 // | | | 544 // 0 001010000 545 return i * Long.SIZE + Long.numberOfTrailingZeros(bits) + left; 546 } 547 if (++i == data.length) { 548 return defaultValue; 549 } 550 bits = data[i]; 551 } 552 } 553 554 /** 555 * Returns the index of the first bit that is set to {@code false} that occurs on or 556 * after the specified starting index <em>within the supported range</em>. If no such 557 * bit exists then the {@code capacity} is returned where {@code capacity = index + 1} 558 * with {@code index} the largest index that can be added to the set without an error. 559 * 560 * <p>If the starting index is less than the supported range the result is {@code fromIndex}. 561 * 562 * @param fromIndex Index to start checking from (inclusive). 563 * @return the index of the next unset bit, or the {@code capacity} if there is no such bit 564 */ 565 public int nextClearBit(int fromIndex) { 566 if (fromIndex < left) { 567 return fromIndex; 568 } 569 // Support searching forward through the known range 570 final int index = fromIndex - left; 571 572 int i = getLongIndex(index); 573 574 // Note: This method is conceptually the same as nextSetBit with the exception 575 // that: all the data is bit-flipped; the capacity is returned when the 576 // scan reaches the end. 577 578 // Mask bits after the bit index 579 // mask = 11111000 = -1L << (fromIndex % 64) 580 long bits = ~data[i] & (LONG_MASK << index); 581 for (;;) { 582 if (bits != 0) { 583 return i * Long.SIZE + Long.numberOfTrailingZeros(bits) + left; 584 } 585 if (++i == data.length) { 586 // Capacity 587 return data.length * Long.SIZE + left; 588 } 589 bits = ~data[i]; 590 } 591 } 592 593 /** 594 * Returns the index of the first bit that is set to {@code false} that occurs on or 595 * before the specified starting index <em>within the supported range</em>. If no such 596 * bit exists then {@code -1} is returned. 597 * 598 * <p>If the starting index is less than the supported range the result is {@code fromIndex}. 599 * This can return {@code -1} only if the support begins at {@code index == 0}. 600 * 601 * @param fromIndex Index to start checking from (inclusive). 602 * @return the index of the previous unset bit, or {@code -1} if there is no such bit 603 */ 604 public int previousClearBit(int fromIndex) { 605 if (fromIndex < left) { 606 // index is in an unknown range 607 return fromIndex; 608 } 609 final int index = fromIndex - left; 610 int i = getLongIndex(index); 611 612 // Note: This method is conceptually the same as previousSetBit with the exception 613 // that: all the data is bit-flipped; the offset - 1 is returned when the 614 // scan reaches the end. 615 616 // Mask bits before the bit index 617 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64)) 618 long bits = ~data[i] & (LONG_MASK >>> -(index + 1)); 619 for (;;) { 620 if (bits != 0) { 621 //(i+1) i 622 // | index | 623 // | | | 624 // 0 001010000 625 return (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits) - 1 + left; 626 } 627 if (i == 0) { 628 return left - 1; 629 } 630 bits = ~data[--i]; 631 } 632 } 633 634 /** 635 * Perform the {@code action} for each index in the set. 636 * 637 * @param action Action. 638 */ 639 public void forEach(IntConsumer action) { 640 // Adapted from o.a.c.collections4.IndexProducer 641 int wordIdx = left; 642 for (int i = 0; i < data.length; i++) { 643 long bits = data[i]; 644 int index = wordIdx; 645 while (bits != 0) { 646 if ((bits & 1L) == 1L) { 647 action.accept(index); 648 } 649 bits >>>= 1; 650 index++; 651 } 652 wordIdx += Long.SIZE; 653 } 654 } 655 656 /** 657 * Write each index in the set into the provided array. 658 * Returns the number of indices. 659 * 660 * <p>The caller must ensure the output array has sufficient capacity. 661 * For example the array used to construct the IndexSet. 662 * 663 * @param a Output array. 664 * @return count of indices 665 * @see #of(int[]) 666 * @see #of(int[], int) 667 */ 668 public int toArray(int[] a) { 669 // This benchmarks as faster for index sorting than toArray2 even at 670 // high density (average separation of 2). 671 int n = -1; 672 int offset = left; 673 for (long bits : data) { 674 while (bits != 0) { 675 final int index = Long.numberOfTrailingZeros(bits); 676 a[++n] = index + offset; 677 bits &= ~(1L << index); 678 } 679 offset += Long.SIZE; 680 } 681 return n + 1; 682 } 683 684 /** 685 * Write each index in the set into the provided array. 686 * Returns the number of indices. 687 * 688 * <p>The caller must ensure the output array has sufficient capacity. 689 * For example the array used to construct the IndexSet. 690 * 691 * @param a Output array. 692 * @return count of indices 693 * @see #of(int[]) 694 * @see #of(int[], int) 695 */ 696 public int toArray2(int[] a) { 697 // Adapted from o.a.c.collections4.IndexProducer 698 int n = -1; 699 for (int i = 0, offset = left; i < data.length; i++, offset += Long.SIZE) { 700 long bits = data[i]; 701 int index = offset; 702 while (bits != 0) { 703 if ((bits & 1L) == 1L) { 704 a[++n] = index; 705 } 706 bits >>>= 1; 707 index++; 708 } 709 } 710 return n + 1; 711 } 712 713 // PivotCache interface 714 715 @Override 716 public int left() { 717 return left; 718 } 719 720 @Override 721 public int right() { 722 return right; 723 } 724 725 @Override 726 public boolean sparse() { 727 // Can store all pivots between [left, right] 728 return false; 729 } 730 731 @Override 732 public boolean contains(int k) { 733 // Assume [left <= k <= right] 734 return get(k); 735 } 736 737 @Override 738 public void add(int index) { 739 // Update the floating pivots if outside the support 740 if (index < left) { 741 lowerPivot = Math.max(index, lowerPivot); 742 } else if (index > right) { 743 upperPivot = Math.min(index, upperPivot); 744 } else { 745 set(index); 746 } 747 } 748 749 @Override 750 public void add(int fromIndex, int toIndex) { 751 // Optimisation for the main use case of the PivotCache 752 if (fromIndex == toIndex) { 753 add(fromIndex); 754 return; 755 } 756 757 // Note: 758 // Storing all pivots allows regions of identical values 759 // and sorted regions to be skipped in subsequent partitioning. 760 // Repeat sorting these regions is typically more expensive 761 // than caching them and moving over them during partitioning. 762 // An alternative is to: store fromIndex and only store 763 // toIndex if they are well separated, optionally storing 764 // regions between. If they are not well separated (e.g. < 10) 765 // then using a single pivot is an alternative to investigate 766 // with performance benchmarks on a range of input data. 767 768 // Pivots are required to bracket [L, R]: 769 // LP-----L--------------R------UP 770 // If the range [i, j] overlaps either L or R then 771 // the floating pivots are no longer required: 772 // i-j Set lower pivot 773 // i--------j Ignore lower pivot 774 // i---------------------j Ignore lower & upper pivots (no longer required) 775 // i-------j Ignore lower & upper pivots 776 // i---------------j Ignore upper pivot 777 // i-j Set upper pivot 778 if (fromIndex <= right && toIndex >= left) { 779 // Clip the range between [left, right] 780 final int i = Math.max(fromIndex, left); 781 final int j = Math.min(toIndex, right); 782 set(i, j); 783 } else if (toIndex < left) { 784 lowerPivot = Math.max(toIndex, lowerPivot); 785 } else { 786 // fromIndex > right 787 upperPivot = Math.min(fromIndex, upperPivot); 788 } 789 } 790 791 @Override 792 public int previousPivot(int k) { 793 // Assume scanning in [left <= k <= right] 794 return previousSetBitOrElse(k, lowerPivot); 795 } 796 797 @Override 798 public int nextPivotOrElse(int k, int other) { 799 // Assume scanning in [left <= k <= right] 800 final int p = upperPivot == UPPER_DEFAULT ? other : upperPivot; 801 return nextSetBitOrElse(k, p); 802 } 803 804 // IndexInterval 805 806 @Override 807 public int previousIndex(int k) { 808 // Re-implement previousSetBitOrElse without index checks 809 // as this supports left <= k <= right 810 811 final int index = k - left; 812 int i = getLongIndex(index); 813 814 // Mask bits before the bit index 815 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64)) 816 long bits = data[i] & (LONG_MASK >>> -(index + 1)); 817 for (;;) { 818 if (bits != 0) { 819 //(i+1) i 820 // | index | 821 // | | | 822 // 0 001010000 823 return (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits) - 1 + left; 824 } 825 // Unsupported: the interval should contain k 826 //if (i == 0) { 827 // return left - 1; 828 //} 829 bits = data[--i]; 830 } 831 } 832 833 @Override 834 public int nextIndex(int k) { 835 // Re-implement nextSetBitOrElse without index checks 836 // as this supports left <= k <= right 837 838 final int index = k - left; 839 int i = getLongIndex(index); 840 841 // Mask bits after the bit index 842 // mask = 11111000 = -1L << (index % 64) 843 long bits = data[i] & (LONG_MASK << index); 844 for (;;) { 845 if (bits != 0) { 846 //(i+1) i 847 // | index | 848 // | | | 849 // 0 001010000 850 return i * Long.SIZE + Long.numberOfTrailingZeros(bits) + left; 851 } 852 // Unsupported: the interval should contain k 853 //if (++i == data.length) { 854 // return right + 1; 855 //} 856 bits = data[++i]; 857 } 858 } 859 860 // No override for split. 861 // This requires searching for previousIndex(k - 1) and nextIndex(k + 1). 862 // The only shared code is getLongIndex(x - left). Since argument indices are 2 apart 863 // these will map to a different long with a probability of 1/32. 864 865 // IndexInterval2 866 // This is exactly the same as IndexInterval as the pointers i are the same as the keys k 867 868 @Override 869 public int start() { 870 return left(); 871 } 872 873 @Override 874 public int end() { 875 return right(); 876 } 877 878 @Override 879 public int index(int i) { 880 return i; 881 } 882 883 @Override 884 public int previous(int i, int k) { 885 return previousIndex(k); 886 } 887 888 @Override 889 public int next(int i, int k) { 890 return nextIndex(k); 891 } 892 893 /** 894 * Return a {@link ScanningPivotCache} implementation re-using the same internal storage. 895 * 896 * <p>Note that the range for the {@link ScanningPivotCache} must fit inside the current 897 * supported range of indices. 898 * 899 * <p>Warning: This operation clears all set bits within the range. 900 * 901 * <p><strong>Support</strong> 902 * 903 * <p>The returned {@link ScanningPivotCache} is suitable for storing all pivot points between 904 * {@code [left, right]} and the closest bounding pivots outside that range. It can be 905 * used for bracketing partition keys processed in a random order by storing pivots 906 * found during each successive partition search. 907 * 908 * <p>The returned {@link ScanningPivotCache} is suitable for use when iterating over 909 * partition keys in ascending order. 910 * 911 * <p>The cache allows incrementing the {@code left} support using 912 * {@link ScanningPivotCache#moveLeft(int)}. Any calls to decrement the {@code left} support 913 * at any time will result in an {@link UnsupportedOperationException}; this prevents reseting 914 * to within the original support used to create the cache. If the {@code left} is 915 * moved beyond the {@code right} then the move is rejected. 916 * 917 * @param lower Lower bound (inclusive). 918 * @param upper Upper bound (inclusive). 919 * @return the pivot cache 920 * @throws IllegalArgumentException if {@code right < left} or the range cannot be 921 * supported. 922 */ 923 ScanningPivotCache asScanningPivotCache(int lower, int upper) { 924 return asScanningPivotCache(lower, upper, true); 925 } 926 927 /** 928 * Return a {@link ScanningPivotCache} implementation to support the range 929 * {@code [left, right]}. 930 * 931 * <p>See {@link #asScanningPivotCache(int, int)} for the details of the cache implementation. 932 * 933 * @param left Lower bound (inclusive). 934 * @param right Upper bound (inclusive). 935 * @return the pivot cache 936 * @throws IllegalArgumentException if {@code right < left} 937 */ 938 static ScanningPivotCache createScanningPivotCache(int left, int right) { 939 final IndexSet set = ofRange(left, right); 940 return set.asScanningPivotCache(left, right, false); 941 } 942 943 /** 944 * Return a {@link ScanningPivotCache} implementation to support the range 945 * {@code [left, right]} re-using the same internal storage. 946 * 947 * @param lower Lower bound (inclusive). 948 * @param upper Upper bound (inclusive). 949 * @param initialize Perform validation checks and initialize the storage. 950 * @return the pivot cache 951 * @throws IllegalArgumentException if {@code right < left} or the range cannot be 952 * supported. 953 */ 954 private ScanningPivotCache asScanningPivotCache(int lower, int upper, boolean initialize) { 955 if (initialize) { 956 checkRange(lower, upper); 957 final int capacity = data.length * Long.SIZE + lower; 958 if (lower < left || upper >= capacity) { 959 throw new IllegalArgumentException( 960 String.format("Unsupported range: [%d, %d] is not within [%d, %d]", lower, upper, 961 left, capacity - 1)); 962 } 963 // Clear existing data 964 Arrays.fill(data, 0); 965 } 966 return new IndexPivotCache(lower, upper); 967 } 968 969 /** 970 * Return an {@link UpdatingInterval} implementation to support the range 971 * {@code [left, right]} re-using the same internal storage. 972 * 973 * @return the interval 974 */ 975 UpdatingInterval interval() { 976 return new IndexSetUpdatingInterval(left, right); 977 } 978 979 /** 980 * Check the range is valid. 981 * 982 * @param left Lower bound (inclusive). 983 * @param right Upper bound (inclusive). 984 * @throws IllegalArgumentException if {@code right < left} 985 */ 986 private static void checkRange(int left, int right) { 987 if (right < left) { 988 throw new IllegalArgumentException( 989 String.format("Invalid range: [%d, %d]", left, right)); 990 } 991 } 992 993 /** 994 * Implementation of the {@link ScanningPivotCache} using the {@link IndexSet}. 995 * 996 * <p>Stores all pivots between the support {@code [left, right]}. Uses two 997 * floating pivots which are the closest known pivots surrounding this range. 998 * 999 * <p>This class is bound to the enclosing {@link IndexSet} instance to provide 1000 * the functionality to read, write and search indexes. 1001 * 1002 * <p>Note: This duplicates functionality of the parent IndexSet. Differences 1003 * are that it uses a movable left bound and implements the scanning functionality 1004 * of the {@link ScanningPivotCache} interface. It can also be created for 1005 * a smaller {@code [left, right]} range than the enclosing class. 1006 * 1007 * <p>Creation of this class typically invalidates the use of the outer class. 1008 * Creation will zero the underlying storage and the range may be different. 1009 */ 1010 private class IndexPivotCache implements ScanningPivotCache { 1011 /** Left bound of the support. */ 1012 private int left; 1013 /** Right bound of the support. */ 1014 private final int right; 1015 /** The upstream pivot closest to the left bound of the support. 1016 * Provides a lower search bound for the range [left, right]. */ 1017 private int lowerPivot = -1; 1018 /** The downstream pivot closest to the right bound of the support. 1019 * Provides an upper search bound for the range [left, right]. */ 1020 private int upperPivot = UPPER_DEFAULT; 1021 1022 /** 1023 * @param left Lower bound (inclusive). 1024 * @param right Upper bound (inclusive). 1025 */ 1026 IndexPivotCache(int left, int right) { 1027 this.left = left; 1028 this.right = right; 1029 } 1030 1031 @Override 1032 public int left() { 1033 return left; 1034 } 1035 1036 @Override 1037 public int right() { 1038 return right; 1039 } 1040 1041 @Override 1042 public boolean sparse() { 1043 // Can store all pivots between [left, right] 1044 return false; 1045 } 1046 1047 @Override 1048 public boolean moveLeft(int newLeft) { 1049 if (newLeft > right) { 1050 // Signal that this cache can no longer be used in that range 1051 return false; 1052 } 1053 if (newLeft < left) { 1054 throw new UnsupportedOperationException( 1055 String.format("New left is outside current support: %d < %d", newLeft, left)); 1056 } 1057 // Here [left <= newLeft <= right] 1058 // Move the upstream pivot 1059 lowerPivot = previousPivot(newLeft); 1060 left = newLeft; 1061 return true; 1062 } 1063 1064 @Override 1065 public boolean contains(int k) { 1066 // Assume [left <= k <= right] 1067 return IndexSet.this.get(k); 1068 } 1069 1070 @Override 1071 public int previousPivot(int k) { 1072 // Assume scanning in [left <= k <= right] 1073 // Here left is moveable and lower pivot holds the last pivot below it. 1074 // The cache will not store any bits below left so if it has moved 1075 // searching may find stale bits below the current lower pivot. 1076 // So we return the max of the found bit or the lower pivot. 1077 if (k < left) { 1078 return lowerPivot; 1079 } 1080 return Math.max(lowerPivot, IndexSet.this.previousSetBitOrElse(k, lowerPivot)); 1081 } 1082 1083 @Override 1084 public int nextPivotOrElse(int k, int other) { 1085 // Assume scanning in [left <= k <= right] 1086 final int p = upperPivot == UPPER_DEFAULT ? other : upperPivot; 1087 return IndexSet.this.nextSetBitOrElse(k, p); 1088 } 1089 1090 @Override 1091 public int nextNonPivot(int k) { 1092 // Assume scanning in [left <= k <= right] 1093 return IndexSet.this.nextClearBit(k); 1094 } 1095 1096 @Override 1097 public int previousNonPivot(int k) { 1098 // Assume scanning in [left <= k <= right] 1099 return IndexSet.this.previousClearBit(k); 1100 } 1101 1102 @Override 1103 public void add(int index) { 1104 // Update the floating pivots if outside the support 1105 if (index < left) { 1106 lowerPivot = Math.max(index, lowerPivot); 1107 } else if (index > right) { 1108 upperPivot = Math.min(index, upperPivot); 1109 } else { 1110 IndexSet.this.set(index); 1111 } 1112 } 1113 1114 @Override 1115 public void add(int fromIndex, int toIndex) { 1116 if (fromIndex == toIndex) { 1117 add(fromIndex); 1118 return; 1119 } 1120 // Note: 1121 // Storing all pivots allows regions of identical values 1122 // and sorted regions to be skipped in subsequent partitioning. 1123 // Repeat sorting these regions is typically more expensive 1124 // than caching them and moving over them during partitioning. 1125 // An alternative is to: store fromIndex and only store 1126 // toIndex if they are well separated, optionally storing 1127 // regions between. If they are not well separated (e.g. < 10) 1128 // then using a single pivot is an alternative to investigate 1129 // with performance benchmarks on a range of input data. 1130 1131 // Pivots are required to bracket [L, R]: 1132 // LP-----L--------------R------UP 1133 // If the range [i, j] overlaps either L or R then 1134 // the floating pivots are no longer required: 1135 // i-j Set lower pivot 1136 // i--------j Ignore lower pivot 1137 // i---------------------j Ignore lower & upper pivots (no longer required) 1138 // i-------j Ignore lower & upper pivots 1139 // i---------------j Ignore upper pivot 1140 // i-j Set upper pivot 1141 if (fromIndex <= right && toIndex >= left) { 1142 // Clip the range between [left, right] 1143 final int i = Math.max(fromIndex, left); 1144 final int j = Math.min(toIndex, right); 1145 IndexSet.this.set(i, j); 1146 } else if (toIndex < left) { 1147 lowerPivot = Math.max(toIndex, lowerPivot); 1148 } else { 1149 // fromIndex > right 1150 upperPivot = Math.min(fromIndex, upperPivot); 1151 } 1152 } 1153 } 1154 1155 /** 1156 * Implementation of the {@link UpdatingInterval} using the {@link IndexSet}. 1157 * 1158 * <p>This class is bound to the enclosing {@link IndexSet} instance to provide 1159 * the functionality to search indexes. 1160 */ 1161 private class IndexSetUpdatingInterval implements UpdatingInterval { 1162 /** Left bound of the interval. */ 1163 private int left; 1164 /** Right bound of the interval. */ 1165 private int right; 1166 1167 /** 1168 * @param left Lower bound (inclusive). 1169 * @param right Upper bound (inclusive). 1170 */ 1171 IndexSetUpdatingInterval(int left, int right) { 1172 this.left = left; 1173 this.right = right; 1174 } 1175 1176 @Override 1177 public int left() { 1178 return left; 1179 } 1180 1181 @Override 1182 public int right() { 1183 return right; 1184 } 1185 1186 @Override 1187 public int updateLeft(int k) { 1188 // Assume left < k= < right 1189 return left = nextIndex(k); 1190 } 1191 1192 @Override 1193 public int updateRight(int k) { 1194 // Assume left <= k < right 1195 return right = previousIndex(k); 1196 } 1197 1198 @Override 1199 public UpdatingInterval splitLeft(int ka, int kb) { 1200 // Assume left < ka <= kb < right 1201 final int lower = left; 1202 left = nextIndex(kb + 1); 1203 return new IndexSetUpdatingInterval(lower, previousIndex(ka - 1)); 1204 } 1205 1206 @Override 1207 public UpdatingInterval splitRight(int ka, int kb) { 1208 // Assume left < ka <= kb < right 1209 final int upper = right; 1210 right = previousIndex(ka - 1); 1211 return new IndexSetUpdatingInterval(nextIndex(kb + 1), upper); 1212 } 1213 } 1214 }