1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.examples.jmh.arrays;
18
19 /**
20 * A fixed size set of indices within an inclusive range {@code [left, right]}.
21 *
22 * <p>This is a specialised class to implement a data structure similar to a
23 * {@link java.util.BitSet}. It supports a fixed size and contains the methods required to
24 * store and look-up intervals of indices.
25 *
26 * <p>An offset is supported to allow the fixed size to cover a range of indices starting
27 * above 0 with the most efficient usage of storage.
28 *
29 * <p>In contrast to a {@link java.util.BitSet}, the data structure does not store all
30 * indices in the range. Indices are compressed by a power of 2. The structure can return
31 * with 100% accuracy if a query index is not within the range. It cannot return with 100%
32 * accuracy if a query index is contained within the range. The presence of a query index
33 * is a probabilistic statement that there is an index within a range of the query index.
34 * The range is defined by the compression level {@code c}.
35 *
36 * <p>Indices are stored offset from {@code left} and compressed. A compressed index
37 * represents 2<sup>c</sup> real indices:
38 *
39 * <pre>
40 * Interval: 012345678
41 * Compressed (c=1): 0-1-2-3-4
42 * Compressed (c=2): 0---1---2
43 * Compressed (c=2): 0-------1
44 * </pre>
45 *
46 * <p>When scanning for the next index the identified compressed index is decompressed and
47 * the lower bound of the range represented by the index is returned.
48 *
49 * <p>When scanning for the previous index the identified compressed index is decompressed
50 * and the upper bound of the range represented by the index is returned.
51 *
52 * <p>When scanning in either direction, if the search index is inside a compressed index
53 * the search index is returned.
54 *
55 * <p>Note: Search for the {@link SearchableInterval} interface outside the supported bounds
56 * {@code [left, right]} is not supported and will result in an {@link IndexOutOfBoundsException}.
57 *
58 * <p>See the BloomFilter code in Commons Collections for use of long[] data to store
59 * bits.
60 *
61 * @since 1.2
62 */
63 final class CompressedIndexSet implements SearchableInterval, SearchableInterval2 {
64 /** All 64-bits bits set. */
65 private static final long LONG_MASK = -1L;
66 /** A bit shift to apply to an integer to divided by 64 (2^6). */
67 private static final int DIVIDE_BY_64 = 6;
68
69 /** Bit indexes. */
70 private final long[] data;
71
72 /** Left bound of the support. */
73 private final int left;
74 /** Right bound of the support. */
75 private final int right;
76 /** Compression level. */
77 private final int compression;
78
79 /**
80 * Create an instance to store indices within the range {@code [left, right]}.
81 *
82 * @param compression Compression level (in {@code [1, 31])}
83 * @param l Lower bound (inclusive).
84 * @param r Upper bound (inclusive).
85 */
86 private CompressedIndexSet(int compression, int l, int r) {
87 this.compression = compression;
88 this.left = l;
89 // Note: The functional upper bound may be higher but the next/previous functionality
90 // support scanning in the original [left, right] bound.
91 this.right = r;
92 // Note: This may allow directly writing to index > right if there
93 // is extra capacity.
94 data = new long[getLongIndex((r - l) >>> 1) + 1];
95 }
96
97 /**
98 * Create an instance to store indices within the range {@code [left, right]}.
99 * The instance is initially empty.
100 *
101 * <p>Warning: To use this object as an {@link SearchableInterval} the left and right
102 * indices should be added to the set.
103 *
104 * @param compression Compression level (in {@code [1, 31])}
105 * @param left Lower bound (inclusive).
106 * @param right Upper bound (inclusive).
107 * @return the index set
108 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
109 * or if {@code right < left}; or if {@code left < 0}
110 */
111 static CompressedIndexSet ofRange(int compression, int left, int right) {
112 checkCompression(compression);
113 checkLeft(left);
114 checkRange(left, right);
115 return new CompressedIndexSet(compression, left, right);
116 }
117
118 /**
119 * Initialise an instance with the {@code indices}. The capacity is defined by the
120 * range required to store the minimum and maximum index at the specified
121 * {@code compression} level.
122 *
123 * <p>This object can be used as an {@link SearchableInterval} as the left and right
124 * indices will be set.
125 *
126 * @param compression Compression level (in {@code [1, 31])}
127 * @param indices Indices.
128 * @return the index set
129 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
130 * or if {@code indices.length == 0}; or if {@code left < 0}
131 */
132 static CompressedIndexSet of(int compression, int[] indices) {
133 return of(compression, indices, indices.length);
134 }
135
136 /**
137 * Initialise an instance with the {@code indices}. The capacity is defined by the
138 * range required to store the minimum and maximum index at the specified
139 * {@code compression} level.
140 *
141 * <p>This object can be used as an {@link SearchableInterval} as the left and right
142 * indices will be set.
143 *
144 * @param compression Compression level (in {@code [1, 31])}
145 * @param indices Indices.
146 * @param n Number of indices.
147 * @return the index set
148 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
149 * or if {@code n == 0}; or if {@code left < 0}
150 */
151 static CompressedIndexSet of(int compression, int[] indices, int n) {
152 if (n <= 0) {
153 throw new IllegalArgumentException("No indices to define the range");
154 }
155 checkCompression(compression);
156 int min = indices[0];
157 int max = min;
158 for (int i = 0; ++i < n;) {
159 min = Math.min(min, indices[i]);
160 max = Math.max(max, indices[i]);
161 }
162 checkLeft(min);
163 final CompressedIndexSet set = new CompressedIndexSet(compression, min, max);
164 for (int i = -1; ++i < n;) {
165 set.set(indices[i]);
166 }
167 return set;
168 }
169
170 /**
171 * Create an {@link IndexIterator} with the {@code indices}.
172 *
173 * @param compression Compression level (in {@code [1, 31])}
174 * @param indices Indices.
175 * @param n Number of indices.
176 * @return the index set
177 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
178 * or if {@code n == 0}; or if {@code left < 0}
179 */
180 static IndexIterator iterator(int compression, int[] indices, int n) {
181 return of(compression, indices, n).new Iterator();
182 }
183
184 /**
185 * Gets the compressed index for this instance using the left bound and the
186 * compression level.
187 *
188 * @param index Index.
189 * @return the compressed index
190 */
191 private int compressIndex(int index) {
192 return (index - left) >>> compression;
193 }
194
195 /**
196 * Gets the filter index for the specified bit index assuming the filter is using
197 * 64-bit longs to store bits starting at index 0.
198 *
199 * <p>The index is assumed to be positive. For a positive index the result will match
200 * {@code bitIndex / 64}.</p>
201 *
202 * <p><em>The divide is performed using bit shifts. If the input is negative the
203 * behavior is not defined.</em></p>
204 *
205 * @param bitIndex the bit index (assumed to be positive)
206 * @return the index of the bit map in an array of bit maps.
207 */
208 private static int getLongIndex(final int bitIndex) {
209 // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
210 // positive.
211 // We do not explicitly check for a negative here. Instead we use a
212 // signed shift. Any negative index will produce a negative value
213 // by sign-extension and if used as an index into an array it will throw an
214 // exception.
215 return bitIndex >> DIVIDE_BY_64;
216 }
217
218 /**
219 * Gets the filter bit mask for the specified bit index assuming the filter is using
220 * 64-bit longs to store bits starting at index 0. The returned value is a
221 * {@code long} with only 1 bit set.
222 *
223 * <p>The index is assumed to be positive. For a positive index the result will match
224 * {@code 1L << (bitIndex % 64)}.</p>
225 *
226 * <p><em>If the input is negative the behavior is not defined.</em></p>
227 *
228 * @param bitIndex the bit index (assumed to be positive)
229 * @return the filter bit
230 */
231 private static long getLongBit(final int bitIndex) {
232 // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
233 // using 0x3f (63) or compute bitIndex % 64.
234 // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
235 // this will identify an incorrect bit.
236 return 1L << bitIndex;
237 }
238
239 /**
240 * Returns the value of the bit with the specified index.
241 *
242 * <p>Warning: This has no range checks.
243 *
244 * @param bitIndex the bit index (assumed to be positive)
245 * @return the value of the bit with the specified index
246 */
247 boolean get(int bitIndex) {
248 // WARNING: No range checks !!!
249 final int index = compressIndex(bitIndex);
250 final int i = getLongIndex(index);
251 final long m = getLongBit(index);
252 return (data[i] & m) != 0;
253 }
254
255 /**
256 * Sets the bit at the specified index to {@code true}.
257 *
258 * <p>Warning: This has no range checks.
259 *
260 * @param bitIndex the bit index (assumed to be positive)
261 */
262 void set(int bitIndex) {
263 // WARNING: No range checks !!!
264 final int index = compressIndex(bitIndex);
265 final int i = getLongIndex(index);
266 final long m = getLongBit(index);
267 data[i] |= m;
268 }
269
270
271 @Override
272 public int left() {
273 return left;
274 }
275
276 @Override
277 public int right() {
278 return right;
279 }
280
281 /**
282 * Returns the nearest index that occurs on or before the specified starting
283 * index, or {@code left - 1} if no such index exists.
284 *
285 * <p>This method exists for comparative testing to {@link #previousIndex(int)}.
286 *
287 * @param k Index to start checking from (inclusive).
288 * @return the previous index, or {@code left - 1}
289 */
290 int previousIndexOrLeftMinus1(int k) {
291 if (k < left) {
292 // index is in an unknown range
293 return left - 1;
294 }
295 // Support searching backward through the known range
296 final int index = compressIndex(k > right ? right : k);
297
298 int i = getLongIndex(index);
299 long bits = data[i];
300
301 // Check if this is within a compressed index. If so return the exact result.
302 if ((bits & getLongBit(index)) != 0) {
303 return Math.min(k, right);
304 }
305
306 // Mask bits before the bit index
307 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
308 bits &= LONG_MASK >>> -(index + 1);
309 for (;;) {
310 if (bits != 0) {
311 //(i+1) i
312 // | c |
313 // | | |
314 // 0 001010000
315 final int c = (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits);
316 // Decompress the prior unset bit to an index. When inflated this is the
317 // next index above the upper bound of the compressed range so subtract 1.
318 return (c << compression) - 1 + left;
319 }
320 if (i == 0) {
321 return left - 1;
322 }
323 bits = data[--i];
324 }
325 }
326
327 /**
328 * Returns the nearest index that occurs on or after the specified starting
329 * index, or {@code right + 1} if no such index exists.
330 *
331 * <p>This method exists for comparative testing to {@link #nextIndex(int)}.
332 *
333 * @param k Index to start checking from (inclusive).
334 * @return the next index, or {@code right + 1}
335 */
336 int nextIndexOrRightPlus1(int k) {
337 if (k > right) {
338 // index is in an unknown range
339 return right + 1;
340 }
341 // Support searching forward through the known range
342 final int index = compressIndex(k < left ? left : k);
343
344 int i = getLongIndex(index);
345 long bits = data[i];
346
347 // Check if this is within a compressed index. If so return the exact result.
348 if ((bits & getLongBit(index)) != 0) {
349 return Math.max(k, left);
350 }
351
352 // Mask bits after the bit index
353 // mask = 11111000 = -1L << (index % 64)
354 bits &= LONG_MASK << index;
355 for (;;) {
356 if (bits != 0) {
357 //(i+1) i
358 // | c |
359 // | | |
360 // 0 001010000
361 final int c = i * Long.SIZE + Long.numberOfTrailingZeros(bits);
362 // Decompress the set bit to an index. When inflated this is the lower bound of
363 // the compressed range and is OK for next scanning.
364 return (c << compression) + left;
365 }
366 if (++i == data.length) {
367 return right + 1;
368 }
369 bits = data[i];
370 }
371 }
372
373 @Override
374 public int previousIndex(int k) {
375 // WARNING: No range checks !!!
376 // Assume left <= k <= right and that left and right are set bits acting as sentinals.
377 final int index = compressIndex(k);
378
379 int i = getLongIndex(index);
380 long bits = data[i];
381
382 // Check if this is within a compressed index. If so return the exact result.
383 if ((bits & getLongBit(index)) != 0) {
384 return k;
385 }
386
387 // Mask bits before the bit index
388 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
389 bits &= LONG_MASK >>> -(index + 1);
390 for (;;) {
391 if (bits != 0) {
392 //(i+1) i
393 // | c |
394 // | | |
395 // 0 001010000
396 final int c = (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits);
397 // Decompress the prior unset bit to an index. When inflated this is the
398 // next index above the upper bound of the compressed range so subtract 1.
399 return (c << compression) - 1 + left;
400 }
401 // Unsupported: the interval should contain k
402 //if (i == 0) {
403 // return left - 1;
404 //}
405 bits = data[--i];
406 }
407 }
408
409 @Override
410 public int nextIndex(int k) {
411 // WARNING: No range checks !!!
412 // Assume left <= k <= right and that left and right are set bits acting as sentinals.
413 final int index = compressIndex(k);
414
415 int i = getLongIndex(index);
416 long bits = data[i];
417
418 // Check if this is within a compressed index. If so return the exact result.
419 if ((bits & getLongBit(index)) != 0) {
420 return k;
421 }
422
423 // Mask bits after the bit index
424 // mask = 11111000 = -1L << (index % 64)
425 bits &= LONG_MASK << index;
426 for (;;) {
427 if (bits != 0) {
428 //(i+1) i
429 // | c |
430 // | | |
431 // 0 001010000
432 final int c = i * Long.SIZE + Long.numberOfTrailingZeros(bits);
433 // Decompress the set bit to an index. When inflated this is the lower bound of
434 // the compressed range and is OK for next scanning.
435 return (c << compression) + left;
436 }
437 // Unsupported: the interval should contain k
438 //if (++i == data.length) {
439 // return right + 1;
440 //}
441 bits = data[++i];
442 }
443 }
444
445 // SearchableInterval2
446 // This is exactly the same as SearchableInterval as the pointers i are the same as the keys k
447
448 @Override
449 public int start() {
450 return left();
451 }
452
453 @Override
454 public int end() {
455 return right();
456 }
457
458 @Override
459 public int index(int i) {
460 return i;
461 }
462
463 @Override
464 public int previous(int i, int k) {
465 return previousIndex(k);
466 }
467
468 @Override
469 public int next(int i, int k) {
470 return nextIndex(k);
471 }
472
473 /**
474 * Returns the index of the first bit that is set to {@code false} that occurs on or
475 * before the specified starting index <em>within the supported range</em>. If no such
476 * bit exists then {@code left - 1} is returned.
477 *
478 * <p>Assumes {@code k} is within an enabled compressed index.
479 *
480 * @param k Index to start checking from (inclusive).
481 * @return the index of the previous unset bit, or {@code left - 1} if there is no such bit
482 */
483 int previousClearBit(int k) {
484 // WARNING: No range checks !!!
485 // Assume left <= k <= right and that left and right are set bits acting as sentinals.
486 final int index = compressIndex(k);
487
488 int i = getLongIndex(index);
489
490 // Note: This method is conceptually the same as previousIndex with the exception
491 // that: all the data is bit-flipped; a check is made when the scan reaches the end;
492 // and no check is made for k within an unset compressed index.
493
494 // Mask bits before the bit index
495 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
496 long bits = ~data[i] & (LONG_MASK >>> -(index + 1));
497 for (;;) {
498 if (bits != 0) {
499 final int c = (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits);
500 return (c << compression) - 1 + left;
501 }
502 if (i == 0) {
503 return left - 1;
504 }
505 bits = ~data[--i];
506 }
507 }
508
509 /**
510 * Returns the index of the first bit that is set to {@code false} that occurs on or
511 * after the specified starting index <em>within the supported range</em>. If no such
512 * bit exists then the {@code capacity} is returned where {@code capacity = index + 1}
513 * with {@code index} the largest index that can be added to the set without an error.
514 *
515 * <p>Assumes {@code k} is within an enabled compressed index.
516 *
517 * @param k Index to start checking from (inclusive).
518 * @return the index of the next unset bit, or the {@code capacity} if there is no such bit
519 */
520 int nextClearBit(int k) {
521 // WARNING: No range checks !!!
522 // Assume left <= k <= right
523 final int index = compressIndex(k);
524
525 int i = getLongIndex(index);
526
527 // Note: This method is conceptually the same as nextIndex with the exception
528 // that: all the data is bit-flipped; a check is made for the capacity when the
529 // scan reaches the end; and no check is made for k within an unset compressed index.
530
531 // Mask bits after the bit index
532 // mask = 11111000 = -1L << (fromIndex % 64)
533 long bits = ~data[i] & (LONG_MASK << index);
534 for (;;) {
535 if (bits != 0) {
536 final int c = i * Long.SIZE + Long.numberOfTrailingZeros(bits);
537 return (c << compression) + left;
538 }
539 if (++i == data.length) {
540 // Capacity
541 return right + 1;
542 }
543 bits = ~data[i];
544 }
545 }
546
547 /**
548 * Check the compression is valid.
549 *
550 * @param compression Compression level.
551 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]}
552 */
553 private static void checkCompression(int compression) {
554 if (!(compression > 0 && compression <= 31)) {
555 throw new IllegalArgumentException("Invalid compression: " + compression);
556 }
557 }
558
559 /**
560 * Check the lower bound to the range is valid.
561 *
562 * @param left Lower bound (inclusive).
563 * @throws IllegalArgumentException if {@code left < 0}
564 */
565 private static void checkLeft(int left) {
566 if (left < 0) {
567 throw new IllegalArgumentException("Invalid lower index: " + left);
568 }
569 }
570
571 /**
572 * Check the range is valid.
573 *
574 * @param left Lower bound (inclusive).
575 * @param right Upper bound (inclusive).
576 * @throws IllegalArgumentException if {@code right < left}
577 */
578 private static void checkRange(int left, int right) {
579 if (right < left) {
580 throw new IllegalArgumentException(
581 String.format("Invalid range: [%d, %d]", left, right));
582 }
583 }
584
585 /**
586 * {@link IndexIterator} implementation.
587 *
588 * <p>This iterator can efficiently iterate over high-density indices
589 * if the compression level is set to create spacing equal to or above the expected
590 * separation between indices.
591 */
592 private class Iterator implements IndexIterator {
593 /** Iterator left. l is a compressed index. */
594 private int l;
595 /** Iterator right. (r+1) is a clear bit. */
596 private int r;
597 /** Next iterator left. Cached for look ahead functionality. */
598 private int nextL;
599
600 /**
601 * Create an instance.
602 */
603 Iterator() {
604 l = CompressedIndexSet.this.left();
605 r = nextClearBit(l) - 1;
606 if (r < end()) {
607 nextL = nextIndex(r + 1);
608 } else {
609 // Entire range is saturated
610 r = end();
611 }
612 }
613
614 @Override
615 public int left() {
616 return l;
617 }
618
619 @Override
620 public int right() {
621 return r;
622 }
623
624 @Override
625 public int end() {
626 return CompressedIndexSet.this.right();
627 }
628
629 @Override
630 public boolean next() {
631 if (r < end()) {
632 // Reuse the cached next left and advance
633 l = nextL;
634 r = nextClearBit(l) - 1;
635 if (r < end()) {
636 nextL = nextIndex(r + 1);
637 } else {
638 r = end();
639 }
640 return true;
641 }
642 return false;
643 }
644
645 @Override
646 public boolean positionAfter(int index) {
647 // Even though this can provide random access we only allow advancing
648 if (r > index) {
649 return true;
650 }
651 if (index < end()) {
652 // Note: Uses 3 scans as it maintains the next left.
653 // For low density indices scanning for next left will be expensive
654 // and it would be more efficient to only compute next left on demand.
655 // For high density indices the next left will be close to
656 // the new right and the cost is low.
657 // This iterator favours use on high density indices. A variant
658 // iterator could be created for comparison purposes.
659
660 if (get(index + 1)) {
661 // (index+1) is set.
662 // Find [left <= index+1 <= right]
663 r = nextClearBit(index + 1) - 1;
664 if (r < end()) {
665 nextL = nextIndex(r + 1);
666 } else {
667 r = end();
668 }
669 l = index + 1;
670 //l = previousClearBit(index) + 1;
671 } else {
672 // (index+1) is clear.
673 // Advance to the next [left, right] pair
674 l = nextIndex(index + 1);
675 r = nextClearBit(l) - 1;
676 if (r < end()) {
677 nextL = nextIndex(r + 1);
678 } else {
679 r = end();
680 }
681 }
682 return true;
683 }
684 // Advance to end. No next left. Not positioned after the target index.
685 l = r = end();
686 return false;
687 }
688
689 @Override
690 public boolean nextAfter(int index) {
691 if (r < end()) {
692 return nextL > index;
693 }
694 // no more indices
695 return true;
696 }
697 }
698 }