1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.examples.jmh.arrays;
18
19 /**
20 * A fixed size set of indices within an inclusive range {@code [left, right]}.
21 *
22 * <p>This is a specialised class to implement a data structure similar to a
23 * {@link java.util.BitSet}. It supports a fixed size and contains the methods required to
24 * store and look-up intervals of indices.
25 *
26 * <p>An offset is supported to allow the fixed size to cover a range of indices starting
27 * above 0 with the most efficient usage of storage.
28 *
29 * <p>In contrast to a {@link java.util.BitSet}, the data structure does not store all
30 * indices in the range. Indices are compressed 2-to-1. The structure can return
31 * with 100% accuracy if a query index is not within the range. It cannot return with 100%
32 * accuracy if a query index is contained within the range. The presence of a query index
33 * is a probabilistic statement that there is an index within 1 of the query index.
34 *
35 * <p>Indices are stored offset from {@code left} and compressed. A compressed index
36 * represents 2 real indices:
37 *
38 * <pre>
39 * Interval: 012345678
40 * Compressed: 0-1-2-3-4
41 * </pre>
42 *
43 * <p>When scanning for the next index the identified compressed index is decompressed and
44 * the lower bound of the range represented by the index is returned.
45 *
46 * <p>When scanning for the previous index the identified compressed index is decompressed
47 * and the upper bound of the range represented by the index is returned.
48 *
49 * <p>When scanning in either direction, if the search index is inside a compressed index
50 * the search index is returned.
51 *
52 * <p>Note: Search for the {@link SearchableInterval} interface outside the supported bounds
53 * {@code [left, right]} is not supported and will result in an {@link IndexOutOfBoundsException}.
54 *
55 * <p>See the BloomFilter code in Commons Collections for use of long[] data to store
56 * bits.
57 *
58 * <p>Note: This is a specialised version of {@link CompressedIndexSet} using a fixed
59 * compression of 1. This is used for performance testing. This is the most useful
60 * compression level for the partition algorithm as any compressed key that lies
61 * exactly on a partition index will only require a search for the min/max in the
62 * interval immediately below/above the partition index. A pair of indices (k, k+1)
63 * that is split into two compressed keys and lies exactly on a partition index will
64 * require a search for the min on one side and two maximum values on the other side; or
65 * max on one side and two minimum on the other. Each of these cases can be handled by
66 * dedicated select routines to find 1 or 2 values at the edge of a range.
67 *
68 * @since 1.2
69 */
70 final class CompressedIndexSet2 implements SearchableInterval {
71 /** All 64-bits bits set. */
72 private static final long LONG_MASK = -1L;
73 /** A bit shift to apply to an integer to divided by 64 (2^6). */
74 private static final int DIVIDE_BY_64 = 6;
75
76 /** Bit indexes. */
77 private final long[] data;
78
79 /** Left bound of the support. */
80 private final int left;
81 /** Right bound of the support. */
82 private final int right;
83
84 /**
85 * Create an instance to store indices within the range {@code [left, right]}.
86 *
87 * @param l Lower bound (inclusive).
88 * @param r Upper bound (inclusive).
89 */
90 private CompressedIndexSet2(int l, int r) {
91 this.left = l;
92 // Note: The functional upper bound may be higher but the next/previous functionality
93 // support scanning in the original [left, right] bound.
94 this.right = r;
95 // Note: This may allow directly writing to index > right if there
96 // is extra capacity.
97 data = new long[getLongIndex((r - l) >>> 1) + 1];
98 }
99
100 /**
101 * Create an instance to store indices within the range {@code [left, right]}.
102 * The instance is initially empty.
103 *
104 * <p>Warning: To use this object as an {@link SearchableInterval} the left and right
105 * indices should be added to the set.
106 *
107 * @param left Lower bound (inclusive).
108 * @param right Upper bound (inclusive).
109 * @return the index set
110 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
111 * or if {@code right < left}; or if {@code left < 0}
112 */
113 static CompressedIndexSet2 ofRange(int left, int right) {
114 checkLeft(left);
115 checkRange(left, right);
116 return new CompressedIndexSet2(left, right);
117 }
118
119 /**
120 * Initialise an instance with the {@code indices}. The capacity is defined by the
121 * range required to store the minimum and maximum index at the specified
122 * {@code compression} level.
123 *
124 * <p>This object can be used as an {@link SearchableInterval} as the left and right
125 * indices will be set.
126 *
127 * @param indices Indices.
128 * @return the index set
129 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
130 * or if {@code indices.length == 0}; or if {@code left < 0}
131 */
132 static CompressedIndexSet2 of(int[] indices) {
133 return of(indices, indices.length);
134 }
135
136 /**
137 * Initialise an instance with the {@code indices}. The capacity is defined by the
138 * range required to store the minimum and maximum index at the specified
139 * {@code compression} level.
140 *
141 * @param indices Indices.
142 * @param n Number of indices.
143 * @return the index set
144 * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
145 * or if {@code n == 0}; or if {@code left < 0}
146 */
147 static CompressedIndexSet2 of(int[] indices, int n) {
148 if (n <= 0) {
149 throw new IllegalArgumentException("No indices to define the range");
150 }
151 int min = indices[0];
152 int max = min;
153 for (int i = 0; ++i < n;) {
154 min = Math.min(min, indices[i]);
155 max = Math.max(max, indices[i]);
156 }
157 checkLeft(min);
158 final CompressedIndexSet2 set = new CompressedIndexSet2(min, max);
159 for (int i = -1; ++i < n;) {
160 set.set(indices[i]);
161 }
162 return set;
163 }
164
165 /**
166 * Gets the compressed index for this instance using the left bound and the
167 * compression level.
168 *
169 * @param index Index.
170 * @return the compressed index
171 */
172 private int compressIndex(int index) {
173 return (index - left) >>> 1;
174 }
175
176 /**
177 * Gets the filter index for the specified bit index assuming the filter is using
178 * 64-bit longs to store bits starting at index 0.
179 *
180 * <p>The index is assumed to be positive. For a positive index the result will match
181 * {@code bitIndex / 64}.</p>
182 *
183 * <p><em>The divide is performed using bit shifts. If the input is negative the
184 * behavior is not defined.</em></p>
185 *
186 * @param bitIndex the bit index (assumed to be positive)
187 * @return the index of the bit map in an array of bit maps.
188 */
189 private static int getLongIndex(final int bitIndex) {
190 // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
191 // positive.
192 // We do not explicitly check for a negative here. Instead we use a
193 // signed shift. Any negative index will produce a negative value
194 // by sign-extension and if used as an index into an array it will throw an
195 // exception.
196 return bitIndex >> DIVIDE_BY_64;
197 }
198
199 /**
200 * Gets the filter bit mask for the specified bit index assuming the filter is using
201 * 64-bit longs to store bits starting at index 0. The returned value is a
202 * {@code long} with only 1 bit set.
203 *
204 * <p>The index is assumed to be positive. For a positive index the result will match
205 * {@code 1L << (bitIndex % 64)}.</p>
206 *
207 * <p><em>If the input is negative the behavior is not defined.</em></p>
208 *
209 * @param bitIndex the bit index (assumed to be positive)
210 * @return the filter bit
211 */
212 private static long getLongBit(final int bitIndex) {
213 // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
214 // using 0x3f (63) or compute bitIndex % 64.
215 // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
216 // this will identify an incorrect bit.
217 return 1L << bitIndex;
218 }
219
220 /**
221 * Returns the value of the bit with the specified index.
222 *
223 * <p>Warning: This has no range checks.
224 *
225 * @param bitIndex the bit index (assumed to be positive)
226 * @return the value of the bit with the specified index
227 */
228 boolean get(int bitIndex) {
229 // WARNING: No range checks !!!
230 final int index = compressIndex(bitIndex);
231 final int i = getLongIndex(index);
232 final long m = getLongBit(index);
233 return (data[i] & m) != 0;
234 }
235
236 /**
237 * Sets the bit at the specified index to {@code true}.
238 *
239 * <p>Warning: This has no range checks.
240 *
241 * @param bitIndex the bit index (assumed to be positive)
242 */
243 void set(int bitIndex) {
244 // WARNING: No range checks !!!
245 final int index = compressIndex(bitIndex);
246 final int i = getLongIndex(index);
247 final long m = getLongBit(index);
248 data[i] |= m;
249 }
250
251
252 @Override
253 public int left() {
254 return left;
255 }
256
257 @Override
258 public int right() {
259 return right;
260 }
261
262 @Override
263 public int previousIndex(int k) {
264 // WARNING: No range checks !!!
265 // Assume left <= k <= right and that left and right are set bits acting as sentinels.
266 final int index = compressIndex(k);
267
268 int i = getLongIndex(index);
269 long bits = data[i];
270
271 // Check if this is within a compressed index. If so return the exact result.
272 if ((bits & getLongBit(index)) != 0) {
273 return k;
274 }
275
276 // Mask bits before the bit index
277 // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
278 bits &= LONG_MASK >>> -(index + 1);
279 for (;;) {
280 if (bits != 0) {
281 //(i+1) i
282 // | c |
283 // | | |
284 // 0 001010000
285 final int c = (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits);
286 // Decompress the prior unset bit to an index. When inflated this is the
287 // next index above the upper bound of the compressed range so subtract 1.
288 return (c << 1) - 1 + left;
289 }
290 // Unsupported: the interval should contain k
291 //if (i == 0) {
292 // return left - 1;
293 //}
294 bits = data[--i];
295 }
296 }
297
298 @Override
299 public int nextIndex(int k) {
300 // WARNING: No range checks !!!
301 // Assume left <= k <= right and that left and right are set bits acting as sentinels.
302 final int index = compressIndex(k);
303
304 int i = getLongIndex(index);
305 long bits = data[i];
306
307 // Check if this is within a compressed index. If so return the exact result.
308 if ((bits & getLongBit(index)) != 0) {
309 return k;
310 }
311
312 // Mask bits after the bit index
313 // mask = 11111000 = -1L << (index % 64)
314 bits &= LONG_MASK << index;
315 for (;;) {
316 if (bits != 0) {
317 //(i+1) i
318 // | c |
319 // | | |
320 // 0 001010000
321 final int c = i * Long.SIZE + Long.numberOfTrailingZeros(bits);
322 // Decompress the set bit to an index. When inflated this is the lower bound of
323 // the compressed range and is OK for next scanning.
324 return (c << 1) + left;
325 }
326 // Unsupported: the interval should contain k
327 //if (++i == data.length) {
328 // return right + 1;
329 //}
330 bits = data[++i];
331 }
332 }
333
334 /**
335 * Check the lower bound to the range is valid.
336 *
337 * @param left Lower bound (inclusive).
338 * @throws IllegalArgumentException if {@code left < 0}
339 */
340 private static void checkLeft(int left) {
341 if (left < 0) {
342 throw new IllegalArgumentException("Invalid lower index: " + left);
343 }
344 }
345
346 /**
347 * Check the range is valid.
348 *
349 * @param left Lower bound (inclusive).
350 * @param right Upper bound (inclusive).
351 * @throws IllegalArgumentException if {@code right < left}
352 */
353 private static void checkRange(int left, int right) {
354 if (right < left) {
355 throw new IllegalArgumentException(
356 String.format("Invalid range: [%d, %d]", left, right));
357 }
358 }
359 }