View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.numbers.examples.jmh.arrays;
18  
19  /**
20   * An {@link UpdatingInterval} and {@link SplittingInterval} backed by a fixed size of bits.
21   *
22   * <p>This is a specialised class to implement a reduced API similar to a
23   * {@link java.util.BitSet}. It uses no bounds range checks and supports only
24   * the methods required to implement the {@link UpdatingInterval} API.
25   *
26   * <p>An offset is supported to allow the fixed size to cover a range of indices starting
27   * above 0 with the most efficient usage of storage.
28   *
29   * <p>See the BloomFilter code in Commons Collections for use of long[] data to store
30   * bits.
31   *
32   * @since 1.2
33   */
34  final class BitIndexUpdatingInterval implements UpdatingInterval, SplittingInterval, IntervalAnalysis {
35      /** All 64-bits bits set. */
36      private static final long LONG_MASK = -1L;
37      /** A bit shift to apply to an integer to divided by 64 (2^6). */
38      private static final int DIVIDE_BY_64 = 6;
39  
40      /** Bit indexes. */
41      private final long[] data;
42  
43      /** Index offset. */
44      private final int offset;
45      /** Left bound of the support. */
46      private int left;
47      /** Right bound of the support. */
48      private int right;
49  
50      /**
51       * Create an instance to store indices within the range {@code [left, right]}.
52       * The range is not validated.
53       *
54       * @param left Lower bound (inclusive).
55       * @param right Upper bound (inclusive).
56       */
57      BitIndexUpdatingInterval(int left, int right) {
58          this.offset = left;
59          this.left = left;
60          this.right = right;
61          // Allocate storage to store index==right
62          // Note: This may allow directly writing to index > right if there
63          // is extra capacity.
64          data = new long[getLongIndex(right - offset) + 1];
65      }
66  
67      /**
68       * Create an instance with the range {@code [left, right]} and reusing the provided
69       * index {@code data}.
70       *
71       * @param data Data.
72       * @param offset Index offset.
73       * @param left Lower bound (inclusive).
74       * @param right Upper bound (inclusive).
75       */
76      private BitIndexUpdatingInterval(long[] data, int offset, int left, int right) {
77          this.data = data;
78          this.offset = offset;
79          this.left = left;
80          this.right = right;
81      }
82  
83      /**
84       * Create an instance to store indices within the range {@code [left, right]}.
85       *
86       * @param left Lower bound (inclusive).
87       * @param right Upper bound (inclusive).
88       * @return the index set
89       * @throws IllegalArgumentException if {@code right < left}; {@code left < 0} or
90       * {@code right == Integer.MAX_VALUE}
91       */
92      static BitIndexUpdatingInterval ofRange(int left, int right) {
93          if (left < 0) {
94              throw new IllegalArgumentException("Invalid lower index: " + left);
95          }
96          if (right == Integer.MAX_VALUE) {
97              throw new IllegalArgumentException("Invalid upper index: " + right);
98          }
99          if (right < left) {
100             throw new IllegalArgumentException(
101                 String.format("Invalid range: [%d, %d]", left, right));
102         }
103         return new BitIndexUpdatingInterval(left, right);
104     }
105 
106     /**
107      * Initialise an instance with the {@code indices}. The capacity is defined by the
108      * range required to store the minimum and maximum index.
109      *
110      * @param indices Indices.
111      * @param n Number of indices.
112      * @return the index set
113      * @throws IllegalArgumentException if {@code n == 0}
114      */
115     static BitIndexUpdatingInterval of(int[] indices, int n) {
116         if (n <= 0) {
117             throw new IllegalArgumentException("No indices to define the range");
118         }
119         int min = indices[0];
120         int max = min;
121         for (int i = 1; i < n; i++) {
122             min = Math.min(min, indices[i]);
123             max = Math.max(max, indices[i]);
124         }
125         final BitIndexUpdatingInterval set = BitIndexUpdatingInterval.ofRange(min, max);
126         for (int i = -1; ++i < n;) {
127             set.set(indices[i]);
128         }
129         return set;
130     }
131 
132     /**
133      * Return the memory footprint in bytes. This is always a multiple of 64.
134      *
135      * <p>The result is {@code 64 * ceil((right - offset + 1) / 64)}.
136      *
137      * <p>This method is intended to provided information to choose if the data structure
138      * is memory efficient.
139      *
140      * <p>Warning: It is assumed {@code 0 <= left <= right}. Use with the min/max index
141      * that is to be stored.
142      *
143      * @param left Lower bound (inclusive).
144      * @param right Upper bound (inclusive).
145      * @return the memory footprint
146      */
147     static long memoryFootprint(int left, int right) {
148         return (getLongIndex(right - left) + 1L) * Long.BYTES;
149     }
150 
151     /**
152      * Gets the filter index for the specified bit index assuming the filter is using
153      * 64-bit longs to store bits starting at index 0.
154      *
155      * <p>The index is assumed to be positive. For a positive index the result will match
156      * {@code bitIndex / 64}.</p>
157      *
158      * <p><em>The divide is performed using bit shifts. If the input is negative the
159      * behavior is not defined.</em></p>
160      *
161      * @param bitIndex the bit index (assumed to be positive)
162      * @return the index of the bit map in an array of bit maps.
163      */
164     private static int getLongIndex(final int bitIndex) {
165         // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
166         // positive.
167         // We do not explicitly check for a negative here. Instead we use a
168         // signed shift. Any negative index will produce a negative value
169         // by sign-extension and if used as an index into an array it will throw an
170         // exception.
171         return bitIndex >> DIVIDE_BY_64;
172     }
173 
174     /**
175      * Gets the filter bit mask for the specified bit index assuming the filter is using
176      * 64-bit longs to store bits starting at index 0. The returned value is a
177      * {@code long} with only 1 bit set.
178      *
179      * <p>The index is assumed to be positive. For a positive index the result will match
180      * {@code 1L << (bitIndex % 64)}.</p>
181      *
182      * <p><em>If the input is negative the behavior is not defined.</em></p>
183      *
184      * @param bitIndex the bit index (assumed to be positive)
185      * @return the filter bit
186      */
187     private static long getLongBit(final int bitIndex) {
188         // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
189         // using 0x3f (63) or compute bitIndex % 64.
190         // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
191         // this will identify an incorrect bit.
192         return 1L << bitIndex;
193     }
194 
195     /**
196      * Sets the bit at the specified index to {@code true}.
197      *
198      * <p>Warning: This has no range checks.
199      *
200      * @param bitIndex the bit index (assumed to be positive)
201      */
202     void set(int bitIndex) {
203         // WARNING: No range checks !!!
204         final int index = bitIndex - offset;
205         final int i = getLongIndex(index);
206         final long m = getLongBit(index);
207         data[i] |= m;
208     }
209 
210     /**
211      * Returns the index of the first bit that is set to {@code true} that occurs on or
212      * after the specified starting index.
213      *
214      * <p>Warning: This has no range checks. It is assumed that {@code left <= k <= right},
215      * that is there is a set bit on or after {@code k}.
216      *
217      * @param k Index to start checking from (inclusive).
218      * @return the index of the next set bit
219      */
220     private int nextIndex(int k) {
221         // left <= k <= right
222 
223         final int index = k - offset;
224         int i = getLongIndex(index);
225 
226         // Mask bits after the bit index
227         // mask = 11111000 = -1L << (index % 64)
228         long bits = data[i] & (LONG_MASK << index);
229         for (;;) {
230             if (bits != 0) {
231                 //(i+1)       i
232                 // |    index |
233                 // |      |   |
234                 // 0  001010000
235                 return i * Long.SIZE + Long.numberOfTrailingZeros(bits) + offset;
236             }
237             // Unsupported: the interval should contain k
238             //if (++i == data.length)
239             //    return right + 1
240             bits = data[++i];
241         }
242     }
243 
244     /**
245      * Returns the index of the first bit that is set to {@code true} that occurs on or
246      * before the specified starting index.
247      *
248      * <p>Warning: This has no range checks. It is assumed that {@code left <= k <= right},
249      * that is there is a set bit on or before {@code k}.
250      *
251      * @param k Index to start checking from (inclusive).
252      * @return the index of the previous set bit
253      */
254     private int previousIndex(int k) {
255         // left <= k <= right
256 
257         final int index = k - offset;
258         int i = getLongIndex(index);
259 
260         // Mask bits before the bit index
261         // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
262         long bits = data[i] & (LONG_MASK >>> -(index + 1));
263         for (;;) {
264             if (bits != 0) {
265                 //(i+1)       i
266                 // |  index   |
267                 // |    |     |
268                 // 0  001010000
269                 return (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits) - 1 + offset;
270             }
271             // Unsupported: the interval should contain k
272             //if (i == 0)
273             //    return left - 1
274             bits = data[--i];
275         }
276     }
277 
278     @Override
279     public int left() {
280         return left;
281     }
282 
283     @Override
284     public int right() {
285         return right;
286     }
287 
288     @Override
289     public int updateLeft(int k) {
290         // Assume left < k= < right
291         return left = nextIndex(k);
292     }
293 
294     @Override
295     public int updateRight(int k) {
296         // Assume left <= k < right
297         return right = previousIndex(k);
298     }
299 
300     @Override
301     public UpdatingInterval splitLeft(int ka, int kb) {
302         // Assume left < ka <= kb < right
303         final int lower = left;
304         left = nextIndex(kb + 1);
305         return new BitIndexUpdatingInterval(data, offset, lower, previousIndex(ka - 1));
306     }
307 
308     @Override
309     public UpdatingInterval splitRight(int ka, int kb) {
310         // Assume left < ka <= kb < right
311         final int upper = right;
312         right = previousIndex(ka - 1);
313         return new BitIndexUpdatingInterval(data, offset, nextIndex(kb + 1), upper);
314     }
315 
316     @Override
317     public boolean empty() {
318         // Empty when the interval is invalid. Signalled by a negative right bound.
319         return right < 0;
320     }
321 
322     @Override
323     public SplittingInterval split(int ka, int kb) {
324         if (ka <= left) {
325             // No left interval
326             if (kb >= right) {
327                 // No right interval
328                 invalidate();
329             } else if (kb >= left) {
330                 // Update the left bound
331                 left = nextIndex(kb + 1);
332             }
333             return null;
334         }
335         if (kb >= right) {
336             // No right interval.
337             // Find new right bound for the left-side.
338             final int r = ka <= right ? previousIndex(ka - 1) : right;
339             invalidate();
340             return new BitIndexUpdatingInterval(data, offset, left, r);
341         }
342         // Split
343         return (SplittingInterval) splitLeft(ka, kb);
344     }
345 
346     /**
347      * Invalidate the interval and mark as empty.
348      */
349     private void invalidate() {
350         right = -1;
351     }
352 
353     @Override
354     public boolean saturated(int separation) {
355         // Support saturation analysis at separation relevant to the
356         // quickselect implementations
357         if (separation == 3) {
358             return saturated3();
359         }
360         if (separation == 4) {
361             return saturated4();
362         }
363         return false;
364     }
365 
366     /**
367      * Test if saturated as a separation of {@code 2^3}.
368      *
369      * @return true if saturated
370      */
371     private boolean saturated3() {
372         int c = 0;
373         for (long x : data) {
374             // Shift powers of 2 and mask out the bits that were shifted
375             x = x | (x >>> 1);
376             x = x | (x >>> 2);
377             x = (x | (x >>> 4)) & 0b0000000100000001000000010000000100000001000000010000000100000001L;
378             // Expect a population count intrinsic method
379             // Add [0, 8]
380             c += Long.bitCount(x);
381         }
382         // Multiply by 8
383         return c << 3 >= right - left;
384     }
385 
386     /**
387      * Test if saturated as a separation of {@code 2^4}.
388      *
389      * @return true if saturated
390      */
391     private boolean saturated4() {
392         int c = 0;
393         for (long x : data) {
394             // Shift powers of 2 and mask out the bits that were shifted
395             x = x | (x >>> 1);
396             x = x | (x >>> 2);
397             x = x | (x >>> 4);
398             x = (x | (x >>> 8)) & 0b0000000000000001000000000000000100000000000000010000000000000001L;
399             // Count the bits using folding
400             // x = mask:
401             // 0000000000000001000000000000001000000000000000100000000000000010  (x += (x >>> 16))
402             // 0000000100000001000000100000001000000011000000110000010000000100  (x += (x >>> 32))
403             x = x + (x >>> 16); // put count of each 32 bits into their lowest 2 bits
404             x = x + (x >>> 32); // put count of each 64 bits into their lowest 3 bits
405             // Add [0, 4]
406             c += (int) x & 0b111;
407         }
408         // Multiply by 16
409         return c << 4 >= right - left;
410     }
411 }