View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.numbers.examples.jmh.arrays;
18  
19  /**
20   * A fixed size set of indices within an inclusive range {@code [left, right]}.
21   *
22   * <p>This is a specialised class to implement a data structure similar to a
23   * {@link java.util.BitSet}. It supports a fixed size and contains the methods required to
24   * store and look-up intervals of indices.
25   *
26   * <p>An offset is supported to allow the fixed size to cover a range of indices starting
27   * above 0 with the most efficient usage of storage.
28   *
29   * <p>In contrast to a {@link java.util.BitSet}, the data structure does not store all
30   * indices in the range. Indices are compressed 2-to-1. The structure can return
31   * with 100% accuracy if a query index is not within the range. It cannot return with 100%
32   * accuracy if a query index is contained within the range. The presence of a query index
33   * is a probabilistic statement that there is an index within 1 of the query index.
34   *
35   * <p>Indices are stored offset from {@code left} and compressed. A compressed index
36   * represents 2 real indices:
37   *
38   * <pre>
39   * Interval:   012345678
40   * Compressed: 0-1-2-3-4
41   * </pre>
42   *
43   * <p>When scanning for the next index the identified compressed index is decompressed and
44   * the lower bound of the range represented by the index is returned.
45   *
46   * <p>When scanning for the previous index the identified compressed index is decompressed
47   * and the upper bound of the range represented by the index is returned.
48   *
49   * <p>When scanning in either direction, if the search index is inside a compressed index
50   * the search index is returned.
51   *
52   * <p>Note: Search for the {@link SearchableInterval} interface outside the supported bounds
53   * {@code [left, right]} is not supported and will result in an {@link IndexOutOfBoundsException}.
54   *
55   * <p>See the BloomFilter code in Commons Collections for use of long[] data to store
56   * bits.
57   *
58   * <p>Note: This is a specialised version of {@link CompressedIndexSet} using a fixed
59   * compression of 1. This is used for performance testing. This is the most useful
60   * compression level for the partition algorithm as any compressed key that lies
61   * exactly on a partition index will only require a search for the min/max in the
62   * interval immediately below/above the partition index. A pair of indices (k, k+1)
63   * that is split into two compressed keys and lies exactly on a partition index will
64   * require a search for the min on one side and two maximum values on the other side; or
65   * max on one side and two minimum on the other. Each of these cases can be handled by
66   * dedicated select routines to find 1 or 2 values at the edge of a range.
67   *
68   * @since 1.2
69   */
70  final class CompressedIndexSet2 implements SearchableInterval {
71      /** All 64-bits bits set. */
72      private static final long LONG_MASK = -1L;
73      /** A bit shift to apply to an integer to divided by 64 (2^6). */
74      private static final int DIVIDE_BY_64 = 6;
75  
76      /** Bit indexes. */
77      private final long[] data;
78  
79      /** Left bound of the support. */
80      private final int left;
81      /** Right bound of the support. */
82      private final int right;
83  
84      /**
85       * Create an instance to store indices within the range {@code [left, right]}.
86       *
87       * @param l Lower bound (inclusive).
88       * @param r Upper bound (inclusive).
89       */
90      private CompressedIndexSet2(int l, int r) {
91          this.left = l;
92          // Note: The functional upper bound may be higher but the next/previous functionality
93          // support scanning in the original [left, right] bound.
94          this.right = r;
95          // Note: This may allow directly writing to index > right if there
96          // is extra capacity.
97          data = new long[getLongIndex((r - l) >>> 1) + 1];
98      }
99  
100     /**
101      * Create an instance to store indices within the range {@code [left, right]}.
102      * The instance is initially empty.
103      *
104      * <p>Warning: To use this object as an {@link SearchableInterval} the left and right
105      * indices should be added to the set.
106      *
107      * @param left Lower bound (inclusive).
108      * @param right Upper bound (inclusive).
109      * @return the index set
110      * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
111      * or if {@code right < left}; or if {@code left < 0}
112      */
113     static CompressedIndexSet2 ofRange(int left, int right) {
114         checkLeft(left);
115         checkRange(left, right);
116         return new CompressedIndexSet2(left, right);
117     }
118 
119     /**
120      * Initialise an instance with the {@code indices}. The capacity is defined by the
121      * range required to store the minimum and maximum index at the specified
122      * {@code compression} level.
123      *
124      * <p>This object can be used as an {@link SearchableInterval} as the left and right
125      * indices will be set.
126      *
127      * @param indices Indices.
128      * @return the index set
129      * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
130      * or if {@code indices.length == 0}; or if {@code left < 0}
131      */
132     static CompressedIndexSet2 of(int[] indices) {
133         return of(indices, indices.length);
134     }
135 
136     /**
137      * Initialise an instance with the {@code indices}. The capacity is defined by the
138      * range required to store the minimum and maximum index at the specified
139      * {@code compression} level.
140      *
141      * @param indices Indices.
142      * @param n Number of indices.
143      * @return the index set
144      * @throws IllegalArgumentException if {@code compression} is not in {@code [1, 31]};
145      * or if {@code n == 0}; or if {@code left < 0}
146      */
147     static CompressedIndexSet2 of(int[] indices, int n) {
148         if (n <= 0) {
149             throw new IllegalArgumentException("No indices to define the range");
150         }
151         int min = indices[0];
152         int max = min;
153         for (int i = 0; ++i < n;) {
154             min = Math.min(min, indices[i]);
155             max = Math.max(max, indices[i]);
156         }
157         checkLeft(min);
158         final CompressedIndexSet2 set = new CompressedIndexSet2(min, max);
159         for (int i = -1; ++i < n;) {
160             set.set(indices[i]);
161         }
162         return set;
163     }
164 
165     /**
166      * Gets the compressed index for this instance using the left bound and the
167      * compression level.
168      *
169      * @param index Index.
170      * @return the compressed index
171      */
172     private int compressIndex(int index) {
173         return (index - left) >>> 1;
174     }
175 
176     /**
177      * Gets the filter index for the specified bit index assuming the filter is using
178      * 64-bit longs to store bits starting at index 0.
179      *
180      * <p>The index is assumed to be positive. For a positive index the result will match
181      * {@code bitIndex / 64}.</p>
182      *
183      * <p><em>The divide is performed using bit shifts. If the input is negative the
184      * behavior is not defined.</em></p>
185      *
186      * @param bitIndex the bit index (assumed to be positive)
187      * @return the index of the bit map in an array of bit maps.
188      */
189     private static int getLongIndex(final int bitIndex) {
190         // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
191         // positive.
192         // We do not explicitly check for a negative here. Instead we use a
193         // signed shift. Any negative index will produce a negative value
194         // by sign-extension and if used as an index into an array it will throw an
195         // exception.
196         return bitIndex >> DIVIDE_BY_64;
197     }
198 
199     /**
200      * Gets the filter bit mask for the specified bit index assuming the filter is using
201      * 64-bit longs to store bits starting at index 0. The returned value is a
202      * {@code long} with only 1 bit set.
203      *
204      * <p>The index is assumed to be positive. For a positive index the result will match
205      * {@code 1L << (bitIndex % 64)}.</p>
206      *
207      * <p><em>If the input is negative the behavior is not defined.</em></p>
208      *
209      * @param bitIndex the bit index (assumed to be positive)
210      * @return the filter bit
211      */
212     private static long getLongBit(final int bitIndex) {
213         // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
214         // using 0x3f (63) or compute bitIndex % 64.
215         // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
216         // this will identify an incorrect bit.
217         return 1L << bitIndex;
218     }
219 
220     /**
221      * Returns the value of the bit with the specified index.
222      *
223      * <p>Warning: This has no range checks.
224      *
225      * @param bitIndex the bit index (assumed to be positive)
226      * @return the value of the bit with the specified index
227      */
228     boolean get(int bitIndex) {
229         // WARNING: No range checks !!!
230         final int index = compressIndex(bitIndex);
231         final int i = getLongIndex(index);
232         final long m = getLongBit(index);
233         return (data[i] & m) != 0;
234     }
235 
236     /**
237      * Sets the bit at the specified index to {@code true}.
238      *
239      * <p>Warning: This has no range checks.
240      *
241      * @param bitIndex the bit index (assumed to be positive)
242      */
243     void set(int bitIndex) {
244         // WARNING: No range checks !!!
245         final int index = compressIndex(bitIndex);
246         final int i = getLongIndex(index);
247         final long m = getLongBit(index);
248         data[i] |= m;
249     }
250 
251 
252     @Override
253     public int left() {
254         return left;
255     }
256 
257     @Override
258     public int right() {
259         return right;
260     }
261 
262     @Override
263     public int previousIndex(int k) {
264         // WARNING: No range checks !!!
265         // Assume left <= k <= right and that left and right are set bits acting as sentinels.
266         final int index = compressIndex(k);
267 
268         int i = getLongIndex(index);
269         long bits = data[i];
270 
271         // Check if this is within a compressed index. If so return the exact result.
272         if ((bits & getLongBit(index)) != 0) {
273             return k;
274         }
275 
276         // Mask bits before the bit index
277         // mask = 00011111 = -1L >>> (64 - ((index + 1) % 64))
278         bits &= LONG_MASK >>> -(index + 1);
279         for (;;) {
280             if (bits != 0) {
281                 //(i+1)       i
282                 // |   c      |
283                 // |   |      |
284                 // 0  001010000
285                 final int c = (i + 1) * Long.SIZE - Long.numberOfLeadingZeros(bits);
286                 // Decompress the prior unset bit to an index. When inflated this is the
287                 // next index above the upper bound of the compressed range so subtract 1.
288                 return (c << 1) - 1 + left;
289             }
290             // Unsupported: the interval should contain k
291             //if (i == 0) {
292             //    return left - 1;
293             //}
294             bits = data[--i];
295         }
296     }
297 
298     @Override
299     public int nextIndex(int k) {
300         // WARNING: No range checks !!!
301         // Assume left <= k <= right and that left and right are set bits acting as sentinels.
302         final int index = compressIndex(k);
303 
304         int i = getLongIndex(index);
305         long bits = data[i];
306 
307         // Check if this is within a compressed index. If so return the exact result.
308         if ((bits & getLongBit(index)) != 0) {
309             return k;
310         }
311 
312         // Mask bits after the bit index
313         // mask = 11111000 = -1L << (index % 64)
314         bits &= LONG_MASK << index;
315         for (;;) {
316             if (bits != 0) {
317                 //(i+1)       i
318                 // |      c   |
319                 // |      |   |
320                 // 0  001010000
321                 final int c = i * Long.SIZE + Long.numberOfTrailingZeros(bits);
322                 // Decompress the set bit to an index. When inflated this is the lower bound of
323                 // the compressed range and is OK for next scanning.
324                 return (c << 1) + left;
325             }
326             // Unsupported: the interval should contain k
327             //if (++i == data.length) {
328             //    return right + 1;
329             //}
330             bits = data[++i];
331         }
332     }
333 
334     /**
335      * Check the lower bound to the range is valid.
336      *
337      * @param left Lower bound (inclusive).
338      * @throws IllegalArgumentException if {@code left < 0}
339      */
340     private static void checkLeft(int left) {
341         if (left < 0) {
342             throw new IllegalArgumentException("Invalid lower index: " + left);
343         }
344     }
345 
346     /**
347      * Check the range is valid.
348      *
349      * @param left Lower bound (inclusive).
350      * @param right Upper bound (inclusive).
351      * @throws IllegalArgumentException if {@code right < left}
352      */
353     private static void checkRange(int left, int right) {
354         if (right < left) {
355             throw new IllegalArgumentException(
356                 String.format("Invalid range: [%d, %d]", left, right));
357         }
358     }
359 }