1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.numbers.examples.jmh.arrays;
19
20 /**
21 * An index set backed by a open-addressed hash table using linear hashing. Table size is a power
22 * of 2 and has a maximum capacity of 2^29 with a fixed load factor of 0.5. If the functional
23 * capacity is exceeded then the set raises an {@link IllegalStateException}.
24 *
25 * <p>Values are stored using bit inversion. Any positive index will have a negative
26 * representation when stored. An empty slot is indicated by a zero.
27 *
28 * <p>This class has a minimal API. It can be used to ensure a collection of indices of
29 * a known size are unique:
30 *
31 * <pre>{@code
32 * int[] keys = ...
33 * HashIndexSet set = new HashIndexSet(keys.length);
34 * for (int k : keys) {
35 * if (set.add(k)) {
36 * // first occurrence of k in keys
37 * }
38 * }
39 * }</pre>
40 *
41 * @see <a href="https://en.wikipedia.org/wiki/Open_addressing">Open addressing (Wikipedia)</a>
42 * @since 1.2
43 */
44 final class HashIndexSet {
45 /** Message for an invalid index. */
46 private static final String INVALID_INDEX = "Invalid index: ";
47 /** The maximum capacity of the set. */
48 private static final int MAX_CAPACITY = 1 << 29;
49 /** The minimum size of the backing array. */
50 private static final int MIN_SIZE = 16;
51 /**
52 * Unsigned 32-bit integer numerator of the golden ratio (0.618) with an assumed
53 * denominator of 2^32.
54 *
55 * <pre>
56 * 2654435769 = round(2^32 * (sqrt(5) - 1) / 2)
57 * Long.toHexString((long)(0x1p32 * (Math.sqrt(5.0) - 1) / 2))
58 * </pre>
59 */
60 private static final int PHI = 0x9e3779b9;
61
62 /** The set. */
63 private final int[] set;
64 /** The size. */
65 private int size;
66
67 /**
68 * Create an instance with size to store up to the specified {@code capacity}.
69 *
70 * <p>The functional capacity (number of indices that can be stored) is the next power
71 * of 2 above {@code capacity}; or a minimum size if the requested {@code capacity} is
72 * small.
73 *
74 * @param capacity Capacity (assumed to be positive).
75 */
76 HashIndexSet(int capacity) {
77 if (capacity > MAX_CAPACITY) {
78 throw new IllegalArgumentException("Unsupported capacity: " + capacity);
79 }
80 // This will generate a load factor at capacity in the range (0.25, 0.5]
81 // The use of Math.max will ignore zero/negative capacity requests.
82 set = new int[nextPow2(Math.max(MIN_SIZE, capacity * 2))];
83 }
84
85 /**
86 * Return the memory footprint in bytes. This is always a power of 2.
87 *
88 * <p>This will return the size as if not limited to a capacity of 2<sup>29</sup>.
89 * In this case the size will exceed the maximum size of an {@code int[]} array.
90 *
91 * <p>This method is intended to provide information to choose if the data structure
92 * is memory efficient.
93 *
94 * @param capacity Capacity.
95 * @return the memory footprint
96 */
97 static long memoryFootprint(int capacity) {
98 if (capacity <= (MIN_SIZE >> 1)) {
99 // 4 bytes/int
100 return MIN_SIZE << 2;
101 }
102 // Double the next power of 2, then convert integer count to bytes (4 bytes/int)
103 // * 2 * 4 == * 2^3
104 return Integer.toUnsignedLong(nextPow2(capacity)) << 3;
105 }
106
107 /**
108 * Returns the closest power-of-two number greater than or equal to {@code value}.
109 *
110 * <p>Warning: This will return {@link Integer#MIN_VALUE} for any {@code value} above
111 * {@code 1 << 30}. This is the next power of 2 as an unsigned integer.
112 *
113 * <p>See <a
114 * href="https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2">Bit
115 * Hacks: Rounding up to a power of 2</a>
116 *
117 * @param value Value.
118 * @return the closest power-of-two number greater than or equal to value
119 */
120 private static int nextPow2(int value) {
121 int result = value - 1;
122 result |= result >>> 1;
123 result |= result >>> 2;
124 result |= result >>> 4;
125 result |= result >>> 8;
126 return (result | (result >>> 16)) + 1;
127 }
128
129 /**
130 * Adds the {@code index} to the set.
131 *
132 * @param index Index.
133 * @return true if the set was modified by the operation
134 * @throws IndexOutOfBoundsException if the index is negative
135 */
136 boolean add(int index) {
137 if (index < 0) {
138 throw new IndexOutOfBoundsException(INVALID_INDEX + index);
139 }
140 final int[] keys = set;
141 final int key = ~index;
142 final int mask = keys.length - 1;
143 int pos = mix(index) & mask;
144 int curr = keys[pos];
145 if (curr < 0) {
146 if (curr == key) {
147 // Already present
148 return false;
149 }
150 // Probe
151 while ((curr = keys[pos = (pos + 1) & mask]) < 0) {
152 if (curr == key) {
153 // Already present
154 return false;
155 }
156 }
157 }
158 // Insert
159 keys[pos] = key;
160 // Here the load factor is 0.5: Test if size > keys.length * 0.5
161 if (++size > (mask + 1) >>> 1) {
162 // This is where we should grow the size of the set and re-insert
163 // all current keys into the new key storage. Here we are using a
164 // fixed capacity so raise an exception.
165 throw new IllegalStateException("Functional capacity exceeded: " + (keys.length >>> 1));
166 }
167 return true;
168 }
169
170 /**
171 * Test if the {@code index} is in the set.
172 *
173 * <p>This method is present for testing. It is not required when filtering a collection
174 * of indices with duplicates to a unique set of indices.
175 *
176 * @param index Index.
177 * @return true if the set contains the index
178 * @throws IndexOutOfBoundsException if the index is negative
179 */
180 boolean contains(int index) {
181 if (index < 0) {
182 throw new IndexOutOfBoundsException(INVALID_INDEX + index);
183 }
184 final int[] keys = set;
185 final int mask = keys.length - 1;
186 int pos = mix(index) & mask;
187 int curr = keys[pos];
188 if (curr == 0) {
189 return false;
190 }
191 final int key = ~index;
192 if (curr == key) {
193 return true;
194 }
195 // Probe
196 while (true) {
197 pos = (pos + 1) & mask;
198 curr = keys[pos];
199 if (curr == 0) {
200 // No more entries
201 return false;
202 }
203 if (curr == key) {
204 return true;
205 }
206 }
207 }
208
209 /**
210 * Mix the bits of an integer.
211 *
212 * <p>This is the fast hash function used in the linear hash implementation in the <a
213 * href="https://github.com/leventov/Koloboke">Koloboke Collections</a>.
214 *
215 * @param x Bits.
216 * @return the mixed bits
217 */
218 private static int mix(int x) {
219 final int h = x * PHI;
220 return h ^ (h >>> 16);
221 }
222
223 /**
224 * Returns the number of distinct indices in the set.
225 *
226 * @return the size
227 */
228 int size() {
229 return size;
230 }
231
232 /**
233 * Write each index in the set into the provided array. Returns the number of indices.
234 *
235 * <p>The caller must ensure the output array has sufficient capacity.
236 *
237 * <p>Warning: The indices are not ordered.
238 *
239 * <p>This method is present for testing. It is not required when filtering a collection
240 * of indices with duplicates to a unique set of indices. It can be used to write
241 * out the unique indices, but they are not in the encounter order of indices
242 * {@link #add(int) added} to the set.
243 *
244 * @param a Output array.
245 * @return count of indices
246 * @see #size()
247 */
248 int toArray(int[] a) {
249 final int[] keys = set;
250 int c = 0;
251 for (final int key : keys) {
252 if (key < 0) {
253 a[c++] = ~key;
254 }
255 }
256 // assert c == size
257 return c;
258 }
259 }