1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.numbers.arrays; 19 20 /** 21 * An index set backed by a open-addressed hash table using linear hashing. Table size is a power 22 * of 2 and has a maximum capacity of 2^29 with a fixed load factor of 0.5. If the functional 23 * capacity is exceeded then the set raises an {@link IllegalStateException}. 24 * 25 * <p>Values are stored using bit inversion. Any positive index will have a negative 26 * representation when stored. An empty slot is indicated by a zero. 27 * 28 * <p>This class has a minimal API. It can be used to ensure a collection of indices of 29 * a known size are unique: 30 * 31 * <pre>{@code 32 * int[] keys = ... 33 * HashIndexSet set = new HashIndexSet(keys.length); 34 * for (int k : keys) { 35 * if (set.add(k)) { 36 * // first occurrence of k in keys 37 * } 38 * } 39 * }</pre> 40 * 41 * @see <a href="https://en.wikipedia.org/wiki/Open_addressing">Open addressing (Wikipedia)</a> 42 * @since 1.2 43 */ 44 final class HashIndexSet { 45 /** Message for an invalid index. */ 46 private static final String INVALID_INDEX = "Invalid index: "; 47 /** The maximum capacity of the set. */ 48 private static final int MAX_CAPACITY = 1 << 29; 49 /** The minimum size of the backing array. */ 50 private static final int MIN_SIZE = 16; 51 /** 52 * Unsigned 32-bit integer numerator of the golden ratio (0.618) with an assumed 53 * denominator of 2^32. 54 * 55 * <pre> 56 * 2654435769 = round(2^32 * (sqrt(5) - 1) / 2) 57 * Long.toHexString((long)(0x1p32 * (Math.sqrt(5.0) - 1) / 2)) 58 * </pre> 59 */ 60 private static final int PHI = 0x9e3779b9; 61 62 /** The set. */ 63 private final int[] set; 64 /** The size. */ 65 private int size; 66 67 /** 68 * Create an instance with size to store up to the specified {@code capacity}. 69 * 70 * <p>The functional capacity (number of indices that can be stored) is the next power 71 * of 2 above {@code capacity}; or a minimum size if the requested {@code capacity} is 72 * small. 73 * 74 * @param capacity Capacity. 75 */ 76 private HashIndexSet(int capacity) { 77 // This will generate a load factor at capacity in the range (0.25, 0.5] 78 // The use of Math.max will ignore zero/negative capacity requests. 79 set = new int[nextPow2(Math.max(MIN_SIZE, capacity * 2))]; 80 } 81 82 /** 83 * Create an instance with size to store up to the specified {@code capacity}. 84 * The maximum supported {@code capacity} is 2<sup>29</sup>. 85 * 86 * @param capacity Capacity. 87 * @return the hash index set 88 * @throws IllegalArgumentException if the {@code capacity} is too large. 89 */ 90 static HashIndexSet create(int capacity) { 91 if (capacity > MAX_CAPACITY) { 92 throw new IllegalArgumentException("Unsupported capacity: " + capacity); 93 } 94 return new HashIndexSet(capacity); 95 } 96 97 /** 98 * Returns the closest power-of-two number greater than or equal to {@code value}. 99 * 100 * <p>Warning: This will return {@link Integer#MIN_VALUE} for any {@code value} above 101 * {@code 1 << 30}. This is the next power of 2 as an unsigned integer. 102 * 103 * <p>See <a 104 * href="https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2">Bit 105 * Hacks: Rounding up to a power of 2</a> 106 * 107 * @param value Value. 108 * @return the closest power-of-two number greater than or equal to value 109 */ 110 private static int nextPow2(int value) { 111 int result = value - 1; 112 result |= result >>> 1; 113 result |= result >>> 2; 114 result |= result >>> 4; 115 result |= result >>> 8; 116 return (result | (result >>> 16)) + 1; 117 } 118 119 /** 120 * Adds the {@code index} to the set. 121 * 122 * @param index Index. 123 * @return true if the set was modified by the operation 124 * @throws IndexOutOfBoundsException if the index is negative 125 */ 126 boolean add(int index) { 127 if (index < 0) { 128 throw new IndexOutOfBoundsException(INVALID_INDEX + index); 129 } 130 final int[] keys = set; 131 final int key = ~index; 132 final int mask = keys.length - 1; 133 int pos = mix(index) & mask; 134 int curr = keys[pos]; 135 if (curr < 0) { 136 if (curr == key) { 137 // Already present 138 return false; 139 } 140 // Probe 141 while ((curr = keys[pos = (pos + 1) & mask]) < 0) { 142 if (curr == key) { 143 // Already present 144 return false; 145 } 146 } 147 } 148 // Insert 149 keys[pos] = key; 150 // Here the load factor is 0.5: Test if size > keys.length * 0.5 151 if (++size > (mask + 1) >>> 1) { 152 // This is where we should grow the size of the set and re-insert 153 // all current keys into the new key storage. Here we are using a 154 // fixed capacity so raise an exception. 155 throw new IllegalStateException("Functional capacity exceeded: " + (keys.length >>> 1)); 156 } 157 return true; 158 } 159 160 /** 161 * Mix the bits of an integer. 162 * 163 * <p>This is the fast hash function used in the linear hash implementation in the <a 164 * href="https://github.com/leventov/Koloboke">Koloboke Collections</a>. 165 * 166 * @param x Bits. 167 * @return the mixed bits 168 */ 169 private static int mix(int x) { 170 final int h = x * PHI; 171 return h ^ (h >>> 16); 172 } 173 }