HashIndexSet.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.numbers.arrays;

  18. /**
  19.  * An index set backed by a open-addressed hash table using linear hashing. Table size is a power
  20.  * of 2 and has a maximum capacity of 2^29 with a fixed load factor of 0.5. If the functional
  21.  * capacity is exceeded then the set raises an {@link IllegalStateException}.
  22.  *
  23.  * <p>Values are stored using bit inversion. Any positive index will have a negative
  24.  * representation when stored. An empty slot is indicated by a zero.
  25.  *
  26.  * <p>This class has a minimal API. It can be used to ensure a collection of indices of
  27.  * a known size are unique:
  28.  *
  29.  * <pre>{@code
  30.  * int[] keys = ...
  31.  * HashIndexSet set = new HashIndexSet(keys.length);
  32.  * for (int k : keys) {
  33.  *   if (set.add(k)) {
  34.  *     // first occurrence of k in keys
  35.  *   }
  36.  * }
  37.  * }</pre>
  38.  *
  39.  * @see <a href="https://en.wikipedia.org/wiki/Open_addressing">Open addressing (Wikipedia)</a>
  40.  * @since 1.2
  41.  */
  42. final class HashIndexSet {
  43.     /** Message for an invalid index. */
  44.     private static final String INVALID_INDEX = "Invalid index: ";
  45.     /** The maximum capacity of the set. */
  46.     private static final int MAX_CAPACITY = 1 << 29;
  47.     /** The minimum size of the backing array. */
  48.     private static final int MIN_SIZE = 16;
  49.     /**
  50.      * Unsigned 32-bit integer numerator of the golden ratio (0.618) with an assumed
  51.      * denominator of 2^32.
  52.      *
  53.      * <pre>
  54.      * 2654435769 = round(2^32 * (sqrt(5) - 1) / 2)
  55.      * Long.toHexString((long)(0x1p32 * (Math.sqrt(5.0) - 1) / 2))
  56.      * </pre>
  57.      */
  58.     private static final int PHI = 0x9e3779b9;

  59.     /** The set. */
  60.     private final int[] set;
  61.     /** The size. */
  62.     private int size;

  63.     /**
  64.      * Create an instance with size to store up to the specified {@code capacity}.
  65.      *
  66.      * <p>The functional capacity (number of indices that can be stored) is the next power
  67.      * of 2 above {@code capacity}; or a minimum size if the requested {@code capacity} is
  68.      * small.
  69.      *
  70.      * @param capacity Capacity.
  71.      */
  72.     private HashIndexSet(int capacity) {
  73.         // This will generate a load factor at capacity in the range (0.25, 0.5]
  74.         // The use of Math.max will ignore zero/negative capacity requests.
  75.         set = new int[nextPow2(Math.max(MIN_SIZE, capacity * 2))];
  76.     }

  77.     /**
  78.      * Create an instance with size to store up to the specified {@code capacity}.
  79.      * The maximum supported {@code capacity} is 2<sup>29</sup>.
  80.      *
  81.      * @param capacity Capacity.
  82.      * @return the hash index set
  83.      * @throws IllegalArgumentException if the {@code capacity} is too large.
  84.      */
  85.     static HashIndexSet create(int capacity) {
  86.         if (capacity > MAX_CAPACITY) {
  87.             throw new IllegalArgumentException("Unsupported capacity: " + capacity);
  88.         }
  89.         return new HashIndexSet(capacity);
  90.     }

  91.     /**
  92.      * Returns the closest power-of-two number greater than or equal to {@code value}.
  93.      *
  94.      * <p>Warning: This will return {@link Integer#MIN_VALUE} for any {@code value} above
  95.      * {@code 1 << 30}. This is the next power of 2 as an unsigned integer.
  96.      *
  97.      * <p>See <a
  98.      * href="https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2">Bit
  99.      * Hacks: Rounding up to a power of 2</a>
  100.      *
  101.      * @param value Value.
  102.      * @return the closest power-of-two number greater than or equal to value
  103.      */
  104.     private static int nextPow2(int value) {
  105.         int result = value - 1;
  106.         result |= result >>> 1;
  107.         result |= result >>> 2;
  108.         result |= result >>> 4;
  109.         result |= result >>> 8;
  110.         return (result | (result >>> 16)) + 1;
  111.     }

  112.     /**
  113.      * Adds the {@code index} to the set.
  114.      *
  115.      * @param index Index.
  116.      * @return true if the set was modified by the operation
  117.      * @throws IndexOutOfBoundsException if the index is negative
  118.      */
  119.     boolean add(int index) {
  120.         if (index < 0) {
  121.             throw new IndexOutOfBoundsException(INVALID_INDEX + index);
  122.         }
  123.         final int[] keys = set;
  124.         final int key = ~index;
  125.         final int mask = keys.length - 1;
  126.         int pos = mix(index) & mask;
  127.         int curr = keys[pos];
  128.         if (curr < 0) {
  129.             if (curr == key) {
  130.                 // Already present
  131.                 return false;
  132.             }
  133.             // Probe
  134.             while ((curr = keys[pos = (pos + 1) & mask]) < 0) {
  135.                 if (curr == key) {
  136.                     // Already present
  137.                     return false;
  138.                 }
  139.             }
  140.         }
  141.         // Insert
  142.         keys[pos] = key;
  143.         // Here the load factor is 0.5: Test if size > keys.length * 0.5
  144.         if (++size > (mask + 1) >>> 1) {
  145.             // This is where we should grow the size of the set and re-insert
  146.             // all current keys into the new key storage. Here we are using a
  147.             // fixed capacity so raise an exception.
  148.             throw new IllegalStateException("Functional capacity exceeded: " + (keys.length >>> 1));
  149.         }
  150.         return true;
  151.     }

  152.     /**
  153.      * Mix the bits of an integer.
  154.      *
  155.      * <p>This is the fast hash function used in the linear hash implementation in the <a
  156.      * href="https://github.com/leventov/Koloboke">Koloboke Collections</a>.
  157.      *
  158.      * @param x Bits.
  159.      * @return the mixed bits
  160.      */
  161.     private static int mix(int x) {
  162.         final int h = x * PHI;
  163.         return h ^ (h >>> 16);
  164.     }
  165. }