001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.rng.sampling;
019
020import org.apache.commons.rng.UniformRandomProvider;
021
022/**
023 * Class for representing <a href="https://en.wikipedia.org/wiki/Combination">combinations</a>
024 * of a sequence of integers.
025 *
026 * <p>A combination is a selection of items from a collection, such that (unlike
027 * permutations) the order of selection <strong>does not matter</strong>. This
028 * sampler can be used to generate a combination in an unspecified order and is
029 * faster than the corresponding {@link PermutationSampler}.</p>
030 *
031 * <p>Note that the sample order is unspecified. For example a sample
032 * combination of 2 from 4 may return {@code [0,1]} or {@code [1,0]} as the two are
033 * equivalent, and the order of a given combination may change in subsequent samples.</p>
034 *
035 * <p>The sampler can be used to generate indices to select subsets where the
036 * order of the subset is not important.</p>
037 *
038 * <p>Sampling uses {@link UniformRandomProvider#nextInt(int)}.</p>
039 *
040 * @see PermutationSampler
041 */
042public class CombinationSampler implements SharedStateSampler<CombinationSampler> {
043    /** Domain of the combination. */
044    private final int[] domain;
045    /** The number of steps of a full shuffle to perform. */
046    private final int steps;
047    /**
048     * The section to copy the domain from after a partial shuffle.
049     */
050    private final boolean upper;
051    /** RNG. */
052    private final UniformRandomProvider rng;
053
054    /**
055     * Creates a generator of combinations.
056     *
057     * <p>The {@link #sample()} method will generate an integer array of
058     * length {@code k} whose entries are selected randomly, without
059     * repetition, from the integers 0, 1, ..., {@code n}-1 (inclusive).
060     * The returned array represents a combination of {@code n} taken
061     * {@code k}.
062     *
063     * <p>In contrast to a permutation, the returned array is <strong>not
064     * guaranteed</strong> to be in a random order. The {@link #sample()}
065     * method returns the array in an unspecified order.
066     *
067     * <p>If {@code n <= 0} or {@code k <= 0} or {@code k > n} then no combination
068     * is required and an exception is raised.
069     *
070     * @param rng Generator of uniformly distributed random numbers.
071     * @param n   Domain of the combination.
072     * @param k   Size of the combination.
073     * @throws IllegalArgumentException if {@code n <= 0} or {@code k <= 0} or
074     *                                  {@code k > n}.
075     */
076    public CombinationSampler(UniformRandomProvider rng,
077                              int n,
078                              int k) {
079        SubsetSamplerUtils.checkSubset(n, k);
080        domain = PermutationSampler.natural(n);
081        // The sample can be optimised by only performing the first k or (n - k) steps
082        // from a full Fisher-Yates shuffle from the end of the domain to the start.
083        // The upper positions will then contain a random sample from the domain. The
084        // lower half is then by definition also a random sample (just not in a random order).
085        // The sample is then picked using the upper or lower half depending which
086        // makes the number of steps smaller.
087        upper = k <= n / 2;
088        steps = upper ? k : n - k;
089        this.rng = rng;
090    }
091
092    /**
093     * @param rng Generator of uniformly distributed random numbers.
094     * @param source Source to copy.
095     */
096    private CombinationSampler(UniformRandomProvider rng,
097                               CombinationSampler source) {
098        // Do not clone the domain. This ensures:
099        // 1. Thread safety as the domain may be shuffled during the clone
100        //    and a shuffle swap step can result in duplicates and missing elements
101        //    in the array.
102        // 2. If the argument RNG is an exact match for the RNG in the source
103        //    then the output sequence will differ unless the domain is currently
104        //    in natural order.
105        domain = PermutationSampler.natural(source.domain.length);
106        steps = source.steps;
107        upper = source.upper;
108        this.rng = rng;
109    }
110
111    /**
112     * Return a combination of {@code k} whose entries are selected randomly,
113     * without repetition, from the integers 0, 1, ..., {@code n}-1 (inclusive).
114     *
115     * <p>The order of the returned array is not guaranteed to be in a random order
116     * as the order of a combination <strong>does not matter</strong>.
117     *
118     * @return a random combination.
119     */
120    public int[] sample() {
121        return SubsetSamplerUtils.partialSample(domain, steps, rng, upper);
122    }
123
124    /**
125     * {@inheritDoc}
126     *
127     * @since 1.3
128     */
129    @Override
130    public CombinationSampler withUniformRandomProvider(UniformRandomProvider rng) {
131        return new CombinationSampler(rng, this);
132    }
133}