001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.util;
018
019import java.util.Iterator;
020import java.util.Comparator;
021import java.util.Arrays;
022import java.util.NoSuchElementException;
023import java.io.Serializable;
024import org.apache.commons.math3.exception.MathInternalError;
025import org.apache.commons.math3.exception.DimensionMismatchException;
026import org.apache.commons.math3.exception.OutOfRangeException;
027
028/**
029 * Utility to create <a href="http://en.wikipedia.org/wiki/Combination">
030 * combinations</a> {@code (n, k)} of {@code k} elements in a set of
031 * {@code n} elements.
032 *
033 * @since 3.3
034 */
035public class Combinations implements Iterable<int[]> {
036    /** Size of the set from which combinations are drawn. */
037    private final int n;
038    /** Number of elements in each combination. */
039    private final int k;
040    /** Iteration order. */
041    private final IterationOrder iterationOrder;
042
043    /**
044     * Describes the type of iteration performed by the
045     * {@link #iterator() iterator}.
046     */
047    private enum IterationOrder {
048        /** Lexicographic order. */
049        LEXICOGRAPHIC
050    }
051
052   /**
053     * Creates an instance whose range is the k-element subsets of
054     * {0, ..., n - 1} represented as {@code int[]} arrays.
055     * <p>
056     * The iteration order is lexicographic: the arrays returned by the
057     * {@link #iterator() iterator} are sorted in descending order and
058     * they are visited in lexicographic order with significance from
059     * right to left.
060     * For example, {@code new Combinations(4, 2).iterator()} returns
061     * an iterator that will generate the following sequence of arrays
062     * on successive calls to
063     * {@code next()}:<br/>
064     * {@code [0, 1], [0, 2], [1, 2], [0, 3], [1, 3], [2, 3]}
065     * </p>
066     * If {@code k == 0} an iterator containing an empty array is returned;
067     * if {@code k == n} an iterator containing [0, ..., n - 1] is returned.
068     *
069     * @param n Size of the set from which subsets are selected.
070     * @param k Size of the subsets to be enumerated.
071     * @throws org.apache.commons.math3.exception.NotPositiveException if {@code n < 0}.
072     * @throws org.apache.commons.math3.exception.NumberIsTooLargeException if {@code k > n}.
073     */
074    public Combinations(int n,
075                        int k) {
076        this(n, k, IterationOrder.LEXICOGRAPHIC);
077    }
078
079    /**
080     * Creates an instance whose range is the k-element subsets of
081     * {0, ..., n - 1} represented as {@code int[]} arrays.
082     * <p>
083     * If the {@code iterationOrder} argument is set to
084     * {@link IterationOrder#LEXICOGRAPHIC}, the arrays returned by the
085     * {@link #iterator() iterator} are sorted in descending order and
086     * they are visited in lexicographic order with significance from
087     * right to left.
088     * For example, {@code new Combinations(4, 2).iterator()} returns
089     * an iterator that will generate the following sequence of arrays
090     * on successive calls to
091     * {@code next()}:<br/>
092     * {@code [0, 1], [0, 2], [1, 2], [0, 3], [1, 3], [2, 3]}
093     * </p>
094     * If {@code k == 0} an iterator containing an empty array is returned;
095     * if {@code k == n} an iterator containing [0, ..., n - 1] is returned.
096     *
097     * @param n Size of the set from which subsets are selected.
098     * @param k Size of the subsets to be enumerated.
099     * @param iterationOrder Specifies the {@link #iterator() iteration order}.
100     * @throws org.apache.commons.math3.exception.NotPositiveException if {@code n < 0}.
101     * @throws org.apache.commons.math3.exception.NumberIsTooLargeException if {@code k > n}.
102     */
103    private Combinations(int n,
104                         int k,
105                         IterationOrder iterationOrder) {
106        CombinatoricsUtils.checkBinomial(n, k);
107        this.n = n;
108        this.k = k;
109        this.iterationOrder = iterationOrder;
110    }
111
112    /**
113     * Gets the size of the set from which combinations are drawn.
114     *
115     * @return the size of the universe.
116     */
117    public int getN() {
118        return n;
119    }
120
121    /**
122     * Gets the number of elements in each combination.
123     *
124     * @return the size of the subsets to be enumerated.
125     */
126    public int getK() {
127        return k;
128    }
129
130    /** {@inheritDoc} */
131    public Iterator<int[]> iterator() {
132        if (k == 0 ||
133            k == n) {
134            return new SingletonIterator(MathArrays.natural(k));
135        }
136
137        switch (iterationOrder) {
138        case LEXICOGRAPHIC:
139            return new LexicographicIterator(n, k);
140        default:
141            throw new MathInternalError(); // Should never happen.
142        }
143    }
144
145    /**
146     * Defines a lexicographic ordering of combinations.
147     * The returned comparator allows to compare any two combinations
148     * that can be produced by this instance's {@link #iterator() iterator}.
149     * Its {@code compare(int[],int[])} method will throw exceptions if
150     * passed combinations that are inconsistent with this instance:
151     * <ul>
152     *  <li>{@code DimensionMismatchException} if the array lengths are not
153     *      equal to {@code k},</li>
154     *  <li>{@code OutOfRangeException} if an element of the array is not
155     *      within the interval [0, {@code n}).</li>
156     * </ul>
157     * @return a lexicographic comparator.
158     */
159    public Comparator<int[]> comparator() {
160        return new LexicographicComparator(n, k);
161    }
162
163    /**
164     * Lexicographic combinations iterator.
165     * <p>
166     * Implementation follows Algorithm T in <i>The Art of Computer Programming</i>
167     * Internet Draft (PRE-FASCICLE 3A), "A Draft of Section 7.2.1.3 Generating All
168     * Combinations</a>, D. Knuth, 2004.</p>
169     * <p>
170     * The degenerate cases {@code k == 0} and {@code k == n} are NOT handled by this
171     * implementation.  If constructor arguments satisfy {@code k == 0}
172     * or {@code k >= n}, no exception is generated, but the iterator is empty.
173     * </p>
174     *
175     */
176    private static class LexicographicIterator implements Iterator<int[]> {
177        /** Size of subsets returned by the iterator */
178        private final int k;
179
180        /**
181         * c[1], ..., c[k] stores the next combination; c[k + 1], c[k + 2] are
182         * sentinels.
183         * <p>
184         * Note that c[0] is "wasted" but this makes it a little easier to
185         * follow the code.
186         * </p>
187         */
188        private final int[] c;
189
190        /** Return value for {@link #hasNext()} */
191        private boolean more = true;
192
193        /** Marker: smallest index such that c[j + 1] > j */
194        private int j;
195
196        /**
197         * Construct a CombinationIterator to enumerate k-sets from n.
198         * <p>
199         * NOTE: If {@code k === 0} or {@code k >= n}, the Iterator will be empty
200         * (that is, {@link #hasNext()} will return {@code false} immediately.
201         * </p>
202         *
203         * @param n size of the set from which subsets are enumerated
204         * @param k size of the subsets to enumerate
205         */
206        LexicographicIterator(int n, int k) {
207            this.k = k;
208            c = new int[k + 3];
209            if (k == 0 || k >= n) {
210                more = false;
211                return;
212            }
213            // Initialize c to start with lexicographically first k-set
214            for (int i = 1; i <= k; i++) {
215                c[i] = i - 1;
216            }
217            // Initialize sentinels
218            c[k + 1] = n;
219            c[k + 2] = 0;
220            j = k; // Set up invariant: j is smallest index such that c[j + 1] > j
221        }
222
223        /**
224         * {@inheritDoc}
225         */
226        public boolean hasNext() {
227            return more;
228        }
229
230        /**
231         * {@inheritDoc}
232         */
233        public int[] next() {
234            if (!more) {
235                throw new NoSuchElementException();
236            }
237            // Copy return value (prepared by last activation)
238            final int[] ret = new int[k];
239            System.arraycopy(c, 1, ret, 0, k);
240
241            // Prepare next iteration
242            // T2 and T6 loop
243            int x = 0;
244            if (j > 0) {
245                x = j;
246                c[j] = x;
247                j--;
248                return ret;
249            }
250            // T3
251            if (c[1] + 1 < c[2]) {
252                c[1]++;
253                return ret;
254            } else {
255                j = 2;
256            }
257            // T4
258            boolean stepDone = false;
259            while (!stepDone) {
260                c[j - 1] = j - 2;
261                x = c[j] + 1;
262                if (x == c[j + 1]) {
263                    j++;
264                } else {
265                    stepDone = true;
266                }
267            }
268            // T5
269            if (j > k) {
270                more = false;
271                return ret;
272            }
273            // T6
274            c[j] = x;
275            j--;
276            return ret;
277        }
278
279        /**
280         * Not supported.
281         */
282        public void remove() {
283            throw new UnsupportedOperationException();
284        }
285    }
286
287    /**
288     * Iterator with just one element to handle degenerate cases (full array,
289     * empty array) for combination iterator.
290     */
291    private static class SingletonIterator implements Iterator<int[]> {
292        /** Singleton array */
293        private final int[] singleton;
294        /** True on initialization, false after first call to next */
295        private boolean more = true;
296        /**
297         * Create a singleton iterator providing the given array.
298         * @param singleton array returned by the iterator
299         */
300        SingletonIterator(final int[] singleton) {
301            this.singleton = singleton;
302        }
303        /** @return True until next is called the first time, then false */
304        public boolean hasNext() {
305            return more;
306        }
307        /** @return the singleton in first activation; throws NSEE thereafter */
308        public int[] next() {
309            if (more) {
310                more = false;
311                return singleton;
312            } else {
313                throw new NoSuchElementException();
314            }
315        }
316        /** Not supported */
317        public void remove() {
318            throw new UnsupportedOperationException();
319        }
320    }
321
322    /**
323     * Defines the lexicographic ordering of combinations, using
324     * the {@link #lexNorm(int[])} method.
325     */
326    private static class LexicographicComparator
327        implements Comparator<int[]>, Serializable {
328        /** Serializable version identifier. */
329        private static final long serialVersionUID = 20130906L;
330        /** Size of the set from which combinations are drawn. */
331        private final int n;
332        /** Number of elements in each combination. */
333        private final int k;
334
335        /**
336         * @param n Size of the set from which subsets are selected.
337         * @param k Size of the subsets to be enumerated.
338         */
339        LexicographicComparator(int n, int k) {
340            this.n = n;
341            this.k = k;
342        }
343
344        /**
345         * {@inheritDoc}
346         *
347         * @throws DimensionMismatchException if the array lengths are not
348         * equal to {@code k}.
349         * @throws OutOfRangeException if an element of the array is not
350         * within the interval [0, {@code n}).
351         */
352        public int compare(int[] c1,
353                           int[] c2) {
354            if (c1.length != k) {
355                throw new DimensionMismatchException(c1.length, k);
356            }
357            if (c2.length != k) {
358                throw new DimensionMismatchException(c2.length, k);
359            }
360
361            // Method "lexNorm" works with ordered arrays.
362            final int[] c1s = MathArrays.copyOf(c1);
363            Arrays.sort(c1s);
364            final int[] c2s = MathArrays.copyOf(c2);
365            Arrays.sort(c2s);
366
367            final long v1 = lexNorm(c1s);
368            final long v2 = lexNorm(c2s);
369
370            if (v1 < v2) {
371                return -1;
372            } else if (v1 > v2) {
373                return 1;
374            } else {
375                return 0;
376            }
377        }
378
379        /**
380         * Computes the value (in base 10) represented by the digit
381         * (interpreted in base {@code n}) in the input array in reverse
382         * order.
383         * For example if {@code c} is {@code {3, 2, 1}}, and {@code n}
384         * is 3, the method will return 18.
385         *
386         * @param c Input array.
387         * @return the lexicographic norm.
388         * @throws OutOfRangeException if an element of the array is not
389         * within the interval [0, {@code n}).
390         */
391        private long lexNorm(int[] c) {
392            long ret = 0;
393            for (int i = 0; i < c.length; i++) {
394                final int digit = c[i];
395                if (digit < 0 ||
396                    digit >= n) {
397                    throw new OutOfRangeException(digit, 0, n - 1);
398                }
399
400                ret += c[i] * ArithmeticUtils.pow(n, i);
401            }
402            return ret;
403        }
404    }
405}