001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.util; 018 019import java.util.Iterator; 020import java.util.Comparator; 021import java.util.Arrays; 022import java.util.NoSuchElementException; 023import java.io.Serializable; 024import org.apache.commons.math3.exception.MathInternalError; 025import org.apache.commons.math3.exception.DimensionMismatchException; 026import org.apache.commons.math3.exception.OutOfRangeException; 027 028/** 029 * Utility to create <a href="http://en.wikipedia.org/wiki/Combination"> 030 * combinations</a> {@code (n, k)} of {@code k} elements in a set of 031 * {@code n} elements. 032 * 033 * @since 3.3 034 */ 035public class Combinations implements Iterable<int[]> { 036 /** Size of the set from which combinations are drawn. */ 037 private final int n; 038 /** Number of elements in each combination. */ 039 private final int k; 040 /** Iteration order. */ 041 private final IterationOrder iterationOrder; 042 043 /** 044 * Describes the type of iteration performed by the 045 * {@link #iterator() iterator}. 046 */ 047 private enum IterationOrder { 048 /** Lexicographic order. */ 049 LEXICOGRAPHIC 050 } 051 052 /** 053 * Creates an instance whose range is the k-element subsets of 054 * {0, ..., n - 1} represented as {@code int[]} arrays. 055 * <p> 056 * The iteration order is lexicographic: the arrays returned by the 057 * {@link #iterator() iterator} are sorted in descending order and 058 * they are visited in lexicographic order with significance from 059 * right to left. 060 * For example, {@code new Combinations(4, 2).iterator()} returns 061 * an iterator that will generate the following sequence of arrays 062 * on successive calls to 063 * {@code next()}:<br/> 064 * {@code [0, 1], [0, 2], [1, 2], [0, 3], [1, 3], [2, 3]} 065 * </p> 066 * If {@code k == 0} an iterator containing an empty array is returned; 067 * if {@code k == n} an iterator containing [0, ..., n - 1] is returned. 068 * 069 * @param n Size of the set from which subsets are selected. 070 * @param k Size of the subsets to be enumerated. 071 * @throws org.apache.commons.math3.exception.NotPositiveException if {@code n < 0}. 072 * @throws org.apache.commons.math3.exception.NumberIsTooLargeException if {@code k > n}. 073 */ 074 public Combinations(int n, 075 int k) { 076 this(n, k, IterationOrder.LEXICOGRAPHIC); 077 } 078 079 /** 080 * Creates an instance whose range is the k-element subsets of 081 * {0, ..., n - 1} represented as {@code int[]} arrays. 082 * <p> 083 * If the {@code iterationOrder} argument is set to 084 * {@link IterationOrder#LEXICOGRAPHIC}, the arrays returned by the 085 * {@link #iterator() iterator} are sorted in descending order and 086 * they are visited in lexicographic order with significance from 087 * right to left. 088 * For example, {@code new Combinations(4, 2).iterator()} returns 089 * an iterator that will generate the following sequence of arrays 090 * on successive calls to 091 * {@code next()}:<br/> 092 * {@code [0, 1], [0, 2], [1, 2], [0, 3], [1, 3], [2, 3]} 093 * </p> 094 * If {@code k == 0} an iterator containing an empty array is returned; 095 * if {@code k == n} an iterator containing [0, ..., n - 1] is returned. 096 * 097 * @param n Size of the set from which subsets are selected. 098 * @param k Size of the subsets to be enumerated. 099 * @param iterationOrder Specifies the {@link #iterator() iteration order}. 100 * @throws org.apache.commons.math3.exception.NotPositiveException if {@code n < 0}. 101 * @throws org.apache.commons.math3.exception.NumberIsTooLargeException if {@code k > n}. 102 */ 103 private Combinations(int n, 104 int k, 105 IterationOrder iterationOrder) { 106 CombinatoricsUtils.checkBinomial(n, k); 107 this.n = n; 108 this.k = k; 109 this.iterationOrder = iterationOrder; 110 } 111 112 /** 113 * Gets the size of the set from which combinations are drawn. 114 * 115 * @return the size of the universe. 116 */ 117 public int getN() { 118 return n; 119 } 120 121 /** 122 * Gets the number of elements in each combination. 123 * 124 * @return the size of the subsets to be enumerated. 125 */ 126 public int getK() { 127 return k; 128 } 129 130 /** {@inheritDoc} */ 131 public Iterator<int[]> iterator() { 132 if (k == 0 || 133 k == n) { 134 return new SingletonIterator(MathArrays.natural(k)); 135 } 136 137 switch (iterationOrder) { 138 case LEXICOGRAPHIC: 139 return new LexicographicIterator(n, k); 140 default: 141 throw new MathInternalError(); // Should never happen. 142 } 143 } 144 145 /** 146 * Defines a lexicographic ordering of combinations. 147 * The returned comparator allows to compare any two combinations 148 * that can be produced by this instance's {@link #iterator() iterator}. 149 * Its {@code compare(int[],int[])} method will throw exceptions if 150 * passed combinations that are inconsistent with this instance: 151 * <ul> 152 * <li>{@code DimensionMismatchException} if the array lengths are not 153 * equal to {@code k},</li> 154 * <li>{@code OutOfRangeException} if an element of the array is not 155 * within the interval [0, {@code n}).</li> 156 * </ul> 157 * @return a lexicographic comparator. 158 */ 159 public Comparator<int[]> comparator() { 160 return new LexicographicComparator(n, k); 161 } 162 163 /** 164 * Lexicographic combinations iterator. 165 * <p> 166 * Implementation follows Algorithm T in <i>The Art of Computer Programming</i> 167 * Internet Draft (PRE-FASCICLE 3A), "A Draft of Section 7.2.1.3 Generating All 168 * Combinations</a>, D. Knuth, 2004.</p> 169 * <p> 170 * The degenerate cases {@code k == 0} and {@code k == n} are NOT handled by this 171 * implementation. If constructor arguments satisfy {@code k == 0} 172 * or {@code k >= n}, no exception is generated, but the iterator is empty. 173 * </p> 174 * 175 */ 176 private static class LexicographicIterator implements Iterator<int[]> { 177 /** Size of subsets returned by the iterator */ 178 private final int k; 179 180 /** 181 * c[1], ..., c[k] stores the next combination; c[k + 1], c[k + 2] are 182 * sentinels. 183 * <p> 184 * Note that c[0] is "wasted" but this makes it a little easier to 185 * follow the code. 186 * </p> 187 */ 188 private final int[] c; 189 190 /** Return value for {@link #hasNext()} */ 191 private boolean more = true; 192 193 /** Marker: smallest index such that c[j + 1] > j */ 194 private int j; 195 196 /** 197 * Construct a CombinationIterator to enumerate k-sets from n. 198 * <p> 199 * NOTE: If {@code k === 0} or {@code k >= n}, the Iterator will be empty 200 * (that is, {@link #hasNext()} will return {@code false} immediately. 201 * </p> 202 * 203 * @param n size of the set from which subsets are enumerated 204 * @param k size of the subsets to enumerate 205 */ 206 LexicographicIterator(int n, int k) { 207 this.k = k; 208 c = new int[k + 3]; 209 if (k == 0 || k >= n) { 210 more = false; 211 return; 212 } 213 // Initialize c to start with lexicographically first k-set 214 for (int i = 1; i <= k; i++) { 215 c[i] = i - 1; 216 } 217 // Initialize sentinels 218 c[k + 1] = n; 219 c[k + 2] = 0; 220 j = k; // Set up invariant: j is smallest index such that c[j + 1] > j 221 } 222 223 /** 224 * {@inheritDoc} 225 */ 226 public boolean hasNext() { 227 return more; 228 } 229 230 /** 231 * {@inheritDoc} 232 */ 233 public int[] next() { 234 if (!more) { 235 throw new NoSuchElementException(); 236 } 237 // Copy return value (prepared by last activation) 238 final int[] ret = new int[k]; 239 System.arraycopy(c, 1, ret, 0, k); 240 241 // Prepare next iteration 242 // T2 and T6 loop 243 int x = 0; 244 if (j > 0) { 245 x = j; 246 c[j] = x; 247 j--; 248 return ret; 249 } 250 // T3 251 if (c[1] + 1 < c[2]) { 252 c[1]++; 253 return ret; 254 } else { 255 j = 2; 256 } 257 // T4 258 boolean stepDone = false; 259 while (!stepDone) { 260 c[j - 1] = j - 2; 261 x = c[j] + 1; 262 if (x == c[j + 1]) { 263 j++; 264 } else { 265 stepDone = true; 266 } 267 } 268 // T5 269 if (j > k) { 270 more = false; 271 return ret; 272 } 273 // T6 274 c[j] = x; 275 j--; 276 return ret; 277 } 278 279 /** 280 * Not supported. 281 */ 282 public void remove() { 283 throw new UnsupportedOperationException(); 284 } 285 } 286 287 /** 288 * Iterator with just one element to handle degenerate cases (full array, 289 * empty array) for combination iterator. 290 */ 291 private static class SingletonIterator implements Iterator<int[]> { 292 /** Singleton array */ 293 private final int[] singleton; 294 /** True on initialization, false after first call to next */ 295 private boolean more = true; 296 /** 297 * Create a singleton iterator providing the given array. 298 * @param singleton array returned by the iterator 299 */ 300 SingletonIterator(final int[] singleton) { 301 this.singleton = singleton; 302 } 303 /** @return True until next is called the first time, then false */ 304 public boolean hasNext() { 305 return more; 306 } 307 /** @return the singleton in first activation; throws NSEE thereafter */ 308 public int[] next() { 309 if (more) { 310 more = false; 311 return singleton; 312 } else { 313 throw new NoSuchElementException(); 314 } 315 } 316 /** Not supported */ 317 public void remove() { 318 throw new UnsupportedOperationException(); 319 } 320 } 321 322 /** 323 * Defines the lexicographic ordering of combinations, using 324 * the {@link #lexNorm(int[])} method. 325 */ 326 private static class LexicographicComparator 327 implements Comparator<int[]>, Serializable { 328 /** Serializable version identifier. */ 329 private static final long serialVersionUID = 20130906L; 330 /** Size of the set from which combinations are drawn. */ 331 private final int n; 332 /** Number of elements in each combination. */ 333 private final int k; 334 335 /** 336 * @param n Size of the set from which subsets are selected. 337 * @param k Size of the subsets to be enumerated. 338 */ 339 LexicographicComparator(int n, int k) { 340 this.n = n; 341 this.k = k; 342 } 343 344 /** 345 * {@inheritDoc} 346 * 347 * @throws DimensionMismatchException if the array lengths are not 348 * equal to {@code k}. 349 * @throws OutOfRangeException if an element of the array is not 350 * within the interval [0, {@code n}). 351 */ 352 public int compare(int[] c1, 353 int[] c2) { 354 if (c1.length != k) { 355 throw new DimensionMismatchException(c1.length, k); 356 } 357 if (c2.length != k) { 358 throw new DimensionMismatchException(c2.length, k); 359 } 360 361 // Method "lexNorm" works with ordered arrays. 362 final int[] c1s = MathArrays.copyOf(c1); 363 Arrays.sort(c1s); 364 final int[] c2s = MathArrays.copyOf(c2); 365 Arrays.sort(c2s); 366 367 final long v1 = lexNorm(c1s); 368 final long v2 = lexNorm(c2s); 369 370 if (v1 < v2) { 371 return -1; 372 } else if (v1 > v2) { 373 return 1; 374 } else { 375 return 0; 376 } 377 } 378 379 /** 380 * Computes the value (in base 10) represented by the digit 381 * (interpreted in base {@code n}) in the input array in reverse 382 * order. 383 * For example if {@code c} is {@code {3, 2, 1}}, and {@code n} 384 * is 3, the method will return 18. 385 * 386 * @param c Input array. 387 * @return the lexicographic norm. 388 * @throws OutOfRangeException if an element of the array is not 389 * within the interval [0, {@code n}). 390 */ 391 private long lexNorm(int[] c) { 392 long ret = 0; 393 for (int i = 0; i < c.length; i++) { 394 final int digit = c[i]; 395 if (digit < 0 || 396 digit >= n) { 397 throw new OutOfRangeException(digit, 0, n - 1); 398 } 399 400 ret += c[i] * ArithmeticUtils.pow(n, i); 401 } 402 return ret; 403 } 404 } 405}