1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.text; 18 19 import java.util.ArrayList; 20 import java.util.Collections; 21 import java.util.HashSet; 22 import java.util.List; 23 import java.util.Set; 24 import java.util.concurrent.ThreadLocalRandom; 25 import java.util.function.IntUnaryOperator; 26 27 import org.apache.commons.lang3.ArrayUtils; 28 import org.apache.commons.lang3.StringUtils; 29 import org.apache.commons.lang3.Validate; 30 31 /** 32 * Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to 33 * define the properties of the generator. See the documentation for the {@link Builder} class to see available properties. 34 * 35 * <pre> 36 * // Generates a 20 code point string, using only the letters a-z 37 * RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build(); 38 * String randomLetters = generator.generate(20); 39 * </pre> 40 * <pre> 41 * // Using Apache Commons RNG for randomness 42 * UniformRandomProvider rng = RandomSource.create(...); 43 * // Generates a 20 code point string, using only the letters a-z 44 * RandomStringGenerator generator = RandomStringGenerator.builder() 45 * .withinRange('a', 'z') 46 * .usingRandom(rng::nextInt) 47 * .build(); 48 * String randomLetters = generator.generate(20); 49 * </pre> 50 * <p> 51 * {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method 52 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally. 53 * </p> 54 * 55 * @since 1.1 56 */ 57 public final class RandomStringGenerator { 58 59 /** 60 * A builder for generating {@code RandomStringGenerator} instances. 61 * 62 * <p> 63 * The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the 64 * methods defined in this class, prior to calling {@link #build()}. 65 * </p> 66 * <p> 67 * All the property setting methods return the {@code Builder} instance to allow for method chaining. 68 * </p> 69 * <p> 70 * The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and 71 * {@link Character#MAX_CODE_POINT} respectively. 72 * </p> 73 * <p> 74 * The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used. 75 * </p> 76 * <p> 77 * The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied 78 * to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used 79 * predicates are provided by the {@link CharacterPredicates} enum. 80 * </p> 81 * <p> 82 * This class is not thread safe. 83 * </p> 84 * 85 * @since 1.1 86 */ 87 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 88 89 /** 90 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 91 * ({@value}). 92 */ 93 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 94 95 /** 96 * The default string length produced by this builder: {@value}. 97 */ 98 public static final int DEFAULT_LENGTH = 0; 99 100 /** 101 * The default minimum code point allowed: {@value}. 102 */ 103 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 104 105 /** 106 * The minimum code point allowed. 107 */ 108 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 109 110 /** 111 * The maximum code point allowed. 112 */ 113 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 114 115 /** 116 * Filters for code points. 117 */ 118 private Set<CharacterPredicate> inclusivePredicates; 119 120 /** 121 * The source of randomness. 122 */ 123 private IntUnaryOperator random; 124 125 /** 126 * The source of provided characters. 127 */ 128 private Set<Character> characterSet = new HashSet<>(); 129 130 /** 131 * Whether calls accumulates the source of provided characters. The default is {@code false}. 132 */ 133 private boolean accumulate; 134 135 /** 136 * Creates a new instance. 137 */ 138 public Builder() { 139 // empty 140 } 141 142 /** 143 * Builds a new {@code RandomStringGenerator}. 144 * 145 * @return A new {@code RandomStringGenerator} 146 * @deprecated Use {@link #get()}. 147 */ 148 @Deprecated 149 @Override 150 public RandomStringGenerator build() { 151 return get(); 152 } 153 154 /** 155 * Limits the characters in the generated string to those that match at least one of the predicates supplied. 156 * 157 * <p> 158 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method 159 * will replace the previously stored predicates. 160 * </p> 161 * 162 * @param predicates the predicates, may be {@code null} or empty. 163 * @return {@code this} instance. 164 */ 165 public Builder filteredBy(final CharacterPredicate... predicates) { 166 if (ArrayUtils.isEmpty(predicates)) { 167 inclusivePredicates = null; 168 return this; 169 } 170 if (inclusivePredicates == null) { 171 inclusivePredicates = new HashSet<>(); 172 } else { 173 inclusivePredicates.clear(); 174 } 175 Collections.addAll(inclusivePredicates, predicates); 176 return this; 177 } 178 179 /** 180 * Builds a new {@code RandomStringGenerator}. 181 * 182 * @return A new {@code RandomStringGenerator}. 183 * @since 1.12.0 184 */ 185 @Override 186 public RandomStringGenerator get() { 187 return new RandomStringGenerator(this); 188 } 189 190 private void initCharList() { 191 if (!accumulate) { 192 characterSet = new HashSet<>(); 193 } 194 } 195 196 /** 197 * Limits the characters in the generated string to those who match at supplied list of Character. 198 * 199 * <p> 200 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method 201 * will replace the previously stored Character. 202 * </p> 203 * 204 * @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty 205 * @return {@code this} instance. 206 * @since 1.2 207 */ 208 public Builder selectFrom(final char... chars) { 209 initCharList(); 210 if (chars != null) { 211 for (final char c : chars) { 212 characterSet.add(c); 213 } 214 } 215 return this; 216 } 217 218 /** 219 * Sets whether calls accumulates the source of provided characters. The default is {@code false}. 220 * 221 * <pre> 222 * {@code 223 * RandomStringGenerator gen = RandomStringGenerator.builder() 224 * .setAccumulate(true) 225 * .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } }) 226 * .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[', 227 * '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation 228 * // additional builder calls as needed 229 * .build(); 230 * } 231 * </pre> 232 * 233 * @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}. 234 * @return {@code this} instance. 235 * @since 1.14.0 236 */ 237 public Builder setAccumulate(final boolean accumulate) { 238 this.accumulate = accumulate; 239 return this; 240 } 241 242 /** 243 * Overrides the default source of randomness. It is highly recommended that a random number generator library like 244 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation. 245 * 246 * <p> 247 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented: 248 * </p> 249 * 250 * <pre> 251 * {@code 252 * UniformRandomProvider rng = RandomSource.create(...); 253 * RandomStringGenerator gen = RandomStringGenerator.builder() 254 * .usingRandom(rng::nextInt) 255 * // additional builder calls as needed 256 * .build(); 257 * } 258 * </pre> 259 * 260 * <p> 261 * Passing {@code null} to this method will revert to the default source of randomness. 262 * </p> 263 * 264 * @param random the source of randomness, may be {@code null}. 265 * @return {@code this} instance. 266 * @since 1.14.0 267 */ 268 public Builder usingRandom(final IntUnaryOperator random) { 269 this.random = random; 270 return this; 271 } 272 273 /** 274 * Overrides the default source of randomness. It is highly recommended that a random number generator library like 275 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation. 276 * 277 * <p> 278 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented: 279 * </p> 280 * 281 * <pre> 282 * {@code 283 * UniformRandomProvider rng = RandomSource.create(...); 284 * RandomStringGenerator gen = RandomStringGenerator.builder() 285 * .usingRandom(rng::nextInt) 286 * // additional builder calls as needed 287 * .build(); 288 * } 289 * </pre> 290 * 291 * <p> 292 * Passing {@code null} to this method will revert to the default source of randomness. 293 * </p> 294 * 295 * @param random the source of randomness, may be {@code null}. 296 * @return {@code this} instance. 297 */ 298 public Builder usingRandom(final TextRandomProvider random) { 299 this.random = random; 300 return this; 301 } 302 303 /** 304 * Sets the array of minimum and maximum char allowed in the generated string. 305 * 306 * For example: 307 * 308 * <pre> 309 * {@code 310 * char[][] pairs = { { '0', '9' } }; 311 * char[][] pairs = { { 'a', 'z' } }; 312 * char[][] pairs = { { 'a', 'z' }, { '0', '9' } }; 313 * } 314 * </pre> 315 * 316 * @param pairs array of characters array, expected is to pass min, max pairs through this arg. 317 * @return {@code this} instance. 318 */ 319 public Builder withinRange(final char[]... pairs) { 320 initCharList(); 321 if (pairs != null) { 322 for (final char[] pair : pairs) { 323 Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point"); 324 final int minimumCodePoint = pair[0]; 325 final int maximumCodePoint = pair[1]; 326 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, 327 maximumCodePoint); 328 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) { 329 characterSet.add((char) index); 330 } 331 } 332 } 333 return this; 334 } 335 336 337 /** 338 * Sets the minimum and maximum code points allowed in the generated string. 339 * 340 * @param minimumCodePoint the smallest code point allowed (inclusive). 341 * @param maximumCodePoint the largest code point allowed (inclusive). 342 * @return {@code this} instance. 343 * @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}. 344 * @throws IllegalArgumentException if {@code minimumCodePoint < 0}. 345 * @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}. 346 */ 347 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 348 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, 349 maximumCodePoint); 350 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 351 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 352 this.minimumCodePoint = minimumCodePoint; 353 this.maximumCodePoint = maximumCodePoint; 354 return this; 355 } 356 } 357 358 /** 359 * Constructs a new builder. 360 * 361 * @return a new builder. 362 * @since 1.11.0 363 */ 364 public static Builder builder() { 365 return new Builder(); 366 } 367 368 /** 369 * The smallest allowed code point (inclusive). 370 */ 371 private final int minimumCodePoint; 372 373 /** 374 * The largest allowed code point (inclusive). 375 */ 376 private final int maximumCodePoint; 377 378 /** 379 * Filters for code points. 380 */ 381 private final Set<CharacterPredicate> inclusivePredicates; 382 383 /** 384 * The source of randomness for this generator. 385 */ 386 private final IntUnaryOperator random; 387 388 /** 389 * The source of provided characters. 390 */ 391 private final List<Character> characterList; 392 393 /** 394 * Constructs the generator. 395 * 396 * @param minimumCodePoint smallest allowed code point (inclusive). 397 * @param maximumCodePoint largest allowed code point (inclusive). 398 * @param inclusivePredicates filters for code points. 399 * @param random source of randomness. 400 * @param characterSet list of predefined set of characters. 401 */ 402 private RandomStringGenerator(final Builder builder) { 403 this.minimumCodePoint = builder.minimumCodePoint; 404 this.maximumCodePoint = builder.maximumCodePoint; 405 this.inclusivePredicates = builder.inclusivePredicates; 406 this.random = builder.random; 407 this.characterList = new ArrayList<>(builder.characterSet); 408 } 409 410 /** 411 * Generates a random string, containing the specified number of code points. 412 * 413 * <p> 414 * Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned, 415 * although the resulting string may contain pairs of surrogates that together encode a supplementary character. 416 * </p> 417 * <p> 418 * Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the 419 * {@link Character} documentation to understand how Java stores Unicode values. 420 * </p> 421 * 422 * @param length the number of code points to generate. 423 * @return The generated string. 424 * @throws IllegalArgumentException if {@code length < 0}. 425 */ 426 public String generate(final int length) { 427 if (length == 0) { 428 return StringUtils.EMPTY; 429 } 430 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 431 final StringBuilder builder = new StringBuilder(length); 432 long remaining = length; 433 do { 434 final int codePoint; 435 if (characterList != null && !characterList.isEmpty()) { 436 codePoint = generateRandomNumber(characterList); 437 } else { 438 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 439 } 440 switch (Character.getType(codePoint)) { 441 case Character.UNASSIGNED: 442 case Character.PRIVATE_USE: 443 case Character.SURROGATE: 444 continue; 445 default: 446 } 447 if (inclusivePredicates != null) { 448 boolean matchedFilter = false; 449 for (final CharacterPredicate predicate : inclusivePredicates) { 450 if (predicate.test(codePoint)) { 451 matchedFilter = true; 452 break; 453 } 454 } 455 if (!matchedFilter) { 456 continue; 457 } 458 } 459 builder.appendCodePoint(codePoint); 460 remaining--; 461 } while (remaining != 0); 462 return builder.toString(); 463 } 464 465 /** 466 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points. 467 * 468 * @param minLengthInclusive the minimum (inclusive) number of code points to generate. 469 * @param maxLengthInclusive the maximum (inclusive) number of code points to generate. 470 * @return The generated string. 471 * @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}. 472 * @see RandomStringGenerator#generate(int) 473 * @since 1.2 474 */ 475 public String generate(final int minLengthInclusive, final int maxLengthInclusive) { 476 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive); 477 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, 478 minLengthInclusive); 479 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive)); 480 } 481 482 /** 483 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness. 484 * 485 * @param minInclusive the minimum value allowed. 486 * @param maxInclusive the maximum value allowed. 487 * @return The random number. 488 */ 489 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 490 if (random != null) { 491 return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive; 492 } 493 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 494 } 495 496 /** 497 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness. 498 * 499 * @param characterList predefined char list. 500 * @return The random number. 501 */ 502 private int generateRandomNumber(final List<Character> characterList) { 503 final int listSize = characterList.size(); 504 if (random != null) { 505 return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0); 506 } 507 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0); 508 } 509 }