001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.HashSet; 022import java.util.List; 023import java.util.Set; 024import java.util.concurrent.ThreadLocalRandom; 025import java.util.function.IntUnaryOperator; 026 027import org.apache.commons.lang3.ArrayUtils; 028import org.apache.commons.lang3.StringUtils; 029import org.apache.commons.lang3.Validate; 030 031/** 032 * Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to 033 * define the properties of the generator. See the documentation for the {@link Builder} class to see available properties. 034 * 035 * <pre> 036 * // Generates a 20 code point string, using only the letters a-z 037 * RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build(); 038 * String randomLetters = generator.generate(20); 039 * </pre> 040 * <pre> 041 * // Using Apache Commons RNG for randomness 042 * UniformRandomProvider rng = RandomSource.create(...); 043 * // Generates a 20 code point string, using only the letters a-z 044 * RandomStringGenerator generator = RandomStringGenerator.builder() 045 * .withinRange('a', 'z') 046 * .usingRandom(rng::nextInt) 047 * .build(); 048 * String randomLetters = generator.generate(20); 049 * </pre> 050 * <p> 051 * {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method 052 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally. 053 * </p> 054 * 055 * @since 1.1 056 */ 057public final class RandomStringGenerator { 058 059 /** 060 * A builder for generating {@code RandomStringGenerator} instances. 061 * 062 * <p> 063 * The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the 064 * methods defined in this class, prior to calling {@link #build()}. 065 * </p> 066 * <p> 067 * All the property setting methods return the {@code Builder} instance to allow for method chaining. 068 * </p> 069 * <p> 070 * The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and 071 * {@link Character#MAX_CODE_POINT} respectively. 072 * </p> 073 * <p> 074 * The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used. 075 * </p> 076 * <p> 077 * The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied 078 * to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used 079 * predicates are provided by the {@link CharacterPredicates} enum. 080 * </p> 081 * <p> 082 * This class is not thread safe. 083 * </p> 084 * 085 * @since 1.1 086 */ 087 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 088 089 /** 090 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 091 * ({@value}). 092 */ 093 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 094 095 /** 096 * The default string length produced by this builder: {@value}. 097 */ 098 public static final int DEFAULT_LENGTH = 0; 099 100 /** 101 * The default minimum code point allowed: {@value}. 102 */ 103 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 104 105 /** 106 * The minimum code point allowed. 107 */ 108 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 109 110 /** 111 * The maximum code point allowed. 112 */ 113 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 114 115 /** 116 * Filters for code points. 117 */ 118 private Set<CharacterPredicate> inclusivePredicates; 119 120 /** 121 * The source of randomness. 122 */ 123 private IntUnaryOperator random; 124 125 /** 126 * The source of provided characters. 127 */ 128 private Set<Character> characterSet = new HashSet<>(); 129 130 /** 131 * Whether calls accumulates the source of provided characters. The default is {@code false}. 132 */ 133 private boolean accumulate; 134 135 /** 136 * Creates a new instance. 137 */ 138 public Builder() { 139 // empty 140 } 141 142 /** 143 * Builds a new {@code RandomStringGenerator}. 144 * 145 * @return A new {@code RandomStringGenerator} 146 * @deprecated Use {@link #get()}. 147 */ 148 @Deprecated 149 @Override 150 public RandomStringGenerator build() { 151 return get(); 152 } 153 154 /** 155 * Limits the characters in the generated string to those that match at least one of the predicates supplied. 156 * 157 * <p> 158 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method 159 * will replace the previously stored predicates. 160 * </p> 161 * 162 * @param predicates the predicates, may be {@code null} or empty. 163 * @return {@code this} instance. 164 */ 165 public Builder filteredBy(final CharacterPredicate... predicates) { 166 if (ArrayUtils.isEmpty(predicates)) { 167 inclusivePredicates = null; 168 return this; 169 } 170 if (inclusivePredicates == null) { 171 inclusivePredicates = new HashSet<>(); 172 } else { 173 inclusivePredicates.clear(); 174 } 175 Collections.addAll(inclusivePredicates, predicates); 176 return this; 177 } 178 179 /** 180 * Builds a new {@code RandomStringGenerator}. 181 * 182 * @return A new {@code RandomStringGenerator}. 183 * @since 1.12.0 184 */ 185 @Override 186 public RandomStringGenerator get() { 187 return new RandomStringGenerator(this); 188 } 189 190 private void initCharList() { 191 if (!accumulate) { 192 characterSet = new HashSet<>(); 193 } 194 } 195 196 /** 197 * Limits the characters in the generated string to those who match at supplied list of Character. 198 * 199 * <p> 200 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method 201 * will replace the previously stored Character. 202 * </p> 203 * 204 * @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty 205 * @return {@code this} instance. 206 * @since 1.2 207 */ 208 public Builder selectFrom(final char... chars) { 209 initCharList(); 210 if (chars != null) { 211 for (final char c : chars) { 212 characterSet.add(c); 213 } 214 } 215 return this; 216 } 217 218 /** 219 * Sets whether calls accumulates the source of provided characters. The default is {@code false}. 220 * 221 * <pre> 222 * {@code 223 * RandomStringGenerator gen = RandomStringGenerator.builder() 224 * .setAccumulate(true) 225 * .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } }) 226 * .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[', 227 * '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation 228 * // additional builder calls as needed 229 * .build(); 230 * } 231 * </pre> 232 * 233 * @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}. 234 * @return {@code this} instance. 235 * @since 1.14.0 236 */ 237 public Builder setAccumulate(final boolean accumulate) { 238 this.accumulate = accumulate; 239 return this; 240 } 241 242 /** 243 * Overrides the default source of randomness. It is highly recommended that a random number generator library like 244 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation. 245 * 246 * <p> 247 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented: 248 * </p> 249 * 250 * <pre> 251 * {@code 252 * UniformRandomProvider rng = RandomSource.create(...); 253 * RandomStringGenerator gen = RandomStringGenerator.builder() 254 * .usingRandom(rng::nextInt) 255 * // additional builder calls as needed 256 * .build(); 257 * } 258 * </pre> 259 * 260 * <p> 261 * Passing {@code null} to this method will revert to the default source of randomness. 262 * </p> 263 * 264 * @param random the source of randomness, may be {@code null}. 265 * @return {@code this} instance. 266 * @since 1.14.0 267 */ 268 public Builder usingRandom(final IntUnaryOperator random) { 269 this.random = random; 270 return this; 271 } 272 273 /** 274 * Overrides the default source of randomness. It is highly recommended that a random number generator library like 275 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation. 276 * 277 * <p> 278 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented: 279 * </p> 280 * 281 * <pre> 282 * {@code 283 * UniformRandomProvider rng = RandomSource.create(...); 284 * RandomStringGenerator gen = RandomStringGenerator.builder() 285 * .usingRandom(rng::nextInt) 286 * // additional builder calls as needed 287 * .build(); 288 * } 289 * </pre> 290 * 291 * <p> 292 * Passing {@code null} to this method will revert to the default source of randomness. 293 * </p> 294 * 295 * @param random the source of randomness, may be {@code null}. 296 * @return {@code this} instance. 297 */ 298 public Builder usingRandom(final TextRandomProvider random) { 299 this.random = random; 300 return this; 301 } 302 303 /** 304 * Sets the array of minimum and maximum char allowed in the generated string. 305 * 306 * For example: 307 * 308 * <pre> 309 * {@code 310 * char[][] pairs = { { '0', '9' } }; 311 * char[][] pairs = { { 'a', 'z' } }; 312 * char[][] pairs = { { 'a', 'z' }, { '0', '9' } }; 313 * } 314 * </pre> 315 * 316 * @param pairs array of characters array, expected is to pass min, max pairs through this arg. 317 * @return {@code this} instance. 318 */ 319 public Builder withinRange(final char[]... pairs) { 320 initCharList(); 321 if (pairs != null) { 322 for (final char[] pair : pairs) { 323 Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point"); 324 final int minimumCodePoint = pair[0]; 325 final int maximumCodePoint = pair[1]; 326 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, 327 maximumCodePoint); 328 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) { 329 characterSet.add((char) index); 330 } 331 } 332 } 333 return this; 334 } 335 336 337 /** 338 * Sets the minimum and maximum code points allowed in the generated string. 339 * 340 * @param minimumCodePoint the smallest code point allowed (inclusive). 341 * @param maximumCodePoint the largest code point allowed (inclusive). 342 * @return {@code this} instance. 343 * @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}. 344 * @throws IllegalArgumentException if {@code minimumCodePoint < 0}. 345 * @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}. 346 */ 347 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 348 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, 349 maximumCodePoint); 350 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 351 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 352 this.minimumCodePoint = minimumCodePoint; 353 this.maximumCodePoint = maximumCodePoint; 354 return this; 355 } 356 } 357 358 /** 359 * Constructs a new builder. 360 * 361 * @return a new builder. 362 * @since 1.11.0 363 */ 364 public static Builder builder() { 365 return new Builder(); 366 } 367 368 /** 369 * The smallest allowed code point (inclusive). 370 */ 371 private final int minimumCodePoint; 372 373 /** 374 * The largest allowed code point (inclusive). 375 */ 376 private final int maximumCodePoint; 377 378 /** 379 * Filters for code points. 380 */ 381 private final Set<CharacterPredicate> inclusivePredicates; 382 383 /** 384 * The source of randomness for this generator. 385 */ 386 private final IntUnaryOperator random; 387 388 /** 389 * The source of provided characters. 390 */ 391 private final List<Character> characterList; 392 393 /** 394 * Constructs the generator. 395 * 396 * @param minimumCodePoint smallest allowed code point (inclusive). 397 * @param maximumCodePoint largest allowed code point (inclusive). 398 * @param inclusivePredicates filters for code points. 399 * @param random source of randomness. 400 * @param characterSet list of predefined set of characters. 401 */ 402 private RandomStringGenerator(final Builder builder) { 403 this.minimumCodePoint = builder.minimumCodePoint; 404 this.maximumCodePoint = builder.maximumCodePoint; 405 this.inclusivePredicates = builder.inclusivePredicates; 406 this.random = builder.random; 407 this.characterList = new ArrayList<>(builder.characterSet); 408 } 409 410 /** 411 * Generates a random string, containing the specified number of code points. 412 * 413 * <p> 414 * Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned, 415 * although the resulting string may contain pairs of surrogates that together encode a supplementary character. 416 * </p> 417 * <p> 418 * Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the 419 * {@link Character} documentation to understand how Java stores Unicode values. 420 * </p> 421 * 422 * @param length the number of code points to generate. 423 * @return The generated string. 424 * @throws IllegalArgumentException if {@code length < 0}. 425 */ 426 public String generate(final int length) { 427 if (length == 0) { 428 return StringUtils.EMPTY; 429 } 430 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 431 final StringBuilder builder = new StringBuilder(length); 432 long remaining = length; 433 do { 434 final int codePoint; 435 if (characterList != null && !characterList.isEmpty()) { 436 codePoint = generateRandomNumber(characterList); 437 } else { 438 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 439 } 440 switch (Character.getType(codePoint)) { 441 case Character.UNASSIGNED: 442 case Character.PRIVATE_USE: 443 case Character.SURROGATE: 444 continue; 445 default: 446 } 447 if (inclusivePredicates != null) { 448 boolean matchedFilter = false; 449 for (final CharacterPredicate predicate : inclusivePredicates) { 450 if (predicate.test(codePoint)) { 451 matchedFilter = true; 452 break; 453 } 454 } 455 if (!matchedFilter) { 456 continue; 457 } 458 } 459 builder.appendCodePoint(codePoint); 460 remaining--; 461 } while (remaining != 0); 462 return builder.toString(); 463 } 464 465 /** 466 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points. 467 * 468 * @param minLengthInclusive the minimum (inclusive) number of code points to generate. 469 * @param maxLengthInclusive the maximum (inclusive) number of code points to generate. 470 * @return The generated string. 471 * @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}. 472 * @see RandomStringGenerator#generate(int) 473 * @since 1.2 474 */ 475 public String generate(final int minLengthInclusive, final int maxLengthInclusive) { 476 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive); 477 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, 478 minLengthInclusive); 479 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive)); 480 } 481 482 /** 483 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness. 484 * 485 * @param minInclusive the minimum value allowed. 486 * @param maxInclusive the maximum value allowed. 487 * @return The random number. 488 */ 489 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 490 if (random != null) { 491 return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive; 492 } 493 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 494 } 495 496 /** 497 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness. 498 * 499 * @param characterList predefined char list. 500 * @return The random number. 501 */ 502 private int generateRandomNumber(final List<Character> characterList) { 503 final int listSize = characterList.size(); 504 if (random != null) { 505 return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0); 506 } 507 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0); 508 } 509}