001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.HashSet; 022import java.util.List; 023import java.util.Set; 024import java.util.concurrent.ThreadLocalRandom; 025 026import org.apache.commons.lang3.ArrayUtils; 027import org.apache.commons.lang3.StringUtils; 028import org.apache.commons.lang3.Validate; 029 030/** 031 * Generates random Unicode strings containing the specified number of code points. 032 * Instances are created using a builder class, which allows the 033 * callers to define the properties of the generator. See the documentation for the 034 * {@link Builder} class to see available properties. 035 * 036 * <pre> 037 * // Generates a 20 code point string, using only the letters a-z 038 * RandomStringGenerator generator = RandomStringGenerator.builder() 039 * .withinRange('a', 'z').build(); 040 * String randomLetters = generator.generate(20); 041 * </pre> 042 * <pre> 043 * // Using Apache Commons RNG for randomness 044 * UniformRandomProvider rng = RandomSource.create(...); 045 * // Generates a 20 code point string, using only the letters a-z 046 * RandomStringGenerator generator = RandomStringGenerator.builder() 047 * .withinRange('a', 'z') 048 * .usingRandom(rng::nextInt) // uses Java 8 syntax 049 * .build(); 050 * String randomLetters = generator.generate(20); 051 * </pre> 052 * <p> 053 * {@code RandomStringGenerator} instances are thread-safe when using the 054 * default random number generator (RNG). If a custom RNG is set by calling the method 055 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety 056 * must be ensured externally. 057 * </p> 058 * @since 1.1 059 */ 060public final class RandomStringGenerator { 061 062 /** 063 * A builder for generating {@code RandomStringGenerator} instances. 064 * 065 * <p>The behavior of a generator is controlled by properties set by this 066 * builder. Each property has a default value, which can be overridden by 067 * calling the methods defined in this class, prior to calling {@link #build()}.</p> 068 * 069 * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p> 070 * 071 * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The 072 * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p> 073 * 074 * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, 075 * otherwise {@link ThreadLocalRandom} is used.</p> 076 * 077 * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, 078 * which defines a collection of tests that are applied to the randomly generated code points. 079 * The code points will only be included in the result if they pass at least one of the tests. 080 * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p> 081 * 082 * <p>This class is not thread safe.</p> 083 * @since 1.1 084 */ 085 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 086 087 /** 088 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 089 * ({@value}). 090 */ 091 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 092 093 /** 094 * The default string length produced by this builder: {@value}. 095 */ 096 public static final int DEFAULT_LENGTH = 0; 097 098 /** 099 * The default minimum code point allowed: {@value}. 100 */ 101 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 102 103 /** 104 * The minimum code point allowed. 105 */ 106 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 107 108 /** 109 * The maximum code point allowed. 110 */ 111 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 112 113 /** 114 * Filters for code points. 115 */ 116 private Set<CharacterPredicate> inclusivePredicates; 117 118 /** 119 * The source of randomness. 120 */ 121 private TextRandomProvider random; 122 123 /** 124 * The source of provided characters. 125 */ 126 private List<Character> characterList; 127 128 /** 129 * Creates a new instance. 130 */ 131 public Builder() { 132 // empty 133 } 134 135 /** 136 * Builds a new {@code RandomStringGenerator}. 137 * 138 * @return A new {@code RandomStringGenerator} 139 * @deprecated Use {@link #get()}. 140 */ 141 @Deprecated 142 @Override 143 public RandomStringGenerator build() { 144 return get(); 145 } 146 147 /** 148 * Limits the characters in the generated string to those that match at 149 * least one of the predicates supplied. 150 * 151 * <p> 152 * Passing {@code null} or an empty array to this method will revert to the 153 * default behavior of allowing any character. Multiple calls to this 154 * method will replace the previously stored predicates. 155 * </p> 156 * 157 * @param predicates 158 * the predicates, may be {@code null} or empty 159 * @return {@code this}, to allow method chaining 160 */ 161 public Builder filteredBy(final CharacterPredicate... predicates) { 162 if (ArrayUtils.isEmpty(predicates)) { 163 inclusivePredicates = null; 164 return this; 165 } 166 if (inclusivePredicates == null) { 167 inclusivePredicates = new HashSet<>(); 168 } else { 169 inclusivePredicates.clear(); 170 } 171 Collections.addAll(inclusivePredicates, predicates); 172 return this; 173 } 174 175 /** 176 * Builds a new {@code RandomStringGenerator}. 177 * 178 * @return A new {@code RandomStringGenerator} 179 * @since 1.12.0 180 */ 181 @Override 182 public RandomStringGenerator get() { 183 return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates, 184 random, characterList); 185 } 186 187 /** 188 * Limits the characters in the generated string to those who match at 189 * supplied list of Character. 190 * 191 * <p> 192 * Passing {@code null} or an empty array to this method will revert to the 193 * default behavior of allowing any character. Multiple calls to this 194 * method will replace the previously stored Character. 195 * </p> 196 * 197 * @param chars set of predefined Characters for random string generation 198 * the Character can be, may be {@code null} or empty 199 * @return {@code this}, to allow method chaining 200 * @since 1.2 201 */ 202 public Builder selectFrom(final char... chars) { 203 characterList = new ArrayList<>(); 204 if (chars != null) { 205 for (final char c : chars) { 206 characterList.add(c); 207 } 208 } 209 return this; 210 } 211 212 /** 213 * Overrides the default source of randomness. It is highly 214 * recommended that a random number generator library like 215 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> 216 * be used to provide the random number generation. 217 * 218 * <p> 219 * When using Java 8 or later, {@link TextRandomProvider} is a 220 * functional interface and need not be explicitly implemented: 221 * </p> 222 * <pre> 223 * {@code 224 * UniformRandomProvider rng = RandomSource.create(...); 225 * RandomStringGenerator gen = RandomStringGenerator.builder() 226 * .usingRandom(rng::nextInt) 227 * // additional builder calls as needed 228 * .build(); 229 * } 230 * </pre> 231 * 232 * <p> 233 * Passing {@code null} to this method will revert to the default source of 234 * randomness. 235 * </p> 236 * 237 * @param random 238 * the source of randomness, may be {@code null} 239 * @return {@code this}, to allow method chaining 240 */ 241 public Builder usingRandom(final TextRandomProvider random) { 242 this.random = random; 243 return this; 244 } 245 246 /** 247 * Sets the array of minimum and maximum char allowed in the 248 * generated string. 249 * 250 * For example: 251 * <pre> 252 * {@code 253 * char [][] pairs = {{'0','9'}}; 254 * char [][] pairs = {{'a','z'}}; 255 * char [][] pairs = {{'a','z'},{'0','9'}}; 256 * } 257 * </pre> 258 * 259 * @param pairs array of characters array, expected is to pass min, max pairs through this arg. 260 * @return {@code this}, to allow method chaining. 261 */ 262 public Builder withinRange(final char[]... pairs) { 263 characterList = new ArrayList<>(); 264 if (pairs != null) { 265 for (final char[] pair : pairs) { 266 Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point"); 267 final int minimumCodePoint = pair[0]; 268 final int maximumCodePoint = pair[1]; 269 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, 270 maximumCodePoint); 271 272 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) { 273 characterList.add((char) index); 274 } 275 } 276 } 277 return this; 278 279 } 280 281 /** 282 * Sets the minimum and maximum code points allowed in the 283 * generated string. 284 * 285 * @param minimumCodePoint 286 * the smallest code point allowed (inclusive) 287 * @param maximumCodePoint 288 * the largest code point allowed (inclusive) 289 * @return {@code this}, to allow method chaining 290 * @throws IllegalArgumentException 291 * if {@code maximumCodePoint >} 292 * {@link Character#MAX_CODE_POINT} 293 * @throws IllegalArgumentException 294 * if {@code minimumCodePoint < 0} 295 * @throws IllegalArgumentException 296 * if {@code minimumCodePoint > maximumCodePoint} 297 */ 298 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 299 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 300 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 301 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 302 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, 303 "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 304 this.minimumCodePoint = minimumCodePoint; 305 this.maximumCodePoint = maximumCodePoint; 306 return this; 307 } 308 } 309 310 /** 311 * Constructs a new builder. 312 * @return a new builder. 313 * @since 1.11.0 314 */ 315 public static Builder builder() { 316 return new Builder(); 317 } 318 319 /** 320 * The smallest allowed code point (inclusive). 321 */ 322 private final int minimumCodePoint; 323 324 /** 325 * The largest allowed code point (inclusive). 326 */ 327 private final int maximumCodePoint; 328 329 /** 330 * Filters for code points. 331 */ 332 private final Set<CharacterPredicate> inclusivePredicates; 333 334 /** 335 * The source of randomness for this generator. 336 */ 337 private final TextRandomProvider random; 338 339 /** 340 * The source of provided characters. 341 */ 342 private final List<Character> characterList; 343 344 /** 345 * Constructs the generator. 346 * 347 * @param minimumCodePoint 348 * smallest allowed code point (inclusive) 349 * @param maximumCodePoint 350 * largest allowed code point (inclusive) 351 * @param inclusivePredicates 352 * filters for code points 353 * @param random 354 * source of randomness 355 * @param characterList list of predefined set of characters. 356 */ 357 private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint, 358 final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random, 359 final List<Character> characterList) { 360 this.minimumCodePoint = minimumCodePoint; 361 this.maximumCodePoint = maximumCodePoint; 362 this.inclusivePredicates = inclusivePredicates; 363 this.random = random; 364 this.characterList = characterList; 365 } 366 367 /** 368 * Generates a random string, containing the specified number of code points. 369 * 370 * <p> 371 * Code points are randomly selected between the minimum and maximum values defined 372 * in the generator. 373 * Surrogate and private use characters are not returned, although the 374 * resulting string may contain pairs of surrogates that together encode a 375 * supplementary character. 376 * </p> 377 * <p> 378 * Note: the number of {@code char} code units generated will exceed 379 * {@code length} if the string contains supplementary characters. See the 380 * {@link Character} documentation to understand how Java stores Unicode 381 * values. 382 * </p> 383 * 384 * @param length 385 * the number of code points to generate 386 * @return The generated string 387 * @throws IllegalArgumentException 388 * if {@code length < 0} 389 */ 390 public String generate(final int length) { 391 if (length == 0) { 392 return StringUtils.EMPTY; 393 } 394 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 395 final StringBuilder builder = new StringBuilder(length); 396 long remaining = length; 397 do { 398 final int codePoint; 399 if (characterList != null && !characterList.isEmpty()) { 400 codePoint = generateRandomNumber(characterList); 401 } else { 402 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 403 } 404 switch (Character.getType(codePoint)) { 405 case Character.UNASSIGNED: 406 case Character.PRIVATE_USE: 407 case Character.SURROGATE: 408 continue; 409 default: 410 } 411 if (inclusivePredicates != null) { 412 boolean matchedFilter = false; 413 for (final CharacterPredicate predicate : inclusivePredicates) { 414 if (predicate.test(codePoint)) { 415 matchedFilter = true; 416 break; 417 } 418 } 419 if (!matchedFilter) { 420 continue; 421 } 422 } 423 builder.appendCodePoint(codePoint); 424 remaining--; 425 } while (remaining != 0); 426 return builder.toString(); 427 } 428 429 /** 430 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) 431 * number of code points. 432 * 433 * @param minLengthInclusive 434 * the minimum (inclusive) number of code points to generate 435 * @param maxLengthInclusive 436 * the maximum (inclusive) number of code points to generate 437 * @return The generated string 438 * @throws IllegalArgumentException 439 * if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive} 440 * @see RandomStringGenerator#generate(int) 441 * @since 1.2 442 */ 443 public String generate(final int minLengthInclusive, final int maxLengthInclusive) { 444 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive); 445 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, 446 "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive); 447 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive)); 448 } 449 450 /** 451 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 452 * or the user-supplied source of randomness. 453 * 454 * @param minInclusive 455 * the minimum value allowed 456 * @param maxInclusive 457 * the maximum value allowed 458 * @return The random number. 459 */ 460 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 461 if (random != null) { 462 return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive; 463 } 464 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 465 } 466 467 /** 468 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 469 * or the user-supplied source of randomness. 470 * 471 * @param characterList predefined char list. 472 * @return The random number. 473 */ 474 private int generateRandomNumber(final List<Character> characterList) { 475 final int listSize = characterList.size(); 476 if (random != null) { 477 return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0); 478 } 479 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0); 480 } 481}