001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.ArrayList; 020import java.util.Collections; 021import java.util.HashSet; 022import java.util.List; 023import java.util.Set; 024import java.util.concurrent.ThreadLocalRandom; 025 026import org.apache.commons.lang3.ArrayUtils; 027import org.apache.commons.lang3.StringUtils; 028import org.apache.commons.lang3.Validate; 029 030/** 031 * <p> 032 * Generates random Unicode strings containing the specified number of code points. 033 * Instances are created using a builder class, which allows the 034 * callers to define the properties of the generator. See the documentation for the 035 * {@link Builder} class to see available properties. 036 * </p> 037 * <pre> 038 * // Generates a 20 code point string, using only the letters a-z 039 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 040 * .withinRange('a', 'z').build(); 041 * String randomLetters = generator.generate(20); 042 * </pre> 043 * <pre> 044 * // Using Apache Commons RNG for randomness 045 * UniformRandomProvider rng = RandomSource.create(...); 046 * // Generates a 20 code point string, using only the letters a-z 047 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 048 * .withinRange('a', 'z') 049 * .usingRandom(rng::nextInt) // uses Java 8 syntax 050 * .build(); 051 * String randomLetters = generator.generate(20); 052 * </pre> 053 * <p> 054 * {@code RandomStringGenerator} instances are thread-safe when using the 055 * default random number generator (RNG). If a custom RNG is set by calling the method 056 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety 057 * must be ensured externally. 058 * </p> 059 * @since 1.1 060 */ 061public final class RandomStringGenerator { 062 063 /** 064 * The smallest allowed code point (inclusive). 065 */ 066 private final int minimumCodePoint; 067 068 /** 069 * The largest allowed code point (inclusive). 070 */ 071 private final int maximumCodePoint; 072 073 /** 074 * Filters for code points. 075 */ 076 private final Set<CharacterPredicate> inclusivePredicates; 077 078 /** 079 * The source of randomness for this generator. 080 */ 081 private final TextRandomProvider random; 082 083 /** 084 * The source of provided characters. 085 */ 086 private final List<Character> characterList; 087 088 /** 089 * Constructs the generator. 090 * 091 * @param minimumCodePoint 092 * smallest allowed code point (inclusive) 093 * @param maximumCodePoint 094 * largest allowed code point (inclusive) 095 * @param inclusivePredicates 096 * filters for code points 097 * @param random 098 * source of randomness 099 * @param characterList list of predefined set of characters. 100 */ 101 private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint, 102 final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random, 103 final List<Character> characterList) { 104 this.minimumCodePoint = minimumCodePoint; 105 this.maximumCodePoint = maximumCodePoint; 106 this.inclusivePredicates = inclusivePredicates; 107 this.random = random; 108 this.characterList = characterList; 109 } 110 111 /** 112 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 113 * or the user-supplied source of randomness. 114 * 115 * @param minInclusive 116 * the minimum value allowed 117 * @param maxInclusive 118 * the maximum value allowed 119 * @return The random number. 120 */ 121 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 122 if (random != null) { 123 return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive; 124 } 125 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 126 } 127 128 /** 129 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 130 * or the user-supplied source of randomness. 131 * 132 * @param characterList predefined char list. 133 * @return The random number. 134 */ 135 private int generateRandomNumber(final List<Character> characterList) { 136 final int listSize = characterList.size(); 137 if (random != null) { 138 return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0); 139 } 140 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0); 141 } 142 143 /** 144 * <p> 145 * Generates a random string, containing the specified number of code points. 146 * </p> 147 * <p>Code points are randomly selected between the minimum and maximum values defined 148 * in the generator. 149 * Surrogate and private use characters are not returned, although the 150 * resulting string may contain pairs of surrogates that together encode a 151 * supplementary character. 152 * </p> 153 * <p> 154 * Note: the number of {@code char} code units generated will exceed 155 * {@code length} if the string contains supplementary characters. See the 156 * {@link Character} documentation to understand how Java stores Unicode 157 * values. 158 * </p> 159 * 160 * @param length 161 * the number of code points to generate 162 * @return The generated string 163 * @throws IllegalArgumentException 164 * if {@code length < 0} 165 */ 166 public String generate(final int length) { 167 if (length == 0) { 168 return StringUtils.EMPTY; 169 } 170 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 171 172 final StringBuilder builder = new StringBuilder(length); 173 long remaining = length; 174 175 do { 176 int codePoint; 177 if (characterList != null && !characterList.isEmpty()) { 178 codePoint = generateRandomNumber(characterList); 179 } else { 180 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 181 } 182 switch (Character.getType(codePoint)) { 183 case Character.UNASSIGNED: 184 case Character.PRIVATE_USE: 185 case Character.SURROGATE: 186 continue; 187 default: 188 } 189 190 if (inclusivePredicates != null) { 191 boolean matchedFilter = false; 192 for (final CharacterPredicate predicate : inclusivePredicates) { 193 if (predicate.test(codePoint)) { 194 matchedFilter = true; 195 break; 196 } 197 } 198 if (!matchedFilter) { 199 continue; 200 } 201 } 202 203 builder.appendCodePoint(codePoint); 204 remaining--; 205 206 } while (remaining != 0); 207 208 return builder.toString(); 209 } 210 211 /** 212 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) 213 * number of code points. 214 * 215 * @param minLengthInclusive 216 * the minimum (inclusive) number of code points to generate 217 * @param maxLengthInclusive 218 * the maximum (inclusive) number of code points to generate 219 * @return The generated string 220 * @throws IllegalArgumentException 221 * if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive} 222 * @see RandomStringGenerator#generate(int) 223 * @since 1.2 224 */ 225 public String generate(final int minLengthInclusive, final int maxLengthInclusive) { 226 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive); 227 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, 228 "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive); 229 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive)); 230 } 231 232 /** 233 * <p>A builder for generating {@code RandomStringGenerator} instances.</p> 234 * <p>The behavior of a generator is controlled by properties set by this 235 * builder. Each property has a default value, which can be overridden by 236 * calling the methods defined in this class, prior to calling {@link #build()}.</p> 237 * 238 * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p> 239 * 240 * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The 241 * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p> 242 * 243 * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, 244 * otherwise {@link ThreadLocalRandom} is used.</p> 245 * 246 * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, 247 * which defines a collection of tests that are applied to the randomly generated code points. 248 * The code points will only be included in the result if they pass at least one of the tests. 249 * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p> 250 * 251 * <p>This class is not thread safe.</p> 252 * @since 1.1 253 */ 254 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 255 256 /** 257 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 258 * ({@value}). 259 */ 260 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 261 262 /** 263 * The default string length produced by this builder: {@value}. 264 */ 265 public static final int DEFAULT_LENGTH = 0; 266 267 /** 268 * The default minimum code point allowed: {@value}. 269 */ 270 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 271 272 /** 273 * The minimum code point allowed. 274 */ 275 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 276 277 /** 278 * The maximum code point allowed. 279 */ 280 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 281 282 /** 283 * Filters for code points. 284 */ 285 private Set<CharacterPredicate> inclusivePredicates; 286 287 /** 288 * The source of randomness. 289 */ 290 private TextRandomProvider random; 291 292 /** 293 * The source of provided characters. 294 */ 295 private List<Character> characterList; 296 297 /** 298 * <p> 299 * Specifies the minimum and maximum code points allowed in the 300 * generated string. 301 * </p> 302 * 303 * @param minimumCodePoint 304 * the smallest code point allowed (inclusive) 305 * @param maximumCodePoint 306 * the largest code point allowed (inclusive) 307 * @return {@code this}, to allow method chaining 308 * @throws IllegalArgumentException 309 * if {@code maximumCodePoint >} 310 * {@link Character#MAX_CODE_POINT} 311 * @throws IllegalArgumentException 312 * if {@code minimumCodePoint < 0} 313 * @throws IllegalArgumentException 314 * if {@code minimumCodePoint > maximumCodePoint} 315 */ 316 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 317 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 318 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 319 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 320 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, 321 "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 322 323 this.minimumCodePoint = minimumCodePoint; 324 this.maximumCodePoint = maximumCodePoint; 325 return this; 326 } 327 328 /** 329 * <p> 330 * Specifies the array of minimum and maximum char allowed in the 331 * generated string. 332 * </p> 333 * 334 * For example: 335 * <pre> 336 * {@code 337 * char [][] pairs = {{'0','9'}}; 338 * char [][] pairs = {{'a','z'}}; 339 * char [][] pairs = {{'a','z'},{'0','9'}}; 340 * } 341 * </pre> 342 * 343 * @param pairs array of characters array, expected is to pass min, max pairs through this arg. 344 * @return {@code this}, to allow method chaining. 345 */ 346 public Builder withinRange(final char[]... pairs) { 347 characterList = new ArrayList<>(); 348 for (final char[] pair : pairs) { 349 Validate.isTrue(pair.length == 2, 350 "Each pair must contain minimum and maximum code point"); 351 final int minimumCodePoint = pair[0]; 352 final int maximumCodePoint = pair[1]; 353 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 354 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 355 356 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) { 357 characterList.add((char) index); 358 } 359 } 360 return this; 361 362 } 363 364 /** 365 * <p> 366 * Limits the characters in the generated string to those that match at 367 * least one of the predicates supplied. 368 * </p> 369 * 370 * <p> 371 * Passing {@code null} or an empty array to this method will revert to the 372 * default behavior of allowing any character. Multiple calls to this 373 * method will replace the previously stored predicates. 374 * </p> 375 * 376 * @param predicates 377 * the predicates, may be {@code null} or empty 378 * @return {@code this}, to allow method chaining 379 */ 380 public Builder filteredBy(final CharacterPredicate... predicates) { 381 if (ArrayUtils.isEmpty(predicates)) { 382 inclusivePredicates = null; 383 return this; 384 } 385 386 if (inclusivePredicates == null) { 387 inclusivePredicates = new HashSet<>(); 388 } else { 389 inclusivePredicates.clear(); 390 } 391 392 Collections.addAll(inclusivePredicates, predicates); 393 394 return this; 395 } 396 397 /** 398 * <p> 399 * Overrides the default source of randomness. It is highly 400 * recommended that a random number generator library like 401 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> 402 * be used to provide the random number generation. 403 * </p> 404 * 405 * <p> 406 * When using Java 8 or later, {@link TextRandomProvider} is a 407 * functional interface and need not be explicitly implemented: 408 * </p> 409 * <pre> 410 * {@code 411 * UniformRandomProvider rng = RandomSource.create(...); 412 * RandomStringGenerator gen = new RandomStringGenerator.Builder() 413 * .usingRandom(rng::nextInt) 414 * // additional builder calls as needed 415 * .build(); 416 * } 417 * </pre> 418 * 419 * <p> 420 * Passing {@code null} to this method will revert to the default source of 421 * randomness. 422 * </p> 423 * 424 * @param random 425 * the source of randomness, may be {@code null} 426 * @return {@code this}, to allow method chaining 427 */ 428 public Builder usingRandom(final TextRandomProvider random) { 429 this.random = random; 430 return this; 431 } 432 433 /** 434 * <p> 435 * Limits the characters in the generated string to those who match at 436 * supplied list of Character. 437 * </p> 438 * 439 * <p> 440 * Passing {@code null} or an empty array to this method will revert to the 441 * default behavior of allowing any character. Multiple calls to this 442 * method will replace the previously stored Character. 443 * </p> 444 * 445 * @param chars set of predefined Characters for random string generation 446 * the Character can be, may be {@code null} or empty 447 * @return {@code this}, to allow method chaining 448 * @since 1.2 449 */ 450 public Builder selectFrom(final char... chars) { 451 characterList = new ArrayList<>(); 452 for (final char c : chars) { 453 characterList.add(c); 454 } 455 return this; 456 } 457 458 /** 459 * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p> 460 * @return The configured {@code RandomStringGenerator} 461 */ 462 @Override 463 public RandomStringGenerator build() { 464 return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates, 465 random, characterList); 466 } 467 } 468}