1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import java.util.ArrayList;
20 import java.util.Collections;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24 import java.util.concurrent.ThreadLocalRandom;
25 import java.util.function.IntUnaryOperator;
26
27 import org.apache.commons.lang3.ArrayUtils;
28 import org.apache.commons.lang3.StringUtils;
29 import org.apache.commons.lang3.Validate;
30
31 /**
32 * Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to
33 * define the properties of the generator. See the documentation for the {@link Builder} class to see available properties.
34 *
35 * <pre>
36 * // Generates a 20 code point string, using only the letters a-z
37 * RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build();
38 * String randomLetters = generator.generate(20);
39 * </pre>
40 * <pre>
41 * // Using Apache Commons RNG for randomness
42 * UniformRandomProvider rng = RandomSource.create(...);
43 * // Generates a 20 code point string, using only the letters a-z
44 * RandomStringGenerator generator = RandomStringGenerator.builder()
45 * .withinRange('a', 'z')
46 * .usingRandom(rng::nextInt)
47 * .build();
48 * String randomLetters = generator.generate(20);
49 * </pre>
50 * <p>
51 * {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method
52 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally.
53 * </p>
54 *
55 * @since 1.1
56 */
57 public final class RandomStringGenerator {
58
59 /**
60 * A builder for generating {@code RandomStringGenerator} instances.
61 *
62 * <p>
63 * The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the
64 * methods defined in this class, prior to calling {@link #build()}.
65 * </p>
66 * <p>
67 * All the property setting methods return the {@code Builder} instance to allow for method chaining.
68 * </p>
69 * <p>
70 * The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and
71 * {@link Character#MAX_CODE_POINT} respectively.
72 * </p>
73 * <p>
74 * The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used.
75 * </p>
76 * <p>
77 * The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied
78 * to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used
79 * predicates are provided by the {@link CharacterPredicates} enum.
80 * </p>
81 * <p>
82 * This class is not thread safe.
83 * </p>
84 *
85 * @since 1.1
86 */
87 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
88
89 /**
90 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
91 * ({@value}).
92 */
93 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
94
95 /**
96 * The default string length produced by this builder: {@value}.
97 */
98 public static final int DEFAULT_LENGTH = 0;
99
100 /**
101 * The default minimum code point allowed: {@value}.
102 */
103 public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
104
105 /**
106 * The minimum code point allowed.
107 */
108 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
109
110 /**
111 * The maximum code point allowed.
112 */
113 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
114
115 /**
116 * Filters for code points.
117 */
118 private Set<CharacterPredicate> inclusivePredicates;
119
120 /**
121 * The source of randomness.
122 */
123 private IntUnaryOperator random;
124
125 /**
126 * The source of provided characters.
127 */
128 private Set<Character> characterSet = new HashSet<>();
129
130 /**
131 * Whether calls accumulates the source of provided characters. The default is {@code false}.
132 */
133 private boolean accumulate;
134
135 /**
136 * Creates a new instance.
137 */
138 public Builder() {
139 // empty
140 }
141
142 /**
143 * Builds a new {@code RandomStringGenerator}.
144 *
145 * @return A new {@code RandomStringGenerator}
146 * @deprecated Use {@link #get()}.
147 */
148 @Deprecated
149 @Override
150 public RandomStringGenerator build() {
151 return get();
152 }
153
154 /**
155 * Limits the characters in the generated string to those that match at least one of the predicates supplied.
156 *
157 * <p>
158 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
159 * will replace the previously stored predicates.
160 * </p>
161 *
162 * @param predicates the predicates, may be {@code null} or empty.
163 * @return {@code this} instance.
164 */
165 public Builder filteredBy(final CharacterPredicate... predicates) {
166 if (ArrayUtils.isEmpty(predicates)) {
167 inclusivePredicates = null;
168 return this;
169 }
170 if (inclusivePredicates == null) {
171 inclusivePredicates = new HashSet<>();
172 } else {
173 inclusivePredicates.clear();
174 }
175 Collections.addAll(inclusivePredicates, predicates);
176 return this;
177 }
178
179 /**
180 * Builds a new {@code RandomStringGenerator}.
181 *
182 * @return A new {@code RandomStringGenerator}.
183 * @since 1.12.0
184 */
185 @Override
186 public RandomStringGenerator get() {
187 return new RandomStringGenerator(this);
188 }
189
190 private void initCharList() {
191 if (!accumulate) {
192 characterSet = new HashSet<>();
193 }
194 }
195
196 /**
197 * Limits the characters in the generated string to those who match at supplied list of Character.
198 *
199 * <p>
200 * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
201 * will replace the previously stored Character.
202 * </p>
203 *
204 * @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty.
205 * @return {@code this} instance.
206 * @since 1.2
207 */
208 public Builder selectFrom(final char... chars) {
209 initCharList();
210 if (chars != null) {
211 for (final char c : chars) {
212 characterSet.add(c);
213 }
214 }
215 return this;
216 }
217
218 /**
219 * Sets whether calls accumulates the source of provided characters. The default is {@code false}.
220 *
221 * <pre>
222 * {@code
223 * RandomStringGenerator gen = RandomStringGenerator.builder()
224 * .setAccumulate(true)
225 * .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } })
226 * .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[',
227 * '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation
228 * // additional builder calls as needed
229 * .build();
230 * }
231 * </pre>
232 *
233 * @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}.
234 * @return {@code this} instance.
235 * @since 1.14.0
236 */
237 public Builder setAccumulate(final boolean accumulate) {
238 this.accumulate = accumulate;
239 return this;
240 }
241
242 /**
243 * Overrides the default source of randomness. It is highly recommended that a random number generator library like
244 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
245 *
246 * <p>
247 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
248 * </p>
249 *
250 * <pre>
251 * {@code
252 * UniformRandomProvider rng = RandomSource.create(...);
253 * RandomStringGenerator gen = RandomStringGenerator.builder()
254 * .usingRandom(rng::nextInt)
255 * // additional builder calls as needed
256 * .build();
257 * }
258 * </pre>
259 *
260 * <p>
261 * Passing {@code null} to this method will revert to the default source of randomness.
262 * </p>
263 *
264 * @param random the source of randomness, may be {@code null}.
265 * @return {@code this} instance.
266 * @since 1.14.0
267 */
268 public Builder usingRandom(final IntUnaryOperator random) {
269 this.random = random;
270 return this;
271 }
272
273 /**
274 * Overrides the default source of randomness. It is highly recommended that a random number generator library like
275 * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
276 *
277 * <p>
278 * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
279 * </p>
280 *
281 * <pre>
282 * {@code
283 * UniformRandomProvider rng = RandomSource.create(...);
284 * RandomStringGenerator gen = RandomStringGenerator.builder()
285 * .usingRandom(rng::nextInt)
286 * // additional builder calls as needed
287 * .build();
288 * }
289 * </pre>
290 *
291 * <p>
292 * Passing {@code null} to this method will revert to the default source of randomness.
293 * </p>
294 *
295 * @param random the source of randomness, may be {@code null}.
296 * @return {@code this} instance.
297 */
298 public Builder usingRandom(final TextRandomProvider random) {
299 this.random = random;
300 return this;
301 }
302
303 /**
304 * Sets the array of minimum and maximum char allowed in the generated string.
305 *
306 * For example:
307 *
308 * <pre>
309 * {@code
310 * char[][] pairs = { { '0', '9' } };
311 * char[][] pairs = { { 'a', 'z' } };
312 * char[][] pairs = { { 'a', 'z' }, { '0', '9' } };
313 * }
314 * </pre>
315 *
316 * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
317 * @return {@code this} instance.
318 */
319 public Builder withinRange(final char[]... pairs) {
320 initCharList();
321 if (pairs != null) {
322 for (final char[] pair : pairs) {
323 Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
324 final int minimumCodePoint = pair[0];
325 final int maximumCodePoint = pair[1];
326 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
327 maximumCodePoint);
328 for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
329 characterSet.add((char) index);
330 }
331 }
332 }
333 return this;
334 }
335
336
337 /**
338 * Sets the minimum and maximum code points allowed in the generated string.
339 *
340 * @param minimumCodePoint the smallest code point allowed (inclusive).
341 * @param maximumCodePoint the largest code point allowed (inclusive).
342 * @return {@code this} instance.
343 * @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}.
344 * @throws IllegalArgumentException if {@code minimumCodePoint < 0}.
345 * @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}.
346 */
347 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
348 Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
349 maximumCodePoint);
350 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
351 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
352 this.minimumCodePoint = minimumCodePoint;
353 this.maximumCodePoint = maximumCodePoint;
354 return this;
355 }
356 }
357
358 /**
359 * Constructs a new builder.
360 *
361 * @return a new builder.
362 * @since 1.11.0
363 */
364 public static Builder builder() {
365 return new Builder();
366 }
367
368 /**
369 * The smallest allowed code point (inclusive).
370 */
371 private final int minimumCodePoint;
372
373 /**
374 * The largest allowed code point (inclusive).
375 */
376 private final int maximumCodePoint;
377
378 /**
379 * Filters for code points.
380 */
381 private final Set<CharacterPredicate> inclusivePredicates;
382
383 /**
384 * The source of randomness for this generator.
385 */
386 private final IntUnaryOperator random;
387
388 /**
389 * The source of provided characters.
390 */
391 private final List<Character> characterList;
392
393 /**
394 * Constructs the generator.
395 *
396 * @param minimumCodePoint smallest allowed code point (inclusive).
397 * @param maximumCodePoint largest allowed code point (inclusive).
398 * @param inclusivePredicates filters for code points.
399 * @param random source of randomness.
400 * @param characterSet list of predefined set of characters.
401 */
402 private RandomStringGenerator(final Builder builder) {
403 this.minimumCodePoint = builder.minimumCodePoint;
404 this.maximumCodePoint = builder.maximumCodePoint;
405 this.inclusivePredicates = builder.inclusivePredicates;
406 this.random = builder.random;
407 this.characterList = new ArrayList<>(builder.characterSet);
408 }
409
410 /**
411 * Generates a random string, containing the specified number of code points.
412 *
413 * <p>
414 * Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned,
415 * although the resulting string may contain pairs of surrogates that together encode a supplementary character.
416 * </p>
417 * <p>
418 * Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the
419 * {@link Character} documentation to understand how Java stores Unicode values.
420 * </p>
421 *
422 * @param length the number of code points to generate.
423 * @return The generated string.
424 * @throws IllegalArgumentException if {@code length < 0}.
425 */
426 public String generate(final int length) {
427 if (length == 0) {
428 return StringUtils.EMPTY;
429 }
430 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
431 final StringBuilder builder = new StringBuilder(length);
432 long remaining = length;
433 do {
434 final int codePoint;
435 if (characterList != null && !characterList.isEmpty()) {
436 codePoint = generateRandomNumber(characterList);
437 } else {
438 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
439 }
440 switch (Character.getType(codePoint)) {
441 case Character.UNASSIGNED:
442 case Character.PRIVATE_USE:
443 case Character.SURROGATE:
444 continue;
445 default:
446 }
447 if (inclusivePredicates != null) {
448 boolean matchedFilter = false;
449 for (final CharacterPredicate predicate : inclusivePredicates) {
450 if (predicate.test(codePoint)) {
451 matchedFilter = true;
452 break;
453 }
454 }
455 if (!matchedFilter) {
456 continue;
457 }
458 }
459 builder.appendCodePoint(codePoint);
460 remaining--;
461 } while (remaining != 0);
462 return builder.toString();
463 }
464
465 /**
466 * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points.
467 *
468 * @param minLengthInclusive the minimum (inclusive) number of code points to generate.
469 * @param maxLengthInclusive the maximum (inclusive) number of code points to generate.
470 * @return The generated string.
471 * @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}.
472 * @see RandomStringGenerator#generate(int)
473 * @since 1.2
474 */
475 public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
476 Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
477 Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive,
478 minLengthInclusive);
479 return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
480 }
481
482 /**
483 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
484 *
485 * @param minInclusive the minimum value allowed.
486 * @param maxInclusive the maximum value allowed.
487 * @return The random number.
488 */
489 private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
490 if (random != null) {
491 return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive;
492 }
493 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
494 }
495
496 /**
497 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
498 *
499 * @param characterList predefined char list.
500 * @return The random number.
501 */
502 private int generateRandomNumber(final List<Character> characterList) {
503 final int listSize = characterList.size();
504 if (random != null) {
505 return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0);
506 }
507 return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
508 }
509 }