View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.ArrayList;
20  import java.util.Collections;
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Set;
24  import java.util.concurrent.ThreadLocalRandom;
25  import java.util.function.IntUnaryOperator;
26  
27  import org.apache.commons.lang3.ArrayUtils;
28  import org.apache.commons.lang3.StringUtils;
29  import org.apache.commons.lang3.Validate;
30  
31  /**
32   * Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to
33   * define the properties of the generator. See the documentation for the {@link Builder} class to see available properties.
34   *
35   * <pre>
36   * // Generates a 20 code point string, using only the letters a-z
37   * RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build();
38   * String randomLetters = generator.generate(20);
39   * </pre>
40   * <pre>
41   * // Using Apache Commons RNG for randomness
42   * UniformRandomProvider rng = RandomSource.create(...);
43   * // Generates a 20 code point string, using only the letters a-z
44   * RandomStringGenerator generator = RandomStringGenerator.builder()
45   *     .withinRange('a', 'z')
46   *     .usingRandom(rng::nextInt)
47   *     .build();
48   * String randomLetters = generator.generate(20);
49   * </pre>
50   * <p>
51   * {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method
52   * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally.
53   * </p>
54   *
55   * @since 1.1
56   */
57  public final class RandomStringGenerator {
58  
59      /**
60       * A builder for generating {@code RandomStringGenerator} instances.
61       *
62       * <p>
63       * The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the
64       * methods defined in this class, prior to calling {@link #build()}.
65       * </p>
66       * <p>
67       * All the property setting methods return the {@code Builder} instance to allow for method chaining.
68       * </p>
69       * <p>
70       * The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and
71       * {@link Character#MAX_CODE_POINT} respectively.
72       * </p>
73       * <p>
74       * The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used.
75       * </p>
76       * <p>
77       * The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied
78       * to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used
79       * predicates are provided by the {@link CharacterPredicates} enum.
80       * </p>
81       * <p>
82       * This class is not thread safe.
83       * </p>
84       *
85       * @since 1.1
86       */
87      public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
88  
89          /**
90           * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
91           * ({@value}).
92           */
93          public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
94  
95          /**
96           * The default string length produced by this builder: {@value}.
97           */
98          public static final int DEFAULT_LENGTH = 0;
99  
100         /**
101          * The default minimum code point allowed: {@value}.
102          */
103         public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
104 
105         /**
106          * The minimum code point allowed.
107          */
108         private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
109 
110         /**
111          * The maximum code point allowed.
112          */
113         private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
114 
115         /**
116          * Filters for code points.
117          */
118         private Set<CharacterPredicate> inclusivePredicates;
119 
120         /**
121          * The source of randomness.
122          */
123         private IntUnaryOperator random;
124 
125         /**
126          * The source of provided characters.
127          */
128         private Set<Character> characterSet = new HashSet<>();
129 
130         /**
131          * Whether calls accumulates the source of provided characters. The default is {@code false}.
132          */
133         private boolean accumulate;
134 
135         /**
136          * Creates a new instance.
137          */
138         public Builder() {
139             // empty
140         }
141 
142         /**
143          * Builds a new {@code RandomStringGenerator}.
144          *
145          * @return A new {@code RandomStringGenerator}
146          * @deprecated Use {@link #get()}.
147          */
148         @Deprecated
149         @Override
150         public RandomStringGenerator build() {
151             return get();
152         }
153 
154         /**
155          * Limits the characters in the generated string to those that match at least one of the predicates supplied.
156          *
157          * <p>
158          * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
159          * will replace the previously stored predicates.
160          * </p>
161          *
162          * @param predicates the predicates, may be {@code null} or empty.
163          * @return {@code this} instance.
164          */
165         public Builder filteredBy(final CharacterPredicate... predicates) {
166             if (ArrayUtils.isEmpty(predicates)) {
167                 inclusivePredicates = null;
168                 return this;
169             }
170             if (inclusivePredicates == null) {
171                 inclusivePredicates = new HashSet<>();
172             } else {
173                 inclusivePredicates.clear();
174             }
175             Collections.addAll(inclusivePredicates, predicates);
176             return this;
177         }
178 
179         /**
180          * Builds a new {@code RandomStringGenerator}.
181          *
182          * @return A new {@code RandomStringGenerator}.
183          * @since 1.12.0
184          */
185         @Override
186         public RandomStringGenerator get() {
187             return new RandomStringGenerator(this);
188         }
189 
190         private void initCharList() {
191             if (!accumulate) {
192                 characterSet = new HashSet<>();
193             }
194         }
195 
196         /**
197          * Limits the characters in the generated string to those who match at supplied list of Character.
198          *
199          * <p>
200          * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
201          * will replace the previously stored Character.
202          * </p>
203          *
204          * @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty
205          * @return {@code this} instance.
206          * @since 1.2
207          */
208         public Builder selectFrom(final char... chars) {
209             initCharList();
210             if (chars != null) {
211                 for (final char c : chars) {
212                     characterSet.add(c);
213                 }
214             }
215             return this;
216         }
217 
218         /**
219          * Sets whether calls accumulates the source of provided characters. The default is {@code false}.
220          *
221          * <pre>
222          * {@code
223          *     RandomStringGenerator gen = RandomStringGenerator.builder()
224          *         .setAccumulate(true)
225          *         .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } })
226          *         .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[',
227          *                     '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation
228          *         // additional builder calls as needed
229          *         .build();
230          * }
231          * </pre>
232          *
233          * @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}.
234          * @return {@code this} instance.
235          * @since 1.14.0
236          */
237         public Builder setAccumulate(final boolean accumulate) {
238             this.accumulate = accumulate;
239             return this;
240         }
241 
242         /**
243          * Overrides the default source of randomness. It is highly recommended that a random number generator library like
244          * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
245          *
246          * <p>
247          * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
248          * </p>
249          *
250          * <pre>
251          * {@code
252          *     UniformRandomProvider rng = RandomSource.create(...);
253          *     RandomStringGenerator gen = RandomStringGenerator.builder()
254          *         .usingRandom(rng::nextInt)
255          *         // additional builder calls as needed
256          *         .build();
257          * }
258          * </pre>
259          *
260          * <p>
261          * Passing {@code null} to this method will revert to the default source of randomness.
262          * </p>
263          *
264          * @param random the source of randomness, may be {@code null}.
265          * @return {@code this} instance.
266          * @since 1.14.0
267          */
268         public Builder usingRandom(final IntUnaryOperator random) {
269             this.random = random;
270             return this;
271         }
272 
273         /**
274          * Overrides the default source of randomness. It is highly recommended that a random number generator library like
275          * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
276          *
277          * <p>
278          * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
279          * </p>
280          *
281          * <pre>
282          * {@code
283          *     UniformRandomProvider rng = RandomSource.create(...);
284          *     RandomStringGenerator gen = RandomStringGenerator.builder()
285          *         .usingRandom(rng::nextInt)
286          *         // additional builder calls as needed
287          *         .build();
288          * }
289          * </pre>
290          *
291          * <p>
292          * Passing {@code null} to this method will revert to the default source of randomness.
293          * </p>
294          *
295          * @param random the source of randomness, may be {@code null}.
296          * @return {@code this} instance.
297          */
298         public Builder usingRandom(final TextRandomProvider random) {
299             this.random = random;
300             return this;
301         }
302 
303         /**
304          * Sets the array of minimum and maximum char allowed in the generated string.
305          *
306          * For example:
307          *
308          * <pre>
309          * {@code
310          * char[][] pairs = { { '0', '9' } };
311          * char[][] pairs = { { 'a', 'z' } };
312          * char[][] pairs = { { 'a', 'z' }, { '0', '9' } };
313          * }
314          * </pre>
315          *
316          * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
317          * @return {@code this} instance.
318          */
319         public Builder withinRange(final char[]... pairs) {
320             initCharList();
321             if (pairs != null) {
322                 for (final char[] pair : pairs) {
323                     Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
324                     final int minimumCodePoint = pair[0];
325                     final int maximumCodePoint = pair[1];
326                     Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
327                             maximumCodePoint);
328                     for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
329                         characterSet.add((char) index);
330                     }
331                 }
332             }
333             return this;
334         }
335 
336 
337         /**
338          * Sets the minimum and maximum code points allowed in the generated string.
339          *
340          * @param minimumCodePoint the smallest code point allowed (inclusive).
341          * @param maximumCodePoint the largest code point allowed (inclusive).
342          * @return {@code this} instance.
343          * @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}.
344          * @throws IllegalArgumentException if {@code minimumCodePoint < 0}.
345          * @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}.
346          */
347         public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
348             Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
349                     maximumCodePoint);
350             Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
351             Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
352             this.minimumCodePoint = minimumCodePoint;
353             this.maximumCodePoint = maximumCodePoint;
354             return this;
355         }
356     }
357 
358     /**
359      * Constructs a new builder.
360      *
361      * @return a new builder.
362      * @since 1.11.0
363      */
364     public static Builder builder() {
365         return new Builder();
366     }
367 
368     /**
369      * The smallest allowed code point (inclusive).
370      */
371     private final int minimumCodePoint;
372 
373     /**
374      * The largest allowed code point (inclusive).
375      */
376     private final int maximumCodePoint;
377 
378     /**
379      * Filters for code points.
380      */
381     private final Set<CharacterPredicate> inclusivePredicates;
382 
383     /**
384      * The source of randomness for this generator.
385      */
386     private final IntUnaryOperator random;
387 
388     /**
389      * The source of provided characters.
390      */
391     private final List<Character> characterList;
392 
393     /**
394      * Constructs the generator.
395      *
396      * @param minimumCodePoint    smallest allowed code point (inclusive).
397      * @param maximumCodePoint    largest allowed code point (inclusive).
398      * @param inclusivePredicates filters for code points.
399      * @param random              source of randomness.
400      * @param characterSet       list of predefined set of characters.
401      */
402     private RandomStringGenerator(final Builder builder) {
403         this.minimumCodePoint = builder.minimumCodePoint;
404         this.maximumCodePoint = builder.maximumCodePoint;
405         this.inclusivePredicates = builder.inclusivePredicates;
406         this.random = builder.random;
407         this.characterList = new ArrayList<>(builder.characterSet);
408     }
409 
410     /**
411      * Generates a random string, containing the specified number of code points.
412      *
413      * <p>
414      * Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned,
415      * although the resulting string may contain pairs of surrogates that together encode a supplementary character.
416      * </p>
417      * <p>
418      * Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the
419      * {@link Character} documentation to understand how Java stores Unicode values.
420      * </p>
421      *
422      * @param length the number of code points to generate.
423      * @return The generated string.
424      * @throws IllegalArgumentException if {@code length < 0}.
425      */
426     public String generate(final int length) {
427         if (length == 0) {
428             return StringUtils.EMPTY;
429         }
430         Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
431         final StringBuilder builder = new StringBuilder(length);
432         long remaining = length;
433         do {
434             final int codePoint;
435             if (characterList != null && !characterList.isEmpty()) {
436                 codePoint = generateRandomNumber(characterList);
437             } else {
438                 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
439             }
440             switch (Character.getType(codePoint)) {
441             case Character.UNASSIGNED:
442             case Character.PRIVATE_USE:
443             case Character.SURROGATE:
444                 continue;
445             default:
446             }
447             if (inclusivePredicates != null) {
448                 boolean matchedFilter = false;
449                 for (final CharacterPredicate predicate : inclusivePredicates) {
450                     if (predicate.test(codePoint)) {
451                         matchedFilter = true;
452                         break;
453                     }
454                 }
455                 if (!matchedFilter) {
456                     continue;
457                 }
458             }
459             builder.appendCodePoint(codePoint);
460             remaining--;
461         } while (remaining != 0);
462         return builder.toString();
463     }
464 
465     /**
466      * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points.
467      *
468      * @param minLengthInclusive the minimum (inclusive) number of code points to generate.
469      * @param maxLengthInclusive the maximum (inclusive) number of code points to generate.
470      * @return The generated string.
471      * @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}.
472      * @see RandomStringGenerator#generate(int)
473      * @since 1.2
474      */
475     public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
476         Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
477         Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive,
478                 minLengthInclusive);
479         return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
480     }
481 
482     /**
483      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
484      *
485      * @param minInclusive the minimum value allowed.
486      * @param maxInclusive the maximum value allowed.
487      * @return The random number.
488      */
489     private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
490         if (random != null) {
491             return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive;
492         }
493         return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
494     }
495 
496     /**
497      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
498      *
499      * @param characterList predefined char list.
500      * @return The random number.
501      */
502     private int generateRandomNumber(final List<Character> characterList) {
503         final int listSize = characterList.size();
504         if (random != null) {
505             return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0);
506         }
507         return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
508     }
509 }