View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.ArrayList;
20  import java.util.Collections;
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Set;
24  import java.util.concurrent.ThreadLocalRandom;
25  
26  import org.apache.commons.lang3.ArrayUtils;
27  import org.apache.commons.lang3.StringUtils;
28  import org.apache.commons.lang3.Validate;
29  
30  /**
31   * Generates random Unicode strings containing the specified number of code points.
32   * Instances are created using a builder class, which allows the
33   * callers to define the properties of the generator. See the documentation for the
34   * {@link Builder} class to see available properties.
35   *
36   * <pre>
37   * // Generates a 20 code point string, using only the letters a-z
38   * RandomStringGenerator generator = RandomStringGenerator.builder()
39   *     .withinRange('a', 'z').build();
40   * String randomLetters = generator.generate(20);
41   * </pre>
42   * <pre>
43   * // Using Apache Commons RNG for randomness
44   * UniformRandomProvider rng = RandomSource.create(...);
45   * // Generates a 20 code point string, using only the letters a-z
46   * RandomStringGenerator generator = RandomStringGenerator.builder()
47   *     .withinRange('a', 'z')
48   *     .usingRandom(rng::nextInt) // uses Java 8 syntax
49   *     .build();
50   * String randomLetters = generator.generate(20);
51   * </pre>
52   * <p>
53   * {@code RandomStringGenerator} instances are thread-safe when using the
54   * default random number generator (RNG). If a custom RNG is set by calling the method
55   * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
56   * must be ensured externally.
57   * </p>
58   * @since 1.1
59   */
60  public final class RandomStringGenerator {
61  
62      /**
63       * A builder for generating {@code RandomStringGenerator} instances.
64       *
65       * <p>The behavior of a generator is controlled by properties set by this
66       * builder. Each property has a default value, which can be overridden by
67       * calling the methods defined in this class, prior to calling {@link #build()}.</p>
68       *
69       * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
70       *
71       * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
72       * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
73       *
74       * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
75       * otherwise {@link ThreadLocalRandom} is used.</p>
76       *
77       * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
78       * which defines a collection of tests that are applied to the randomly generated code points.
79       * The code points will only be included in the result if they pass at least one of the tests.
80       * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
81       *
82       * <p>This class is not thread safe.</p>
83       * @since 1.1
84       */
85      public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
86  
87          /**
88           * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
89           * ({@value}).
90           */
91          public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
92  
93          /**
94           * The default string length produced by this builder: {@value}.
95           */
96          public static final int DEFAULT_LENGTH = 0;
97  
98          /**
99           * The default minimum code point allowed: {@value}.
100          */
101         public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
102 
103         /**
104          * The minimum code point allowed.
105          */
106         private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
107 
108         /**
109          * The maximum code point allowed.
110          */
111         private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
112 
113         /**
114          * Filters for code points.
115          */
116         private Set<CharacterPredicate> inclusivePredicates;
117 
118         /**
119          * The source of randomness.
120          */
121         private TextRandomProvider random;
122 
123         /**
124          * The source of provided characters.
125          */
126         private List<Character> characterList;
127 
128         /**
129          * Creates a new instance.
130          */
131         public Builder() {
132             // empty
133         }
134 
135         /**
136          * Builds a new {@code RandomStringGenerator}.
137          *
138          * @return A new {@code RandomStringGenerator}
139          * @deprecated Use {@link #get()}.
140          */
141         @Deprecated
142         @Override
143         public RandomStringGenerator build() {
144             return get();
145         }
146 
147         /**
148          * Limits the characters in the generated string to those that match at
149          * least one of the predicates supplied.
150          *
151          * <p>
152          * Passing {@code null} or an empty array to this method will revert to the
153          * default behavior of allowing any character. Multiple calls to this
154          * method will replace the previously stored predicates.
155          * </p>
156          *
157          * @param predicates
158          *            the predicates, may be {@code null} or empty
159          * @return {@code this}, to allow method chaining
160          */
161         public Builder filteredBy(final CharacterPredicate... predicates) {
162             if (ArrayUtils.isEmpty(predicates)) {
163                 inclusivePredicates = null;
164                 return this;
165             }
166             if (inclusivePredicates == null) {
167                 inclusivePredicates = new HashSet<>();
168             } else {
169                 inclusivePredicates.clear();
170             }
171             Collections.addAll(inclusivePredicates, predicates);
172             return this;
173         }
174 
175         /**
176          * Builds a new {@code RandomStringGenerator}.
177          *
178          * @return A new {@code RandomStringGenerator}
179          * @since 1.12.0
180          */
181         @Override
182         public RandomStringGenerator get() {
183             return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
184                     random, characterList);
185         }
186 
187         /**
188          * Limits the characters in the generated string to those who match at
189          * supplied list of Character.
190          *
191          * <p>
192          * Passing {@code null} or an empty array to this method will revert to the
193          * default behavior of allowing any character. Multiple calls to this
194          * method will replace the previously stored Character.
195          * </p>
196          *
197          * @param chars set of predefined Characters for random string generation
198          *            the Character can be, may be {@code null} or empty
199          * @return {@code this}, to allow method chaining
200          * @since 1.2
201          */
202         public Builder selectFrom(final char... chars) {
203             characterList = new ArrayList<>();
204             if (chars != null) {
205                 for (final char c : chars) {
206                     characterList.add(c);
207                 }
208             }
209             return this;
210         }
211 
212         /**
213          * Overrides the default source of randomness.  It is highly
214          * recommended that a random number generator library like
215          * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
216          * be used to provide the random number generation.
217          *
218          * <p>
219          * When using Java 8 or later, {@link TextRandomProvider} is a
220          * functional interface and need not be explicitly implemented:
221          * </p>
222          * <pre>
223          * {@code
224          *     UniformRandomProvider rng = RandomSource.create(...);
225          *     RandomStringGenerator gen = RandomStringGenerator.builder()
226          *         .usingRandom(rng::nextInt)
227          *         // additional builder calls as needed
228          *         .build();
229          * }
230          * </pre>
231          *
232          * <p>
233          * Passing {@code null} to this method will revert to the default source of
234          * randomness.
235          * </p>
236          *
237          * @param random
238          *            the source of randomness, may be {@code null}
239          * @return {@code this}, to allow method chaining
240          */
241         public Builder usingRandom(final TextRandomProvider random) {
242             this.random = random;
243             return this;
244         }
245 
246         /**
247          * Sets the array of minimum and maximum char allowed in the
248          * generated string.
249          *
250          * For example:
251          * <pre>
252          * {@code
253          *     char [][] pairs = {{'0','9'}};
254          *     char [][] pairs = {{'a','z'}};
255          *     char [][] pairs = {{'a','z'},{'0','9'}};
256          * }
257          * </pre>
258          *
259          * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
260          * @return {@code this}, to allow method chaining.
261          */
262         public Builder withinRange(final char[]... pairs) {
263             characterList = new ArrayList<>();
264             if (pairs != null) {
265                 for (final char[] pair : pairs) {
266                     Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
267                     final int minimumCodePoint = pair[0];
268                     final int maximumCodePoint = pair[1];
269                     Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
270                             maximumCodePoint);
271 
272                     for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
273                         characterList.add((char) index);
274                     }
275                 }
276             }
277             return this;
278 
279         }
280 
281         /**
282          * Sets the minimum and maximum code points allowed in the
283          * generated string.
284          *
285          * @param minimumCodePoint
286          *            the smallest code point allowed (inclusive)
287          * @param maximumCodePoint
288          *            the largest code point allowed (inclusive)
289          * @return {@code this}, to allow method chaining
290          * @throws IllegalArgumentException
291          *             if {@code maximumCodePoint >}
292          *             {@link Character#MAX_CODE_POINT}
293          * @throws IllegalArgumentException
294          *             if {@code minimumCodePoint < 0}
295          * @throws IllegalArgumentException
296          *             if {@code minimumCodePoint > maximumCodePoint}
297          */
298         public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
299             Validate.isTrue(minimumCodePoint <= maximumCodePoint,
300                     "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
301             Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
302             Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
303                     "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
304             this.minimumCodePoint = minimumCodePoint;
305             this.maximumCodePoint = maximumCodePoint;
306             return this;
307         }
308     }
309 
310     /**
311      * Constructs a new builder.
312      * @return a new builder.
313      * @since 1.11.0
314      */
315     public static Builder builder() {
316         return new Builder();
317     }
318 
319     /**
320      * The smallest allowed code point (inclusive).
321      */
322     private final int minimumCodePoint;
323 
324     /**
325      * The largest allowed code point (inclusive).
326      */
327     private final int maximumCodePoint;
328 
329     /**
330      * Filters for code points.
331      */
332     private final Set<CharacterPredicate> inclusivePredicates;
333 
334     /**
335      * The source of randomness for this generator.
336      */
337     private final TextRandomProvider random;
338 
339     /**
340      * The source of provided characters.
341      */
342     private final List<Character> characterList;
343 
344     /**
345      * Constructs the generator.
346      *
347      * @param minimumCodePoint
348      *            smallest allowed code point (inclusive)
349      * @param maximumCodePoint
350      *            largest allowed code point (inclusive)
351      * @param inclusivePredicates
352      *            filters for code points
353      * @param random
354      *            source of randomness
355      * @param characterList list of predefined set of characters.
356      */
357     private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
358                                   final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
359                                   final List<Character> characterList) {
360         this.minimumCodePoint = minimumCodePoint;
361         this.maximumCodePoint = maximumCodePoint;
362         this.inclusivePredicates = inclusivePredicates;
363         this.random = random;
364         this.characterList = characterList;
365     }
366 
367     /**
368      * Generates a random string, containing the specified number of code points.
369      *
370      * <p>
371      * Code points are randomly selected between the minimum and maximum values defined
372      * in the generator.
373      * Surrogate and private use characters are not returned, although the
374      * resulting string may contain pairs of surrogates that together encode a
375      * supplementary character.
376      * </p>
377      * <p>
378      * Note: the number of {@code char} code units generated will exceed
379      * {@code length} if the string contains supplementary characters. See the
380      * {@link Character} documentation to understand how Java stores Unicode
381      * values.
382      * </p>
383      *
384      * @param length
385      *            the number of code points to generate
386      * @return The generated string
387      * @throws IllegalArgumentException
388      *             if {@code length < 0}
389      */
390     public String generate(final int length) {
391         if (length == 0) {
392             return StringUtils.EMPTY;
393         }
394         Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
395         final StringBuilder builder = new StringBuilder(length);
396         long remaining = length;
397         do {
398             final int codePoint;
399             if (characterList != null && !characterList.isEmpty()) {
400                 codePoint = generateRandomNumber(characterList);
401             } else {
402                 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
403             }
404             switch (Character.getType(codePoint)) {
405             case Character.UNASSIGNED:
406             case Character.PRIVATE_USE:
407             case Character.SURROGATE:
408                 continue;
409             default:
410             }
411             if (inclusivePredicates != null) {
412                 boolean matchedFilter = false;
413                 for (final CharacterPredicate predicate : inclusivePredicates) {
414                     if (predicate.test(codePoint)) {
415                         matchedFilter = true;
416                         break;
417                     }
418                 }
419                 if (!matchedFilter) {
420                     continue;
421                 }
422             }
423             builder.appendCodePoint(codePoint);
424             remaining--;
425         } while (remaining != 0);
426         return builder.toString();
427     }
428 
429     /**
430      * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
431      * number of code points.
432      *
433      * @param minLengthInclusive
434      *            the minimum (inclusive) number of code points to generate
435      * @param maxLengthInclusive
436      *            the maximum (inclusive) number of code points to generate
437      * @return The generated string
438      * @throws IllegalArgumentException
439      *             if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
440      * @see RandomStringGenerator#generate(int)
441      * @since 1.2
442      */
443     public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
444         Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
445         Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
446                 "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
447         return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
448     }
449 
450     /**
451      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
452      * or the user-supplied source of randomness.
453      *
454      * @param minInclusive
455      *            the minimum value allowed
456      * @param maxInclusive
457      *            the maximum value allowed
458      * @return The random number.
459      */
460     private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
461         if (random != null) {
462             return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
463         }
464         return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
465     }
466 
467     /**
468      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
469      * or the user-supplied source of randomness.
470      *
471      * @param characterList predefined char list.
472      * @return The random number.
473      */
474     private int generateRandomNumber(final List<Character> characterList) {
475         final int listSize = characterList.size();
476         if (random != null) {
477             return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
478         }
479         return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
480     }
481 }