001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.HashSet;
022import java.util.List;
023import java.util.Set;
024import java.util.concurrent.ThreadLocalRandom;
025import java.util.function.IntUnaryOperator;
026
027import org.apache.commons.lang3.ArrayUtils;
028import org.apache.commons.lang3.StringUtils;
029import org.apache.commons.lang3.Validate;
030
031/**
032 * Generates random Unicode strings containing the specified number of code points. Instances are created using a builder class, which allows the callers to
033 * define the properties of the generator. See the documentation for the {@link Builder} class to see available properties.
034 *
035 * <pre>
036 * // Generates a 20 code point string, using only the letters a-z
037 * RandomStringGenerator generator = RandomStringGenerator.builder().withinRange('a', 'z').build();
038 * String randomLetters = generator.generate(20);
039 * </pre>
040 * <pre>
041 * // Using Apache Commons RNG for randomness
042 * UniformRandomProvider rng = RandomSource.create(...);
043 * // Generates a 20 code point string, using only the letters a-z
044 * RandomStringGenerator generator = RandomStringGenerator.builder()
045 *     .withinRange('a', 'z')
046 *     .usingRandom(rng::nextInt)
047 *     .build();
048 * String randomLetters = generator.generate(20);
049 * </pre>
050 * <p>
051 * {@code RandomStringGenerator} instances are thread-safe when using the default random number generator (RNG). If a custom RNG is set by calling the method
052 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety must be ensured externally.
053 * </p>
054 *
055 * @since 1.1
056 */
057public final class RandomStringGenerator {
058
059    /**
060     * A builder for generating {@code RandomStringGenerator} instances.
061     *
062     * <p>
063     * The behavior of a generator is controlled by properties set by this builder. Each property has a default value, which can be overridden by calling the
064     * methods defined in this class, prior to calling {@link #build()}.
065     * </p>
066     * <p>
067     * All the property setting methods return the {@code Builder} instance to allow for method chaining.
068     * </p>
069     * <p>
070     * The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The default values are {@code 0} and
071     * {@link Character#MAX_CODE_POINT} respectively.
072     * </p>
073     * <p>
074     * The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, otherwise {@link ThreadLocalRandom} is used.
075     * </p>
076     * <p>
077     * The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, which defines a collection of tests that are applied
078     * to the randomly generated code points. The code points will only be included in the result if they pass at least one of the tests. Some commonly used
079     * predicates are provided by the {@link CharacterPredicates} enum.
080     * </p>
081     * <p>
082     * This class is not thread safe.
083     * </p>
084     *
085     * @since 1.1
086     */
087    public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
088
089        /**
090         * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
091         * ({@value}).
092         */
093        public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
094
095        /**
096         * The default string length produced by this builder: {@value}.
097         */
098        public static final int DEFAULT_LENGTH = 0;
099
100        /**
101         * The default minimum code point allowed: {@value}.
102         */
103        public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
104
105        /**
106         * The minimum code point allowed.
107         */
108        private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
109
110        /**
111         * The maximum code point allowed.
112         */
113        private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
114
115        /**
116         * Filters for code points.
117         */
118        private Set<CharacterPredicate> inclusivePredicates;
119
120        /**
121         * The source of randomness.
122         */
123        private IntUnaryOperator random;
124
125        /**
126         * The source of provided characters.
127         */
128        private Set<Character> characterSet = new HashSet<>();
129
130        /**
131         * Whether calls accumulates the source of provided characters. The default is {@code false}.
132         */
133        private boolean accumulate;
134
135        /**
136         * Creates a new instance.
137         */
138        public Builder() {
139            // empty
140        }
141
142        /**
143         * Builds a new {@code RandomStringGenerator}.
144         *
145         * @return A new {@code RandomStringGenerator}
146         * @deprecated Use {@link #get()}.
147         */
148        @Deprecated
149        @Override
150        public RandomStringGenerator build() {
151            return get();
152        }
153
154        /**
155         * Limits the characters in the generated string to those that match at least one of the predicates supplied.
156         *
157         * <p>
158         * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
159         * will replace the previously stored predicates.
160         * </p>
161         *
162         * @param predicates the predicates, may be {@code null} or empty.
163         * @return {@code this} instance.
164         */
165        public Builder filteredBy(final CharacterPredicate... predicates) {
166            if (ArrayUtils.isEmpty(predicates)) {
167                inclusivePredicates = null;
168                return this;
169            }
170            if (inclusivePredicates == null) {
171                inclusivePredicates = new HashSet<>();
172            } else {
173                inclusivePredicates.clear();
174            }
175            Collections.addAll(inclusivePredicates, predicates);
176            return this;
177        }
178
179        /**
180         * Builds a new {@code RandomStringGenerator}.
181         *
182         * @return A new {@code RandomStringGenerator}.
183         * @since 1.12.0
184         */
185        @Override
186        public RandomStringGenerator get() {
187            return new RandomStringGenerator(this);
188        }
189
190        private void initCharList() {
191            if (!accumulate) {
192                characterSet = new HashSet<>();
193            }
194        }
195
196        /**
197         * Limits the characters in the generated string to those who match at supplied list of Character.
198         *
199         * <p>
200         * Passing {@code null} or an empty array to this method will revert to the default behavior of allowing any character. Multiple calls to this method
201         * will replace the previously stored Character.
202         * </p>
203         *
204         * @param chars set of predefined Characters for random string generation the Character can be, may be {@code null} or empty
205         * @return {@code this} instance.
206         * @since 1.2
207         */
208        public Builder selectFrom(final char... chars) {
209            initCharList();
210            if (chars != null) {
211                for (final char c : chars) {
212                    characterSet.add(c);
213                }
214            }
215            return this;
216        }
217
218        /**
219         * Sets whether calls accumulates the source of provided characters. The default is {@code false}.
220         *
221         * <pre>
222         * {@code
223         *     RandomStringGenerator gen = RandomStringGenerator.builder()
224         *         .setAccumulate(true)
225         *         .withinRange(new char[][] { { 'a', 'z' }, { 'A', 'Z' }, { '0', '9' } })
226         *         .selectFrom('!', '"', '#', '$', '&', '\'', '(', ')', ',', '.', ':', ';', '?', '@', '[',
227         *                     '\\', ']', '^', '_', '`', '{', '|', '}', '~') // punctuation
228         *         // additional builder calls as needed
229         *         .build();
230         * }
231         * </pre>
232         *
233         * @param accumulate whether calls accumulates the source of provided characters. The default is {@code false}.
234         * @return {@code this} instance.
235         * @since 1.14.0
236         */
237        public Builder setAccumulate(final boolean accumulate) {
238            this.accumulate = accumulate;
239            return this;
240        }
241
242        /**
243         * Overrides the default source of randomness. It is highly recommended that a random number generator library like
244         * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
245         *
246         * <p>
247         * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
248         * </p>
249         *
250         * <pre>
251         * {@code
252         *     UniformRandomProvider rng = RandomSource.create(...);
253         *     RandomStringGenerator gen = RandomStringGenerator.builder()
254         *         .usingRandom(rng::nextInt)
255         *         // additional builder calls as needed
256         *         .build();
257         * }
258         * </pre>
259         *
260         * <p>
261         * Passing {@code null} to this method will revert to the default source of randomness.
262         * </p>
263         *
264         * @param random the source of randomness, may be {@code null}.
265         * @return {@code this} instance.
266         * @since 1.14.0
267         */
268        public Builder usingRandom(final IntUnaryOperator random) {
269            this.random = random;
270            return this;
271        }
272
273        /**
274         * Overrides the default source of randomness. It is highly recommended that a random number generator library like
275         * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> be used to provide the random number generation.
276         *
277         * <p>
278         * {@link TextRandomProvider} is a functional interface and need not be explicitly implemented:
279         * </p>
280         *
281         * <pre>
282         * {@code
283         *     UniformRandomProvider rng = RandomSource.create(...);
284         *     RandomStringGenerator gen = RandomStringGenerator.builder()
285         *         .usingRandom(rng::nextInt)
286         *         // additional builder calls as needed
287         *         .build();
288         * }
289         * </pre>
290         *
291         * <p>
292         * Passing {@code null} to this method will revert to the default source of randomness.
293         * </p>
294         *
295         * @param random the source of randomness, may be {@code null}.
296         * @return {@code this} instance.
297         */
298        public Builder usingRandom(final TextRandomProvider random) {
299            this.random = random;
300            return this;
301        }
302
303        /**
304         * Sets the array of minimum and maximum char allowed in the generated string.
305         *
306         * For example:
307         *
308         * <pre>
309         * {@code
310         * char[][] pairs = { { '0', '9' } };
311         * char[][] pairs = { { 'a', 'z' } };
312         * char[][] pairs = { { 'a', 'z' }, { '0', '9' } };
313         * }
314         * </pre>
315         *
316         * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
317         * @return {@code this} instance.
318         */
319        public Builder withinRange(final char[]... pairs) {
320            initCharList();
321            if (pairs != null) {
322                for (final char[] pair : pairs) {
323                    Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
324                    final int minimumCodePoint = pair[0];
325                    final int maximumCodePoint = pair[1];
326                    Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
327                            maximumCodePoint);
328                    for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
329                        characterSet.add((char) index);
330                    }
331                }
332            }
333            return this;
334        }
335
336
337        /**
338         * Sets the minimum and maximum code points allowed in the generated string.
339         *
340         * @param minimumCodePoint the smallest code point allowed (inclusive).
341         * @param maximumCodePoint the largest code point allowed (inclusive).
342         * @return {@code this} instance.
343         * @throws IllegalArgumentException if {@code maximumCodePoint >} {@link Character#MAX_CODE_POINT}.
344         * @throws IllegalArgumentException if {@code minimumCodePoint < 0}.
345         * @throws IllegalArgumentException if {@code minimumCodePoint > maximumCodePoint}.
346         */
347        public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
348            Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
349                    maximumCodePoint);
350            Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
351            Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
352            this.minimumCodePoint = minimumCodePoint;
353            this.maximumCodePoint = maximumCodePoint;
354            return this;
355        }
356    }
357
358    /**
359     * Constructs a new builder.
360     *
361     * @return a new builder.
362     * @since 1.11.0
363     */
364    public static Builder builder() {
365        return new Builder();
366    }
367
368    /**
369     * The smallest allowed code point (inclusive).
370     */
371    private final int minimumCodePoint;
372
373    /**
374     * The largest allowed code point (inclusive).
375     */
376    private final int maximumCodePoint;
377
378    /**
379     * Filters for code points.
380     */
381    private final Set<CharacterPredicate> inclusivePredicates;
382
383    /**
384     * The source of randomness for this generator.
385     */
386    private final IntUnaryOperator random;
387
388    /**
389     * The source of provided characters.
390     */
391    private final List<Character> characterList;
392
393    /**
394     * Constructs the generator.
395     *
396     * @param minimumCodePoint    smallest allowed code point (inclusive).
397     * @param maximumCodePoint    largest allowed code point (inclusive).
398     * @param inclusivePredicates filters for code points.
399     * @param random              source of randomness.
400     * @param characterSet       list of predefined set of characters.
401     */
402    private RandomStringGenerator(final Builder builder) {
403        this.minimumCodePoint = builder.minimumCodePoint;
404        this.maximumCodePoint = builder.maximumCodePoint;
405        this.inclusivePredicates = builder.inclusivePredicates;
406        this.random = builder.random;
407        this.characterList = new ArrayList<>(builder.characterSet);
408    }
409
410    /**
411     * Generates a random string, containing the specified number of code points.
412     *
413     * <p>
414     * Code points are randomly selected between the minimum and maximum values defined in the generator. Surrogate and private use characters are not returned,
415     * although the resulting string may contain pairs of surrogates that together encode a supplementary character.
416     * </p>
417     * <p>
418     * Note: the number of {@code char} code units generated will exceed {@code length} if the string contains supplementary characters. See the
419     * {@link Character} documentation to understand how Java stores Unicode values.
420     * </p>
421     *
422     * @param length the number of code points to generate.
423     * @return The generated string.
424     * @throws IllegalArgumentException if {@code length < 0}.
425     */
426    public String generate(final int length) {
427        if (length == 0) {
428            return StringUtils.EMPTY;
429        }
430        Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
431        final StringBuilder builder = new StringBuilder(length);
432        long remaining = length;
433        do {
434            final int codePoint;
435            if (characterList != null && !characterList.isEmpty()) {
436                codePoint = generateRandomNumber(characterList);
437            } else {
438                codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
439            }
440            switch (Character.getType(codePoint)) {
441            case Character.UNASSIGNED:
442            case Character.PRIVATE_USE:
443            case Character.SURROGATE:
444                continue;
445            default:
446            }
447            if (inclusivePredicates != null) {
448                boolean matchedFilter = false;
449                for (final CharacterPredicate predicate : inclusivePredicates) {
450                    if (predicate.test(codePoint)) {
451                        matchedFilter = true;
452                        break;
453                    }
454                }
455                if (!matchedFilter) {
456                    continue;
457                }
458            }
459            builder.appendCodePoint(codePoint);
460            remaining--;
461        } while (remaining != 0);
462        return builder.toString();
463    }
464
465    /**
466     * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive) number of code points.
467     *
468     * @param minLengthInclusive the minimum (inclusive) number of code points to generate.
469     * @param maxLengthInclusive the maximum (inclusive) number of code points to generate.
470     * @return The generated string.
471     * @throws IllegalArgumentException if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}.
472     * @see RandomStringGenerator#generate(int)
473     * @since 1.2
474     */
475    public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
476        Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
477        Validate.isTrue(minLengthInclusive <= maxLengthInclusive, "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive,
478                minLengthInclusive);
479        return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
480    }
481
482    /**
483     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
484     *
485     * @param minInclusive the minimum value allowed.
486     * @param maxInclusive the maximum value allowed.
487     * @return The random number.
488     */
489    private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
490        if (random != null) {
491            return random.applyAsInt(maxInclusive - minInclusive + 1) + minInclusive;
492        }
493        return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
494    }
495
496    /**
497     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance or the user-supplied source of randomness.
498     *
499     * @param characterList predefined char list.
500     * @return The random number.
501     */
502    private int generateRandomNumber(final List<Character> characterList) {
503        final int listSize = characterList.size();
504        if (random != null) {
505            return String.valueOf(characterList.get(random.applyAsInt(listSize))).codePointAt(0);
506        }
507        return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
508    }
509}