001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.HashSet;
022import java.util.List;
023import java.util.Set;
024import java.util.concurrent.ThreadLocalRandom;
025
026import org.apache.commons.lang3.ArrayUtils;
027import org.apache.commons.lang3.StringUtils;
028import org.apache.commons.lang3.Validate;
029
030/**
031 * Generates random Unicode strings containing the specified number of code points.
032 * Instances are created using a builder class, which allows the
033 * callers to define the properties of the generator. See the documentation for the
034 * {@link Builder} class to see available properties.
035 *
036 * <pre>
037 * // Generates a 20 code point string, using only the letters a-z
038 * RandomStringGenerator generator = RandomStringGenerator.builder()
039 *     .withinRange('a', 'z').build();
040 * String randomLetters = generator.generate(20);
041 * </pre>
042 * <pre>
043 * // Using Apache Commons RNG for randomness
044 * UniformRandomProvider rng = RandomSource.create(...);
045 * // Generates a 20 code point string, using only the letters a-z
046 * RandomStringGenerator generator = RandomStringGenerator.builder()
047 *     .withinRange('a', 'z')
048 *     .usingRandom(rng::nextInt) // uses Java 8 syntax
049 *     .build();
050 * String randomLetters = generator.generate(20);
051 * </pre>
052 * <p>
053 * {@code RandomStringGenerator} instances are thread-safe when using the
054 * default random number generator (RNG). If a custom RNG is set by calling the method
055 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
056 * must be ensured externally.
057 * </p>
058 * @since 1.1
059 */
060public final class RandomStringGenerator {
061
062    /**
063     * A builder for generating {@code RandomStringGenerator} instances.
064     *
065     * <p>The behavior of a generator is controlled by properties set by this
066     * builder. Each property has a default value, which can be overridden by
067     * calling the methods defined in this class, prior to calling {@link #build()}.</p>
068     *
069     * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
070     *
071     * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
072     * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
073     *
074     * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
075     * otherwise {@link ThreadLocalRandom} is used.</p>
076     *
077     * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
078     * which defines a collection of tests that are applied to the randomly generated code points.
079     * The code points will only be included in the result if they pass at least one of the tests.
080     * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
081     *
082     * <p>This class is not thread safe.</p>
083     * @since 1.1
084     */
085    public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
086
087        /**
088         * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
089         * ({@value}).
090         */
091        public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
092
093        /**
094         * The default string length produced by this builder: {@value}.
095         */
096        public static final int DEFAULT_LENGTH = 0;
097
098        /**
099         * The default minimum code point allowed: {@value}.
100         */
101        public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
102
103        /**
104         * The minimum code point allowed.
105         */
106        private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
107
108        /**
109         * The maximum code point allowed.
110         */
111        private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
112
113        /**
114         * Filters for code points.
115         */
116        private Set<CharacterPredicate> inclusivePredicates;
117
118        /**
119         * The source of randomness.
120         */
121        private TextRandomProvider random;
122
123        /**
124         * The source of provided characters.
125         */
126        private List<Character> characterList;
127
128        /**
129         * Creates a new instance.
130         */
131        public Builder() {
132            // empty
133        }
134
135        /**
136         * Builds a new {@code RandomStringGenerator}.
137         *
138         * @return A new {@code RandomStringGenerator}
139         * @deprecated Use {@link #get()}.
140         */
141        @Deprecated
142        @Override
143        public RandomStringGenerator build() {
144            return get();
145        }
146
147        /**
148         * Limits the characters in the generated string to those that match at
149         * least one of the predicates supplied.
150         *
151         * <p>
152         * Passing {@code null} or an empty array to this method will revert to the
153         * default behavior of allowing any character. Multiple calls to this
154         * method will replace the previously stored predicates.
155         * </p>
156         *
157         * @param predicates
158         *            the predicates, may be {@code null} or empty
159         * @return {@code this}, to allow method chaining
160         */
161        public Builder filteredBy(final CharacterPredicate... predicates) {
162            if (ArrayUtils.isEmpty(predicates)) {
163                inclusivePredicates = null;
164                return this;
165            }
166            if (inclusivePredicates == null) {
167                inclusivePredicates = new HashSet<>();
168            } else {
169                inclusivePredicates.clear();
170            }
171            Collections.addAll(inclusivePredicates, predicates);
172            return this;
173        }
174
175        /**
176         * Builds a new {@code RandomStringGenerator}.
177         *
178         * @return A new {@code RandomStringGenerator}
179         * @since 1.12.0
180         */
181        @Override
182        public RandomStringGenerator get() {
183            return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
184                    random, characterList);
185        }
186
187        /**
188         * Limits the characters in the generated string to those who match at
189         * supplied list of Character.
190         *
191         * <p>
192         * Passing {@code null} or an empty array to this method will revert to the
193         * default behavior of allowing any character. Multiple calls to this
194         * method will replace the previously stored Character.
195         * </p>
196         *
197         * @param chars set of predefined Characters for random string generation
198         *            the Character can be, may be {@code null} or empty
199         * @return {@code this}, to allow method chaining
200         * @since 1.2
201         */
202        public Builder selectFrom(final char... chars) {
203            characterList = new ArrayList<>();
204            if (chars != null) {
205                for (final char c : chars) {
206                    characterList.add(c);
207                }
208            }
209            return this;
210        }
211
212        /**
213         * Overrides the default source of randomness.  It is highly
214         * recommended that a random number generator library like
215         * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
216         * be used to provide the random number generation.
217         *
218         * <p>
219         * When using Java 8 or later, {@link TextRandomProvider} is a
220         * functional interface and need not be explicitly implemented:
221         * </p>
222         * <pre>
223         * {@code
224         *     UniformRandomProvider rng = RandomSource.create(...);
225         *     RandomStringGenerator gen = RandomStringGenerator.builder()
226         *         .usingRandom(rng::nextInt)
227         *         // additional builder calls as needed
228         *         .build();
229         * }
230         * </pre>
231         *
232         * <p>
233         * Passing {@code null} to this method will revert to the default source of
234         * randomness.
235         * </p>
236         *
237         * @param random
238         *            the source of randomness, may be {@code null}
239         * @return {@code this}, to allow method chaining
240         */
241        public Builder usingRandom(final TextRandomProvider random) {
242            this.random = random;
243            return this;
244        }
245
246        /**
247         * Sets the array of minimum and maximum char allowed in the
248         * generated string.
249         *
250         * For example:
251         * <pre>
252         * {@code
253         *     char [][] pairs = {{'0','9'}};
254         *     char [][] pairs = {{'a','z'}};
255         *     char [][] pairs = {{'a','z'},{'0','9'}};
256         * }
257         * </pre>
258         *
259         * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
260         * @return {@code this}, to allow method chaining.
261         */
262        public Builder withinRange(final char[]... pairs) {
263            characterList = new ArrayList<>();
264            if (pairs != null) {
265                for (final char[] pair : pairs) {
266                    Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
267                    final int minimumCodePoint = pair[0];
268                    final int maximumCodePoint = pair[1];
269                    Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
270                            maximumCodePoint);
271
272                    for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
273                        characterList.add((char) index);
274                    }
275                }
276            }
277            return this;
278
279        }
280
281        /**
282         * Sets the minimum and maximum code points allowed in the
283         * generated string.
284         *
285         * @param minimumCodePoint
286         *            the smallest code point allowed (inclusive)
287         * @param maximumCodePoint
288         *            the largest code point allowed (inclusive)
289         * @return {@code this}, to allow method chaining
290         * @throws IllegalArgumentException
291         *             if {@code maximumCodePoint >}
292         *             {@link Character#MAX_CODE_POINT}
293         * @throws IllegalArgumentException
294         *             if {@code minimumCodePoint < 0}
295         * @throws IllegalArgumentException
296         *             if {@code minimumCodePoint > maximumCodePoint}
297         */
298        public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
299            Validate.isTrue(minimumCodePoint <= maximumCodePoint,
300                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
301            Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
302            Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
303                    "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
304            this.minimumCodePoint = minimumCodePoint;
305            this.maximumCodePoint = maximumCodePoint;
306            return this;
307        }
308    }
309
310    /**
311     * Constructs a new builder.
312     * @return a new builder.
313     * @since 1.11.0
314     */
315    public static Builder builder() {
316        return new Builder();
317    }
318
319    /**
320     * The smallest allowed code point (inclusive).
321     */
322    private final int minimumCodePoint;
323
324    /**
325     * The largest allowed code point (inclusive).
326     */
327    private final int maximumCodePoint;
328
329    /**
330     * Filters for code points.
331     */
332    private final Set<CharacterPredicate> inclusivePredicates;
333
334    /**
335     * The source of randomness for this generator.
336     */
337    private final TextRandomProvider random;
338
339    /**
340     * The source of provided characters.
341     */
342    private final List<Character> characterList;
343
344    /**
345     * Constructs the generator.
346     *
347     * @param minimumCodePoint
348     *            smallest allowed code point (inclusive)
349     * @param maximumCodePoint
350     *            largest allowed code point (inclusive)
351     * @param inclusivePredicates
352     *            filters for code points
353     * @param random
354     *            source of randomness
355     * @param characterList list of predefined set of characters.
356     */
357    private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
358                                  final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
359                                  final List<Character> characterList) {
360        this.minimumCodePoint = minimumCodePoint;
361        this.maximumCodePoint = maximumCodePoint;
362        this.inclusivePredicates = inclusivePredicates;
363        this.random = random;
364        this.characterList = characterList;
365    }
366
367    /**
368     * Generates a random string, containing the specified number of code points.
369     *
370     * <p>
371     * Code points are randomly selected between the minimum and maximum values defined
372     * in the generator.
373     * Surrogate and private use characters are not returned, although the
374     * resulting string may contain pairs of surrogates that together encode a
375     * supplementary character.
376     * </p>
377     * <p>
378     * Note: the number of {@code char} code units generated will exceed
379     * {@code length} if the string contains supplementary characters. See the
380     * {@link Character} documentation to understand how Java stores Unicode
381     * values.
382     * </p>
383     *
384     * @param length
385     *            the number of code points to generate
386     * @return The generated string
387     * @throws IllegalArgumentException
388     *             if {@code length < 0}
389     */
390    public String generate(final int length) {
391        if (length == 0) {
392            return StringUtils.EMPTY;
393        }
394        Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
395        final StringBuilder builder = new StringBuilder(length);
396        long remaining = length;
397        do {
398            final int codePoint;
399            if (characterList != null && !characterList.isEmpty()) {
400                codePoint = generateRandomNumber(characterList);
401            } else {
402                codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
403            }
404            switch (Character.getType(codePoint)) {
405            case Character.UNASSIGNED:
406            case Character.PRIVATE_USE:
407            case Character.SURROGATE:
408                continue;
409            default:
410            }
411            if (inclusivePredicates != null) {
412                boolean matchedFilter = false;
413                for (final CharacterPredicate predicate : inclusivePredicates) {
414                    if (predicate.test(codePoint)) {
415                        matchedFilter = true;
416                        break;
417                    }
418                }
419                if (!matchedFilter) {
420                    continue;
421                }
422            }
423            builder.appendCodePoint(codePoint);
424            remaining--;
425        } while (remaining != 0);
426        return builder.toString();
427    }
428
429    /**
430     * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
431     * number of code points.
432     *
433     * @param minLengthInclusive
434     *            the minimum (inclusive) number of code points to generate
435     * @param maxLengthInclusive
436     *            the maximum (inclusive) number of code points to generate
437     * @return The generated string
438     * @throws IllegalArgumentException
439     *             if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
440     * @see RandomStringGenerator#generate(int)
441     * @since 1.2
442     */
443    public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
444        Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
445        Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
446                "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
447        return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
448    }
449
450    /**
451     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
452     * or the user-supplied source of randomness.
453     *
454     * @param minInclusive
455     *            the minimum value allowed
456     * @param maxInclusive
457     *            the maximum value allowed
458     * @return The random number.
459     */
460    private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
461        if (random != null) {
462            return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
463        }
464        return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
465    }
466
467    /**
468     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
469     * or the user-supplied source of randomness.
470     *
471     * @param characterList predefined char list.
472     * @return The random number.
473     */
474    private int generateRandomNumber(final List<Character> characterList) {
475        final int listSize = characterList.size();
476        if (random != null) {
477            return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
478        }
479        return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
480    }
481}