001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.ArrayList;
020import java.util.Collections;
021import java.util.HashSet;
022import java.util.List;
023import java.util.Set;
024import java.util.concurrent.ThreadLocalRandom;
025
026import org.apache.commons.lang3.ArrayUtils;
027import org.apache.commons.lang3.StringUtils;
028import org.apache.commons.lang3.Validate;
029
030/**
031 * <p>
032 * Generates random Unicode strings containing the specified number of code points.
033 * Instances are created using a builder class, which allows the
034 * callers to define the properties of the generator. See the documentation for the
035 * {@link Builder} class to see available properties.
036 * </p>
037 * <pre>
038 * // Generates a 20 code point string, using only the letters a-z
039 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
040 *     .withinRange('a', 'z').build();
041 * String randomLetters = generator.generate(20);
042 * </pre>
043 * <pre>
044 * // Using Apache Commons RNG for randomness
045 * UniformRandomProvider rng = RandomSource.create(...);
046 * // Generates a 20 code point string, using only the letters a-z
047 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
048 *     .withinRange('a', 'z')
049 *     .usingRandom(rng::nextInt) // uses Java 8 syntax
050 *     .build();
051 * String randomLetters = generator.generate(20);
052 * </pre>
053 * <p>
054 * {@code RandomStringGenerator} instances are thread-safe when using the
055 * default random number generator (RNG). If a custom RNG is set by calling the method
056 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
057 * must be ensured externally.
058 * </p>
059 * @since 1.1
060 */
061public final class RandomStringGenerator {
062
063    /**
064     * The smallest allowed code point (inclusive).
065     */
066    private final int minimumCodePoint;
067
068    /**
069     * The largest allowed code point (inclusive).
070     */
071    private final int maximumCodePoint;
072
073    /**
074     * Filters for code points.
075     */
076    private final Set<CharacterPredicate> inclusivePredicates;
077
078    /**
079     * The source of randomness for this generator.
080     */
081    private final TextRandomProvider random;
082
083    /**
084     * The source of provided characters.
085     */
086    private final List<Character> characterList;
087
088    /**
089     * Constructs the generator.
090     *
091     * @param minimumCodePoint
092     *            smallest allowed code point (inclusive)
093     * @param maximumCodePoint
094     *            largest allowed code point (inclusive)
095     * @param inclusivePredicates
096     *            filters for code points
097     * @param random
098     *            source of randomness
099     * @param characterList list of predefined set of characters.
100     */
101    private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
102                                  final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
103                                  final List<Character> characterList) {
104        this.minimumCodePoint = minimumCodePoint;
105        this.maximumCodePoint = maximumCodePoint;
106        this.inclusivePredicates = inclusivePredicates;
107        this.random = random;
108        this.characterList = characterList;
109    }
110
111    /**
112     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
113     * or the user-supplied source of randomness.
114     *
115     * @param minInclusive
116     *            the minimum value allowed
117     * @param maxInclusive
118     *            the maximum value allowed
119     * @return The random number.
120     */
121    private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
122        if (random != null) {
123            return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
124        }
125        return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
126    }
127
128    /**
129     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
130     * or the user-supplied source of randomness.
131     *
132     * @param characterList predefined char list.
133     * @return The random number.
134     */
135    private int generateRandomNumber(final List<Character> characterList) {
136        final int listSize = characterList.size();
137        if (random != null) {
138            return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
139        }
140        return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
141    }
142
143    /**
144     * <p>
145     * Generates a random string, containing the specified number of code points.
146     * </p>
147     * <p>Code points are randomly selected between the minimum and maximum values defined
148     * in the generator.
149     * Surrogate and private use characters are not returned, although the
150     * resulting string may contain pairs of surrogates that together encode a
151     * supplementary character.
152     * </p>
153     * <p>
154     * Note: the number of {@code char} code units generated will exceed
155     * {@code length} if the string contains supplementary characters. See the
156     * {@link Character} documentation to understand how Java stores Unicode
157     * values.
158     * </p>
159     *
160     * @param length
161     *            the number of code points to generate
162     * @return The generated string
163     * @throws IllegalArgumentException
164     *             if {@code length < 0}
165     */
166    public String generate(final int length) {
167        if (length == 0) {
168            return StringUtils.EMPTY;
169        }
170        Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
171
172        final StringBuilder builder = new StringBuilder(length);
173        long remaining = length;
174
175        do {
176            int codePoint;
177            if (characterList != null && !characterList.isEmpty()) {
178                codePoint = generateRandomNumber(characterList);
179            } else {
180                codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
181            }
182            switch (Character.getType(codePoint)) {
183            case Character.UNASSIGNED:
184            case Character.PRIVATE_USE:
185            case Character.SURROGATE:
186                continue;
187            default:
188            }
189
190            if (inclusivePredicates != null) {
191                boolean matchedFilter = false;
192                for (final CharacterPredicate predicate : inclusivePredicates) {
193                    if (predicate.test(codePoint)) {
194                        matchedFilter = true;
195                        break;
196                    }
197                }
198                if (!matchedFilter) {
199                    continue;
200                }
201            }
202
203            builder.appendCodePoint(codePoint);
204            remaining--;
205
206        } while (remaining != 0);
207
208        return builder.toString();
209    }
210
211    /**
212     * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
213     * number of code points.
214     *
215     * @param minLengthInclusive
216     *            the minimum (inclusive) number of code points to generate
217     * @param maxLengthInclusive
218     *            the maximum (inclusive) number of code points to generate
219     * @return The generated string
220     * @throws IllegalArgumentException
221     *             if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
222     * @see RandomStringGenerator#generate(int)
223     * @since 1.2
224     */
225    public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
226        Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
227        Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
228                "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
229        return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
230    }
231
232    /**
233     * <p>A builder for generating {@code RandomStringGenerator} instances.</p>
234     * <p>The behavior of a generator is controlled by properties set by this
235     * builder. Each property has a default value, which can be overridden by
236     * calling the methods defined in this class, prior to calling {@link #build()}.</p>
237     *
238     * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
239     *
240     * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
241     * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
242     *
243     * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
244     * otherwise {@link ThreadLocalRandom} is used.</p>
245     *
246     * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
247     * which defines a collection of tests that are applied to the randomly generated code points.
248     * The code points will only be included in the result if they pass at least one of the tests.
249     * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
250     *
251     * <p>This class is not thread safe.</p>
252     * @since 1.1
253     */
254    public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
255
256        /**
257         * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
258         * ({@value}).
259         */
260        public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
261
262        /**
263         * The default string length produced by this builder: {@value}.
264         */
265        public static final int DEFAULT_LENGTH = 0;
266
267        /**
268         * The default minimum code point allowed: {@value}.
269         */
270        public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
271
272        /**
273         * The minimum code point allowed.
274         */
275        private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
276
277        /**
278         * The maximum code point allowed.
279         */
280        private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
281
282        /**
283         * Filters for code points.
284         */
285        private Set<CharacterPredicate> inclusivePredicates;
286
287        /**
288         * The source of randomness.
289         */
290        private TextRandomProvider random;
291
292        /**
293         * The source of provided characters.
294         */
295        private List<Character> characterList;
296
297        /**
298         * <p>
299         * Specifies the minimum and maximum code points allowed in the
300         * generated string.
301         * </p>
302         *
303         * @param minimumCodePoint
304         *            the smallest code point allowed (inclusive)
305         * @param maximumCodePoint
306         *            the largest code point allowed (inclusive)
307         * @return {@code this}, to allow method chaining
308         * @throws IllegalArgumentException
309         *             if {@code maximumCodePoint >}
310         *             {@link Character#MAX_CODE_POINT}
311         * @throws IllegalArgumentException
312         *             if {@code minimumCodePoint < 0}
313         * @throws IllegalArgumentException
314         *             if {@code minimumCodePoint > maximumCodePoint}
315         */
316        public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
317            Validate.isTrue(minimumCodePoint <= maximumCodePoint,
318                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
319            Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
320            Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
321                    "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
322
323            this.minimumCodePoint = minimumCodePoint;
324            this.maximumCodePoint = maximumCodePoint;
325            return this;
326        }
327
328        /**
329         * <p>
330         * Specifies the array of minimum and maximum char allowed in the
331         * generated string.
332         * </p>
333         *
334         * For example:
335         * <pre>
336         * {@code
337         *     char [][] pairs = {{'0','9'}};
338         *     char [][] pairs = {{'a','z'}};
339         *     char [][] pairs = {{'a','z'},{'0','9'}};
340         * }
341         * </pre>
342         *
343         * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
344         * @return {@code this}, to allow method chaining.
345         */
346        public Builder withinRange(final char[]... pairs) {
347            characterList = new ArrayList<>();
348            for (final char[] pair :  pairs) {
349                Validate.isTrue(pair.length == 2,
350                      "Each pair must contain minimum and maximum code point");
351                final int minimumCodePoint = pair[0];
352                final int maximumCodePoint = pair[1];
353                Validate.isTrue(minimumCodePoint <= maximumCodePoint,
354                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
355
356                for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
357                    characterList.add((char) index);
358                }
359            }
360            return this;
361
362        }
363
364        /**
365         * <p>
366         * Limits the characters in the generated string to those that match at
367         * least one of the predicates supplied.
368         * </p>
369         *
370         * <p>
371         * Passing {@code null} or an empty array to this method will revert to the
372         * default behavior of allowing any character. Multiple calls to this
373         * method will replace the previously stored predicates.
374         * </p>
375         *
376         * @param predicates
377         *            the predicates, may be {@code null} or empty
378         * @return {@code this}, to allow method chaining
379         */
380        public Builder filteredBy(final CharacterPredicate... predicates) {
381            if (ArrayUtils.isEmpty(predicates)) {
382                inclusivePredicates = null;
383                return this;
384            }
385
386            if (inclusivePredicates == null) {
387                inclusivePredicates = new HashSet<>();
388            } else {
389                inclusivePredicates.clear();
390            }
391
392            Collections.addAll(inclusivePredicates, predicates);
393
394            return this;
395        }
396
397        /**
398         * <p>
399         * Overrides the default source of randomness.  It is highly
400         * recommended that a random number generator library like
401         * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
402         * be used to provide the random number generation.
403         * </p>
404         *
405         * <p>
406         * When using Java 8 or later, {@link TextRandomProvider} is a
407         * functional interface and need not be explicitly implemented:
408         * </p>
409         * <pre>
410         * {@code
411         *     UniformRandomProvider rng = RandomSource.create(...);
412         *     RandomStringGenerator gen = new RandomStringGenerator.Builder()
413         *         .usingRandom(rng::nextInt)
414         *         // additional builder calls as needed
415         *         .build();
416         * }
417         * </pre>
418         *
419         * <p>
420         * Passing {@code null} to this method will revert to the default source of
421         * randomness.
422         * </p>
423         *
424         * @param random
425         *            the source of randomness, may be {@code null}
426         * @return {@code this}, to allow method chaining
427         */
428        public Builder usingRandom(final TextRandomProvider random) {
429            this.random = random;
430            return this;
431        }
432
433        /**
434         * <p>
435         * Limits the characters in the generated string to those who match at
436         * supplied list of Character.
437         * </p>
438         *
439         * <p>
440         * Passing {@code null} or an empty array to this method will revert to the
441         * default behavior of allowing any character. Multiple calls to this
442         * method will replace the previously stored Character.
443         * </p>
444         *
445         * @param chars set of predefined Characters for random string generation
446         *            the Character can be, may be {@code null} or empty
447         * @return {@code this}, to allow method chaining
448         * @since 1.2
449         */
450        public Builder selectFrom(final char... chars) {
451            characterList = new ArrayList<>();
452            for (final char c : chars) {
453                characterList.add(c);
454            }
455            return this;
456        }
457
458        /**
459         * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p>
460         * @return The configured {@code RandomStringGenerator}
461         */
462        @Override
463        public RandomStringGenerator build() {
464            return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
465                    random, characterList);
466        }
467    }
468}