RandomStringGenerator.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import java.util.ArrayList;
  19. import java.util.Collections;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Set;
  23. import java.util.concurrent.ThreadLocalRandom;

  24. import org.apache.commons.lang3.ArrayUtils;
  25. import org.apache.commons.lang3.StringUtils;
  26. import org.apache.commons.lang3.Validate;

  27. /**
  28.  * Generates random Unicode strings containing the specified number of code points.
  29.  * Instances are created using a builder class, which allows the
  30.  * callers to define the properties of the generator. See the documentation for the
  31.  * {@link Builder} class to see available properties.
  32.  *
  33.  * <pre>
  34.  * // Generates a 20 code point string, using only the letters a-z
  35.  * RandomStringGenerator generator = RandomStringGenerator.builder()
  36.  *     .withinRange('a', 'z').build();
  37.  * String randomLetters = generator.generate(20);
  38.  * </pre>
  39.  * <pre>
  40.  * // Using Apache Commons RNG for randomness
  41.  * UniformRandomProvider rng = RandomSource.create(...);
  42.  * // Generates a 20 code point string, using only the letters a-z
  43.  * RandomStringGenerator generator = RandomStringGenerator.builder()
  44.  *     .withinRange('a', 'z')
  45.  *     .usingRandom(rng::nextInt) // uses Java 8 syntax
  46.  *     .build();
  47.  * String randomLetters = generator.generate(20);
  48.  * </pre>
  49.  * <p>
  50.  * {@code RandomStringGenerator} instances are thread-safe when using the
  51.  * default random number generator (RNG). If a custom RNG is set by calling the method
  52.  * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
  53.  * must be ensured externally.
  54.  * </p>
  55.  * @since 1.1
  56.  */
  57. public final class RandomStringGenerator {

  58.     /**
  59.      * A builder for generating {@code RandomStringGenerator} instances.
  60.      *
  61.      * <p>The behavior of a generator is controlled by properties set by this
  62.      * builder. Each property has a default value, which can be overridden by
  63.      * calling the methods defined in this class, prior to calling {@link #build()}.</p>
  64.      *
  65.      * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
  66.      *
  67.      * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
  68.      * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
  69.      *
  70.      * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
  71.      * otherwise {@link ThreadLocalRandom} is used.</p>
  72.      *
  73.      * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
  74.      * which defines a collection of tests that are applied to the randomly generated code points.
  75.      * The code points will only be included in the result if they pass at least one of the tests.
  76.      * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
  77.      *
  78.      * <p>This class is not thread safe.</p>
  79.      * @since 1.1
  80.      */
  81.     public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {

  82.         /**
  83.          * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
  84.          * ({@value}).
  85.          */
  86.         public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;

  87.         /**
  88.          * The default string length produced by this builder: {@value}.
  89.          */
  90.         public static final int DEFAULT_LENGTH = 0;

  91.         /**
  92.          * The default minimum code point allowed: {@value}.
  93.          */
  94.         public static final int DEFAULT_MINIMUM_CODE_POINT = 0;

  95.         /**
  96.          * The minimum code point allowed.
  97.          */
  98.         private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;

  99.         /**
  100.          * The maximum code point allowed.
  101.          */
  102.         private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;

  103.         /**
  104.          * Filters for code points.
  105.          */
  106.         private Set<CharacterPredicate> inclusivePredicates;

  107.         /**
  108.          * The source of randomness.
  109.          */
  110.         private TextRandomProvider random;

  111.         /**
  112.          * The source of provided characters.
  113.          */
  114.         private List<Character> characterList;

  115.         /**
  116.          * Creates a new instance.
  117.          */
  118.         public Builder() {
  119.             // empty
  120.         }

  121.         /**
  122.          * Builds a new {@code RandomStringGenerator}.
  123.          *
  124.          * @return A new {@code RandomStringGenerator}
  125.          * @deprecated Use {@link #get()}.
  126.          */
  127.         @Deprecated
  128.         @Override
  129.         public RandomStringGenerator build() {
  130.             return get();
  131.         }

  132.         /**
  133.          * Limits the characters in the generated string to those that match at
  134.          * least one of the predicates supplied.
  135.          *
  136.          * <p>
  137.          * Passing {@code null} or an empty array to this method will revert to the
  138.          * default behavior of allowing any character. Multiple calls to this
  139.          * method will replace the previously stored predicates.
  140.          * </p>
  141.          *
  142.          * @param predicates
  143.          *            the predicates, may be {@code null} or empty
  144.          * @return {@code this}, to allow method chaining
  145.          */
  146.         public Builder filteredBy(final CharacterPredicate... predicates) {
  147.             if (ArrayUtils.isEmpty(predicates)) {
  148.                 inclusivePredicates = null;
  149.                 return this;
  150.             }
  151.             if (inclusivePredicates == null) {
  152.                 inclusivePredicates = new HashSet<>();
  153.             } else {
  154.                 inclusivePredicates.clear();
  155.             }
  156.             Collections.addAll(inclusivePredicates, predicates);
  157.             return this;
  158.         }

  159.         /**
  160.          * Builds a new {@code RandomStringGenerator}.
  161.          *
  162.          * @return A new {@code RandomStringGenerator}
  163.          * @since 1.12.0
  164.          */
  165.         @Override
  166.         public RandomStringGenerator get() {
  167.             return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
  168.                     random, characterList);
  169.         }

  170.         /**
  171.          * Limits the characters in the generated string to those who match at
  172.          * supplied list of Character.
  173.          *
  174.          * <p>
  175.          * Passing {@code null} or an empty array to this method will revert to the
  176.          * default behavior of allowing any character. Multiple calls to this
  177.          * method will replace the previously stored Character.
  178.          * </p>
  179.          *
  180.          * @param chars set of predefined Characters for random string generation
  181.          *            the Character can be, may be {@code null} or empty
  182.          * @return {@code this}, to allow method chaining
  183.          * @since 1.2
  184.          */
  185.         public Builder selectFrom(final char... chars) {
  186.             characterList = new ArrayList<>();
  187.             if (chars != null) {
  188.                 for (final char c : chars) {
  189.                     characterList.add(c);
  190.                 }
  191.             }
  192.             return this;
  193.         }

  194.         /**
  195.          * Overrides the default source of randomness.  It is highly
  196.          * recommended that a random number generator library like
  197.          * <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
  198.          * be used to provide the random number generation.
  199.          *
  200.          * <p>
  201.          * When using Java 8 or later, {@link TextRandomProvider} is a
  202.          * functional interface and need not be explicitly implemented:
  203.          * </p>
  204.          * <pre>
  205.          * {@code
  206.          *     UniformRandomProvider rng = RandomSource.create(...);
  207.          *     RandomStringGenerator gen = RandomStringGenerator.builder()
  208.          *         .usingRandom(rng::nextInt)
  209.          *         // additional builder calls as needed
  210.          *         .build();
  211.          * }
  212.          * </pre>
  213.          *
  214.          * <p>
  215.          * Passing {@code null} to this method will revert to the default source of
  216.          * randomness.
  217.          * </p>
  218.          *
  219.          * @param random
  220.          *            the source of randomness, may be {@code null}
  221.          * @return {@code this}, to allow method chaining
  222.          */
  223.         public Builder usingRandom(final TextRandomProvider random) {
  224.             this.random = random;
  225.             return this;
  226.         }

  227.         /**
  228.          * Sets the array of minimum and maximum char allowed in the
  229.          * generated string.
  230.          *
  231.          * For example:
  232.          * <pre>
  233.          * {@code
  234.          *     char [][] pairs = {{'0','9'}};
  235.          *     char [][] pairs = {{'a','z'}};
  236.          *     char [][] pairs = {{'a','z'},{'0','9'}};
  237.          * }
  238.          * </pre>
  239.          *
  240.          * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
  241.          * @return {@code this}, to allow method chaining.
  242.          */
  243.         public Builder withinRange(final char[]... pairs) {
  244.             characterList = new ArrayList<>();
  245.             if (pairs != null) {
  246.                 for (final char[] pair : pairs) {
  247.                     Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point");
  248.                     final int minimumCodePoint = pair[0];
  249.                     final int maximumCodePoint = pair[1];
  250.                     Validate.isTrue(minimumCodePoint <= maximumCodePoint, "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
  251.                             maximumCodePoint);

  252.                     for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
  253.                         characterList.add((char) index);
  254.                     }
  255.                 }
  256.             }
  257.             return this;

  258.         }

  259.         /**
  260.          * Sets the minimum and maximum code points allowed in the
  261.          * generated string.
  262.          *
  263.          * @param minimumCodePoint
  264.          *            the smallest code point allowed (inclusive)
  265.          * @param maximumCodePoint
  266.          *            the largest code point allowed (inclusive)
  267.          * @return {@code this}, to allow method chaining
  268.          * @throws IllegalArgumentException
  269.          *             if {@code maximumCodePoint >}
  270.          *             {@link Character#MAX_CODE_POINT}
  271.          * @throws IllegalArgumentException
  272.          *             if {@code minimumCodePoint < 0}
  273.          * @throws IllegalArgumentException
  274.          *             if {@code minimumCodePoint > maximumCodePoint}
  275.          */
  276.         public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
  277.             Validate.isTrue(minimumCodePoint <= maximumCodePoint,
  278.                     "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
  279.             Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
  280.             Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
  281.                     "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
  282.             this.minimumCodePoint = minimumCodePoint;
  283.             this.maximumCodePoint = maximumCodePoint;
  284.             return this;
  285.         }
  286.     }

  287.     /**
  288.      * Constructs a new builder.
  289.      * @return a new builder.
  290.      * @since 1.11.0
  291.      */
  292.     public static Builder builder() {
  293.         return new Builder();
  294.     }

  295.     /**
  296.      * The smallest allowed code point (inclusive).
  297.      */
  298.     private final int minimumCodePoint;

  299.     /**
  300.      * The largest allowed code point (inclusive).
  301.      */
  302.     private final int maximumCodePoint;

  303.     /**
  304.      * Filters for code points.
  305.      */
  306.     private final Set<CharacterPredicate> inclusivePredicates;

  307.     /**
  308.      * The source of randomness for this generator.
  309.      */
  310.     private final TextRandomProvider random;

  311.     /**
  312.      * The source of provided characters.
  313.      */
  314.     private final List<Character> characterList;

  315.     /**
  316.      * Constructs the generator.
  317.      *
  318.      * @param minimumCodePoint
  319.      *            smallest allowed code point (inclusive)
  320.      * @param maximumCodePoint
  321.      *            largest allowed code point (inclusive)
  322.      * @param inclusivePredicates
  323.      *            filters for code points
  324.      * @param random
  325.      *            source of randomness
  326.      * @param characterList list of predefined set of characters.
  327.      */
  328.     private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
  329.                                   final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
  330.                                   final List<Character> characterList) {
  331.         this.minimumCodePoint = minimumCodePoint;
  332.         this.maximumCodePoint = maximumCodePoint;
  333.         this.inclusivePredicates = inclusivePredicates;
  334.         this.random = random;
  335.         this.characterList = characterList;
  336.     }

  337.     /**
  338.      * Generates a random string, containing the specified number of code points.
  339.      *
  340.      * <p>
  341.      * Code points are randomly selected between the minimum and maximum values defined
  342.      * in the generator.
  343.      * Surrogate and private use characters are not returned, although the
  344.      * resulting string may contain pairs of surrogates that together encode a
  345.      * supplementary character.
  346.      * </p>
  347.      * <p>
  348.      * Note: the number of {@code char} code units generated will exceed
  349.      * {@code length} if the string contains supplementary characters. See the
  350.      * {@link Character} documentation to understand how Java stores Unicode
  351.      * values.
  352.      * </p>
  353.      *
  354.      * @param length
  355.      *            the number of code points to generate
  356.      * @return The generated string
  357.      * @throws IllegalArgumentException
  358.      *             if {@code length < 0}
  359.      */
  360.     public String generate(final int length) {
  361.         if (length == 0) {
  362.             return StringUtils.EMPTY;
  363.         }
  364.         Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
  365.         final StringBuilder builder = new StringBuilder(length);
  366.         long remaining = length;
  367.         do {
  368.             final int codePoint;
  369.             if (characterList != null && !characterList.isEmpty()) {
  370.                 codePoint = generateRandomNumber(characterList);
  371.             } else {
  372.                 codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
  373.             }
  374.             switch (Character.getType(codePoint)) {
  375.             case Character.UNASSIGNED:
  376.             case Character.PRIVATE_USE:
  377.             case Character.SURROGATE:
  378.                 continue;
  379.             default:
  380.             }
  381.             if (inclusivePredicates != null) {
  382.                 boolean matchedFilter = false;
  383.                 for (final CharacterPredicate predicate : inclusivePredicates) {
  384.                     if (predicate.test(codePoint)) {
  385.                         matchedFilter = true;
  386.                         break;
  387.                     }
  388.                 }
  389.                 if (!matchedFilter) {
  390.                     continue;
  391.                 }
  392.             }
  393.             builder.appendCodePoint(codePoint);
  394.             remaining--;
  395.         } while (remaining != 0);
  396.         return builder.toString();
  397.     }

  398.     /**
  399.      * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
  400.      * number of code points.
  401.      *
  402.      * @param minLengthInclusive
  403.      *            the minimum (inclusive) number of code points to generate
  404.      * @param maxLengthInclusive
  405.      *            the maximum (inclusive) number of code points to generate
  406.      * @return The generated string
  407.      * @throws IllegalArgumentException
  408.      *             if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
  409.      * @see RandomStringGenerator#generate(int)
  410.      * @since 1.2
  411.      */
  412.     public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
  413.         Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
  414.         Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
  415.                 "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
  416.         return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
  417.     }

  418.     /**
  419.      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
  420.      * or the user-supplied source of randomness.
  421.      *
  422.      * @param minInclusive
  423.      *            the minimum value allowed
  424.      * @param maxInclusive
  425.      *            the maximum value allowed
  426.      * @return The random number.
  427.      */
  428.     private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
  429.         if (random != null) {
  430.             return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
  431.         }
  432.         return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
  433.     }

  434.     /**
  435.      * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
  436.      * or the user-supplied source of randomness.
  437.      *
  438.      * @param characterList predefined char list.
  439.      * @return The random number.
  440.      */
  441.     private int generateRandomNumber(final List<Character> characterList) {
  442.         final int listSize = characterList.size();
  443.         if (random != null) {
  444.             return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
  445.         }
  446.         return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
  447.     }
  448. }