CharSetUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3;

  18. import org.apache.commons.lang3.stream.Streams;

  19. /**
  20.  * Operations on {@link CharSet} instances.
  21.  *
  22.  * <p>This class handles {@code null} input gracefully.
  23.  * An exception will not be thrown for a {@code null} input.
  24.  * Each method documents its behavior in more detail.</p>
  25.  *
  26.  * <p>#ThreadSafe#</p>
  27.  * @see CharSet
  28.  * @since 1.0
  29.  */
  30. public class CharSetUtils {

  31.     /**
  32.      * Takes an argument in set-syntax, see evaluateSet,
  33.      * and identifies whether any of the characters are present in the specified string.
  34.      *
  35.      * <pre>
  36.      * CharSetUtils.containsAny(null, *)        = false
  37.      * CharSetUtils.containsAny("", *)          = false
  38.      * CharSetUtils.containsAny(*, null)        = false
  39.      * CharSetUtils.containsAny(*, "")          = false
  40.      * CharSetUtils.containsAny("hello", "k-p") = true
  41.      * CharSetUtils.containsAny("hello", "a-d") = false
  42.      * </pre>
  43.      *
  44.      * @see CharSet#getInstance(String...) for set-syntax.
  45.      * @param str  String to look for characters in, may be null
  46.      * @param set  String[] set of characters to identify, may be null
  47.      * @return whether or not the characters in the set are in the primary string
  48.      * @since 3.2
  49.      */
  50.     public static boolean containsAny(final String str, final String... set) {
  51.         if (StringUtils.isEmpty(str) || deepEmpty(set)) {
  52.             return false;
  53.         }
  54.         final CharSet chars = CharSet.getInstance(set);
  55.         for (final char c : str.toCharArray()) {
  56.             if (chars.contains(c)) {
  57.                 return true;
  58.             }
  59.         }
  60.         return false;
  61.     }

  62.     /**
  63.      * Takes an argument in set-syntax, see evaluateSet,
  64.      * and returns the number of characters present in the specified string.
  65.      *
  66.      * <pre>
  67.      * CharSetUtils.count(null, *)        = 0
  68.      * CharSetUtils.count("", *)          = 0
  69.      * CharSetUtils.count(*, null)        = 0
  70.      * CharSetUtils.count(*, "")          = 0
  71.      * CharSetUtils.count("hello", "k-p") = 3
  72.      * CharSetUtils.count("hello", "a-e") = 1
  73.      * </pre>
  74.      *
  75.      * @see CharSet#getInstance(String...) for set-syntax.
  76.      * @param str  String to count characters in, may be null
  77.      * @param set  String[] set of characters to count, may be null
  78.      * @return the character count, zero if null string input
  79.      */
  80.     public static int count(final String str, final String... set) {
  81.         if (StringUtils.isEmpty(str) || deepEmpty(set)) {
  82.             return 0;
  83.         }
  84.         final CharSet chars = CharSet.getInstance(set);
  85.         int count = 0;
  86.         for (final char c : str.toCharArray()) {
  87.             if (chars.contains(c)) {
  88.                 count++;
  89.             }
  90.         }
  91.         return count;
  92.     }

  93.     /**
  94.      * Determines whether or not all the Strings in an array are
  95.      * empty or not.
  96.      *
  97.      * @param strings String[] whose elements are being checked for emptiness
  98.      * @return whether or not the String is empty
  99.      */
  100.     private static boolean deepEmpty(final String[] strings) {
  101.         return Streams.of(strings).allMatch(StringUtils::isEmpty);
  102.     }

  103.     /**
  104.      * Takes an argument in set-syntax, see evaluateSet,
  105.      * and deletes any of characters present in the specified string.
  106.      *
  107.      * <pre>
  108.      * CharSetUtils.delete(null, *)        = null
  109.      * CharSetUtils.delete("", *)          = ""
  110.      * CharSetUtils.delete(*, null)        = *
  111.      * CharSetUtils.delete(*, "")          = *
  112.      * CharSetUtils.delete("hello", "hl")  = "eo"
  113.      * CharSetUtils.delete("hello", "le")  = "ho"
  114.      * </pre>
  115.      *
  116.      * @see CharSet#getInstance(String...) for set-syntax.
  117.      * @param str  String to delete characters from, may be null
  118.      * @param set  String[] set of characters to delete, may be null
  119.      * @return the modified String, {@code null} if null string input
  120.      */
  121.     public static String delete(final String str, final String... set) {
  122.         if (StringUtils.isEmpty(str) || deepEmpty(set)) {
  123.             return str;
  124.         }
  125.         return modify(str, set, false);
  126.     }

  127.     /**
  128.      * Takes an argument in set-syntax, see evaluateSet,
  129.      * and keeps any of characters present in the specified string.
  130.      *
  131.      * <pre>
  132.      * CharSetUtils.keep(null, *)        = null
  133.      * CharSetUtils.keep("", *)          = ""
  134.      * CharSetUtils.keep(*, null)        = ""
  135.      * CharSetUtils.keep(*, "")          = ""
  136.      * CharSetUtils.keep("hello", "hl")  = "hll"
  137.      * CharSetUtils.keep("hello", "le")  = "ell"
  138.      * </pre>
  139.      *
  140.      * @see CharSet#getInstance(String...) for set-syntax.
  141.      * @param str  String to keep characters from, may be null
  142.      * @param set  String[] set of characters to keep, may be null
  143.      * @return the modified String, {@code null} if null string input
  144.      * @since 2.0
  145.      */
  146.     public static String keep(final String str, final String... set) {
  147.         if (str == null) {
  148.             return null;
  149.         }
  150.         if (str.isEmpty() || deepEmpty(set)) {
  151.             return StringUtils.EMPTY;
  152.         }
  153.         return modify(str, set, true);
  154.     }

  155.     /**
  156.      * Implements delete and keep.
  157.      *
  158.      * @param str String to modify characters within
  159.      * @param set String[] set of characters to modify
  160.      * @param expect whether to evaluate on match, or non-match
  161.      * @return the modified String, not null
  162.      */
  163.     private static String modify(final String str, final String[] set, final boolean expect) {
  164.         final CharSet chars = CharSet.getInstance(set);
  165.         final StringBuilder buffer = new StringBuilder(str.length());
  166.         final char[] chrs = str.toCharArray();
  167.         for (final char chr : chrs) {
  168.             if (chars.contains(chr) == expect) {
  169.                 buffer.append(chr);
  170.             }
  171.         }
  172.         return buffer.toString();
  173.     }

  174.     /**
  175.      * Squeezes any repetitions of a character that is mentioned in the
  176.      * supplied set.
  177.      *
  178.      * <pre>
  179.      * CharSetUtils.squeeze(null, *)        = null
  180.      * CharSetUtils.squeeze("", *)          = ""
  181.      * CharSetUtils.squeeze(*, null)        = *
  182.      * CharSetUtils.squeeze(*, "")          = *
  183.      * CharSetUtils.squeeze("hello", "k-p") = "helo"
  184.      * CharSetUtils.squeeze("hello", "a-e") = "hello"
  185.      * </pre>
  186.      *
  187.      * @see CharSet#getInstance(String...) for set-syntax.
  188.      * @param str  the string to squeeze, may be null
  189.      * @param set  the character set to use for manipulation, may be null
  190.      * @return the modified String, {@code null} if null string input
  191.      */
  192.     public static String squeeze(final String str, final String... set) {
  193.         if (StringUtils.isEmpty(str) || deepEmpty(set)) {
  194.             return str;
  195.         }
  196.         final CharSet chars = CharSet.getInstance(set);
  197.         final StringBuilder buffer = new StringBuilder(str.length());
  198.         final char[] chrs = str.toCharArray();
  199.         final int sz = chrs.length;
  200.         char lastChar = chrs[0];
  201.         char ch;
  202.         Character inChars = null;
  203.         Character notInChars = null;
  204.         buffer.append(lastChar);
  205.         for (int i = 1; i < sz; i++) {
  206.             ch = chrs[i];
  207.             if (ch == lastChar) {
  208.                 if (inChars != null && ch == inChars) {
  209.                     continue;
  210.                 }
  211.                 if (notInChars == null || ch != notInChars) {
  212.                     if (chars.contains(ch)) {
  213.                         inChars = ch;
  214.                         continue;
  215.                     }
  216.                     notInChars = ch;
  217.                 }
  218.             }
  219.             buffer.append(ch);
  220.             lastChar = ch;
  221.         }
  222.         return buffer.toString();
  223.     }

  224.     /**
  225.      * CharSetUtils instances should NOT be constructed in standard programming.
  226.      * Instead, the class should be used as {@code CharSetUtils.evaluateSet(null);}.
  227.      *
  228.      * <p>This constructor is public to permit tools that require a JavaBean instance
  229.      * to operate.</p>
  230.      *
  231.      * @deprecated TODO Make private in 4.0.
  232.      */
  233.     @Deprecated
  234.     public CharSetUtils() {
  235.     }
  236. }