001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019/** 020 * <p>Operations on {@code CharSet} instances.</p> 021 * 022 * <p>This class handles {@code null} input gracefully. 023 * An exception will not be thrown for a {@code null} input. 024 * Each method documents its behavior in more detail.</p> 025 * 026 * <p>#ThreadSafe#</p> 027 * @see CharSet 028 * @since 1.0 029 */ 030public class CharSetUtils { 031 032 /** 033 * <p>Takes an argument in set-syntax, see evaluateSet, 034 * and identifies whether any of the characters are present in the specified string.</p> 035 * 036 * <pre> 037 * CharSetUtils.containsAny(null, *) = false 038 * CharSetUtils.containsAny("", *) = false 039 * CharSetUtils.containsAny(*, null) = false 040 * CharSetUtils.containsAny(*, "") = false 041 * CharSetUtils.containsAny("hello", "k-p") = true 042 * CharSetUtils.containsAny("hello", "a-d") = false 043 * </pre> 044 * 045 * @see CharSet#getInstance(java.lang.String...) for set-syntax. 046 * @param str String to look for characters in, may be null 047 * @param set String[] set of characters to identify, may be null 048 * @return whether or not the characters in the set are in the primary string 049 * @since 3.2 050 */ 051 public static boolean containsAny(final String str, final String... set) { 052 if (StringUtils.isEmpty(str) || deepEmpty(set)) { 053 return false; 054 } 055 final CharSet chars = CharSet.getInstance(set); 056 for (final char c : str.toCharArray()) { 057 if (chars.contains(c)) { 058 return true; 059 } 060 } 061 return false; 062 } 063 064 /** 065 * <p>Takes an argument in set-syntax, see evaluateSet, 066 * and returns the number of characters present in the specified string.</p> 067 * 068 * <pre> 069 * CharSetUtils.count(null, *) = 0 070 * CharSetUtils.count("", *) = 0 071 * CharSetUtils.count(*, null) = 0 072 * CharSetUtils.count(*, "") = 0 073 * CharSetUtils.count("hello", "k-p") = 3 074 * CharSetUtils.count("hello", "a-e") = 1 075 * </pre> 076 * 077 * @see CharSet#getInstance(java.lang.String...) for set-syntax. 078 * @param str String to count characters in, may be null 079 * @param set String[] set of characters to count, may be null 080 * @return the character count, zero if null string input 081 */ 082 public static int count(final String str, final String... set) { 083 if (StringUtils.isEmpty(str) || deepEmpty(set)) { 084 return 0; 085 } 086 final CharSet chars = CharSet.getInstance(set); 087 int count = 0; 088 for (final char c : str.toCharArray()) { 089 if (chars.contains(c)) { 090 count++; 091 } 092 } 093 return count; 094 } 095 096 /** 097 * Determines whether or not all the Strings in an array are 098 * empty or not. 099 * 100 * @param strings String[] whose elements are being checked for emptiness 101 * @return whether or not the String is empty 102 */ 103 private static boolean deepEmpty(final String[] strings) { 104 if (strings != null) { 105 for (final String s : strings) { 106 if (StringUtils.isNotEmpty(s)) { 107 return false; 108 } 109 } 110 } 111 return true; 112 } 113 114 /** 115 * <p>Takes an argument in set-syntax, see evaluateSet, 116 * and deletes any of characters present in the specified string.</p> 117 * 118 * <pre> 119 * CharSetUtils.delete(null, *) = null 120 * CharSetUtils.delete("", *) = "" 121 * CharSetUtils.delete(*, null) = * 122 * CharSetUtils.delete(*, "") = * 123 * CharSetUtils.delete("hello", "hl") = "eo" 124 * CharSetUtils.delete("hello", "le") = "ho" 125 * </pre> 126 * 127 * @see CharSet#getInstance(java.lang.String...) for set-syntax. 128 * @param str String to delete characters from, may be null 129 * @param set String[] set of characters to delete, may be null 130 * @return the modified String, {@code null} if null string input 131 */ 132 public static String delete(final String str, final String... set) { 133 if (StringUtils.isEmpty(str) || deepEmpty(set)) { 134 return str; 135 } 136 return modify(str, set, false); 137 } 138 139 /** 140 * <p>Takes an argument in set-syntax, see evaluateSet, 141 * and keeps any of characters present in the specified string.</p> 142 * 143 * <pre> 144 * CharSetUtils.keep(null, *) = null 145 * CharSetUtils.keep("", *) = "" 146 * CharSetUtils.keep(*, null) = "" 147 * CharSetUtils.keep(*, "") = "" 148 * CharSetUtils.keep("hello", "hl") = "hll" 149 * CharSetUtils.keep("hello", "le") = "ell" 150 * </pre> 151 * 152 * @see CharSet#getInstance(java.lang.String...) for set-syntax. 153 * @param str String to keep characters from, may be null 154 * @param set String[] set of characters to keep, may be null 155 * @return the modified String, {@code null} if null string input 156 * @since 2.0 157 */ 158 public static String keep(final String str, final String... set) { 159 if (str == null) { 160 return null; 161 } 162 if (str.isEmpty() || deepEmpty(set)) { 163 return StringUtils.EMPTY; 164 } 165 return modify(str, set, true); 166 } 167 168 /** 169 * Implementation of delete and keep 170 * 171 * @param str String to modify characters within 172 * @param set String[] set of characters to modify 173 * @param expect whether to evaluate on match, or non-match 174 * @return the modified String, not null 175 */ 176 private static String modify(final String str, final String[] set, final boolean expect) { 177 final CharSet chars = CharSet.getInstance(set); 178 final StringBuilder buffer = new StringBuilder(str.length()); 179 final char[] chrs = str.toCharArray(); 180 for (final char chr : chrs) { 181 if (chars.contains(chr) == expect) { 182 buffer.append(chr); 183 } 184 } 185 return buffer.toString(); 186 } 187 188 /** 189 * <p>Squeezes any repetitions of a character that is mentioned in the 190 * supplied set.</p> 191 * 192 * <pre> 193 * CharSetUtils.squeeze(null, *) = null 194 * CharSetUtils.squeeze("", *) = "" 195 * CharSetUtils.squeeze(*, null) = * 196 * CharSetUtils.squeeze(*, "") = * 197 * CharSetUtils.squeeze("hello", "k-p") = "helo" 198 * CharSetUtils.squeeze("hello", "a-e") = "hello" 199 * </pre> 200 * 201 * @see CharSet#getInstance(java.lang.String...) for set-syntax. 202 * @param str the string to squeeze, may be null 203 * @param set the character set to use for manipulation, may be null 204 * @return the modified String, {@code null} if null string input 205 */ 206 public static String squeeze(final String str, final String... set) { 207 if (StringUtils.isEmpty(str) || deepEmpty(set)) { 208 return str; 209 } 210 final CharSet chars = CharSet.getInstance(set); 211 final StringBuilder buffer = new StringBuilder(str.length()); 212 final char[] chrs = str.toCharArray(); 213 final int sz = chrs.length; 214 char lastChar = chrs[0]; 215 char ch = ' '; 216 Character inChars = null; 217 Character notInChars = null; 218 buffer.append(lastChar); 219 for (int i = 1; i < sz; i++) { 220 ch = chrs[i]; 221 if (ch == lastChar) { 222 if (inChars != null && ch == inChars) { 223 continue; 224 } 225 if (notInChars == null || ch != notInChars) { 226 if (chars.contains(ch)) { 227 inChars = ch; 228 continue; 229 } 230 notInChars = ch; 231 } 232 } 233 buffer.append(ch); 234 lastChar = ch; 235 } 236 return buffer.toString(); 237 } 238 239 /** 240 * <p>CharSetUtils instances should NOT be constructed in standard programming. 241 * Instead, the class should be used as {@code CharSetUtils.evaluateSet(null);}.</p> 242 * 243 * <p>This constructor is public to permit tools that require a JavaBean instance 244 * to operate.</p> 245 */ 246 public CharSetUtils() { 247 super(); 248 } 249}