001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 import org.apache.commons.lang.text.StrBuilder; 020 021 /** 022 * <p>Operations on <code>CharSet</code>s.</p> 023 * 024 * <p>This class handles <code>null</code> input gracefully. 025 * An exception will not be thrown for a <code>null</code> input. 026 * Each method documents its behaviour in more detail.</p> 027 * 028 * <p>#ThreadSafe#</p> 029 * @see CharSet 030 * @author Apache Software Foundation 031 * @author Phil Steitz 032 * @author Gary Gregory 033 * @since 1.0 034 * @version $Id: CharSetUtils.java 1057072 2011-01-10 01:55:57Z niallp $ 035 */ 036 public class CharSetUtils { 037 038 /** 039 * <p>CharSetUtils instances should NOT be constructed in standard programming. 040 * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p> 041 * 042 * <p>This constructor is public to permit tools that require a JavaBean instance 043 * to operate.</p> 044 */ 045 public CharSetUtils() { 046 super(); 047 } 048 049 // Factory 050 //----------------------------------------------------------------------- 051 /** 052 * <p>Creates a <code>CharSet</code> instance which allows a certain amount of 053 * set logic to be performed.</p> 054 * <p>The syntax is:</p> 055 * <ul> 056 * <li>"aeio" which implies 'a','e',..</li> 057 * <li>"^e" implies not e.</li> 058 * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li> 059 * </ul> 060 * 061 * <pre> 062 * CharSetUtils.evaluateSet(null) = null 063 * CharSetUtils.evaluateSet([]) = CharSet matching nothing 064 * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e 065 * </pre> 066 * 067 * @param set the set, may be null 068 * @return a CharSet instance, <code>null</code> if null input 069 * @deprecated Use {@link CharSet#getInstance(String[])}. 070 * Method will be removed in Commons Lang 3.0. 071 */ 072 public static CharSet evaluateSet(String[] set) { 073 if (set == null) { 074 return null; 075 } 076 return new CharSet(set); 077 } 078 079 // Squeeze 080 //----------------------------------------------------------------------- 081 /** 082 * <p>Squeezes any repetitions of a character that is mentioned in the 083 * supplied set.</p> 084 * 085 * <pre> 086 * CharSetUtils.squeeze(null, *) = null 087 * CharSetUtils.squeeze("", *) = "" 088 * CharSetUtils.squeeze(*, null) = * 089 * CharSetUtils.squeeze(*, "") = * 090 * CharSetUtils.squeeze("hello", "k-p") = "helo" 091 * CharSetUtils.squeeze("hello", "a-e") = "hello" 092 * </pre> 093 * 094 * @see CharSet#getInstance(java.lang.String) for set-syntax. 095 * @param str the string to squeeze, may be null 096 * @param set the character set to use for manipulation, may be null 097 * @return modified String, <code>null</code> if null string input 098 */ 099 public static String squeeze(String str, String set) { 100 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 101 return str; 102 } 103 String[] strs = new String[1]; 104 strs[0] = set; 105 return squeeze(str, strs); 106 } 107 108 /** 109 * <p>Squeezes any repetitions of a character that is mentioned in the 110 * supplied set.</p> 111 * 112 * <p>An example is:</p> 113 * <ul> 114 * <li>squeeze("hello", {"el"}) => "helo"</li> 115 * </ul> 116 * 117 * @see CharSet#getInstance(java.lang.String) for set-syntax. 118 * @param str the string to squeeze, may be null 119 * @param set the character set to use for manipulation, may be null 120 * @return modified String, <code>null</code> if null string input 121 */ 122 public static String squeeze(String str, String[] set) { 123 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 124 return str; 125 } 126 CharSet chars = CharSet.getInstance(set); 127 StrBuilder buffer = new StrBuilder(str.length()); 128 char[] chrs = str.toCharArray(); 129 int sz = chrs.length; 130 char lastChar = ' '; 131 char ch = ' '; 132 for (int i = 0; i < sz; i++) { 133 ch = chrs[i]; 134 if (chars.contains(ch)) { 135 if ((ch == lastChar) && (i != 0)) { 136 continue; 137 } 138 } 139 buffer.append(ch); 140 lastChar = ch; 141 } 142 return buffer.toString(); 143 } 144 145 // Count 146 //----------------------------------------------------------------------- 147 /** 148 * <p>Takes an argument in set-syntax, see evaluateSet, 149 * and returns the number of characters present in the specified string.</p> 150 * 151 * <pre> 152 * CharSetUtils.count(null, *) = 0 153 * CharSetUtils.count("", *) = 0 154 * CharSetUtils.count(*, null) = 0 155 * CharSetUtils.count(*, "") = 0 156 * CharSetUtils.count("hello", "k-p") = 3 157 * CharSetUtils.count("hello", "a-e") = 1 158 * </pre> 159 * 160 * @see CharSet#getInstance(java.lang.String) for set-syntax. 161 * @param str String to count characters in, may be null 162 * @param set String set of characters to count, may be null 163 * @return character count, zero if null string input 164 */ 165 public static int count(String str, String set) { 166 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 167 return 0; 168 } 169 String[] strs = new String[1]; 170 strs[0] = set; 171 return count(str, strs); 172 } 173 174 /** 175 * <p>Takes an argument in set-syntax, see evaluateSet, 176 * and returns the number of characters present in the specified string.</p> 177 * 178 * <p>An example would be:</p> 179 * <ul> 180 * <li>count("hello", {"c-f", "o"}) returns 2.</li> 181 * </ul> 182 * 183 * @see CharSet#getInstance(java.lang.String) for set-syntax. 184 * @param str String to count characters in, may be null 185 * @param set String[] set of characters to count, may be null 186 * @return character count, zero if null string input 187 */ 188 public static int count(String str, String[] set) { 189 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 190 return 0; 191 } 192 CharSet chars = CharSet.getInstance(set); 193 int count = 0; 194 char[] chrs = str.toCharArray(); 195 int sz = chrs.length; 196 for(int i=0; i<sz; i++) { 197 if(chars.contains(chrs[i])) { 198 count++; 199 } 200 } 201 return count; 202 } 203 204 // Keep 205 //----------------------------------------------------------------------- 206 /** 207 * <p>Takes an argument in set-syntax, see evaluateSet, 208 * and keeps any of characters present in the specified string.</p> 209 * 210 * <pre> 211 * CharSetUtils.keep(null, *) = null 212 * CharSetUtils.keep("", *) = "" 213 * CharSetUtils.keep(*, null) = "" 214 * CharSetUtils.keep(*, "") = "" 215 * CharSetUtils.keep("hello", "hl") = "hll" 216 * CharSetUtils.keep("hello", "le") = "ell" 217 * </pre> 218 * 219 * @see CharSet#getInstance(java.lang.String) for set-syntax. 220 * @param str String to keep characters from, may be null 221 * @param set String set of characters to keep, may be null 222 * @return modified String, <code>null</code> if null string input 223 * @since 2.0 224 */ 225 public static String keep(String str, String set) { 226 if (str == null) { 227 return null; 228 } 229 if (str.length() == 0 || StringUtils.isEmpty(set)) { 230 return ""; 231 } 232 String[] strs = new String[1]; 233 strs[0] = set; 234 return keep(str, strs); 235 } 236 237 /** 238 * <p>Takes an argument in set-syntax, see evaluateSet, 239 * and keeps any of characters present in the specified string.</p> 240 * 241 * <p>An example would be:</p> 242 * <ul> 243 * <li>keep("hello", {"c-f", "o"}) 244 * returns "eo"</li> 245 * </ul> 246 * 247 * @see CharSet#getInstance(java.lang.String) for set-syntax. 248 * @param str String to keep characters from, may be null 249 * @param set String[] set of characters to keep, may be null 250 * @return modified String, <code>null</code> if null string input 251 * @since 2.0 252 */ 253 public static String keep(String str, String[] set) { 254 if (str == null) { 255 return null; 256 } 257 if (str.length() == 0 || ArrayUtils.isEmpty(set)) { 258 return ""; 259 } 260 return modify(str, set, true); 261 } 262 263 // Delete 264 //----------------------------------------------------------------------- 265 /** 266 * <p>Takes an argument in set-syntax, see evaluateSet, 267 * and deletes any of characters present in the specified string.</p> 268 * 269 * <pre> 270 * CharSetUtils.delete(null, *) = null 271 * CharSetUtils.delete("", *) = "" 272 * CharSetUtils.delete(*, null) = * 273 * CharSetUtils.delete(*, "") = * 274 * CharSetUtils.delete("hello", "hl") = "eo" 275 * CharSetUtils.delete("hello", "le") = "ho" 276 * </pre> 277 * 278 * @see CharSet#getInstance(java.lang.String) for set-syntax. 279 * @param str String to delete characters from, may be null 280 * @param set String set of characters to delete, may be null 281 * @return modified String, <code>null</code> if null string input 282 */ 283 public static String delete(String str, String set) { 284 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 285 return str; 286 } 287 String[] strs = new String[1]; 288 strs[0] = set; 289 return delete(str, strs); 290 } 291 292 /** 293 * <p>Takes an argument in set-syntax, see evaluateSet, 294 * and deletes any of characters present in the specified string.</p> 295 * 296 * <p>An example would be:</p> 297 * <ul> 298 * <li>delete("hello", {"c-f", "o"}) returns 299 * "hll"</li> 300 * </ul> 301 * 302 * @see CharSet#getInstance(java.lang.String) for set-syntax. 303 * @param str String to delete characters from, may be null 304 * @param set String[] set of characters to delete, may be null 305 * @return modified String, <code>null</code> if null string input 306 */ 307 public static String delete(String str, String[] set) { 308 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 309 return str; 310 } 311 return modify(str, set, false); 312 } 313 314 //----------------------------------------------------------------------- 315 /** 316 * Implementation of delete and keep 317 * 318 * @param str String to modify characters within 319 * @param set String[] set of characters to modify 320 * @param expect whether to evaluate on match, or non-match 321 * @return modified String 322 */ 323 private static String modify(String str, String[] set, boolean expect) { 324 CharSet chars = CharSet.getInstance(set); 325 StrBuilder buffer = new StrBuilder(str.length()); 326 char[] chrs = str.toCharArray(); 327 int sz = chrs.length; 328 for(int i=0; i<sz; i++) { 329 if(chars.contains(chrs[i]) == expect) { 330 buffer.append(chrs[i]); 331 } 332 } 333 return buffer.toString(); 334 } 335 336 // Translate 337 //----------------------------------------------------------------------- 338 /** 339 * <p>Translate characters in a String. 340 * This is a multi character search and replace routine.</p> 341 * 342 * <p>An example is:</p> 343 * <ul> 344 * <li>translate("hello", "ho", "jy") 345 * => jelly</li> 346 * </ul> 347 * 348 * <p>If the length of characters to search for is greater than the 349 * length of characters to replace, then the last character is 350 * used.</p> 351 * 352 * <pre> 353 * CharSetUtils.translate(null, *, *) = null 354 * CharSetUtils.translate("", *, *) = "" 355 * </pre> 356 * 357 * @param str String to replace characters in, may be null 358 * @param searchChars a set of characters to search for, must not be null 359 * @param replaceChars a set of characters to replace, must not be null or empty ("") 360 * @return translated String, <code>null</code> if null string input 361 * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code> 362 * is <code>null</code> 363 * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty ("") 364 * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}. 365 * Method will be removed in Commons Lang 3.0. 366 * NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer 367 * than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement 368 * string whereas StringUtils#replaceChars will delete 369 */ 370 public static String translate(String str, String searchChars, String replaceChars) { 371 if (StringUtils.isEmpty(str)) { 372 return str; 373 } 374 StrBuilder buffer = new StrBuilder(str.length()); 375 char[] chrs = str.toCharArray(); 376 char[] withChrs = replaceChars.toCharArray(); 377 int sz = chrs.length; 378 int withMax = replaceChars.length() - 1; 379 for(int i=0; i<sz; i++) { 380 int idx = searchChars.indexOf(chrs[i]); 381 if(idx != -1) { 382 if(idx > withMax) { 383 idx = withMax; 384 } 385 buffer.append(withChrs[idx]); 386 } else { 387 buffer.append(chrs[i]); 388 } 389 } 390 return buffer.toString(); 391 } 392 393 }