001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 /** 020 * <p>Operations on <code>CharSet</code>s.</p> 021 * 022 * <p>This class handles <code>null</code> input gracefully. 023 * An exception will not be thrown for a <code>null</code> input. 024 * Each method documents its behaviour in more detail.</p> 025 * 026 * @see CharSet 027 * @author Apache Software Foundation 028 * @author Phil Steitz 029 * @author Gary Gregory 030 * @since 1.0 031 * @version $Id: CharSetUtils.java 905636 2010-02-02 14:03:32Z niallp $ 032 */ 033 public class CharSetUtils { 034 035 /** 036 * <p>CharSetUtils instances should NOT be constructed in standard programming. 037 * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p> 038 * 039 * <p>This constructor is public to permit tools that require a JavaBean instance 040 * to operate.</p> 041 */ 042 public CharSetUtils() { 043 super(); 044 } 045 046 // Factory 047 //----------------------------------------------------------------------- 048 /** 049 * <p>Creates a <code>CharSet</code> instance which allows a certain amount of 050 * set logic to be performed.</p> 051 * <p>The syntax is:</p> 052 * <ul> 053 * <li>"aeio" which implies 'a','e',..</li> 054 * <li>"^e" implies not e.</li> 055 * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li> 056 * </ul> 057 * 058 * <pre> 059 * CharSetUtils.evaluateSet(null) = null 060 * CharSetUtils.evaluateSet([]) = CharSet matching nothing 061 * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e 062 * </pre> 063 * 064 * @param set the set, may be null 065 * @return a CharSet instance, <code>null</code> if null input 066 * @deprecated Use {@link CharSet#getInstance(String[])}. 067 * Method will be removed in Commons Lang 3.0. 068 */ 069 public static CharSet evaluateSet(String[] set) { 070 if (set == null) { 071 return null; 072 } 073 return new CharSet(set); 074 } 075 076 // Squeeze 077 //----------------------------------------------------------------------- 078 /** 079 * <p>Squeezes any repetitions of a character that is mentioned in the 080 * supplied set.</p> 081 * 082 * <pre> 083 * CharSetUtils.squeeze(null, *) = null 084 * CharSetUtils.squeeze("", *) = "" 085 * CharSetUtils.squeeze(*, null) = * 086 * CharSetUtils.squeeze(*, "") = * 087 * CharSetUtils.squeeze("hello", "k-p") = "helo" 088 * CharSetUtils.squeeze("hello", "a-e") = "hello" 089 * </pre> 090 * 091 * @see CharSet#getInstance(java.lang.String) for set-syntax. 092 * @param str the string to squeeze, may be null 093 * @param set the character set to use for manipulation, may be null 094 * @return modified String, <code>null</code> if null string input 095 */ 096 public static String squeeze(String str, String set) { 097 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 098 return str; 099 } 100 String[] strs = new String[1]; 101 strs[0] = set; 102 return squeeze(str, strs); 103 } 104 105 /** 106 * <p>Squeezes any repetitions of a character that is mentioned in the 107 * supplied set.</p> 108 * 109 * <p>An example is:</p> 110 * <ul> 111 * <li>squeeze("hello", {"el"}) => "helo"</li> 112 * </ul> 113 * 114 * @see CharSet#getInstance(java.lang.String) for set-syntax. 115 * @param str the string to squeeze, may be null 116 * @param set the character set to use for manipulation, may be null 117 * @return modified String, <code>null</code> if null string input 118 */ 119 public static String squeeze(String str, String[] set) { 120 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 121 return str; 122 } 123 CharSet chars = CharSet.getInstance(set); 124 StringBuffer buffer = new StringBuffer(str.length()); 125 char[] chrs = str.toCharArray(); 126 int sz = chrs.length; 127 char lastChar = ' '; 128 char ch = ' '; 129 for (int i = 0; i < sz; i++) { 130 ch = chrs[i]; 131 if (chars.contains(ch)) { 132 if ((ch == lastChar) && (i != 0)) { 133 continue; 134 } 135 } 136 buffer.append(ch); 137 lastChar = ch; 138 } 139 return buffer.toString(); 140 } 141 142 // Count 143 //----------------------------------------------------------------------- 144 /** 145 * <p>Takes an argument in set-syntax, see evaluateSet, 146 * and returns the number of characters present in the specified string.</p> 147 * 148 * <pre> 149 * CharSetUtils.count(null, *) = 0 150 * CharSetUtils.count("", *) = 0 151 * CharSetUtils.count(*, null) = 0 152 * CharSetUtils.count(*, "") = 0 153 * CharSetUtils.count("hello", "k-p") = 3 154 * CharSetUtils.count("hello", "a-e") = 1 155 * </pre> 156 * 157 * @see CharSet#getInstance(java.lang.String) for set-syntax. 158 * @param str String to count characters in, may be null 159 * @param set String set of characters to count, may be null 160 * @return character count, zero if null string input 161 */ 162 public static int count(String str, String set) { 163 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 164 return 0; 165 } 166 String[] strs = new String[1]; 167 strs[0] = set; 168 return count(str, strs); 169 } 170 171 /** 172 * <p>Takes an argument in set-syntax, see evaluateSet, 173 * and returns the number of characters present in the specified string.</p> 174 * 175 * <p>An example would be:</p> 176 * <ul> 177 * <li>count("hello", {"c-f", "o"}) returns 2.</li> 178 * </ul> 179 * 180 * @see CharSet#getInstance(java.lang.String) for set-syntax. 181 * @param str String to count characters in, may be null 182 * @param set String[] set of characters to count, may be null 183 * @return character count, zero if null string input 184 */ 185 public static int count(String str, String[] set) { 186 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 187 return 0; 188 } 189 CharSet chars = CharSet.getInstance(set); 190 int count = 0; 191 char[] chrs = str.toCharArray(); 192 int sz = chrs.length; 193 for(int i=0; i<sz; i++) { 194 if(chars.contains(chrs[i])) { 195 count++; 196 } 197 } 198 return count; 199 } 200 201 // Keep 202 //----------------------------------------------------------------------- 203 /** 204 * <p>Takes an argument in set-syntax, see evaluateSet, 205 * and keeps any of characters present in the specified string.</p> 206 * 207 * <pre> 208 * CharSetUtils.keep(null, *) = null 209 * CharSetUtils.keep("", *) = "" 210 * CharSetUtils.keep(*, null) = "" 211 * CharSetUtils.keep(*, "") = "" 212 * CharSetUtils.keep("hello", "hl") = "hll" 213 * CharSetUtils.keep("hello", "le") = "ell" 214 * </pre> 215 * 216 * @see CharSet#getInstance(java.lang.String) for set-syntax. 217 * @param str String to keep characters from, may be null 218 * @param set String set of characters to keep, may be null 219 * @return modified String, <code>null</code> if null string input 220 * @since 2.0 221 */ 222 public static String keep(String str, String set) { 223 if (str == null) { 224 return null; 225 } 226 if (str.length() == 0 || StringUtils.isEmpty(set)) { 227 return ""; 228 } 229 String[] strs = new String[1]; 230 strs[0] = set; 231 return keep(str, strs); 232 } 233 234 /** 235 * <p>Takes an argument in set-syntax, see evaluateSet, 236 * and keeps any of characters present in the specified string.</p> 237 * 238 * <p>An example would be:</p> 239 * <ul> 240 * <li>keep("hello", {"c-f", "o"}) 241 * returns "eo"</li> 242 * </ul> 243 * 244 * @see CharSet#getInstance(java.lang.String) for set-syntax. 245 * @param str String to keep characters from, may be null 246 * @param set String[] set of characters to keep, may be null 247 * @return modified String, <code>null</code> if null string input 248 * @since 2.0 249 */ 250 public static String keep(String str, String[] set) { 251 if (str == null) { 252 return null; 253 } 254 if (str.length() == 0 || ArrayUtils.isEmpty(set)) { 255 return ""; 256 } 257 return modify(str, set, true); 258 } 259 260 // Delete 261 //----------------------------------------------------------------------- 262 /** 263 * <p>Takes an argument in set-syntax, see evaluateSet, 264 * and deletes any of characters present in the specified string.</p> 265 * 266 * <pre> 267 * CharSetUtils.delete(null, *) = null 268 * CharSetUtils.delete("", *) = "" 269 * CharSetUtils.delete(*, null) = * 270 * CharSetUtils.delete(*, "") = * 271 * CharSetUtils.delete("hello", "hl") = "eo" 272 * CharSetUtils.delete("hello", "le") = "ho" 273 * </pre> 274 * 275 * @see CharSet#getInstance(java.lang.String) for set-syntax. 276 * @param str String to delete characters from, may be null 277 * @param set String set of characters to delete, may be null 278 * @return modified String, <code>null</code> if null string input 279 */ 280 public static String delete(String str, String set) { 281 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) { 282 return str; 283 } 284 String[] strs = new String[1]; 285 strs[0] = set; 286 return delete(str, strs); 287 } 288 289 /** 290 * <p>Takes an argument in set-syntax, see evaluateSet, 291 * and deletes any of characters present in the specified string.</p> 292 * 293 * <p>An example would be:</p> 294 * <ul> 295 * <li>delete("hello", {"c-f", "o"}) returns 296 * "hll"</li> 297 * </ul> 298 * 299 * @see CharSet#getInstance(java.lang.String) for set-syntax. 300 * @param str String to delete characters from, may be null 301 * @param set String[] set of characters to delete, may be null 302 * @return modified String, <code>null</code> if null string input 303 */ 304 public static String delete(String str, String[] set) { 305 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) { 306 return str; 307 } 308 return modify(str, set, false); 309 } 310 311 //----------------------------------------------------------------------- 312 /** 313 * Implementation of delete and keep 314 * 315 * @param str String to modify characters within 316 * @param set String[] set of characters to modify 317 * @param expect whether to evaluate on match, or non-match 318 * @return modified String 319 */ 320 private static String modify(String str, String[] set, boolean expect) { 321 CharSet chars = CharSet.getInstance(set); 322 StringBuffer buffer = new StringBuffer(str.length()); 323 char[] chrs = str.toCharArray(); 324 int sz = chrs.length; 325 for(int i=0; i<sz; i++) { 326 if(chars.contains(chrs[i]) == expect) { 327 buffer.append(chrs[i]); 328 } 329 } 330 return buffer.toString(); 331 } 332 333 // Translate 334 //----------------------------------------------------------------------- 335 /** 336 * <p>Translate characters in a String. 337 * This is a multi character search and replace routine.</p> 338 * 339 * <p>An example is:</p> 340 * <ul> 341 * <li>translate("hello", "ho", "jy") 342 * => jelly</li> 343 * </ul> 344 * 345 * <p>If the length of characters to search for is greater than the 346 * length of characters to replace, then the last character is 347 * used.</p> 348 * 349 * <pre> 350 * CharSetUtils.translate(null, *, *) = null 351 * CharSetUtils.translate("", *, *) = "" 352 * </pre> 353 * 354 * @param str String to replace characters in, may be null 355 * @param searchChars a set of characters to search for, must not be null 356 * @param replaceChars a set of characters to replace, must not be null or empty ("") 357 * @return translated String, <code>null</code> if null string input 358 * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code> 359 * is <code>null</code> 360 * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty ("") 361 * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}. 362 * Method will be removed in Commons Lang 3.0. 363 * NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer 364 * than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement 365 * string whereas StringUtils#replaceChars will delete 366 */ 367 public static String translate(String str, String searchChars, String replaceChars) { 368 if (StringUtils.isEmpty(str)) { 369 return str; 370 } 371 StringBuffer buffer = new StringBuffer(str.length()); 372 char[] chrs = str.toCharArray(); 373 char[] withChrs = replaceChars.toCharArray(); 374 int sz = chrs.length; 375 int withMax = replaceChars.length() - 1; 376 for(int i=0; i<sz; i++) { 377 int idx = searchChars.indexOf(chrs[i]); 378 if(idx != -1) { 379 if(idx > withMax) { 380 idx = withMax; 381 } 382 buffer.append(withChrs[idx]); 383 } else { 384 buffer.append(chrs[i]); 385 } 386 } 387 return buffer.toString(); 388 } 389 390 }