001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 /**
020 * <p>Operations on <code>CharSet</code>s.</p>
021 *
022 * <p>This class handles <code>null</code> input gracefully.
023 * An exception will not be thrown for a <code>null</code> input.
024 * Each method documents its behaviour in more detail.</p>
025 *
026 * @see CharSet
027 * @author Apache Software Foundation
028 * @author Phil Steitz
029 * @author Gary Gregory
030 * @since 1.0
031 * @version $Id: CharSetUtils.java 905636 2010-02-02 14:03:32Z niallp $
032 */
033 public class CharSetUtils {
034
035 /**
036 * <p>CharSetUtils instances should NOT be constructed in standard programming.
037 * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
038 *
039 * <p>This constructor is public to permit tools that require a JavaBean instance
040 * to operate.</p>
041 */
042 public CharSetUtils() {
043 super();
044 }
045
046 // Factory
047 //-----------------------------------------------------------------------
048 /**
049 * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
050 * set logic to be performed.</p>
051 * <p>The syntax is:</p>
052 * <ul>
053 * <li>"aeio" which implies 'a','e',..</li>
054 * <li>"^e" implies not e.</li>
055 * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
056 * </ul>
057 *
058 * <pre>
059 * CharSetUtils.evaluateSet(null) = null
060 * CharSetUtils.evaluateSet([]) = CharSet matching nothing
061 * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
062 * </pre>
063 *
064 * @param set the set, may be null
065 * @return a CharSet instance, <code>null</code> if null input
066 * @deprecated Use {@link CharSet#getInstance(String[])}.
067 * Method will be removed in Commons Lang 3.0.
068 */
069 public static CharSet evaluateSet(String[] set) {
070 if (set == null) {
071 return null;
072 }
073 return new CharSet(set);
074 }
075
076 // Squeeze
077 //-----------------------------------------------------------------------
078 /**
079 * <p>Squeezes any repetitions of a character that is mentioned in the
080 * supplied set.</p>
081 *
082 * <pre>
083 * CharSetUtils.squeeze(null, *) = null
084 * CharSetUtils.squeeze("", *) = ""
085 * CharSetUtils.squeeze(*, null) = *
086 * CharSetUtils.squeeze(*, "") = *
087 * CharSetUtils.squeeze("hello", "k-p") = "helo"
088 * CharSetUtils.squeeze("hello", "a-e") = "hello"
089 * </pre>
090 *
091 * @see CharSet#getInstance(java.lang.String) for set-syntax.
092 * @param str the string to squeeze, may be null
093 * @param set the character set to use for manipulation, may be null
094 * @return modified String, <code>null</code> if null string input
095 */
096 public static String squeeze(String str, String set) {
097 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
098 return str;
099 }
100 String[] strs = new String[1];
101 strs[0] = set;
102 return squeeze(str, strs);
103 }
104
105 /**
106 * <p>Squeezes any repetitions of a character that is mentioned in the
107 * supplied set.</p>
108 *
109 * <p>An example is:</p>
110 * <ul>
111 * <li>squeeze("hello", {"el"}) => "helo"</li>
112 * </ul>
113 *
114 * @see CharSet#getInstance(java.lang.String) for set-syntax.
115 * @param str the string to squeeze, may be null
116 * @param set the character set to use for manipulation, may be null
117 * @return modified String, <code>null</code> if null string input
118 */
119 public static String squeeze(String str, String[] set) {
120 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
121 return str;
122 }
123 CharSet chars = CharSet.getInstance(set);
124 StringBuffer buffer = new StringBuffer(str.length());
125 char[] chrs = str.toCharArray();
126 int sz = chrs.length;
127 char lastChar = ' ';
128 char ch = ' ';
129 for (int i = 0; i < sz; i++) {
130 ch = chrs[i];
131 if (chars.contains(ch)) {
132 if ((ch == lastChar) && (i != 0)) {
133 continue;
134 }
135 }
136 buffer.append(ch);
137 lastChar = ch;
138 }
139 return buffer.toString();
140 }
141
142 // Count
143 //-----------------------------------------------------------------------
144 /**
145 * <p>Takes an argument in set-syntax, see evaluateSet,
146 * and returns the number of characters present in the specified string.</p>
147 *
148 * <pre>
149 * CharSetUtils.count(null, *) = 0
150 * CharSetUtils.count("", *) = 0
151 * CharSetUtils.count(*, null) = 0
152 * CharSetUtils.count(*, "") = 0
153 * CharSetUtils.count("hello", "k-p") = 3
154 * CharSetUtils.count("hello", "a-e") = 1
155 * </pre>
156 *
157 * @see CharSet#getInstance(java.lang.String) for set-syntax.
158 * @param str String to count characters in, may be null
159 * @param set String set of characters to count, may be null
160 * @return character count, zero if null string input
161 */
162 public static int count(String str, String set) {
163 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
164 return 0;
165 }
166 String[] strs = new String[1];
167 strs[0] = set;
168 return count(str, strs);
169 }
170
171 /**
172 * <p>Takes an argument in set-syntax, see evaluateSet,
173 * and returns the number of characters present in the specified string.</p>
174 *
175 * <p>An example would be:</p>
176 * <ul>
177 * <li>count("hello", {"c-f", "o"}) returns 2.</li>
178 * </ul>
179 *
180 * @see CharSet#getInstance(java.lang.String) for set-syntax.
181 * @param str String to count characters in, may be null
182 * @param set String[] set of characters to count, may be null
183 * @return character count, zero if null string input
184 */
185 public static int count(String str, String[] set) {
186 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
187 return 0;
188 }
189 CharSet chars = CharSet.getInstance(set);
190 int count = 0;
191 char[] chrs = str.toCharArray();
192 int sz = chrs.length;
193 for(int i=0; i<sz; i++) {
194 if(chars.contains(chrs[i])) {
195 count++;
196 }
197 }
198 return count;
199 }
200
201 // Keep
202 //-----------------------------------------------------------------------
203 /**
204 * <p>Takes an argument in set-syntax, see evaluateSet,
205 * and keeps any of characters present in the specified string.</p>
206 *
207 * <pre>
208 * CharSetUtils.keep(null, *) = null
209 * CharSetUtils.keep("", *) = ""
210 * CharSetUtils.keep(*, null) = ""
211 * CharSetUtils.keep(*, "") = ""
212 * CharSetUtils.keep("hello", "hl") = "hll"
213 * CharSetUtils.keep("hello", "le") = "ell"
214 * </pre>
215 *
216 * @see CharSet#getInstance(java.lang.String) for set-syntax.
217 * @param str String to keep characters from, may be null
218 * @param set String set of characters to keep, may be null
219 * @return modified String, <code>null</code> if null string input
220 * @since 2.0
221 */
222 public static String keep(String str, String set) {
223 if (str == null) {
224 return null;
225 }
226 if (str.length() == 0 || StringUtils.isEmpty(set)) {
227 return "";
228 }
229 String[] strs = new String[1];
230 strs[0] = set;
231 return keep(str, strs);
232 }
233
234 /**
235 * <p>Takes an argument in set-syntax, see evaluateSet,
236 * and keeps any of characters present in the specified string.</p>
237 *
238 * <p>An example would be:</p>
239 * <ul>
240 * <li>keep("hello", {"c-f", "o"})
241 * returns "eo"</li>
242 * </ul>
243 *
244 * @see CharSet#getInstance(java.lang.String) for set-syntax.
245 * @param str String to keep characters from, may be null
246 * @param set String[] set of characters to keep, may be null
247 * @return modified String, <code>null</code> if null string input
248 * @since 2.0
249 */
250 public static String keep(String str, String[] set) {
251 if (str == null) {
252 return null;
253 }
254 if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
255 return "";
256 }
257 return modify(str, set, true);
258 }
259
260 // Delete
261 //-----------------------------------------------------------------------
262 /**
263 * <p>Takes an argument in set-syntax, see evaluateSet,
264 * and deletes any of characters present in the specified string.</p>
265 *
266 * <pre>
267 * CharSetUtils.delete(null, *) = null
268 * CharSetUtils.delete("", *) = ""
269 * CharSetUtils.delete(*, null) = *
270 * CharSetUtils.delete(*, "") = *
271 * CharSetUtils.delete("hello", "hl") = "eo"
272 * CharSetUtils.delete("hello", "le") = "ho"
273 * </pre>
274 *
275 * @see CharSet#getInstance(java.lang.String) for set-syntax.
276 * @param str String to delete characters from, may be null
277 * @param set String set of characters to delete, may be null
278 * @return modified String, <code>null</code> if null string input
279 */
280 public static String delete(String str, String set) {
281 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
282 return str;
283 }
284 String[] strs = new String[1];
285 strs[0] = set;
286 return delete(str, strs);
287 }
288
289 /**
290 * <p>Takes an argument in set-syntax, see evaluateSet,
291 * and deletes any of characters present in the specified string.</p>
292 *
293 * <p>An example would be:</p>
294 * <ul>
295 * <li>delete("hello", {"c-f", "o"}) returns
296 * "hll"</li>
297 * </ul>
298 *
299 * @see CharSet#getInstance(java.lang.String) for set-syntax.
300 * @param str String to delete characters from, may be null
301 * @param set String[] set of characters to delete, may be null
302 * @return modified String, <code>null</code> if null string input
303 */
304 public static String delete(String str, String[] set) {
305 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
306 return str;
307 }
308 return modify(str, set, false);
309 }
310
311 //-----------------------------------------------------------------------
312 /**
313 * Implementation of delete and keep
314 *
315 * @param str String to modify characters within
316 * @param set String[] set of characters to modify
317 * @param expect whether to evaluate on match, or non-match
318 * @return modified String
319 */
320 private static String modify(String str, String[] set, boolean expect) {
321 CharSet chars = CharSet.getInstance(set);
322 StringBuffer buffer = new StringBuffer(str.length());
323 char[] chrs = str.toCharArray();
324 int sz = chrs.length;
325 for(int i=0; i<sz; i++) {
326 if(chars.contains(chrs[i]) == expect) {
327 buffer.append(chrs[i]);
328 }
329 }
330 return buffer.toString();
331 }
332
333 // Translate
334 //-----------------------------------------------------------------------
335 /**
336 * <p>Translate characters in a String.
337 * This is a multi character search and replace routine.</p>
338 *
339 * <p>An example is:</p>
340 * <ul>
341 * <li>translate("hello", "ho", "jy")
342 * => jelly</li>
343 * </ul>
344 *
345 * <p>If the length of characters to search for is greater than the
346 * length of characters to replace, then the last character is
347 * used.</p>
348 *
349 * <pre>
350 * CharSetUtils.translate(null, *, *) = null
351 * CharSetUtils.translate("", *, *) = ""
352 * </pre>
353 *
354 * @param str String to replace characters in, may be null
355 * @param searchChars a set of characters to search for, must not be null
356 * @param replaceChars a set of characters to replace, must not be null or empty ("")
357 * @return translated String, <code>null</code> if null string input
358 * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code>
359 * is <code>null</code>
360 * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty ("")
361 * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
362 * Method will be removed in Commons Lang 3.0.
363 * NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
364 * than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
365 * string whereas StringUtils#replaceChars will delete
366 */
367 public static String translate(String str, String searchChars, String replaceChars) {
368 if (StringUtils.isEmpty(str)) {
369 return str;
370 }
371 StringBuffer buffer = new StringBuffer(str.length());
372 char[] chrs = str.toCharArray();
373 char[] withChrs = replaceChars.toCharArray();
374 int sz = chrs.length;
375 int withMax = replaceChars.length() - 1;
376 for(int i=0; i<sz; i++) {
377 int idx = searchChars.indexOf(chrs[i]);
378 if(idx != -1) {
379 if(idx > withMax) {
380 idx = withMax;
381 }
382 buffer.append(withChrs[idx]);
383 } else {
384 buffer.append(chrs[i]);
385 }
386 }
387 return buffer.toString();
388 }
389
390 }