View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang;
18  
19  /**
20   * <p>Operations on <code>CharSet</code>s.</p>
21   *
22   * <p>This class handles <code>null</code> input gracefully.
23   * An exception will not be thrown for a <code>null</code> input.
24   * Each method documents its behaviour in more detail.</p>
25   * 
26   * @see CharSet
27   * @author Stephen Colebourne
28   * @author Phil Steitz
29   * @author Gary Gregory
30   * @since 1.0
31   * @version $Id: CharSetUtils.java 534588 2007-05-02 18:46:07Z bayard $
32   */
33  public class CharSetUtils {
34  
35      /**
36       * <p>CharSetUtils instances should NOT be constructed in standard programming.
37       * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
38       *
39       * <p>This constructor is public to permit tools that require a JavaBean instance
40       * to operate.</p>
41       */
42      public CharSetUtils() {
43        super();
44      }
45  
46      // Factory
47      //-----------------------------------------------------------------------
48      /**
49       * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
50       * set logic to be performed.</p>
51       * <p>The syntax is:</p>
52       * <ul>
53       *  <li>&quot;aeio&quot; which implies 'a','e',..</li>
54       *  <li>&quot;^e&quot; implies not e.</li>
55       *  <li>&quot;ej-m&quot; implies e,j-&gt;m. e,j,k,l,m.</li>
56       * </ul>
57       * 
58       * <pre>
59       * CharSetUtils.evaluateSet(null)    = null
60       * CharSetUtils.evaluateSet([])      = CharSet matching nothing
61       * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
62       * </pre>
63       *
64       * @param set  the set, may be null
65       * @return a CharSet instance, <code>null</code> if null input
66       * @deprecated Use {@link CharSet#getInstance(String[])}.
67       *             Method will be removed in Commons Lang 3.0.
68       */
69      public static CharSet evaluateSet(String[] set) {
70          if (set == null) {
71              return null;
72          }
73          return new CharSet(set); 
74      }
75  
76      // Squeeze
77      //-----------------------------------------------------------------------
78      /**
79       * <p>Squeezes any repetitions of a character that is mentioned in the
80       * supplied set.</p>
81       *
82       * <pre>
83       * CharSetUtils.squeeze(null, *)        = null
84       * CharSetUtils.squeeze("", *)          = ""
85       * CharSetUtils.squeeze(*, null)        = *
86       * CharSetUtils.squeeze(*, "")          = *
87       * CharSetUtils.squeeze("hello", "k-p") = "helo"
88       * CharSetUtils.squeeze("hello", "a-e") = "hello"
89       * </pre>
90       *
91       * @see #evaluateSet(java.lang.String[]) for set-syntax.
92       * @param str  the string to squeeze, may be null
93       * @param set  the character set to use for manipulation, may be null
94       * @return modified String, <code>null</code> if null string input
95       */
96      public static String squeeze(String str, String set) {
97          if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
98              return str;
99          }
100         String[] strs = new String[1];
101         strs[0] = set;
102         return squeeze(str, strs);
103     }
104 
105     /**
106      * <p>Squeezes any repetitions of a character that is mentioned in the
107      * supplied set.</p>
108      *
109      * <p>An example is:</p>
110      * <ul>
111      *   <li>squeeze(&quot;hello&quot;, {&quot;el&quot;}) => &quot;helo&quot;</li>
112      * </ul>
113      * 
114      * @see #evaluateSet(java.lang.String[]) for set-syntax.
115      * @param str  the string to squeeze, may be null
116      * @param set  the character set to use for manipulation, may be null
117      * @return modified String, <code>null</code> if null string input
118      */
119     public static String squeeze(String str, String[] set) {
120         if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
121             return str;
122         }
123         CharSet chars = CharSet.getInstance(set);
124         StringBuffer buffer = new StringBuffer(str.length());
125         char[] chrs = str.toCharArray();
126         int sz = chrs.length;
127         char lastChar = ' ';
128         char ch = ' ';
129         for (int i = 0; i < sz; i++) {
130             ch = chrs[i];
131             if (chars.contains(ch)) {
132                 if ((ch == lastChar) && (i != 0)) {
133                     continue;
134                 }
135             }
136             buffer.append(ch);
137             lastChar = ch;
138         }
139         return buffer.toString();
140     }
141 
142     // Count
143     //-----------------------------------------------------------------------
144     /**
145      * <p>Takes an argument in set-syntax, see evaluateSet,
146      * and returns the number of characters present in the specified string.</p>
147      *
148      * <pre>
149      * CharSetUtils.count(null, *)        = 0
150      * CharSetUtils.count("", *)          = 0
151      * CharSetUtils.count(*, null)        = 0
152      * CharSetUtils.count(*, "")          = 0
153      * CharSetUtils.count("hello", "k-p") = 3
154      * CharSetUtils.count("hello", "a-e") = 1
155      * </pre>
156      *
157      * @see #evaluateSet(java.lang.String[]) for set-syntax.
158      * @param str  String to count characters in, may be null
159      * @param set  String set of characters to count, may be null
160      * @return character count, zero if null string input
161      */
162     public static int count(String str, String set) {
163         if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
164             return 0;
165         }
166         String[] strs = new String[1];
167         strs[0] = set;
168         return count(str, strs);
169     }
170     
171     /**
172      * <p>Takes an argument in set-syntax, see evaluateSet,
173      * and returns the number of characters present in the specified string.</p>
174      *
175      * <p>An example would be:</p>
176      * <ul>
177      *  <li>count(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns 2.</li>
178      * </ul>
179      *
180      * @see #evaluateSet(java.lang.String[]) for set-syntax.
181      * @param str  String to count characters in, may be null
182      * @param set  String[] set of characters to count, may be null
183      * @return character count, zero if null string input
184      */
185     public static int count(String str, String[] set) {
186         if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
187             return 0;
188         }
189         CharSet chars = CharSet.getInstance(set);
190         int count = 0;
191         char[] chrs = str.toCharArray();
192         int sz = chrs.length;
193         for(int i=0; i<sz; i++) {
194             if(chars.contains(chrs[i])) {
195                 count++;
196             }
197         }
198         return count;
199     }
200 
201     // Keep
202     //-----------------------------------------------------------------------
203     /**
204      * <p>Takes an argument in set-syntax, see evaluateSet,
205      * and keeps any of characters present in the specified string.</p>
206      *
207      * <pre>
208      * CharSetUtils.keep(null, *)        = null
209      * CharSetUtils.keep("", *)          = ""
210      * CharSetUtils.keep(*, null)        = ""
211      * CharSetUtils.keep(*, "")          = ""
212      * CharSetUtils.keep("hello", "hl")  = "hll"
213      * CharSetUtils.keep("hello", "le")  = "ell"
214      * </pre>
215      *
216      * @see #evaluateSet(java.lang.String[]) for set-syntax.
217      * @param str  String to keep characters from, may be null
218      * @param set  String set of characters to keep, may be null
219      * @return modified String, <code>null</code> if null string input
220      * @since 2.0
221      */
222     public static String keep(String str, String set) {
223         if (str == null) {
224             return null;
225         }
226         if (str.length() == 0 || StringUtils.isEmpty(set)) {
227             return "";
228         }
229         String[] strs = new String[1];
230         strs[0] = set;
231         return keep(str, strs);
232     }
233     
234     /**
235      * <p>Takes an argument in set-syntax, see evaluateSet,
236      * and keeps any of characters present in the specified string.</p>
237      *
238      * <p>An example would be:</p>
239      * <ul>
240      *  <li>keep(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;})
241      *   returns &quot;eo&quot;</li>
242      * </ul>
243      *
244      * @see #evaluateSet(java.lang.String[]) for set-syntax.
245      * @param str  String to keep characters from, may be null
246      * @param set  String[] set of characters to keep, may be null
247      * @return modified String, <code>null</code> if null string input
248      * @since 2.0
249      */
250     public static String keep(String str, String[] set) {
251         if (str == null) {
252             return null;
253         }
254         if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
255             return "";
256         }
257         return modify(str, set, true);
258     }
259 
260     // Delete
261     //-----------------------------------------------------------------------
262     /**
263      * <p>Takes an argument in set-syntax, see evaluateSet,
264      * and deletes any of characters present in the specified string.</p>
265      *
266      * <pre>
267      * CharSetUtils.delete(null, *)        = null
268      * CharSetUtils.delete("", *)          = ""
269      * CharSetUtils.delete(*, null)        = *
270      * CharSetUtils.delete(*, "")          = *
271      * CharSetUtils.delete("hello", "hl")  = "eo"
272      * CharSetUtils.delete("hello", "le")  = "ho"
273      * </pre>
274      *
275      * @see #evaluateSet(java.lang.String[]) for set-syntax.
276      * @param str  String to delete characters from, may be null
277      * @param set  String set of characters to delete, may be null
278      * @return modified String, <code>null</code> if null string input
279      */
280     public static String delete(String str, String set) {
281         if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
282             return str;
283         }
284         String[] strs = new String[1];
285         strs[0] = set;
286         return delete(str, strs);
287     }
288     
289     /**
290      * <p>Takes an argument in set-syntax, see evaluateSet,
291      * and deletes any of characters present in the specified string.</p>
292      *
293      * <p>An example would be:</p>
294      * <ul>
295      *  <li>delete(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns
296      *   &quot;hll&quot;</li>
297      * </ul>
298      *
299      * @see #evaluateSet(java.lang.String[]) for set-syntax.
300      * @param str  String to delete characters from, may be null
301      * @param set  String[] set of characters to delete, may be null
302      * @return modified String, <code>null</code> if null string input
303      */
304     public static String delete(String str, String[] set) {
305         if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
306             return str;
307         }
308         return modify(str, set, false);
309     }
310 
311     //-----------------------------------------------------------------------
312     /**
313      * Implementation of delete and keep
314      *
315      * @param str String to modify characters within
316      * @param set String[] set of characters to modify
317      * @param expect whether to evaluate on match, or non-match
318      * @return modified String
319      */
320     private static String modify(String str, String[] set, boolean expect) {
321         CharSet chars = CharSet.getInstance(set);
322         StringBuffer buffer = new StringBuffer(str.length());
323         char[] chrs = str.toCharArray();
324         int sz = chrs.length;
325         for(int i=0; i<sz; i++) {
326             if(chars.contains(chrs[i]) == expect) {
327                 buffer.append(chrs[i]);
328             }
329         }
330         return buffer.toString();
331     }
332 
333     // Translate
334     //-----------------------------------------------------------------------
335     /**
336      * <p>Translate characters in a String.
337      * This is a multi character search and replace routine.</p>
338      *
339      * <p>An example is:</p>
340      * <ul>
341      *   <li>translate(&quot;hello&quot;, &quot;ho&quot;, &quot;jy&quot;)
342      *    =&gt; jelly</li>
343      * </ul>
344      *
345      * <p>If the length of characters to search for is greater than the
346      * length of characters to replace, then the last character is 
347      * used.</p>
348      * 
349      * <pre>
350      * CharSetUtils.translate(null, *, *) = null
351      * CharSetUtils.translate("", *, *)   = ""
352      * </pre>
353      *
354      * @param str  String to replace characters in, may be null
355      * @param searchChars   a set of characters to search for, must not be null
356      * @param replaceChars  a set of characters to replace, must not be null or empty (&quot;&quot;)
357      * @return translated String, <code>null</code> if null string input
358      * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code> 
359      *  is <code>null</code>
360      * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty (&quot;&quot;)
361      * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
362      *             Method will be removed in Commons Lang 3.0.
363      *  NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
364      *  than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
365      *  string whereas StringUtils#replaceChars will delete
366      */
367     public static String translate(String str, String searchChars, String replaceChars) {
368         if (StringUtils.isEmpty(str)) {
369             return str;
370         }
371         StringBuffer buffer = new StringBuffer(str.length());
372         char[] chrs = str.toCharArray();
373         char[] withChrs = replaceChars.toCharArray();
374         int sz = chrs.length;
375         int withMax = replaceChars.length() - 1;
376         for(int i=0; i<sz; i++) {
377             int idx = searchChars.indexOf(chrs[i]);
378             if(idx != -1) {
379                 if(idx > withMax) {
380                     idx = withMax;
381                 }
382                 buffer.append(withChrs[idx]);
383             } else {
384                 buffer.append(chrs[i]);
385             }
386         }
387         return buffer.toString();
388     }
389 
390 }