001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang;
018    
019    /**
020     * <p>Operations on <code>CharSet</code>s.</p>
021     *
022     * <p>This class handles <code>null</code> input gracefully.
023     * An exception will not be thrown for a <code>null</code> input.
024     * Each method documents its behaviour in more detail.</p>
025     * 
026     * @see CharSet
027     * @author Apache Software Foundation
028     * @author Phil Steitz
029     * @author Gary Gregory
030     * @since 1.0
031     * @version $Id: CharSetUtils.java 905636 2010-02-02 14:03:32Z niallp $
032     */
033    public class CharSetUtils {
034    
035        /**
036         * <p>CharSetUtils instances should NOT be constructed in standard programming.
037         * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
038         *
039         * <p>This constructor is public to permit tools that require a JavaBean instance
040         * to operate.</p>
041         */
042        public CharSetUtils() {
043          super();
044        }
045    
046        // Factory
047        //-----------------------------------------------------------------------
048        /**
049         * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
050         * set logic to be performed.</p>
051         * <p>The syntax is:</p>
052         * <ul>
053         *  <li>&quot;aeio&quot; which implies 'a','e',..</li>
054         *  <li>&quot;^e&quot; implies not e.</li>
055         *  <li>&quot;ej-m&quot; implies e,j-&gt;m. e,j,k,l,m.</li>
056         * </ul>
057         * 
058         * <pre>
059         * CharSetUtils.evaluateSet(null)    = null
060         * CharSetUtils.evaluateSet([])      = CharSet matching nothing
061         * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
062         * </pre>
063         *
064         * @param set  the set, may be null
065         * @return a CharSet instance, <code>null</code> if null input
066         * @deprecated Use {@link CharSet#getInstance(String[])}.
067         *             Method will be removed in Commons Lang 3.0.
068         */
069        public static CharSet evaluateSet(String[] set) {
070            if (set == null) {
071                return null;
072            }
073            return new CharSet(set); 
074        }
075    
076        // Squeeze
077        //-----------------------------------------------------------------------
078        /**
079         * <p>Squeezes any repetitions of a character that is mentioned in the
080         * supplied set.</p>
081         *
082         * <pre>
083         * CharSetUtils.squeeze(null, *)        = null
084         * CharSetUtils.squeeze("", *)          = ""
085         * CharSetUtils.squeeze(*, null)        = *
086         * CharSetUtils.squeeze(*, "")          = *
087         * CharSetUtils.squeeze("hello", "k-p") = "helo"
088         * CharSetUtils.squeeze("hello", "a-e") = "hello"
089         * </pre>
090         *
091         * @see CharSet#getInstance(java.lang.String) for set-syntax.
092         * @param str  the string to squeeze, may be null
093         * @param set  the character set to use for manipulation, may be null
094         * @return modified String, <code>null</code> if null string input
095         */
096        public static String squeeze(String str, String set) {
097            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
098                return str;
099            }
100            String[] strs = new String[1];
101            strs[0] = set;
102            return squeeze(str, strs);
103        }
104    
105        /**
106         * <p>Squeezes any repetitions of a character that is mentioned in the
107         * supplied set.</p>
108         *
109         * <p>An example is:</p>
110         * <ul>
111         *   <li>squeeze(&quot;hello&quot;, {&quot;el&quot;}) => &quot;helo&quot;</li>
112         * </ul>
113         * 
114         * @see CharSet#getInstance(java.lang.String) for set-syntax.
115         * @param str  the string to squeeze, may be null
116         * @param set  the character set to use for manipulation, may be null
117         * @return modified String, <code>null</code> if null string input
118         */
119        public static String squeeze(String str, String[] set) {
120            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
121                return str;
122            }
123            CharSet chars = CharSet.getInstance(set);
124            StringBuffer buffer = new StringBuffer(str.length());
125            char[] chrs = str.toCharArray();
126            int sz = chrs.length;
127            char lastChar = ' ';
128            char ch = ' ';
129            for (int i = 0; i < sz; i++) {
130                ch = chrs[i];
131                if (chars.contains(ch)) {
132                    if ((ch == lastChar) && (i != 0)) {
133                        continue;
134                    }
135                }
136                buffer.append(ch);
137                lastChar = ch;
138            }
139            return buffer.toString();
140        }
141    
142        // Count
143        //-----------------------------------------------------------------------
144        /**
145         * <p>Takes an argument in set-syntax, see evaluateSet,
146         * and returns the number of characters present in the specified string.</p>
147         *
148         * <pre>
149         * CharSetUtils.count(null, *)        = 0
150         * CharSetUtils.count("", *)          = 0
151         * CharSetUtils.count(*, null)        = 0
152         * CharSetUtils.count(*, "")          = 0
153         * CharSetUtils.count("hello", "k-p") = 3
154         * CharSetUtils.count("hello", "a-e") = 1
155         * </pre>
156         *
157         * @see CharSet#getInstance(java.lang.String) for set-syntax.
158         * @param str  String to count characters in, may be null
159         * @param set  String set of characters to count, may be null
160         * @return character count, zero if null string input
161         */
162        public static int count(String str, String set) {
163            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
164                return 0;
165            }
166            String[] strs = new String[1];
167            strs[0] = set;
168            return count(str, strs);
169        }
170        
171        /**
172         * <p>Takes an argument in set-syntax, see evaluateSet,
173         * and returns the number of characters present in the specified string.</p>
174         *
175         * <p>An example would be:</p>
176         * <ul>
177         *  <li>count(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns 2.</li>
178         * </ul>
179         *
180         * @see CharSet#getInstance(java.lang.String) for set-syntax.
181         * @param str  String to count characters in, may be null
182         * @param set  String[] set of characters to count, may be null
183         * @return character count, zero if null string input
184         */
185        public static int count(String str, String[] set) {
186            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
187                return 0;
188            }
189            CharSet chars = CharSet.getInstance(set);
190            int count = 0;
191            char[] chrs = str.toCharArray();
192            int sz = chrs.length;
193            for(int i=0; i<sz; i++) {
194                if(chars.contains(chrs[i])) {
195                    count++;
196                }
197            }
198            return count;
199        }
200    
201        // Keep
202        //-----------------------------------------------------------------------
203        /**
204         * <p>Takes an argument in set-syntax, see evaluateSet,
205         * and keeps any of characters present in the specified string.</p>
206         *
207         * <pre>
208         * CharSetUtils.keep(null, *)        = null
209         * CharSetUtils.keep("", *)          = ""
210         * CharSetUtils.keep(*, null)        = ""
211         * CharSetUtils.keep(*, "")          = ""
212         * CharSetUtils.keep("hello", "hl")  = "hll"
213         * CharSetUtils.keep("hello", "le")  = "ell"
214         * </pre>
215         *
216         * @see CharSet#getInstance(java.lang.String) for set-syntax.
217         * @param str  String to keep characters from, may be null
218         * @param set  String set of characters to keep, may be null
219         * @return modified String, <code>null</code> if null string input
220         * @since 2.0
221         */
222        public static String keep(String str, String set) {
223            if (str == null) {
224                return null;
225            }
226            if (str.length() == 0 || StringUtils.isEmpty(set)) {
227                return "";
228            }
229            String[] strs = new String[1];
230            strs[0] = set;
231            return keep(str, strs);
232        }
233        
234        /**
235         * <p>Takes an argument in set-syntax, see evaluateSet,
236         * and keeps any of characters present in the specified string.</p>
237         *
238         * <p>An example would be:</p>
239         * <ul>
240         *  <li>keep(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;})
241         *   returns &quot;eo&quot;</li>
242         * </ul>
243         *
244         * @see CharSet#getInstance(java.lang.String) for set-syntax.
245         * @param str  String to keep characters from, may be null
246         * @param set  String[] set of characters to keep, may be null
247         * @return modified String, <code>null</code> if null string input
248         * @since 2.0
249         */
250        public static String keep(String str, String[] set) {
251            if (str == null) {
252                return null;
253            }
254            if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
255                return "";
256            }
257            return modify(str, set, true);
258        }
259    
260        // Delete
261        //-----------------------------------------------------------------------
262        /**
263         * <p>Takes an argument in set-syntax, see evaluateSet,
264         * and deletes any of characters present in the specified string.</p>
265         *
266         * <pre>
267         * CharSetUtils.delete(null, *)        = null
268         * CharSetUtils.delete("", *)          = ""
269         * CharSetUtils.delete(*, null)        = *
270         * CharSetUtils.delete(*, "")          = *
271         * CharSetUtils.delete("hello", "hl")  = "eo"
272         * CharSetUtils.delete("hello", "le")  = "ho"
273         * </pre>
274         *
275         * @see CharSet#getInstance(java.lang.String) for set-syntax.
276         * @param str  String to delete characters from, may be null
277         * @param set  String set of characters to delete, may be null
278         * @return modified String, <code>null</code> if null string input
279         */
280        public static String delete(String str, String set) {
281            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
282                return str;
283            }
284            String[] strs = new String[1];
285            strs[0] = set;
286            return delete(str, strs);
287        }
288        
289        /**
290         * <p>Takes an argument in set-syntax, see evaluateSet,
291         * and deletes any of characters present in the specified string.</p>
292         *
293         * <p>An example would be:</p>
294         * <ul>
295         *  <li>delete(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns
296         *   &quot;hll&quot;</li>
297         * </ul>
298         *
299         * @see CharSet#getInstance(java.lang.String) for set-syntax.
300         * @param str  String to delete characters from, may be null
301         * @param set  String[] set of characters to delete, may be null
302         * @return modified String, <code>null</code> if null string input
303         */
304        public static String delete(String str, String[] set) {
305            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
306                return str;
307            }
308            return modify(str, set, false);
309        }
310    
311        //-----------------------------------------------------------------------
312        /**
313         * Implementation of delete and keep
314         *
315         * @param str String to modify characters within
316         * @param set String[] set of characters to modify
317         * @param expect whether to evaluate on match, or non-match
318         * @return modified String
319         */
320        private static String modify(String str, String[] set, boolean expect) {
321            CharSet chars = CharSet.getInstance(set);
322            StringBuffer buffer = new StringBuffer(str.length());
323            char[] chrs = str.toCharArray();
324            int sz = chrs.length;
325            for(int i=0; i<sz; i++) {
326                if(chars.contains(chrs[i]) == expect) {
327                    buffer.append(chrs[i]);
328                }
329            }
330            return buffer.toString();
331        }
332    
333        // Translate
334        //-----------------------------------------------------------------------
335        /**
336         * <p>Translate characters in a String.
337         * This is a multi character search and replace routine.</p>
338         *
339         * <p>An example is:</p>
340         * <ul>
341         *   <li>translate(&quot;hello&quot;, &quot;ho&quot;, &quot;jy&quot;)
342         *    =&gt; jelly</li>
343         * </ul>
344         *
345         * <p>If the length of characters to search for is greater than the
346         * length of characters to replace, then the last character is 
347         * used.</p>
348         * 
349         * <pre>
350         * CharSetUtils.translate(null, *, *) = null
351         * CharSetUtils.translate("", *, *)   = ""
352         * </pre>
353         *
354         * @param str  String to replace characters in, may be null
355         * @param searchChars   a set of characters to search for, must not be null
356         * @param replaceChars  a set of characters to replace, must not be null or empty (&quot;&quot;)
357         * @return translated String, <code>null</code> if null string input
358         * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code> 
359         *  is <code>null</code>
360         * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty (&quot;&quot;)
361         * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
362         *             Method will be removed in Commons Lang 3.0.
363         *  NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
364         *  than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
365         *  string whereas StringUtils#replaceChars will delete
366         */
367        public static String translate(String str, String searchChars, String replaceChars) {
368            if (StringUtils.isEmpty(str)) {
369                return str;
370            }
371            StringBuffer buffer = new StringBuffer(str.length());
372            char[] chrs = str.toCharArray();
373            char[] withChrs = replaceChars.toCharArray();
374            int sz = chrs.length;
375            int withMax = replaceChars.length() - 1;
376            for(int i=0; i<sz; i++) {
377                int idx = searchChars.indexOf(chrs[i]);
378                if(idx != -1) {
379                    if(idx > withMax) {
380                        idx = withMax;
381                    }
382                    buffer.append(withChrs[idx]);
383                } else {
384                    buffer.append(chrs[i]);
385                }
386            }
387            return buffer.toString();
388        }
389    
390    }