001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang;
018    
019    import org.apache.commons.lang.text.StrBuilder;
020    
021    /**
022     * <p>Operations on <code>CharSet</code>s.</p>
023     *
024     * <p>This class handles <code>null</code> input gracefully.
025     * An exception will not be thrown for a <code>null</code> input.
026     * Each method documents its behaviour in more detail.</p>
027     * 
028     * <p>#ThreadSafe#</p>
029     * @see CharSet
030     * @author Apache Software Foundation
031     * @author Phil Steitz
032     * @author Gary Gregory
033     * @since 1.0
034     * @version $Id: CharSetUtils.java 1057072 2011-01-10 01:55:57Z niallp $
035     */
036    public class CharSetUtils {
037    
038        /**
039         * <p>CharSetUtils instances should NOT be constructed in standard programming.
040         * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
041         *
042         * <p>This constructor is public to permit tools that require a JavaBean instance
043         * to operate.</p>
044         */
045        public CharSetUtils() {
046          super();
047        }
048    
049        // Factory
050        //-----------------------------------------------------------------------
051        /**
052         * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
053         * set logic to be performed.</p>
054         * <p>The syntax is:</p>
055         * <ul>
056         *  <li>&quot;aeio&quot; which implies 'a','e',..</li>
057         *  <li>&quot;^e&quot; implies not e.</li>
058         *  <li>&quot;ej-m&quot; implies e,j-&gt;m. e,j,k,l,m.</li>
059         * </ul>
060         * 
061         * <pre>
062         * CharSetUtils.evaluateSet(null)    = null
063         * CharSetUtils.evaluateSet([])      = CharSet matching nothing
064         * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
065         * </pre>
066         *
067         * @param set  the set, may be null
068         * @return a CharSet instance, <code>null</code> if null input
069         * @deprecated Use {@link CharSet#getInstance(String[])}.
070         *             Method will be removed in Commons Lang 3.0.
071         */
072        public static CharSet evaluateSet(String[] set) {
073            if (set == null) {
074                return null;
075            }
076            return new CharSet(set); 
077        }
078    
079        // Squeeze
080        //-----------------------------------------------------------------------
081        /**
082         * <p>Squeezes any repetitions of a character that is mentioned in the
083         * supplied set.</p>
084         *
085         * <pre>
086         * CharSetUtils.squeeze(null, *)        = null
087         * CharSetUtils.squeeze("", *)          = ""
088         * CharSetUtils.squeeze(*, null)        = *
089         * CharSetUtils.squeeze(*, "")          = *
090         * CharSetUtils.squeeze("hello", "k-p") = "helo"
091         * CharSetUtils.squeeze("hello", "a-e") = "hello"
092         * </pre>
093         *
094         * @see CharSet#getInstance(java.lang.String) for set-syntax.
095         * @param str  the string to squeeze, may be null
096         * @param set  the character set to use for manipulation, may be null
097         * @return modified String, <code>null</code> if null string input
098         */
099        public static String squeeze(String str, String set) {
100            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
101                return str;
102            }
103            String[] strs = new String[1];
104            strs[0] = set;
105            return squeeze(str, strs);
106        }
107    
108        /**
109         * <p>Squeezes any repetitions of a character that is mentioned in the
110         * supplied set.</p>
111         *
112         * <p>An example is:</p>
113         * <ul>
114         *   <li>squeeze(&quot;hello&quot;, {&quot;el&quot;}) => &quot;helo&quot;</li>
115         * </ul>
116         * 
117         * @see CharSet#getInstance(java.lang.String) for set-syntax.
118         * @param str  the string to squeeze, may be null
119         * @param set  the character set to use for manipulation, may be null
120         * @return modified String, <code>null</code> if null string input
121         */
122        public static String squeeze(String str, String[] set) {
123            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
124                return str;
125            }
126            CharSet chars = CharSet.getInstance(set);
127            StrBuilder buffer = new StrBuilder(str.length());
128            char[] chrs = str.toCharArray();
129            int sz = chrs.length;
130            char lastChar = ' ';
131            char ch = ' ';
132            for (int i = 0; i < sz; i++) {
133                ch = chrs[i];
134                if (chars.contains(ch)) {
135                    if ((ch == lastChar) && (i != 0)) {
136                        continue;
137                    }
138                }
139                buffer.append(ch);
140                lastChar = ch;
141            }
142            return buffer.toString();
143        }
144    
145        // Count
146        //-----------------------------------------------------------------------
147        /**
148         * <p>Takes an argument in set-syntax, see evaluateSet,
149         * and returns the number of characters present in the specified string.</p>
150         *
151         * <pre>
152         * CharSetUtils.count(null, *)        = 0
153         * CharSetUtils.count("", *)          = 0
154         * CharSetUtils.count(*, null)        = 0
155         * CharSetUtils.count(*, "")          = 0
156         * CharSetUtils.count("hello", "k-p") = 3
157         * CharSetUtils.count("hello", "a-e") = 1
158         * </pre>
159         *
160         * @see CharSet#getInstance(java.lang.String) for set-syntax.
161         * @param str  String to count characters in, may be null
162         * @param set  String set of characters to count, may be null
163         * @return character count, zero if null string input
164         */
165        public static int count(String str, String set) {
166            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
167                return 0;
168            }
169            String[] strs = new String[1];
170            strs[0] = set;
171            return count(str, strs);
172        }
173        
174        /**
175         * <p>Takes an argument in set-syntax, see evaluateSet,
176         * and returns the number of characters present in the specified string.</p>
177         *
178         * <p>An example would be:</p>
179         * <ul>
180         *  <li>count(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns 2.</li>
181         * </ul>
182         *
183         * @see CharSet#getInstance(java.lang.String) for set-syntax.
184         * @param str  String to count characters in, may be null
185         * @param set  String[] set of characters to count, may be null
186         * @return character count, zero if null string input
187         */
188        public static int count(String str, String[] set) {
189            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
190                return 0;
191            }
192            CharSet chars = CharSet.getInstance(set);
193            int count = 0;
194            char[] chrs = str.toCharArray();
195            int sz = chrs.length;
196            for(int i=0; i<sz; i++) {
197                if(chars.contains(chrs[i])) {
198                    count++;
199                }
200            }
201            return count;
202        }
203    
204        // Keep
205        //-----------------------------------------------------------------------
206        /**
207         * <p>Takes an argument in set-syntax, see evaluateSet,
208         * and keeps any of characters present in the specified string.</p>
209         *
210         * <pre>
211         * CharSetUtils.keep(null, *)        = null
212         * CharSetUtils.keep("", *)          = ""
213         * CharSetUtils.keep(*, null)        = ""
214         * CharSetUtils.keep(*, "")          = ""
215         * CharSetUtils.keep("hello", "hl")  = "hll"
216         * CharSetUtils.keep("hello", "le")  = "ell"
217         * </pre>
218         *
219         * @see CharSet#getInstance(java.lang.String) for set-syntax.
220         * @param str  String to keep characters from, may be null
221         * @param set  String set of characters to keep, may be null
222         * @return modified String, <code>null</code> if null string input
223         * @since 2.0
224         */
225        public static String keep(String str, String set) {
226            if (str == null) {
227                return null;
228            }
229            if (str.length() == 0 || StringUtils.isEmpty(set)) {
230                return "";
231            }
232            String[] strs = new String[1];
233            strs[0] = set;
234            return keep(str, strs);
235        }
236        
237        /**
238         * <p>Takes an argument in set-syntax, see evaluateSet,
239         * and keeps any of characters present in the specified string.</p>
240         *
241         * <p>An example would be:</p>
242         * <ul>
243         *  <li>keep(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;})
244         *   returns &quot;eo&quot;</li>
245         * </ul>
246         *
247         * @see CharSet#getInstance(java.lang.String) for set-syntax.
248         * @param str  String to keep characters from, may be null
249         * @param set  String[] set of characters to keep, may be null
250         * @return modified String, <code>null</code> if null string input
251         * @since 2.0
252         */
253        public static String keep(String str, String[] set) {
254            if (str == null) {
255                return null;
256            }
257            if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
258                return "";
259            }
260            return modify(str, set, true);
261        }
262    
263        // Delete
264        //-----------------------------------------------------------------------
265        /**
266         * <p>Takes an argument in set-syntax, see evaluateSet,
267         * and deletes any of characters present in the specified string.</p>
268         *
269         * <pre>
270         * CharSetUtils.delete(null, *)        = null
271         * CharSetUtils.delete("", *)          = ""
272         * CharSetUtils.delete(*, null)        = *
273         * CharSetUtils.delete(*, "")          = *
274         * CharSetUtils.delete("hello", "hl")  = "eo"
275         * CharSetUtils.delete("hello", "le")  = "ho"
276         * </pre>
277         *
278         * @see CharSet#getInstance(java.lang.String) for set-syntax.
279         * @param str  String to delete characters from, may be null
280         * @param set  String set of characters to delete, may be null
281         * @return modified String, <code>null</code> if null string input
282         */
283        public static String delete(String str, String set) {
284            if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
285                return str;
286            }
287            String[] strs = new String[1];
288            strs[0] = set;
289            return delete(str, strs);
290        }
291        
292        /**
293         * <p>Takes an argument in set-syntax, see evaluateSet,
294         * and deletes any of characters present in the specified string.</p>
295         *
296         * <p>An example would be:</p>
297         * <ul>
298         *  <li>delete(&quot;hello&quot;, {&quot;c-f&quot;, &quot;o&quot;}) returns
299         *   &quot;hll&quot;</li>
300         * </ul>
301         *
302         * @see CharSet#getInstance(java.lang.String) for set-syntax.
303         * @param str  String to delete characters from, may be null
304         * @param set  String[] set of characters to delete, may be null
305         * @return modified String, <code>null</code> if null string input
306         */
307        public static String delete(String str, String[] set) {
308            if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
309                return str;
310            }
311            return modify(str, set, false);
312        }
313    
314        //-----------------------------------------------------------------------
315        /**
316         * Implementation of delete and keep
317         *
318         * @param str String to modify characters within
319         * @param set String[] set of characters to modify
320         * @param expect whether to evaluate on match, or non-match
321         * @return modified String
322         */
323        private static String modify(String str, String[] set, boolean expect) {
324            CharSet chars = CharSet.getInstance(set);
325            StrBuilder buffer = new StrBuilder(str.length());
326            char[] chrs = str.toCharArray();
327            int sz = chrs.length;
328            for(int i=0; i<sz; i++) {
329                if(chars.contains(chrs[i]) == expect) {
330                    buffer.append(chrs[i]);
331                }
332            }
333            return buffer.toString();
334        }
335    
336        // Translate
337        //-----------------------------------------------------------------------
338        /**
339         * <p>Translate characters in a String.
340         * This is a multi character search and replace routine.</p>
341         *
342         * <p>An example is:</p>
343         * <ul>
344         *   <li>translate(&quot;hello&quot;, &quot;ho&quot;, &quot;jy&quot;)
345         *    =&gt; jelly</li>
346         * </ul>
347         *
348         * <p>If the length of characters to search for is greater than the
349         * length of characters to replace, then the last character is 
350         * used.</p>
351         * 
352         * <pre>
353         * CharSetUtils.translate(null, *, *) = null
354         * CharSetUtils.translate("", *, *)   = ""
355         * </pre>
356         *
357         * @param str  String to replace characters in, may be null
358         * @param searchChars   a set of characters to search for, must not be null
359         * @param replaceChars  a set of characters to replace, must not be null or empty (&quot;&quot;)
360         * @return translated String, <code>null</code> if null string input
361         * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code> 
362         *  is <code>null</code>
363         * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty (&quot;&quot;)
364         * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
365         *             Method will be removed in Commons Lang 3.0.
366         *  NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
367         *  than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
368         *  string whereas StringUtils#replaceChars will delete
369         */
370        public static String translate(String str, String searchChars, String replaceChars) {
371            if (StringUtils.isEmpty(str)) {
372                return str;
373            }
374            StrBuilder buffer = new StrBuilder(str.length());
375            char[] chrs = str.toCharArray();
376            char[] withChrs = replaceChars.toCharArray();
377            int sz = chrs.length;
378            int withMax = replaceChars.length() - 1;
379            for(int i=0; i<sz; i++) {
380                int idx = searchChars.indexOf(chrs[i]);
381                if(idx != -1) {
382                    if(idx > withMax) {
383                        idx = withMax;
384                    }
385                    buffer.append(withChrs[idx]);
386                } else {
387                    buffer.append(chrs[i]);
388                }
389            }
390            return buffer.toString();
391        }
392    
393    }