001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019/**
020 * <p>Operations on {@code CharSet} instances.</p>
021 *
022 * <p>This class handles {@code null} input gracefully.
023 * An exception will not be thrown for a {@code null} input.
024 * Each method documents its behaviour in more detail.</p>
025 *
026 * <p>#ThreadSafe#</p>
027 * @see CharSet
028 * @since 1.0
029 */
030public class CharSetUtils {
031
032    /**
033     * <p>CharSetUtils instances should NOT be constructed in standard programming.
034     * Instead, the class should be used as {@code CharSetUtils.evaluateSet(null);}.</p>
035     *
036     * <p>This constructor is public to permit tools that require a JavaBean instance
037     * to operate.</p>
038     */
039    public CharSetUtils() {
040      super();
041    }
042
043    // Squeeze
044    //-----------------------------------------------------------------------
045    /**
046     * <p>Squeezes any repetitions of a character that is mentioned in the
047     * supplied set.</p>
048     *
049     * <pre>
050     * CharSetUtils.squeeze(null, *)        = null
051     * CharSetUtils.squeeze("", *)          = ""
052     * CharSetUtils.squeeze(*, null)        = *
053     * CharSetUtils.squeeze(*, "")          = *
054     * CharSetUtils.squeeze("hello", "k-p") = "helo"
055     * CharSetUtils.squeeze("hello", "a-e") = "hello"
056     * </pre>
057     *
058     * @see CharSet#getInstance(java.lang.String...) for set-syntax.
059     * @param str  the string to squeeze, may be null
060     * @param set  the character set to use for manipulation, may be null
061     * @return the modified String, {@code null} if null string input
062     */
063    public static String squeeze(final String str, final String... set) {
064        if (StringUtils.isEmpty(str) || deepEmpty(set)) {
065            return str;
066        }
067        final CharSet chars = CharSet.getInstance(set);
068        final StringBuilder buffer = new StringBuilder(str.length());
069        final char[] chrs = str.toCharArray();
070        final int sz = chrs.length;
071        char lastChar = chrs[0];
072        char ch = ' ';
073        Character inChars = null;
074        Character notInChars = null;
075        buffer.append(lastChar);
076        for (int i = 1; i < sz; i++) {
077            ch = chrs[i];
078            if (ch == lastChar) {
079                if (inChars != null && ch == inChars) {
080                    continue;
081                }
082                if (notInChars == null || ch != notInChars) {
083                    if (chars.contains(ch)) {
084                        inChars = ch;
085                        continue;
086                    }
087                    notInChars = ch;
088                }
089            }
090            buffer.append(ch);
091            lastChar = ch;
092        }
093        return buffer.toString();
094    }
095
096    // ContainsAny
097    //-----------------------------------------------------------------------
098    /**
099     * <p>Takes an argument in set-syntax, see evaluateSet,
100     * and identifies whether any of the characters are present in the specified string.</p>
101     *
102     * <pre>
103     * CharSetUtils.containsAny(null, *)        = false
104     * CharSetUtils.containsAny("", *)          = false
105     * CharSetUtils.containsAny(*, null)        = false
106     * CharSetUtils.containsAny(*, "")          = false
107     * CharSetUtils.containsAny("hello", "k-p") = true
108     * CharSetUtils.containsAny("hello", "a-d") = false
109     * </pre>
110     *
111     * @see CharSet#getInstance(java.lang.String...) for set-syntax.
112     * @param str  String to look for characters in, may be null
113     * @param set  String[] set of characters to identify, may be null
114     * @return whether or not the characters in the set are in the primary string
115     * @since 3.2
116     */
117    public static boolean containsAny(final String str, final String... set) {
118        if (StringUtils.isEmpty(str) || deepEmpty(set)) {
119            return false;
120        }
121        final CharSet chars = CharSet.getInstance(set);
122        for (final char c : str.toCharArray()) {
123            if (chars.contains(c)) {
124                return true;
125            }
126        }
127        return false;
128    }
129
130    // Count
131    //-----------------------------------------------------------------------
132    /**
133     * <p>Takes an argument in set-syntax, see evaluateSet,
134     * and returns the number of characters present in the specified string.</p>
135     *
136     * <pre>
137     * CharSetUtils.count(null, *)        = 0
138     * CharSetUtils.count("", *)          = 0
139     * CharSetUtils.count(*, null)        = 0
140     * CharSetUtils.count(*, "")          = 0
141     * CharSetUtils.count("hello", "k-p") = 3
142     * CharSetUtils.count("hello", "a-e") = 1
143     * </pre>
144     *
145     * @see CharSet#getInstance(java.lang.String...) for set-syntax.
146     * @param str  String to count characters in, may be null
147     * @param set  String[] set of characters to count, may be null
148     * @return the character count, zero if null string input
149     */
150    public static int count(final String str, final String... set) {
151        if (StringUtils.isEmpty(str) || deepEmpty(set)) {
152            return 0;
153        }
154        final CharSet chars = CharSet.getInstance(set);
155        int count = 0;
156        for (final char c : str.toCharArray()) {
157            if (chars.contains(c)) {
158                count++;
159            }
160        }
161        return count;
162    }
163
164    // Keep
165    //-----------------------------------------------------------------------
166    /**
167     * <p>Takes an argument in set-syntax, see evaluateSet,
168     * and keeps any of characters present in the specified string.</p>
169     *
170     * <pre>
171     * CharSetUtils.keep(null, *)        = null
172     * CharSetUtils.keep("", *)          = ""
173     * CharSetUtils.keep(*, null)        = ""
174     * CharSetUtils.keep(*, "")          = ""
175     * CharSetUtils.keep("hello", "hl")  = "hll"
176     * CharSetUtils.keep("hello", "le")  = "ell"
177     * </pre>
178     *
179     * @see CharSet#getInstance(java.lang.String...) for set-syntax.
180     * @param str  String to keep characters from, may be null
181     * @param set  String[] set of characters to keep, may be null
182     * @return the modified String, {@code null} if null string input
183     * @since 2.0
184     */
185    public static String keep(final String str, final String... set) {
186        if (str == null) {
187            return null;
188        }
189        if (str.isEmpty() || deepEmpty(set)) {
190            return StringUtils.EMPTY;
191        }
192        return modify(str, set, true);
193    }
194
195    // Delete
196    //-----------------------------------------------------------------------
197    /**
198     * <p>Takes an argument in set-syntax, see evaluateSet,
199     * and deletes any of characters present in the specified string.</p>
200     *
201     * <pre>
202     * CharSetUtils.delete(null, *)        = null
203     * CharSetUtils.delete("", *)          = ""
204     * CharSetUtils.delete(*, null)        = *
205     * CharSetUtils.delete(*, "")          = *
206     * CharSetUtils.delete("hello", "hl")  = "eo"
207     * CharSetUtils.delete("hello", "le")  = "ho"
208     * </pre>
209     *
210     * @see CharSet#getInstance(java.lang.String...) for set-syntax.
211     * @param str  String to delete characters from, may be null
212     * @param set  String[] set of characters to delete, may be null
213     * @return the modified String, {@code null} if null string input
214     */
215    public static String delete(final String str, final String... set) {
216        if (StringUtils.isEmpty(str) || deepEmpty(set)) {
217            return str;
218        }
219        return modify(str, set, false);
220    }
221
222    //-----------------------------------------------------------------------
223    /**
224     * Implementation of delete and keep
225     *
226     * @param str String to modify characters within
227     * @param set String[] set of characters to modify
228     * @param expect whether to evaluate on match, or non-match
229     * @return the modified String, not null
230     */
231    private static String modify(final String str, final String[] set, final boolean expect) {
232        final CharSet chars = CharSet.getInstance(set);
233        final StringBuilder buffer = new StringBuilder(str.length());
234        final char[] chrs = str.toCharArray();
235        for (final char chr : chrs) {
236            if (chars.contains(chr) == expect) {
237                buffer.append(chr);
238            }
239        }
240        return buffer.toString();
241    }
242
243    /**
244     * Determines whether or not all the Strings in an array are
245     * empty or not.
246     *
247     * @param strings String[] whose elements are being checked for emptiness
248     * @return whether or not the String is empty
249     */
250    private static boolean deepEmpty(final String[] strings) {
251        if (strings != null) {
252            for (final String s : strings) {
253                if (StringUtils.isNotEmpty(s)) {
254                    return false;
255                }
256            }
257        }
258        return true;
259    }
260}