Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019/**
020 * <p>Operations on {@link CharSequence} that are
021 * {@code null} safe.</p>
022 *
023 * @see CharSequence
024 * @since 3.0
025 */
026public class CharSequenceUtils {
027
028    private static final int NOT_FOUND = -1;
029
030    /**
031     * <p>{@code CharSequenceUtils} instances should NOT be constructed in
032     * standard programming. </p>
033     *
034     * <p>This constructor is public to permit tools that require a JavaBean
035     * instance to operate.</p>
036     */
037    public CharSequenceUtils() {
038        super();
039    }
040
041    //-----------------------------------------------------------------------
042    /**
043     * <p>Returns a new {@code CharSequence} that is a subsequence of this
044     * sequence starting with the {@code char} value at the specified index.</p>
045     *
046     * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
047     * The length (in {@code char}) of the returned sequence is {@code length() - start},
048     * so if {@code start == end} then an empty sequence is returned.</p>
049     *
050     * @param cs  the specified subsequence, null returns null
051     * @param start  the start index, inclusive, valid
052     * @return a new subsequence, may be null
053     * @throws IndexOutOfBoundsException if {@code start} is negative or if
054     *  {@code start} is greater than {@code length()}
055     */
056    public static CharSequence subSequence(final CharSequence cs, final int start) {
057        return cs == null ? null : cs.subSequence(start, cs.length());
058    }
059
060    //-----------------------------------------------------------------------
061    /**
062     * Returns the index within <code>cs</code> of the first occurrence of the
063     * specified character, starting the search at the specified index.
064     * <p>
065     * If a character with value <code>searchChar</code> occurs in the
066     * character sequence represented by the <code>cs</code>
067     * object at an index no smaller than <code>start</code>, then
068     * the index of the first such occurrence is returned. For values
069     * of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
070     * this is the smallest value <i>k</i> such that:
071     * <blockquote><pre>
072     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
073     * </pre></blockquote>
074     * is true. For other values of <code>searchChar</code>, it is the
075     * smallest value <i>k</i> such that:
076     * <blockquote><pre>
077     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
078     * </pre></blockquote>
079     * is true. In either case, if no such character occurs inm <code>cs</code>
080     * at or after position <code>start</code>, then
081     * <code>-1</code> is returned.
082     *
083     * <p>
084     * There is no restriction on the value of <code>start</code>. If it
085     * is negative, it has the same effect as if it were zero: the entire
086     * <code>CharSequence</code> may be searched. If it is greater than
087     * the length of <code>cs</code>, it has the same effect as if it were
088     * equal to the length of <code>cs</code>: <code>-1</code> is returned.
089     *
090     * <p>All indices are specified in <code>char</code> values
091     * (Unicode code units).
092     *
093     * @param cs  the {@code CharSequence} to be processed, not null
094     * @param searchChar  the char to be searched for
095     * @param start  the start index, negative starts at the string start
096     * @return the index where the search char was found, -1 if not found
097     * @since 3.6 updated to behave more like <code>String</code>
098     */
099    static int indexOf(final CharSequence cs, final int searchChar, int start) {
100        if (cs instanceof String) {
101            return ((String) cs).indexOf(searchChar, start);
102        }
103        final int sz = cs.length();
104        if (start < 0) {
105            start = 0;
106        }
107        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
108            for (int i = start; i < sz; i++) {
109                if (cs.charAt(i) == searchChar) {
110                    return i;
111                }
112            }
113        }
114        //supplementary characters (LANG1300)
115        if (searchChar <= Character.MAX_CODE_POINT) {
116            final char[] chars = Character.toChars(searchChar);
117            for (int i = start; i < sz - 1; i++) {
118                final char high = cs.charAt(i);
119                final char low = cs.charAt(i + 1);
120                if (high == chars[0] && low == chars[1]) {
121                    return i;
122                }
123            }
124        }
125        return NOT_FOUND;
126    }
127
128    /**
129     * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
130     *
131     * @param cs the {@code CharSequence} to be processed
132     * @param searchChar the {@code CharSequence} to be searched for
133     * @param start the start index
134     * @return the index where the search sequence was found
135     */
136    static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
137        return cs.toString().indexOf(searchChar.toString(), start);
138//        if (cs instanceof String && searchChar instanceof String) {
139//            // TODO: Do we assume searchChar is usually relatively small;
140//            //       If so then calling toString() on it is better than reverting to
141//            //       the green implementation in the else block
142//            return ((String) cs).indexOf((String) searchChar, start);
143//        } else {
144//            // TODO: Implement rather than convert to String
145//            return cs.toString().indexOf(searchChar.toString(), start);
146//        }
147    }
148
149    /**
150     * Returns the index within <code>cs</code> of the last occurrence of
151     * the specified character, searching backward starting at the
152     * specified index. For values of <code>searchChar</code> in the range
153     * from 0 to 0xFFFF (inclusive), the index returned is the largest
154     * value <i>k</i> such that:
155     * <blockquote><pre>
156     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
157     * </pre></blockquote>
158     * is true. For other values of <code>searchChar</code>, it is the
159     * largest value <i>k</i> such that:
160     * <blockquote><pre>
161     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
162     * </pre></blockquote>
163     * is true. In either case, if no such character occurs in <code>cs</code>
164     * at or before position <code>start</code>, then <code>-1</code> is returned.
165     *
166     * <p>All indices are specified in <code>char</code> values
167     * (Unicode code units).
168     *
169     * @param cs  the {@code CharSequence} to be processed
170     * @param searchChar  the char to be searched for
171     * @param start  the start index, negative returns -1, beyond length starts at end
172     * @return the index where the search char was found, -1 if not found
173     * @since 3.6 updated to behave more like <code>String</code>
174     */
175    static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
176        if (cs instanceof String) {
177            return ((String) cs).lastIndexOf(searchChar, start);
178        }
179        final int sz = cs.length();
180        if (start < 0) {
181            return NOT_FOUND;
182        }
183        if (start >= sz) {
184            start = sz - 1;
185        }
186        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
187            for (int i = start; i >= 0; --i) {
188                if (cs.charAt(i) == searchChar) {
189                    return i;
190                }
191            }
192        }
193        //supplementary characters (LANG1300)
194        //NOTE - we must do a forward traversal for this to avoid duplicating code points
195        if (searchChar <= Character.MAX_CODE_POINT) {
196            final char[] chars = Character.toChars(searchChar);
197            //make sure it's not the last index
198            if (start == sz - 1) {
199                return NOT_FOUND;
200            }
201            for (int i = start; i >= 0; i--) {
202                final char high = cs.charAt(i);
203                final char low = cs.charAt(i + 1);
204                if (chars[0] == high && chars[1] == low) {
205                    return i;
206                }
207            }
208        }
209        return NOT_FOUND;
210    }
211
212    /**
213     * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
214     *
215     * @param cs the {@code CharSequence} to be processed
216     * @param searchChar the {@code CharSequence} to be searched for
217     * @param start the start index
218     * @return the index where the search sequence was found
219     */
220    static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
221        return cs.toString().lastIndexOf(searchChar.toString(), start);
222//        if (cs instanceof String && searchChar instanceof String) {
223//            // TODO: Do we assume searchChar is usually relatively small;
224//            //       If so then calling toString() on it is better than reverting to
225//            //       the green implementation in the else block
226//            return ((String) cs).lastIndexOf((String) searchChar, start);
227//        } else {
228//            // TODO: Implement rather than convert to String
229//            return cs.toString().lastIndexOf(searchChar.toString(), start);
230//        }
231    }
232
233    /**
234     * Green implementation of toCharArray.
235     *
236     * @param cs the {@code CharSequence} to be processed
237     * @return the resulting char array
238     */
239    static char[] toCharArray(final CharSequence cs) {
240        if (cs instanceof String) {
241            return ((String) cs).toCharArray();
242        }
243        final int sz = cs.length();
244        final char[] array = new char[cs.length()];
245        for (int i = 0; i < sz; i++) {
246            array[i] = cs.charAt(i);
247        }
248        return array;
249    }
250
251    /**
252     * Green implementation of regionMatches.
253     *
254     * @param cs the {@code CharSequence} to be processed
255     * @param ignoreCase whether or not to be case insensitive
256     * @param thisStart the index to start on the {@code cs} CharSequence
257     * @param substring the {@code CharSequence} to be looked for
258     * @param start the index to start on the {@code substring} CharSequence
259     * @param length character length of the region
260     * @return whether the region matched
261     */
262    static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
263            final CharSequence substring, final int start, final int length)    {
264        if (cs instanceof String && substring instanceof String) {
265            return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
266        }
267        int index1 = thisStart;
268        int index2 = start;
269        int tmpLen = length;
270
271        // Extract these first so we detect NPEs the same as the java.lang.String version
272        final int srcLen = cs.length() - thisStart;
273        final int otherLen = substring.length() - start;
274
275        // Check for invalid parameters
276        if (thisStart < 0 || start < 0 || length < 0) {
277            return false;
278        }
279
280        // Check that the regions are long enough
281        if (srcLen < length || otherLen < length) {
282            return false;
283        }
284
285        while (tmpLen-- > 0) {
286            final char c1 = cs.charAt(index1++);
287            final char c2 = substring.charAt(index2++);
288
289            if (c1 == c2) {
290                continue;
291            }
292
293            if (!ignoreCase) {
294                return false;
295            }
296
297            // The same check as in String.regionMatches():
298            if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
299                    && Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
300                return false;
301            }
302        }
303
304        return true;
305    }
306}