001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019/**
020 * Operations on {@link CharSequence} that are
021 * {@code null} safe.
022 *
023 * @see CharSequence
024 * @since 3.0
025 */
026public class CharSequenceUtils {
027
028    private static final int NOT_FOUND = -1;
029
030    static final int TO_STRING_LIMIT = 16;
031
032    private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
033        for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
034            if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
035                return false;
036            }
037        }
038        return true;
039    }
040
041    /**
042     * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
043     *
044     * @param cs         the {@link CharSequence} to be processed
045     * @param searchChar the {@link CharSequence} to be searched for
046     * @param start      the start index
047     * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence.
048     */
049    static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
050        if (cs == null || searchChar == null) {
051            return StringUtils.INDEX_NOT_FOUND;
052        }
053        if (cs instanceof String) {
054            return ((String) cs).indexOf(searchChar.toString(), start);
055        }
056        if (cs instanceof StringBuilder) {
057            return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
058        }
059        if (cs instanceof StringBuffer) {
060            return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
061        }
062        return cs.toString().indexOf(searchChar.toString(), start);
063//        if (cs instanceof String && searchChar instanceof String) {
064//            // TODO: Do we assume searchChar is usually relatively small;
065//            //       If so then calling toString() on it is better than reverting to
066//            //       the green implementation in the else block
067//            return ((String) cs).indexOf((String) searchChar, start);
068//        } else {
069//            // TODO: Implement rather than convert to String
070//            return cs.toString().indexOf(searchChar.toString(), start);
071//        }
072    }
073
074    /**
075     * Returns the index within {@code cs} of the first occurrence of the specified character, starting the search at the specified index.
076     * <p>
077     * If a character with value {@code searchChar} occurs in the character sequence represented by the {@code cs} object at an index no smaller than
078     * {@code start}, then the index of the first such occurrence is returned. For values of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), this
079     * is the smallest value <em>k</em> such that:
080     * </p>
081     *
082     * <pre>
083     * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
084     * </pre>
085     * <p>
086     * is true. For other values of {@code searchChar}, it is the smallest value <em>k</em> such that:
087     * </p>
088     *
089     * <pre>
090     * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
091     * </pre>
092     * <p>
093     * is true. In either case, if no such character occurs inm {@code cs} at or after position {@code start}, then {@code -1} is returned.
094     * </p>
095     * <p>
096     * There is no restriction on the value of {@code start}. If it is negative, it has the same effect as if it were zero: the entire {@link CharSequence} may
097     * be searched. If it is greater than the length of {@code cs}, it has the same effect as if it were equal to the length of {@code cs}: {@code -1} is
098     * returned.
099     * </p>
100     * <p>
101     * All indices are specified in {@code char} values (Unicode code units).
102     * </p>
103     *
104     * @param cs         the {@link CharSequence} to be processed, not null
105     * @param searchChar the char to be searched for
106     * @param start      the start index, negative starts at the string start
107     * @return the index where the search char was found, -1 if not found
108     * @since 3.6 updated to behave more like {@link String}
109     */
110    static int indexOf(final CharSequence cs, final int searchChar, int start) {
111        if (cs instanceof String) {
112            return ((String) cs).indexOf(searchChar, start);
113        }
114        final int sz = cs.length();
115        if (start < 0) {
116            start = 0;
117        }
118        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
119            for (int i = start; i < sz; i++) {
120                if (cs.charAt(i) == searchChar) {
121                    return i;
122                }
123            }
124            return NOT_FOUND;
125        }
126        //supplementary characters (LANG1300)
127        if (searchChar <= Character.MAX_CODE_POINT) {
128            final char[] chars = Character.toChars(searchChar);
129            for (int i = start; i < sz - 1; i++) {
130                final char high = cs.charAt(i);
131                final char low = cs.charAt(i + 1);
132                if (high == chars[0] && low == chars[1]) {
133                    return i;
134                }
135            }
136        }
137        return NOT_FOUND;
138    }
139
140    /**
141     * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
142     *
143     * @param cs the {@link CharSequence} to be processed
144     * @param searchChar the {@link CharSequence} to find
145     * @param start the start index
146     * @return the index where the search sequence was found
147     */
148    static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
149        if (searchChar == null || cs == null) {
150            return NOT_FOUND;
151        }
152        if (searchChar instanceof String) {
153            if (cs instanceof String) {
154                return ((String) cs).lastIndexOf((String) searchChar, start);
155            }
156            if (cs instanceof StringBuilder) {
157                return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
158            }
159            if (cs instanceof StringBuffer) {
160                return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
161            }
162        }
163
164        final int len1 = cs.length();
165        final int len2 = searchChar.length();
166
167        if (start > len1) {
168            start = len1;
169        }
170
171        if (start < 0 || len2 > len1) {
172            return NOT_FOUND;
173        }
174
175        if (len2 == 0) {
176            return start;
177        }
178
179        if (len2 <= TO_STRING_LIMIT) {
180            if (cs instanceof String) {
181                return ((String) cs).lastIndexOf(searchChar.toString(), start);
182            }
183            if (cs instanceof StringBuilder) {
184                return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
185            }
186            if (cs instanceof StringBuffer) {
187                return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
188            }
189        }
190
191        if (start + len2 > len1) {
192            start = len1 - len2;
193        }
194
195        final char char0 = searchChar.charAt(0);
196
197        int i = start;
198        while (true) {
199            while (cs.charAt(i) != char0) {
200                i--;
201                if (i < 0) {
202                    return NOT_FOUND;
203                }
204            }
205            if (checkLaterThan1(cs, searchChar, len2, i)) {
206                return i;
207            }
208            i--;
209            if (i < 0) {
210                return NOT_FOUND;
211            }
212        }
213    }
214
215    /**
216     * Returns the index within {@code cs} of the last occurrence of the specified character, searching backward starting at the specified index. For values of
217     * {@code searchChar} in the range from 0 to 0xFFFF (inclusive), the index returned is the largest value <em>k</em> such that:
218     * </p>
219     *
220     * <pre>
221     * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
222     * </pre>
223     *
224     * <p>
225     * is true. For other values of {@code searchChar}, it is the largest value <em>k</em> such that:
226     * <p>
227     *
228     * <pre>
229     * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
230     * </pre>
231     *
232     * <p>
233     * is true. In either case, if no such character occurs in {@code cs} at or before position {@code start}, then {@code -1} is returned.
234     * </p>
235     * <p>
236     * All indices are specified in {@code char} values (Unicode code units).
237     * </p>
238     *
239     * @param cs         the {@link CharSequence} to be processed.
240     * @param searchChar the char to be searched for.
241     * @param start      the start index, negative returns -1, beyond length starts at end.
242     * @return the index where the search char was found, -1 if not found.
243     * @since 3.6 updated to behave more like {@link String}
244     */
245    static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
246        if (cs instanceof String) {
247            return ((String) cs).lastIndexOf(searchChar, start);
248        }
249        final int sz = cs.length();
250        if (start < 0) {
251            return NOT_FOUND;
252        }
253        if (start >= sz) {
254            start = sz - 1;
255        }
256        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
257            for (int i = start; i >= 0; --i) {
258                if (cs.charAt(i) == searchChar) {
259                    return i;
260                }
261            }
262            return NOT_FOUND;
263        }
264        //supplementary characters (LANG1300)
265        //NOTE - we must do a forward traversal for this to avoid duplicating code points
266        if (searchChar <= Character.MAX_CODE_POINT) {
267            final char[] chars = Character.toChars(searchChar);
268            //make sure it's not the last index
269            if (start == sz - 1) {
270                return NOT_FOUND;
271            }
272            for (int i = start; i >= 0; i--) {
273                final char high = cs.charAt(i);
274                final char low = cs.charAt(i + 1);
275                if (chars[0] == high && chars[1] == low) {
276                    return i;
277                }
278            }
279        }
280        return NOT_FOUND;
281    }
282
283    /**
284     * Green implementation of regionMatches.
285     *
286     * @param cs the {@link CharSequence} to be processed
287     * @param ignoreCase whether or not to be case-insensitive
288     * @param thisStart the index to start on the {@code cs} CharSequence
289     * @param substring the {@link CharSequence} to be looked for
290     * @param start the index to start on the {@code substring} CharSequence
291     * @param length character length of the region
292     * @return whether the region matched
293     * @see String#regionMatches(boolean, int, String, int, int)
294     */
295    static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
296            final CharSequence substring, final int start, final int length)    {
297        if (cs instanceof String && substring instanceof String) {
298            return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
299        }
300        int index1 = thisStart;
301        int index2 = start;
302        int tmpLen = length;
303
304        // Extract these first so we detect NPEs the same as the java.lang.String version
305        final int srcLen = cs.length() - thisStart;
306        final int otherLen = substring.length() - start;
307
308        // Check for invalid parameters
309        if (thisStart < 0 || start < 0 || length < 0) {
310            return false;
311        }
312
313        // Check that the regions are long enough
314        if (srcLen < length || otherLen < length) {
315            return false;
316        }
317
318        while (tmpLen-- > 0) {
319            final char c1 = cs.charAt(index1++);
320            final char c2 = substring.charAt(index2++);
321
322            if (c1 == c2) {
323                continue;
324            }
325
326            if (!ignoreCase) {
327                return false;
328            }
329
330            // The real same check as in String#regionMatches(boolean, int, String, int, int):
331            final char u1 = Character.toUpperCase(c1);
332            final char u2 = Character.toUpperCase(c2);
333            if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
334                return false;
335            }
336        }
337
338        return true;
339    }
340
341    /**
342     * Returns a new {@link CharSequence} that is a subsequence of this
343     * sequence starting with the {@code char} value at the specified index.
344     *
345     * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
346     * The length (in {@code char}) of the returned sequence is {@code length() - start},
347     * so if {@code start == end} then an empty sequence is returned.</p>
348     *
349     * @param cs  the specified subsequence, null returns null
350     * @param start  the start index, inclusive, valid
351     * @return a new subsequence, may be null
352     * @throws IndexOutOfBoundsException if {@code start} is negative or if
353     *  {@code start} is greater than {@code length()}
354     */
355    public static CharSequence subSequence(final CharSequence cs, final int start) {
356        return cs == null ? null : cs.subSequence(start, cs.length());
357    }
358
359    /**
360     * Converts the given CharSequence to a char[].
361     *
362     * @param source the {@link CharSequence} to be processed.
363     * @return the resulting char array, never null.
364     * @since 3.11
365     */
366    public static char[] toCharArray(final CharSequence source) {
367        final int len = StringUtils.length(source);
368        if (len == 0) {
369            return ArrayUtils.EMPTY_CHAR_ARRAY;
370        }
371        if (source instanceof String) {
372            return ((String) source).toCharArray();
373        }
374        final char[] array = new char[len];
375        for (int i = 0; i < len; i++) {
376            array[i] = source.charAt(i);
377        }
378        return array;
379    }
380
381    /**
382     * {@link CharSequenceUtils} instances should NOT be constructed in
383     * standard programming.
384     *
385     * <p>This constructor is public to permit tools that require a JavaBean
386     * instance to operate.</p>
387     *
388     * @deprecated TODO Make private in 4.0.
389     */
390    @Deprecated
391    public CharSequenceUtils() {
392        // empty
393    }
394}