View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  /**
20   * Operations on {@link CharSequence} that are
21   * {@code null} safe.
22   *
23   * @see CharSequence
24   * @since 3.0
25   */
26  public class CharSequenceUtils {
27  
28      private static final int NOT_FOUND = -1;
29  
30      static final int TO_STRING_LIMIT = 16;
31  
32      private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
33          for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
34              if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
35                  return false;
36              }
37          }
38          return true;
39      }
40  
41      /**
42       * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
43       *
44       * @param cs         the {@link CharSequence} to be processed
45       * @param searchChar the {@link CharSequence} to be searched for
46       * @param start      the start index
47       * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence.
48       */
49      static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
50          if (cs == null || searchChar == null) {
51              return StringUtils.INDEX_NOT_FOUND;
52          }
53          if (cs instanceof String) {
54              return ((String) cs).indexOf(searchChar.toString(), start);
55          }
56          if (cs instanceof StringBuilder) {
57              return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
58          }
59          if (cs instanceof StringBuffer) {
60              return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
61          }
62          return cs.toString().indexOf(searchChar.toString(), start);
63  //        if (cs instanceof String && searchChar instanceof String) {
64  //            // TODO: Do we assume searchChar is usually relatively small;
65  //            //       If so then calling toString() on it is better than reverting to
66  //            //       the green implementation in the else block
67  //            return ((String) cs).indexOf((String) searchChar, start);
68  //        } else {
69  //            // TODO: Implement rather than convert to String
70  //            return cs.toString().indexOf(searchChar.toString(), start);
71  //        }
72      }
73  
74      /**
75       * Returns the index within {@code cs} of the first occurrence of the
76       * specified character, starting the search at the specified index.
77       * <p>
78       * If a character with value {@code searchChar} occurs in the
79       * character sequence represented by the {@code cs}
80       * object at an index no smaller than {@code start}, then
81       * the index of the first such occurrence is returned. For values
82       * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
83       * this is the smallest value <em>k</em> such that:
84       * </p>
85       * <blockquote><pre>
86       * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
87       * </pre></blockquote>
88       * is true. For other values of {@code searchChar}, it is the
89       * smallest value <em>k</em> such that:
90       * <blockquote><pre>
91       * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
92       * </pre></blockquote>
93       * <p>
94       * is true. In either case, if no such character occurs inm {@code cs}
95       * at or after position {@code start}, then
96       * {@code -1} is returned.
97       * </p>
98       * <p>
99       * There is no restriction on the value of {@code start}. If it
100      * is negative, it has the same effect as if it were zero: the entire
101      * {@link CharSequence} may be searched. If it is greater than
102      * the length of {@code cs}, it has the same effect as if it were
103      * equal to the length of {@code cs}: {@code -1} is returned.
104      * </p>
105      * <p>All indices are specified in {@code char} values
106      * (Unicode code units).
107      * </p>
108      *
109      * @param cs  the {@link CharSequence} to be processed, not null
110      * @param searchChar  the char to be searched for
111      * @param start  the start index, negative starts at the string start
112      * @return the index where the search char was found, -1 if not found
113      * @since 3.6 updated to behave more like {@link String}
114      */
115     static int indexOf(final CharSequence cs, final int searchChar, int start) {
116         if (cs instanceof String) {
117             return ((String) cs).indexOf(searchChar, start);
118         }
119         final int sz = cs.length();
120         if (start < 0) {
121             start = 0;
122         }
123         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
124             for (int i = start; i < sz; i++) {
125                 if (cs.charAt(i) == searchChar) {
126                     return i;
127                 }
128             }
129             return NOT_FOUND;
130         }
131         //supplementary characters (LANG1300)
132         if (searchChar <= Character.MAX_CODE_POINT) {
133             final char[] chars = Character.toChars(searchChar);
134             for (int i = start; i < sz - 1; i++) {
135                 final char high = cs.charAt(i);
136                 final char low = cs.charAt(i + 1);
137                 if (high == chars[0] && low == chars[1]) {
138                     return i;
139                 }
140             }
141         }
142         return NOT_FOUND;
143     }
144 
145     /**
146      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
147      *
148      * @param cs the {@link CharSequence} to be processed
149      * @param searchChar the {@link CharSequence} to find
150      * @param start the start index
151      * @return the index where the search sequence was found
152      */
153     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
154         if (searchChar == null || cs == null) {
155             return NOT_FOUND;
156         }
157         if (searchChar instanceof String) {
158             if (cs instanceof String) {
159                 return ((String) cs).lastIndexOf((String) searchChar, start);
160             }
161             if (cs instanceof StringBuilder) {
162                 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
163             }
164             if (cs instanceof StringBuffer) {
165                 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
166             }
167         }
168 
169         final int len1 = cs.length();
170         final int len2 = searchChar.length();
171 
172         if (start > len1) {
173             start = len1;
174         }
175 
176         if (start < 0 || len2 > len1) {
177             return NOT_FOUND;
178         }
179 
180         if (len2 == 0) {
181             return start;
182         }
183 
184         if (len2 <= TO_STRING_LIMIT) {
185             if (cs instanceof String) {
186                 return ((String) cs).lastIndexOf(searchChar.toString(), start);
187             }
188             if (cs instanceof StringBuilder) {
189                 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
190             }
191             if (cs instanceof StringBuffer) {
192                 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
193             }
194         }
195 
196         if (start + len2 > len1) {
197             start = len1 - len2;
198         }
199 
200         final char char0 = searchChar.charAt(0);
201 
202         int i = start;
203         while (true) {
204             while (cs.charAt(i) != char0) {
205                 i--;
206                 if (i < 0) {
207                     return NOT_FOUND;
208                 }
209             }
210             if (checkLaterThan1(cs, searchChar, len2, i)) {
211                 return i;
212             }
213             i--;
214             if (i < 0) {
215                 return NOT_FOUND;
216             }
217         }
218     }
219 
220     /**
221      * Returns the index within {@code cs} of the last occurrence of
222      * the specified character, searching backward starting at the
223      * specified index. For values of {@code searchChar} in the range
224      * from 0 to 0xFFFF (inclusive), the index returned is the largest
225      * value <em>k</em> such that:
226      * <blockquote><pre>
227      * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
228      * </pre></blockquote>
229      * is true. For other values of {@code searchChar}, it is the
230      * largest value <em>k</em> such that:
231      * <blockquote><pre>
232      * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
233      * </pre></blockquote>
234      * is true. In either case, if no such character occurs in {@code cs}
235      * at or before position {@code start}, then {@code -1} is returned.
236      *
237      * <p>
238      * All indices are specified in {@code char} values
239      * (Unicode code units).
240      * </p>
241      *
242      * @param cs  the {@link CharSequence} to be processed
243      * @param searchChar  the char to be searched for
244      * @param start  the start index, negative returns -1, beyond length starts at end
245      * @return the index where the search char was found, -1 if not found
246      * @since 3.6 updated to behave more like {@link String}
247      */
248     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
249         if (cs instanceof String) {
250             return ((String) cs).lastIndexOf(searchChar, start);
251         }
252         final int sz = cs.length();
253         if (start < 0) {
254             return NOT_FOUND;
255         }
256         if (start >= sz) {
257             start = sz - 1;
258         }
259         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
260             for (int i = start; i >= 0; --i) {
261                 if (cs.charAt(i) == searchChar) {
262                     return i;
263                 }
264             }
265             return NOT_FOUND;
266         }
267         //supplementary characters (LANG1300)
268         //NOTE - we must do a forward traversal for this to avoid duplicating code points
269         if (searchChar <= Character.MAX_CODE_POINT) {
270             final char[] chars = Character.toChars(searchChar);
271             //make sure it's not the last index
272             if (start == sz - 1) {
273                 return NOT_FOUND;
274             }
275             for (int i = start; i >= 0; i--) {
276                 final char high = cs.charAt(i);
277                 final char low = cs.charAt(i + 1);
278                 if (chars[0] == high && chars[1] == low) {
279                     return i;
280                 }
281             }
282         }
283         return NOT_FOUND;
284     }
285 
286     /**
287      * Green implementation of regionMatches.
288      *
289      * @param cs the {@link CharSequence} to be processed
290      * @param ignoreCase whether or not to be case-insensitive
291      * @param thisStart the index to start on the {@code cs} CharSequence
292      * @param substring the {@link CharSequence} to be looked for
293      * @param start the index to start on the {@code substring} CharSequence
294      * @param length character length of the region
295      * @return whether the region matched
296      * @see String#regionMatches(boolean, int, String, int, int)
297      */
298     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
299             final CharSequence substring, final int start, final int length)    {
300         if (cs instanceof String && substring instanceof String) {
301             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
302         }
303         int index1 = thisStart;
304         int index2 = start;
305         int tmpLen = length;
306 
307         // Extract these first so we detect NPEs the same as the java.lang.String version
308         final int srcLen = cs.length() - thisStart;
309         final int otherLen = substring.length() - start;
310 
311         // Check for invalid parameters
312         if (thisStart < 0 || start < 0 || length < 0) {
313             return false;
314         }
315 
316         // Check that the regions are long enough
317         if (srcLen < length || otherLen < length) {
318             return false;
319         }
320 
321         while (tmpLen-- > 0) {
322             final char c1 = cs.charAt(index1++);
323             final char c2 = substring.charAt(index2++);
324 
325             if (c1 == c2) {
326                 continue;
327             }
328 
329             if (!ignoreCase) {
330                 return false;
331             }
332 
333             // The real same check as in String#regionMatches(boolean, int, String, int, int):
334             final char u1 = Character.toUpperCase(c1);
335             final char u2 = Character.toUpperCase(c2);
336             if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
337                 return false;
338             }
339         }
340 
341         return true;
342     }
343 
344     /**
345      * Returns a new {@link CharSequence} that is a subsequence of this
346      * sequence starting with the {@code char} value at the specified index.
347      *
348      * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
349      * The length (in {@code char}) of the returned sequence is {@code length() - start},
350      * so if {@code start == end} then an empty sequence is returned.</p>
351      *
352      * @param cs  the specified subsequence, null returns null
353      * @param start  the start index, inclusive, valid
354      * @return a new subsequence, may be null
355      * @throws IndexOutOfBoundsException if {@code start} is negative or if
356      *  {@code start} is greater than {@code length()}
357      */
358     public static CharSequence subSequence(final CharSequence cs, final int start) {
359         return cs == null ? null : cs.subSequence(start, cs.length());
360     }
361 
362     /**
363      * Converts the given CharSequence to a char[].
364      *
365      * @param source the {@link CharSequence} to be processed.
366      * @return the resulting char array, never null.
367      * @since 3.11
368      */
369     public static char[] toCharArray(final CharSequence source) {
370         final int len = StringUtils.length(source);
371         if (len == 0) {
372             return ArrayUtils.EMPTY_CHAR_ARRAY;
373         }
374         if (source instanceof String) {
375             return ((String) source).toCharArray();
376         }
377         final char[] array = new char[len];
378         for (int i = 0; i < len; i++) {
379             array[i] = source.charAt(i);
380         }
381         return array;
382     }
383 
384     /**
385      * {@link CharSequenceUtils} instances should NOT be constructed in
386      * standard programming.
387      *
388      * <p>This constructor is public to permit tools that require a JavaBean
389      * instance to operate.</p>
390      *
391      * @deprecated TODO Make private in 4.0.
392      */
393     @Deprecated
394     public CharSequenceUtils() {
395         // empty
396     }
397 }