View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  /**
20   * Operations on {@link CharSequence} that are
21   * {@code null} safe.
22   *
23   * @see CharSequence
24   * @since 3.0
25   */
26  public class CharSequenceUtils {
27  
28      private static final int NOT_FOUND = -1;
29  
30      static final int TO_STRING_LIMIT = 16;
31  
32      private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
33          for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
34              if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
35                  return false;
36              }
37          }
38          return true;
39      }
40  
41      /**
42       * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
43       *
44       * @param cs         the {@link CharSequence} to be processed
45       * @param searchChar the {@link CharSequence} to be searched for
46       * @param start      the start index
47       * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence.
48       */
49      static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
50          if (cs == null || searchChar == null) {
51              return StringUtils.INDEX_NOT_FOUND;
52          }
53          if (cs instanceof String) {
54              return ((String) cs).indexOf(searchChar.toString(), start);
55          }
56          if (cs instanceof StringBuilder) {
57              return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
58          }
59          if (cs instanceof StringBuffer) {
60              return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
61          }
62          return cs.toString().indexOf(searchChar.toString(), start);
63  //        if (cs instanceof String && searchChar instanceof String) {
64  //            // TODO: Do we assume searchChar is usually relatively small;
65  //            //       If so then calling toString() on it is better than reverting to
66  //            //       the green implementation in the else block
67  //            return ((String) cs).indexOf((String) searchChar, start);
68  //        } else {
69  //            // TODO: Implement rather than convert to String
70  //            return cs.toString().indexOf(searchChar.toString(), start);
71  //        }
72      }
73  
74      /**
75       * Returns the index within {@code cs} of the first occurrence of the specified character, starting the search at the specified index.
76       * <p>
77       * If a character with value {@code searchChar} occurs in the character sequence represented by the {@code cs} object at an index no smaller than
78       * {@code start}, then the index of the first such occurrence is returned. For values of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), this
79       * is the smallest value <em>k</em> such that:
80       * </p>
81       *
82       * <pre>
83       * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
84       * </pre>
85       * <p>
86       * is true. For other values of {@code searchChar}, it is the smallest value <em>k</em> such that:
87       * </p>
88       *
89       * <pre>
90       * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
91       * </pre>
92       * <p>
93       * is true. In either case, if no such character occurs inm {@code cs} at or after position {@code start}, then {@code -1} is returned.
94       * </p>
95       * <p>
96       * There is no restriction on the value of {@code start}. If it is negative, it has the same effect as if it were zero: the entire {@link CharSequence} may
97       * be searched. If it is greater than the length of {@code cs}, it has the same effect as if it were equal to the length of {@code cs}: {@code -1} is
98       * returned.
99       * </p>
100      * <p>
101      * All indices are specified in {@code char} values (Unicode code units).
102      * </p>
103      *
104      * @param cs         the {@link CharSequence} to be processed, not null
105      * @param searchChar the char to be searched for
106      * @param start      the start index, negative starts at the string start
107      * @return the index where the search char was found, -1 if not found
108      * @since 3.6 updated to behave more like {@link String}
109      */
110     static int indexOf(final CharSequence cs, final int searchChar, int start) {
111         if (cs instanceof String) {
112             return ((String) cs).indexOf(searchChar, start);
113         }
114         final int sz = cs.length();
115         if (start < 0) {
116             start = 0;
117         }
118         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
119             for (int i = start; i < sz; i++) {
120                 if (cs.charAt(i) == searchChar) {
121                     return i;
122                 }
123             }
124             return NOT_FOUND;
125         }
126         //supplementary characters (LANG1300)
127         if (searchChar <= Character.MAX_CODE_POINT) {
128             final char[] chars = Character.toChars(searchChar);
129             for (int i = start; i < sz - 1; i++) {
130                 final char high = cs.charAt(i);
131                 final char low = cs.charAt(i + 1);
132                 if (high == chars[0] && low == chars[1]) {
133                     return i;
134                 }
135             }
136         }
137         return NOT_FOUND;
138     }
139 
140     /**
141      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
142      *
143      * @param cs the {@link CharSequence} to be processed
144      * @param searchChar the {@link CharSequence} to find
145      * @param start the start index
146      * @return the index where the search sequence was found
147      */
148     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
149         if (searchChar == null || cs == null) {
150             return NOT_FOUND;
151         }
152         if (searchChar instanceof String) {
153             if (cs instanceof String) {
154                 return ((String) cs).lastIndexOf((String) searchChar, start);
155             }
156             if (cs instanceof StringBuilder) {
157                 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
158             }
159             if (cs instanceof StringBuffer) {
160                 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
161             }
162         }
163 
164         final int len1 = cs.length();
165         final int len2 = searchChar.length();
166 
167         if (start > len1) {
168             start = len1;
169         }
170 
171         if (start < 0 || len2 > len1) {
172             return NOT_FOUND;
173         }
174 
175         if (len2 == 0) {
176             return start;
177         }
178 
179         if (len2 <= TO_STRING_LIMIT) {
180             if (cs instanceof String) {
181                 return ((String) cs).lastIndexOf(searchChar.toString(), start);
182             }
183             if (cs instanceof StringBuilder) {
184                 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
185             }
186             if (cs instanceof StringBuffer) {
187                 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
188             }
189         }
190 
191         if (start + len2 > len1) {
192             start = len1 - len2;
193         }
194 
195         final char char0 = searchChar.charAt(0);
196 
197         int i = start;
198         while (true) {
199             while (cs.charAt(i) != char0) {
200                 i--;
201                 if (i < 0) {
202                     return NOT_FOUND;
203                 }
204             }
205             if (checkLaterThan1(cs, searchChar, len2, i)) {
206                 return i;
207             }
208             i--;
209             if (i < 0) {
210                 return NOT_FOUND;
211             }
212         }
213     }
214 
215     /**
216      * Returns the index within {@code cs} of the last occurrence of the specified character, searching backward starting at the specified index. For values of
217      * {@code searchChar} in the range from 0 to 0xFFFF (inclusive), the index returned is the largest value <em>k</em> such that:
218      * </p>
219      *
220      * <pre>
221      * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
222      * </pre>
223      *
224      * <p>
225      * is true. For other values of {@code searchChar}, it is the largest value <em>k</em> such that:
226      * <p>
227      *
228      * <pre>
229      * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
230      * </pre>
231      *
232      * <p>
233      * is true. In either case, if no such character occurs in {@code cs} at or before position {@code start}, then {@code -1} is returned.
234      * </p>
235      * <p>
236      * All indices are specified in {@code char} values (Unicode code units).
237      * </p>
238      *
239      * @param cs         the {@link CharSequence} to be processed.
240      * @param searchChar the char to be searched for.
241      * @param start      the start index, negative returns -1, beyond length starts at end.
242      * @return the index where the search char was found, -1 if not found.
243      * @since 3.6 updated to behave more like {@link String}
244      */
245     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
246         if (cs instanceof String) {
247             return ((String) cs).lastIndexOf(searchChar, start);
248         }
249         final int sz = cs.length();
250         if (start < 0) {
251             return NOT_FOUND;
252         }
253         if (start >= sz) {
254             start = sz - 1;
255         }
256         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
257             for (int i = start; i >= 0; --i) {
258                 if (cs.charAt(i) == searchChar) {
259                     return i;
260                 }
261             }
262             return NOT_FOUND;
263         }
264         //supplementary characters (LANG1300)
265         //NOTE - we must do a forward traversal for this to avoid duplicating code points
266         if (searchChar <= Character.MAX_CODE_POINT) {
267             final char[] chars = Character.toChars(searchChar);
268             //make sure it's not the last index
269             if (start == sz - 1) {
270                 return NOT_FOUND;
271             }
272             for (int i = start; i >= 0; i--) {
273                 final char high = cs.charAt(i);
274                 final char low = cs.charAt(i + 1);
275                 if (chars[0] == high && chars[1] == low) {
276                     return i;
277                 }
278             }
279         }
280         return NOT_FOUND;
281     }
282 
283     /**
284      * Green implementation of regionMatches.
285      *
286      * @param cs the {@link CharSequence} to be processed
287      * @param ignoreCase whether or not to be case-insensitive
288      * @param thisStart the index to start on the {@code cs} CharSequence
289      * @param substring the {@link CharSequence} to be looked for
290      * @param start the index to start on the {@code substring} CharSequence
291      * @param length character length of the region
292      * @return whether the region matched
293      * @see String#regionMatches(boolean, int, String, int, int)
294      */
295     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
296             final CharSequence substring, final int start, final int length)    {
297         if (cs instanceof String && substring instanceof String) {
298             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
299         }
300         int index1 = thisStart;
301         int index2 = start;
302         int tmpLen = length;
303 
304         // Extract these first so we detect NPEs the same as the java.lang.String version
305         final int srcLen = cs.length() - thisStart;
306         final int otherLen = substring.length() - start;
307 
308         // Check for invalid parameters
309         if (thisStart < 0 || start < 0 || length < 0) {
310             return false;
311         }
312 
313         // Check that the regions are long enough
314         if (srcLen < length || otherLen < length) {
315             return false;
316         }
317 
318         while (tmpLen-- > 0) {
319             final char c1 = cs.charAt(index1++);
320             final char c2 = substring.charAt(index2++);
321 
322             if (c1 == c2) {
323                 continue;
324             }
325 
326             if (!ignoreCase) {
327                 return false;
328             }
329 
330             // The real same check as in String#regionMatches(boolean, int, String, int, int):
331             final char u1 = Character.toUpperCase(c1);
332             final char u2 = Character.toUpperCase(c2);
333             if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
334                 return false;
335             }
336         }
337 
338         return true;
339     }
340 
341     /**
342      * Returns a new {@link CharSequence} that is a subsequence of this
343      * sequence starting with the {@code char} value at the specified index.
344      *
345      * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
346      * The length (in {@code char}) of the returned sequence is {@code length() - start},
347      * so if {@code start == end} then an empty sequence is returned.</p>
348      *
349      * @param cs  the specified subsequence, null returns null
350      * @param start  the start index, inclusive, valid
351      * @return a new subsequence, may be null
352      * @throws IndexOutOfBoundsException if {@code start} is negative or if
353      *  {@code start} is greater than {@code length()}
354      */
355     public static CharSequence subSequence(final CharSequence cs, final int start) {
356         return cs == null ? null : cs.subSequence(start, cs.length());
357     }
358 
359     /**
360      * Converts the given CharSequence to a char[].
361      *
362      * @param source the {@link CharSequence} to be processed.
363      * @return the resulting char array, never null.
364      * @since 3.11
365      */
366     public static char[] toCharArray(final CharSequence source) {
367         final int len = StringUtils.length(source);
368         if (len == 0) {
369             return ArrayUtils.EMPTY_CHAR_ARRAY;
370         }
371         if (source instanceof String) {
372             return ((String) source).toCharArray();
373         }
374         final char[] array = new char[len];
375         for (int i = 0; i < len; i++) {
376             array[i] = source.charAt(i);
377         }
378         return array;
379     }
380 
381     /**
382      * {@link CharSequenceUtils} instances should NOT be constructed in
383      * standard programming.
384      *
385      * <p>This constructor is public to permit tools that require a JavaBean
386      * instance to operate.</p>
387      *
388      * @deprecated TODO Make private in 4.0.
389      */
390     @Deprecated
391     public CharSequenceUtils() {
392         // empty
393     }
394 }