View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  /**
20   * <p>Operations on {@link CharSequence} that are
21   * {@code null} safe.</p>
22   *
23   * @see CharSequence
24   * @since 3.0
25   */
26  public class CharSequenceUtils {
27  
28      private static final int NOT_FOUND = -1;
29  
30      /**
31       * <p>{@code CharSequenceUtils} instances should NOT be constructed in
32       * standard programming. </p>
33       *
34       * <p>This constructor is public to permit tools that require a JavaBean
35       * instance to operate.</p>
36       */
37      public CharSequenceUtils() {
38      }
39  
40      //-----------------------------------------------------------------------
41      /**
42       * <p>Returns a new {@code CharSequence} that is a subsequence of this
43       * sequence starting with the {@code char} value at the specified index.</p>
44       *
45       * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
46       * The length (in {@code char}) of the returned sequence is {@code length() - start},
47       * so if {@code start == end} then an empty sequence is returned.</p>
48       *
49       * @param cs  the specified subsequence, null returns null
50       * @param start  the start index, inclusive, valid
51       * @return a new subsequence, may be null
52       * @throws IndexOutOfBoundsException if {@code start} is negative or if
53       *  {@code start} is greater than {@code length()}
54       */
55      public static CharSequence subSequence(final CharSequence cs, final int start) {
56          return cs == null ? null : cs.subSequence(start, cs.length());
57      }
58  
59      //-----------------------------------------------------------------------
60      /**
61       * Returns the index within {@code cs} of the first occurrence of the
62       * specified character, starting the search at the specified index.
63       * <p>
64       * If a character with value {@code searchChar} occurs in the
65       * character sequence represented by the {@code cs}
66       * object at an index no smaller than {@code start}, then
67       * the index of the first such occurrence is returned. For values
68       * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
69       * this is the smallest value <i>k</i> such that:
70       * <blockquote><pre>
71       * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
72       * </pre></blockquote>
73       * is true. For other values of {@code searchChar}, it is the
74       * smallest value <i>k</i> such that:
75       * <blockquote><pre>
76       * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
77       * </pre></blockquote>
78       * is true. In either case, if no such character occurs inm {@code cs}
79       * at or after position {@code start}, then
80       * {@code -1} is returned.
81       *
82       * <p>
83       * There is no restriction on the value of {@code start}. If it
84       * is negative, it has the same effect as if it were zero: the entire
85       * {@code CharSequence} may be searched. If it is greater than
86       * the length of {@code cs}, it has the same effect as if it were
87       * equal to the length of {@code cs}: {@code -1} is returned.
88       *
89       * <p>All indices are specified in {@code char} values
90       * (Unicode code units).
91       *
92       * @param cs  the {@code CharSequence} to be processed, not null
93       * @param searchChar  the char to be searched for
94       * @param start  the start index, negative starts at the string start
95       * @return the index where the search char was found, -1 if not found
96       * @since 3.6 updated to behave more like {@code String}
97       */
98      static int indexOf(final CharSequence cs, final int searchChar, int start) {
99          if (cs instanceof String) {
100             return ((String) cs).indexOf(searchChar, start);
101         }
102         final int sz = cs.length();
103         if (start < 0) {
104             start = 0;
105         }
106         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
107             for (int i = start; i < sz; i++) {
108                 if (cs.charAt(i) == searchChar) {
109                     return i;
110                 }
111             }
112             return NOT_FOUND;
113         }
114         //supplementary characters (LANG1300)
115         if (searchChar <= Character.MAX_CODE_POINT) {
116             final char[] chars = Character.toChars(searchChar);
117             for (int i = start; i < sz - 1; i++) {
118                 final char high = cs.charAt(i);
119                 final char low = cs.charAt(i + 1);
120                 if (high == chars[0] && low == chars[1]) {
121                     return i;
122                 }
123             }
124         }
125         return NOT_FOUND;
126     }
127 
128     /**
129      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
130      *
131      * @param cs the {@code CharSequence} to be processed
132      * @param searchChar the {@code CharSequence} to be searched for
133      * @param start the start index
134      * @return the index where the search sequence was found
135      */
136     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
137         if (cs instanceof String) {
138             return ((String) cs).indexOf(searchChar.toString(), start);
139         } else if (cs instanceof StringBuilder) {
140             return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
141         } else if (cs instanceof StringBuffer) {
142             return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
143         }
144         return cs.toString().indexOf(searchChar.toString(), start);
145 //        if (cs instanceof String && searchChar instanceof String) {
146 //            // TODO: Do we assume searchChar is usually relatively small;
147 //            //       If so then calling toString() on it is better than reverting to
148 //            //       the green implementation in the else block
149 //            return ((String) cs).indexOf((String) searchChar, start);
150 //        } else {
151 //            // TODO: Implement rather than convert to String
152 //            return cs.toString().indexOf(searchChar.toString(), start);
153 //        }
154     }
155 
156     /**
157      * Returns the index within {@code cs} of the last occurrence of
158      * the specified character, searching backward starting at the
159      * specified index. For values of {@code searchChar} in the range
160      * from 0 to 0xFFFF (inclusive), the index returned is the largest
161      * value <i>k</i> such that:
162      * <blockquote><pre>
163      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
164      * </pre></blockquote>
165      * is true. For other values of {@code searchChar}, it is the
166      * largest value <i>k</i> such that:
167      * <blockquote><pre>
168      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
169      * </pre></blockquote>
170      * is true. In either case, if no such character occurs in {@code cs}
171      * at or before position {@code start}, then {@code -1} is returned.
172      *
173      * <p>All indices are specified in {@code char} values
174      * (Unicode code units).
175      *
176      * @param cs  the {@code CharSequence} to be processed
177      * @param searchChar  the char to be searched for
178      * @param start  the start index, negative returns -1, beyond length starts at end
179      * @return the index where the search char was found, -1 if not found
180      * @since 3.6 updated to behave more like {@code String}
181      */
182     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
183         if (cs instanceof String) {
184             return ((String) cs).lastIndexOf(searchChar, start);
185         }
186         final int sz = cs.length();
187         if (start < 0) {
188             return NOT_FOUND;
189         }
190         if (start >= sz) {
191             start = sz - 1;
192         }
193         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
194             for (int i = start; i >= 0; --i) {
195                 if (cs.charAt(i) == searchChar) {
196                     return i;
197                 }
198             }
199             return NOT_FOUND;
200         }
201         //supplementary characters (LANG1300)
202         //NOTE - we must do a forward traversal for this to avoid duplicating code points
203         if (searchChar <= Character.MAX_CODE_POINT) {
204             final char[] chars = Character.toChars(searchChar);
205             //make sure it's not the last index
206             if (start == sz - 1) {
207                 return NOT_FOUND;
208             }
209             for (int i = start; i >= 0; i--) {
210                 final char high = cs.charAt(i);
211                 final char low = cs.charAt(i + 1);
212                 if (chars[0] == high && chars[1] == low) {
213                     return i;
214                 }
215             }
216         }
217         return NOT_FOUND;
218     }
219 
220     static final int TO_STRING_LIMIT = 16;
221 
222     /**
223      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
224      *
225      * @param cs the {@code CharSequence} to be processed
226      * @param searchChar the {@code CharSequence} to find
227      * @param start the start index
228      * @return the index where the search sequence was found
229      */
230     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
231         if (searchChar == null || cs == null) {
232             return NOT_FOUND;
233         }
234         if (searchChar instanceof String) {
235             if (cs instanceof String) {
236                 return ((String) cs).lastIndexOf((String) searchChar, start);
237             } else if (cs instanceof StringBuilder) {
238                 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
239             } else if (cs instanceof StringBuffer) {
240                 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
241             }
242         }
243 
244         final int len1 = cs.length();
245         final int len2 = searchChar.length();
246 
247         if (start > len1) {
248             start = len1;
249         }
250 
251         if (start < 0 || len2 < 0 || len2 > len1) {
252             return NOT_FOUND;
253         }
254 
255         if (len2 == 0) {
256             return start;
257         }
258 
259         if (len2 <= TO_STRING_LIMIT) {
260             if (cs instanceof String) {
261                 return ((String) cs).lastIndexOf(searchChar.toString(), start);
262             } else if (cs instanceof StringBuilder) {
263                 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
264             } else if (cs instanceof StringBuffer) {
265                 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
266             }
267         }
268 
269         if (start + len2 > len1) {
270             start = len1 - len2;
271         }
272 
273         final char char0 = searchChar.charAt(0);
274 
275         int i = start;
276         while (true) {
277             while (cs.charAt(i) != char0) {
278                 i--;
279                 if (i < 0) {
280                     return NOT_FOUND;
281                 }
282             }
283             if (checkLaterThan1(cs, searchChar, len2, i)) {
284                 return i;
285             }
286             i--;
287             if (i < 0) {
288                 return NOT_FOUND;
289             }
290         }
291     }
292 
293     private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
294         for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
295             if (cs.charAt(start1 + i) != searchChar.charAt(i)
296                     ||
297                     cs.charAt(start1 + j) != searchChar.charAt(j)
298             ) {
299                 return false;
300             }
301         }
302         return true;
303     }
304 
305     /**
306      * Converts the given CharSequence to a char[].
307      *
308      * @param source the {@code CharSequence} to be processed.
309      * @return the resulting char array, never null.
310      * @since 3.11
311      */
312     public static char[] toCharArray(final CharSequence source) {
313         final int len = StringUtils.length(source);
314         if (len == 0) {
315             return ArrayUtils.EMPTY_CHAR_ARRAY;
316         }
317         if (source instanceof String) {
318             return ((String) source).toCharArray();
319         }
320         final char[] array = new char[len];
321         for (int i = 0; i < len; i++) {
322             array[i] = source.charAt(i);
323         }
324         return array;
325     }
326 
327     /**
328      * Green implementation of regionMatches.
329      *
330      * @param cs the {@code CharSequence} to be processed
331      * @param ignoreCase whether or not to be case insensitive
332      * @param thisStart the index to start on the {@code cs} CharSequence
333      * @param substring the {@code CharSequence} to be looked for
334      * @param start the index to start on the {@code substring} CharSequence
335      * @param length character length of the region
336      * @return whether the region matched
337      */
338     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
339             final CharSequence substring, final int start, final int length)    {
340         if (cs instanceof String && substring instanceof String) {
341             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
342         }
343         int index1 = thisStart;
344         int index2 = start;
345         int tmpLen = length;
346 
347         // Extract these first so we detect NPEs the same as the java.lang.String version
348         final int srcLen = cs.length() - thisStart;
349         final int otherLen = substring.length() - start;
350 
351         // Check for invalid parameters
352         if (thisStart < 0 || start < 0 || length < 0) {
353             return false;
354         }
355 
356         // Check that the regions are long enough
357         if (srcLen < length || otherLen < length) {
358             return false;
359         }
360 
361         while (tmpLen-- > 0) {
362             final char c1 = cs.charAt(index1++);
363             final char c2 = substring.charAt(index2++);
364 
365             if (c1 == c2) {
366                 continue;
367             }
368 
369             if (!ignoreCase) {
370                 return false;
371             }
372 
373             // The real same check as in String.regionMatches():
374             final char u1 = Character.toUpperCase(c1);
375             final char u2 = Character.toUpperCase(c2);
376             if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
377                 return false;
378             }
379         }
380 
381         return true;
382     }
383 }