View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  /**
20   * <p>Operations on {@link CharSequence} that are
21   * {@code null} safe.</p>
22   *
23   * @see CharSequence
24   * @since 3.0
25   */
26  public class CharSequenceUtils {
27  
28      private static final int NOT_FOUND = -1;
29  
30      /**
31       * <p>{@code CharSequenceUtils} instances should NOT be constructed in
32       * standard programming. </p>
33       *
34       * <p>This constructor is public to permit tools that require a JavaBean
35       * instance to operate.</p>
36       */
37      public CharSequenceUtils() {
38          super();
39      }
40  
41      //-----------------------------------------------------------------------
42      /**
43       * <p>Returns a new {@code CharSequence} that is a subsequence of this
44       * sequence starting with the {@code char} value at the specified index.</p>
45       *
46       * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
47       * The length (in {@code char}) of the returned sequence is {@code length() - start},
48       * so if {@code start == end} then an empty sequence is returned.</p>
49       *
50       * @param cs  the specified subsequence, null returns null
51       * @param start  the start index, inclusive, valid
52       * @return a new subsequence, may be null
53       * @throws IndexOutOfBoundsException if {@code start} is negative or if
54       *  {@code start} is greater than {@code length()}
55       */
56      public static CharSequence subSequence(final CharSequence cs, final int start) {
57          return cs == null ? null : cs.subSequence(start, cs.length());
58      }
59  
60      //-----------------------------------------------------------------------
61      /**
62       * Returns the index within <code>cs</code> of the first occurrence of the
63       * specified character, starting the search at the specified index.
64       * <p>
65       * If a character with value <code>searchChar</code> occurs in the
66       * character sequence represented by the <code>cs</code>
67       * object at an index no smaller than <code>start</code>, then
68       * the index of the first such occurrence is returned. For values
69       * of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
70       * this is the smallest value <i>k</i> such that:
71       * <blockquote><pre>
72       * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
73       * </pre></blockquote>
74       * is true. For other values of <code>searchChar</code>, it is the
75       * smallest value <i>k</i> such that:
76       * <blockquote><pre>
77       * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
78       * </pre></blockquote>
79       * is true. In either case, if no such character occurs inm <code>cs</code>
80       * at or after position <code>start</code>, then
81       * <code>-1</code> is returned.
82       *
83       * <p>
84       * There is no restriction on the value of <code>start</code>. If it
85       * is negative, it has the same effect as if it were zero: the entire
86       * <code>CharSequence</code> may be searched. If it is greater than
87       * the length of <code>cs</code>, it has the same effect as if it were
88       * equal to the length of <code>cs</code>: <code>-1</code> is returned.
89       *
90       * <p>All indices are specified in <code>char</code> values
91       * (Unicode code units).
92       *
93       * @param cs  the {@code CharSequence} to be processed, not null
94       * @param searchChar  the char to be searched for
95       * @param start  the start index, negative starts at the string start
96       * @return the index where the search char was found, -1 if not found
97       * @since 3.6 updated to behave more like <code>String</code>
98       */
99      static int indexOf(final CharSequence cs, final int searchChar, int start) {
100         if (cs instanceof String) {
101             return ((String) cs).indexOf(searchChar, start);
102         }
103         final int sz = cs.length();
104         if (start < 0) {
105             start = 0;
106         }
107         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
108             for (int i = start; i < sz; i++) {
109                 if (cs.charAt(i) == searchChar) {
110                     return i;
111                 }
112             }
113         }
114         //supplementary characters (LANG1300)
115         if (searchChar <= Character.MAX_CODE_POINT) {
116             char[] chars = Character.toChars(searchChar);
117             for (int i = start; i < sz - 1; i++) {
118                 char high = cs.charAt(i);
119                 char low = cs.charAt(i + 1);
120                 if (high == chars[0] && low == chars[1]) {
121                     return i;
122                 }
123             }
124         }
125         return NOT_FOUND;
126     }
127 
128     /**
129      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
130      *
131      * @param cs the {@code CharSequence} to be processed
132      * @param searchChar the {@code CharSequence} to be searched for
133      * @param start the start index
134      * @return the index where the search sequence was found
135      */
136     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
137         return cs.toString().indexOf(searchChar.toString(), start);
138 //        if (cs instanceof String && searchChar instanceof String) {
139 //            // TODO: Do we assume searchChar is usually relatively small;
140 //            //       If so then calling toString() on it is better than reverting to
141 //            //       the green implementation in the else block
142 //            return ((String) cs).indexOf((String) searchChar, start);
143 //        } else {
144 //            // TODO: Implement rather than convert to String
145 //            return cs.toString().indexOf(searchChar.toString(), start);
146 //        }
147     }
148 
149     /**
150      * Returns the index within <code>cs</code> of the last occurrence of
151      * the specified character, searching backward starting at the
152      * specified index. For values of <code>searchChar</code> in the range
153      * from 0 to 0xFFFF (inclusive), the index returned is the largest
154      * value <i>k</i> such that:
155      * <blockquote><pre>
156      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
157      * </pre></blockquote>
158      * is true. For other values of <code>searchChar</code>, it is the
159      * largest value <i>k</i> such that:
160      * <blockquote><pre>
161      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
162      * </pre></blockquote>
163      * is true. In either case, if no such character occurs in <code>cs</code>
164      * at or before position <code>start</code>, then <code>-1</code> is returned.
165      *
166      * <p>All indices are specified in <code>char</code> values
167      * (Unicode code units).
168      *
169      * @param cs  the {@code CharSequence} to be processed
170      * @param searchChar  the char to be searched for
171      * @param start  the start index, negative returns -1, beyond length starts at end
172      * @return the index where the search char was found, -1 if not found
173      * @since 3.6 updated to behave more like <code>String</code>
174      */
175     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
176         if (cs instanceof String) {
177             return ((String) cs).lastIndexOf(searchChar, start);
178         }
179         final int sz = cs.length();
180         if (start < 0) {
181             return NOT_FOUND;
182         }
183         if (start >= sz) {
184             start = sz - 1;
185         }
186         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
187             for (int i = start; i >= 0; --i) {
188                 if (cs.charAt(i) == searchChar) {
189                     return i;
190                 }
191             }
192         }
193         //supplementary characters (LANG1300)
194         //NOTE - we must do a forward traversal for this to avoid duplicating code points
195         if (searchChar <= Character.MAX_CODE_POINT) {
196             char[] chars = Character.toChars(searchChar);
197             //make sure it's not the last index
198             if (start == sz - 1) {
199                 return NOT_FOUND;
200             }
201             for (int i = start; i >= 0; i--) {
202                 char high = cs.charAt(i);
203                 char low = cs.charAt(i + 1);
204                 if (chars[0] == high && chars[1] == low) {
205                     return i;
206                 }
207             }
208         }
209         return NOT_FOUND;
210     }
211 
212     /**
213      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
214      *
215      * @param cs the {@code CharSequence} to be processed
216      * @param searchChar the {@code CharSequence} to be searched for
217      * @param start the start index
218      * @return the index where the search sequence was found
219      */
220     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
221         return cs.toString().lastIndexOf(searchChar.toString(), start);
222 //        if (cs instanceof String && searchChar instanceof String) {
223 //            // TODO: Do we assume searchChar is usually relatively small;
224 //            //       If so then calling toString() on it is better than reverting to
225 //            //       the green implementation in the else block
226 //            return ((String) cs).lastIndexOf((String) searchChar, start);
227 //        } else {
228 //            // TODO: Implement rather than convert to String
229 //            return cs.toString().lastIndexOf(searchChar.toString(), start);
230 //        }
231     }
232 
233     /**
234      * Green implementation of toCharArray.
235      *
236      * @param cs the {@code CharSequence} to be processed
237      * @return the resulting char array
238      */
239     static char[] toCharArray(final CharSequence cs) {
240         if (cs instanceof String) {
241             return ((String) cs).toCharArray();
242         }
243         final int sz = cs.length();
244         final char[] array = new char[cs.length()];
245         for (int i = 0; i < sz; i++) {
246             array[i] = cs.charAt(i);
247         }
248         return array;
249     }
250 
251     /**
252      * Green implementation of regionMatches.
253      *
254      * @param cs the {@code CharSequence} to be processed
255      * @param ignoreCase whether or not to be case insensitive
256      * @param thisStart the index to start on the {@code cs} CharSequence
257      * @param substring the {@code CharSequence} to be looked for
258      * @param start the index to start on the {@code substring} CharSequence
259      * @param length character length of the region
260      * @return whether the region matched
261      */
262     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
263             final CharSequence substring, final int start, final int length)    {
264         if (cs instanceof String && substring instanceof String) {
265             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
266         }
267         int index1 = thisStart;
268         int index2 = start;
269         int tmpLen = length;
270 
271         // Extract these first so we detect NPEs the same as the java.lang.String version
272         final int srcLen = cs.length() - thisStart;
273         final int otherLen = substring.length() - start;
274 
275         // Check for invalid parameters
276         if (thisStart < 0 || start < 0 || length < 0) {
277             return false;
278         }
279 
280         // Check that the regions are long enough
281         if (srcLen < length || otherLen < length) {
282             return false;
283         }
284 
285         while (tmpLen-- > 0) {
286             final char c1 = cs.charAt(index1++);
287             final char c2 = substring.charAt(index2++);
288 
289             if (c1 == c2) {
290                 continue;
291             }
292 
293             if (!ignoreCase) {
294                 return false;
295             }
296 
297             // The same check as in String.regionMatches():
298             if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
299                     && Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
300                 return false;
301             }
302         }
303 
304         return true;
305     }
306 }