CharSequenceUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3;

  18. /**
  19.  * Operations on {@link CharSequence} that are
  20.  * {@code null} safe.
  21.  *
  22.  * @see CharSequence
  23.  * @since 3.0
  24.  */
  25. public class CharSequenceUtils {

  26.     private static final int NOT_FOUND = -1;

  27.     static final int TO_STRING_LIMIT = 16;

  28.     private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
  29.         for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
  30.             if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
  31.                 return false;
  32.             }
  33.         }
  34.         return true;
  35.     }

  36.     /**
  37.      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
  38.      *
  39.      * @param cs the {@link CharSequence} to be processed
  40.      * @param searchChar the {@link CharSequence} to be searched for
  41.      * @param start the start index
  42.      * @return the index where the search sequence was found
  43.      */
  44.     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
  45.         if (cs instanceof String) {
  46.             return ((String) cs).indexOf(searchChar.toString(), start);
  47.         }
  48.         if (cs instanceof StringBuilder) {
  49.             return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
  50.         }
  51.         if (cs instanceof StringBuffer) {
  52.             return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
  53.         }
  54.         return cs.toString().indexOf(searchChar.toString(), start);
  55. //        if (cs instanceof String && searchChar instanceof String) {
  56. //            // TODO: Do we assume searchChar is usually relatively small;
  57. //            //       If so then calling toString() on it is better than reverting to
  58. //            //       the green implementation in the else block
  59. //            return ((String) cs).indexOf((String) searchChar, start);
  60. //        } else {
  61. //            // TODO: Implement rather than convert to String
  62. //            return cs.toString().indexOf(searchChar.toString(), start);
  63. //        }
  64.     }

  65.     /**
  66.      * Returns the index within {@code cs} of the first occurrence of the
  67.      * specified character, starting the search at the specified index.
  68.      * <p>
  69.      * If a character with value {@code searchChar} occurs in the
  70.      * character sequence represented by the {@code cs}
  71.      * object at an index no smaller than {@code start}, then
  72.      * the index of the first such occurrence is returned. For values
  73.      * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
  74.      * this is the smallest value <em>k</em> such that:
  75.      * </p>
  76.      * <blockquote><pre>
  77.      * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
  78.      * </pre></blockquote>
  79.      * is true. For other values of {@code searchChar}, it is the
  80.      * smallest value <em>k</em> such that:
  81.      * <blockquote><pre>
  82.      * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
  83.      * </pre></blockquote>
  84.      * <p>
  85.      * is true. In either case, if no such character occurs inm {@code cs}
  86.      * at or after position {@code start}, then
  87.      * {@code -1} is returned.
  88.      * </p>
  89.      * <p>
  90.      * There is no restriction on the value of {@code start}. If it
  91.      * is negative, it has the same effect as if it were zero: the entire
  92.      * {@link CharSequence} may be searched. If it is greater than
  93.      * the length of {@code cs}, it has the same effect as if it were
  94.      * equal to the length of {@code cs}: {@code -1} is returned.
  95.      * </p>
  96.      * <p>All indices are specified in {@code char} values
  97.      * (Unicode code units).
  98.      * </p>
  99.      *
  100.      * @param cs  the {@link CharSequence} to be processed, not null
  101.      * @param searchChar  the char to be searched for
  102.      * @param start  the start index, negative starts at the string start
  103.      * @return the index where the search char was found, -1 if not found
  104.      * @since 3.6 updated to behave more like {@link String}
  105.      */
  106.     static int indexOf(final CharSequence cs, final int searchChar, int start) {
  107.         if (cs instanceof String) {
  108.             return ((String) cs).indexOf(searchChar, start);
  109.         }
  110.         final int sz = cs.length();
  111.         if (start < 0) {
  112.             start = 0;
  113.         }
  114.         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
  115.             for (int i = start; i < sz; i++) {
  116.                 if (cs.charAt(i) == searchChar) {
  117.                     return i;
  118.                 }
  119.             }
  120.             return NOT_FOUND;
  121.         }
  122.         //supplementary characters (LANG1300)
  123.         if (searchChar <= Character.MAX_CODE_POINT) {
  124.             final char[] chars = Character.toChars(searchChar);
  125.             for (int i = start; i < sz - 1; i++) {
  126.                 final char high = cs.charAt(i);
  127.                 final char low = cs.charAt(i + 1);
  128.                 if (high == chars[0] && low == chars[1]) {
  129.                     return i;
  130.                 }
  131.             }
  132.         }
  133.         return NOT_FOUND;
  134.     }

  135.     /**
  136.      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
  137.      *
  138.      * @param cs the {@link CharSequence} to be processed
  139.      * @param searchChar the {@link CharSequence} to find
  140.      * @param start the start index
  141.      * @return the index where the search sequence was found
  142.      */
  143.     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
  144.         if (searchChar == null || cs == null) {
  145.             return NOT_FOUND;
  146.         }
  147.         if (searchChar instanceof String) {
  148.             if (cs instanceof String) {
  149.                 return ((String) cs).lastIndexOf((String) searchChar, start);
  150.             }
  151.             if (cs instanceof StringBuilder) {
  152.                 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
  153.             }
  154.             if (cs instanceof StringBuffer) {
  155.                 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
  156.             }
  157.         }

  158.         final int len1 = cs.length();
  159.         final int len2 = searchChar.length();

  160.         if (start > len1) {
  161.             start = len1;
  162.         }

  163.         if (start < 0 || len2 > len1) {
  164.             return NOT_FOUND;
  165.         }

  166.         if (len2 == 0) {
  167.             return start;
  168.         }

  169.         if (len2 <= TO_STRING_LIMIT) {
  170.             if (cs instanceof String) {
  171.                 return ((String) cs).lastIndexOf(searchChar.toString(), start);
  172.             }
  173.             if (cs instanceof StringBuilder) {
  174.                 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
  175.             }
  176.             if (cs instanceof StringBuffer) {
  177.                 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
  178.             }
  179.         }

  180.         if (start + len2 > len1) {
  181.             start = len1 - len2;
  182.         }

  183.         final char char0 = searchChar.charAt(0);

  184.         int i = start;
  185.         while (true) {
  186.             while (cs.charAt(i) != char0) {
  187.                 i--;
  188.                 if (i < 0) {
  189.                     return NOT_FOUND;
  190.                 }
  191.             }
  192.             if (checkLaterThan1(cs, searchChar, len2, i)) {
  193.                 return i;
  194.             }
  195.             i--;
  196.             if (i < 0) {
  197.                 return NOT_FOUND;
  198.             }
  199.         }
  200.     }

  201.     /**
  202.      * Returns the index within {@code cs} of the last occurrence of
  203.      * the specified character, searching backward starting at the
  204.      * specified index. For values of {@code searchChar} in the range
  205.      * from 0 to 0xFFFF (inclusive), the index returned is the largest
  206.      * value <em>k</em> such that:
  207.      * <blockquote><pre>
  208.      * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
  209.      * </pre></blockquote>
  210.      * is true. For other values of {@code searchChar}, it is the
  211.      * largest value <em>k</em> such that:
  212.      * <blockquote><pre>
  213.      * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
  214.      * </pre></blockquote>
  215.      * is true. In either case, if no such character occurs in {@code cs}
  216.      * at or before position {@code start}, then {@code -1} is returned.
  217.      *
  218.      * <p>
  219.      * All indices are specified in {@code char} values
  220.      * (Unicode code units).
  221.      * </p>
  222.      *
  223.      * @param cs  the {@link CharSequence} to be processed
  224.      * @param searchChar  the char to be searched for
  225.      * @param start  the start index, negative returns -1, beyond length starts at end
  226.      * @return the index where the search char was found, -1 if not found
  227.      * @since 3.6 updated to behave more like {@link String}
  228.      */
  229.     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
  230.         if (cs instanceof String) {
  231.             return ((String) cs).lastIndexOf(searchChar, start);
  232.         }
  233.         final int sz = cs.length();
  234.         if (start < 0) {
  235.             return NOT_FOUND;
  236.         }
  237.         if (start >= sz) {
  238.             start = sz - 1;
  239.         }
  240.         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
  241.             for (int i = start; i >= 0; --i) {
  242.                 if (cs.charAt(i) == searchChar) {
  243.                     return i;
  244.                 }
  245.             }
  246.             return NOT_FOUND;
  247.         }
  248.         //supplementary characters (LANG1300)
  249.         //NOTE - we must do a forward traversal for this to avoid duplicating code points
  250.         if (searchChar <= Character.MAX_CODE_POINT) {
  251.             final char[] chars = Character.toChars(searchChar);
  252.             //make sure it's not the last index
  253.             if (start == sz - 1) {
  254.                 return NOT_FOUND;
  255.             }
  256.             for (int i = start; i >= 0; i--) {
  257.                 final char high = cs.charAt(i);
  258.                 final char low = cs.charAt(i + 1);
  259.                 if (chars[0] == high && chars[1] == low) {
  260.                     return i;
  261.                 }
  262.             }
  263.         }
  264.         return NOT_FOUND;
  265.     }

  266.     /**
  267.      * Green implementation of regionMatches.
  268.      *
  269.      * @param cs the {@link CharSequence} to be processed
  270.      * @param ignoreCase whether or not to be case-insensitive
  271.      * @param thisStart the index to start on the {@code cs} CharSequence
  272.      * @param substring the {@link CharSequence} to be looked for
  273.      * @param start the index to start on the {@code substring} CharSequence
  274.      * @param length character length of the region
  275.      * @return whether the region matched
  276.      */
  277.     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
  278.             final CharSequence substring, final int start, final int length)    {
  279.         if (cs instanceof String && substring instanceof String) {
  280.             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
  281.         }
  282.         int index1 = thisStart;
  283.         int index2 = start;
  284.         int tmpLen = length;

  285.         // Extract these first so we detect NPEs the same as the java.lang.String version
  286.         final int srcLen = cs.length() - thisStart;
  287.         final int otherLen = substring.length() - start;

  288.         // Check for invalid parameters
  289.         if (thisStart < 0 || start < 0 || length < 0) {
  290.             return false;
  291.         }

  292.         // Check that the regions are long enough
  293.         if (srcLen < length || otherLen < length) {
  294.             return false;
  295.         }

  296.         while (tmpLen-- > 0) {
  297.             final char c1 = cs.charAt(index1++);
  298.             final char c2 = substring.charAt(index2++);

  299.             if (c1 == c2) {
  300.                 continue;
  301.             }

  302.             if (!ignoreCase) {
  303.                 return false;
  304.             }

  305.             // The real same check as in String.regionMatches():
  306.             final char u1 = Character.toUpperCase(c1);
  307.             final char u2 = Character.toUpperCase(c2);
  308.             if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
  309.                 return false;
  310.             }
  311.         }

  312.         return true;
  313.     }

  314.     /**
  315.      * Returns a new {@link CharSequence} that is a subsequence of this
  316.      * sequence starting with the {@code char} value at the specified index.
  317.      *
  318.      * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
  319.      * The length (in {@code char}) of the returned sequence is {@code length() - start},
  320.      * so if {@code start == end} then an empty sequence is returned.</p>
  321.      *
  322.      * @param cs  the specified subsequence, null returns null
  323.      * @param start  the start index, inclusive, valid
  324.      * @return a new subsequence, may be null
  325.      * @throws IndexOutOfBoundsException if {@code start} is negative or if
  326.      *  {@code start} is greater than {@code length()}
  327.      */
  328.     public static CharSequence subSequence(final CharSequence cs, final int start) {
  329.         return cs == null ? null : cs.subSequence(start, cs.length());
  330.     }

  331.     /**
  332.      * Converts the given CharSequence to a char[].
  333.      *
  334.      * @param source the {@link CharSequence} to be processed.
  335.      * @return the resulting char array, never null.
  336.      * @since 3.11
  337.      */
  338.     public static char[] toCharArray(final CharSequence source) {
  339.         final int len = StringUtils.length(source);
  340.         if (len == 0) {
  341.             return ArrayUtils.EMPTY_CHAR_ARRAY;
  342.         }
  343.         if (source instanceof String) {
  344.             return ((String) source).toCharArray();
  345.         }
  346.         final char[] array = new char[len];
  347.         for (int i = 0; i < len; i++) {
  348.             array[i] = source.charAt(i);
  349.         }
  350.         return array;
  351.     }

  352.     /**
  353.      * {@link CharSequenceUtils} instances should NOT be constructed in
  354.      * standard programming.
  355.      *
  356.      * <p>This constructor is public to permit tools that require a JavaBean
  357.      * instance to operate.</p>
  358.      *
  359.      * @deprecated TODO Make private in 4.0.
  360.      */
  361.     @Deprecated
  362.     public CharSequenceUtils() {
  363.         // empty
  364.     }
  365. }