AbstractStringMatcher.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.text.matcher;

  18. import java.util.Arrays;

  19. /**
  20.  * A matcher that determines if a character array portion matches.
  21.  * <p>
  22.  * Thread=safe.
  23.  * </p>
  24.  *
  25.  * @since 1.3
  26.  */
  27. abstract class AbstractStringMatcher implements StringMatcher {

  28.     /**
  29.      * Matches all of the given matchers in order.
  30.      *
  31.      * @since 1.9
  32.      */
  33.     static final class AndStringMatcher extends AbstractStringMatcher {

  34.         /**
  35.          * Matchers in order.
  36.          */
  37.         private final StringMatcher[] stringMatchers;

  38.         /**
  39.          * Constructs a new initialized instance.
  40.          *
  41.          * @param stringMatchers Matchers in order. Never null since the {@link StringMatcherFactory} uses the
  42.          *        {@link NoneMatcher} instead.
  43.          */
  44.         AndStringMatcher(final StringMatcher... stringMatchers) {
  45.             this.stringMatchers = stringMatchers.clone();
  46.         }

  47.         @Override
  48.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  49.             int total = 0;
  50.             int curStart = start;
  51.             for (final StringMatcher stringMatcher : stringMatchers) {
  52.                 if (stringMatcher != null) {
  53.                     final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd);
  54.                     if (len == 0) {
  55.                         return 0;
  56.                     }
  57.                     total += len;
  58.                     curStart += len;
  59.                 }
  60.             }
  61.             return total;
  62.         }

  63.         @Override
  64.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  65.             int total = 0;
  66.             int curStart = start;
  67.             for (final StringMatcher stringMatcher : stringMatchers) {
  68.                 if (stringMatcher != null) {
  69.                     final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd);
  70.                     if (len == 0) {
  71.                         return 0;
  72.                     }
  73.                     total += len;
  74.                     curStart += len;
  75.                 }
  76.             }
  77.             return total;
  78.         }

  79.         @Override
  80.         public int size() {
  81.             int total = 0;
  82.             for (final StringMatcher stringMatcher : stringMatchers) {
  83.                 if (stringMatcher != null) {
  84.                     total += stringMatcher.size();
  85.                 }
  86.             }
  87.             return total;
  88.         }
  89.     }

  90.     /**
  91.      * Matches out of a set of characters.
  92.      * <p>
  93.      * Thread=safe.
  94.      * </p>
  95.      */
  96.     static final class CharArrayMatcher extends AbstractStringMatcher {

  97.         /** The string to match, as a character array, implementation treats as immutable. */
  98.         private final char[] chars;

  99.         /** The string to match. */
  100.         private final String string;

  101.         /**
  102.          * Constructs a matcher from a String.
  103.          *
  104.          * @param chars the string to match, must not be null
  105.          */
  106.         CharArrayMatcher(final char... chars) {
  107.             this.string = String.valueOf(chars);
  108.             this.chars = chars.clone();
  109.         }

  110.         /**
  111.          * Returns the number of matching characters, {@code 0} if there is no match.
  112.          *
  113.          * @param buffer the text content to match against, do not change
  114.          * @param start the starting position for the match, valid for buffer
  115.          * @param bufferStart unused
  116.          * @param bufferEnd the end index of the active buffer, valid for buffer
  117.          * @return The number of matching characters, zero for no match
  118.          */
  119.         @Override
  120.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  121.             final int len = size();
  122.             if (start + len > bufferEnd) {
  123.                 return 0;
  124.             }
  125.             int j = start;
  126.             for (int i = 0; i < len; i++, j++) {
  127.                 if (chars[i] != buffer[j]) {
  128.                     return 0;
  129.                 }
  130.             }
  131.             return len;
  132.         }

  133.         /**
  134.          * Returns the number of matching characters, {@code 0} if there is no match.
  135.          *
  136.          * @param buffer the text content to match against, do not change
  137.          * @param start the starting position for the match, valid for buffer
  138.          * @param bufferStart unused
  139.          * @param bufferEnd the end index of the active buffer, valid for buffer
  140.          * @return The number of matching characters, zero for no match
  141.          */
  142.         @Override
  143.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  144.             final int len = size();
  145.             if (start + len > bufferEnd) {
  146.                 return 0;
  147.             }
  148.             int j = start;
  149.             for (int i = 0; i < len; i++, j++) {
  150.                 if (chars[i] != buffer.charAt(j)) {
  151.                     return 0;
  152.                 }
  153.             }
  154.             return len;
  155.         }

  156.         /**
  157.          * Returns the size of the string to match given in the constructor.
  158.          *
  159.          * @since 1.9
  160.          */
  161.         @Override
  162.         public int size() {
  163.             return chars.length;
  164.         }

  165.         @Override
  166.         public String toString() {
  167.             return super.toString() + "[\"" + string + "\"]";
  168.         }

  169.     }

  170.     /**
  171.      * Matches a character.
  172.      * <p>
  173.      * Thread=safe.
  174.      * </p>
  175.      */
  176.     static final class CharMatcher extends AbstractStringMatcher {

  177.         /** The character to match. */
  178.         private final char ch;

  179.         /**
  180.          * Constructs a matcher for a single character.
  181.          *
  182.          * @param ch the character to match
  183.          */
  184.         CharMatcher(final char ch) {
  185.             this.ch = ch;
  186.         }

  187.         /**
  188.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  189.          *
  190.          * @param buffer the text content to match against, do not change
  191.          * @param start the starting position for the match, valid for buffer
  192.          * @param bufferStart unused
  193.          * @param bufferEnd unused
  194.          * @return The number of matching characters, zero for no match
  195.          */
  196.         @Override
  197.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  198.             return ch == buffer[start] ? 1 : 0;
  199.         }

  200.         /**
  201.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  202.          *
  203.          * @param buffer the text content to match against, do not change
  204.          * @param start the starting position for the match, valid for buffer
  205.          * @param bufferStart unused
  206.          * @param bufferEnd unused
  207.          * @return The number of matching characters, zero for no match
  208.          */
  209.         @Override
  210.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  211.             return ch == buffer.charAt(start) ? 1 : 0;
  212.         }

  213.         /**
  214.          * Returns 1.
  215.          *
  216.          * @since 1.9
  217.          */
  218.         @Override
  219.         public int size() {
  220.             return 1;
  221.         }

  222.         @Override
  223.         public String toString() {
  224.             return super.toString() + "['" + ch + "']";
  225.         }
  226.     }

  227.     /**
  228.      * Matches a set of characters.
  229.      * <p>
  230.      * Thread=safe.
  231.      * </p>
  232.      */
  233.     static final class CharSetMatcher extends AbstractStringMatcher {

  234.         /** The set of characters to match. */
  235.         private final char[] chars;

  236.         /**
  237.          * Constructs a matcher from a character array.
  238.          *
  239.          * @param chars the characters to match, must not be null
  240.          */
  241.         CharSetMatcher(final char[] chars) {
  242.             this.chars = chars.clone();
  243.             Arrays.sort(this.chars);
  244.         }

  245.         /**
  246.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  247.          *
  248.          * @param buffer the text content to match against, do not change
  249.          * @param start the starting position for the match, valid for buffer
  250.          * @param bufferStart unused
  251.          * @param bufferEnd unused
  252.          * @return The number of matching characters, zero for no match
  253.          */
  254.         @Override
  255.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  256.             return Arrays.binarySearch(chars, buffer[start]) >= 0 ? 1 : 0;
  257.         }

  258.         /**
  259.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  260.          *
  261.          * @param buffer the text content to match against, do not change
  262.          * @param start the starting position for the match, valid for buffer
  263.          * @param bufferStart unused
  264.          * @param bufferEnd unused
  265.          * @return The number of matching characters, zero for no match
  266.          */
  267.         @Override
  268.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  269.             return Arrays.binarySearch(chars, buffer.charAt(start)) >= 0 ? 1 : 0;
  270.         }

  271.         /**
  272.          * Returns 1.
  273.          *
  274.          * @since 1.9
  275.          */
  276.         @Override
  277.         public int size() {
  278.             return 1;
  279.         }

  280.         @Override
  281.         public String toString() {
  282.             return super.toString() + Arrays.toString(chars);
  283.         }

  284.     }

  285.     /**
  286.      * Matches nothing.
  287.      * <p>
  288.      * Thread=safe.
  289.      * </p>
  290.      */
  291.     static final class NoneMatcher extends AbstractStringMatcher {

  292.         /**
  293.          * Constructs a new instance of {@code NoMatcher}.
  294.          */
  295.         NoneMatcher() {
  296.         }

  297.         /**
  298.          * Always returns {@code 0}.
  299.          *
  300.          * @param buffer unused
  301.          * @param start unused
  302.          * @param bufferStart unused
  303.          * @param bufferEnd unused
  304.          * @return The number of matching characters, zero for no match
  305.          */
  306.         @Override
  307.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  308.             return 0;
  309.         }

  310.         /**
  311.          * Always returns {@code 0}.
  312.          *
  313.          * @param buffer unused
  314.          * @param start unused
  315.          * @param bufferStart unused
  316.          * @param bufferEnd unused
  317.          * @return The number of matching characters, zero for no match
  318.          */
  319.         @Override
  320.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  321.             return 0;
  322.         }

  323.         /**
  324.          * Returns 0.
  325.          *
  326.          * @since 1.9
  327.          */
  328.         @Override
  329.         public int size() {
  330.             return 0;
  331.         }

  332.     }

  333.     /**
  334.      * Matches whitespace as per trim().
  335.      * <p>
  336.      * Thread=safe.
  337.      * </p>
  338.      */
  339.     static final class TrimMatcher extends AbstractStringMatcher {

  340.         /**
  341.          * The space character.
  342.          */
  343.         private static final int SPACE_INT = 32;

  344.         /**
  345.          * Constructs a new instance of {@code TrimMatcher}.
  346.          */
  347.         TrimMatcher() {
  348.         }

  349.         /**
  350.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  351.          *
  352.          * @param buffer the text content to match against, do not change
  353.          * @param start the starting position for the match, valid for buffer
  354.          * @param bufferStart unused
  355.          * @param bufferEnd unused
  356.          * @return The number of matching characters, zero for no match
  357.          */
  358.         @Override
  359.         public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
  360.             return buffer[start] <= SPACE_INT ? 1 : 0;
  361.         }

  362.         /**
  363.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  364.          *
  365.          * @param buffer the text content to match against, do not change
  366.          * @param start the starting position for the match, valid for buffer
  367.          * @param bufferStart unused
  368.          * @param bufferEnd unused
  369.          * @return The number of matching characters, zero for no match
  370.          */
  371.         @Override
  372.         public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
  373.             return buffer.charAt(start) <= SPACE_INT ? 1 : 0;
  374.         }

  375.         /**
  376.          * Returns 1.
  377.          *
  378.          * @since 1.9
  379.          */
  380.         @Override
  381.         public int size() {
  382.             return 1;
  383.         }
  384.     }

  385.     /**
  386.      * Constructs a new instance.
  387.      */
  388.     protected AbstractStringMatcher() {
  389.     }

  390. //    /**
  391. //     * Validates indices for {@code bufferStart <= start < bufferEnd}.
  392. //     *
  393. //     * @param start the starting position for the match, valid in {@code buffer}.
  394. //     * @param bufferStart the first active index in the buffer, valid in {@code buffer}.
  395. //     * @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}.
  396. //     */
  397. //    void validate(final int start, final int bufferStart, final int bufferEnd) {
  398. //        if (((bufferStart > start) || (start >= bufferEnd))) {
  399. //            throw new IndexOutOfBoundsException(
  400. //                String.format("bufferStart(%,d) <= start(%,d) < bufferEnd(%,d)", bufferStart, start, bufferEnd));
  401. //        }
  402. //    }

  403. }