StrMatcher.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3.text;

  18. import java.util.Arrays;

  19. import org.apache.commons.lang3.ArraySorter;
  20. import org.apache.commons.lang3.ArrayUtils;
  21. import org.apache.commons.lang3.StringUtils;

  22. /**
  23.  * A matcher class that can be queried to determine if a character array
  24.  * portion matches.
  25.  * <p>
  26.  * This class comes complete with various factory methods.
  27.  * If these do not suffice, you can subclass and implement your own matcher.
  28.  * </p>
  29.  *
  30.  * @since 2.2
  31.  * @deprecated As of 3.6, use Apache Commons Text
  32.  * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
  33.  * StringMatcherFactory</a> instead
  34.  */
  35. @Deprecated
  36. public abstract class StrMatcher {

  37.     /**
  38.      * Class used to define a character for matching purposes.
  39.      */
  40.     static final class CharMatcher extends StrMatcher {
  41.         /** The character to match. */
  42.         private final char ch;

  43.         /**
  44.          * Constructor that creates a matcher that matches a single character.
  45.          *
  46.          * @param ch  the character to match
  47.          */
  48.         CharMatcher(final char ch) {
  49.             this.ch = ch;
  50.         }

  51.         /**
  52.          * Returns whether or not the given character matches.
  53.          *
  54.          * @param buffer  the text content to match against, do not change
  55.          * @param pos  the starting position for the match, valid for buffer
  56.          * @param bufferStart  the first active index in the buffer, valid for buffer
  57.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  58.          * @return the number of matching characters, zero for no match
  59.          */
  60.         @Override
  61.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  62.             return ch == buffer[pos] ? 1 : 0;
  63.         }
  64.     }
  65.     /**
  66.      * Class used to define a set of characters for matching purposes.
  67.      */
  68.     static final class CharSetMatcher extends StrMatcher {
  69.         /** The set of characters to match. */
  70.         private final char[] chars;

  71.         /**
  72.          * Constructor that creates a matcher from a character array.
  73.          *
  74.          * @param chars  the characters to match, must not be null
  75.          */
  76.         CharSetMatcher(final char[] chars) {
  77.             this.chars = ArraySorter.sort(chars.clone());
  78.         }

  79.         /**
  80.          * Returns whether or not the given character matches.
  81.          *
  82.          * @param buffer  the text content to match against, do not change
  83.          * @param pos  the starting position for the match, valid for buffer
  84.          * @param bufferStart  the first active index in the buffer, valid for buffer
  85.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  86.          * @return the number of matching characters, zero for no match
  87.          */
  88.         @Override
  89.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  90.             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
  91.         }
  92.     }
  93.     /**
  94.      * Class used to match no characters.
  95.      */
  96.     static final class NoMatcher extends StrMatcher {

  97.         /**
  98.          * Constructs a new instance of {@link NoMatcher}.
  99.          */
  100.         NoMatcher() {
  101.         }

  102.         /**
  103.          * Always returns {@code false}.
  104.          *
  105.          * @param buffer  the text content to match against, do not change
  106.          * @param pos  the starting position for the match, valid for buffer
  107.          * @param bufferStart  the first active index in the buffer, valid for buffer
  108.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  109.          * @return the number of matching characters, zero for no match
  110.          */
  111.         @Override
  112.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  113.             return 0;
  114.         }
  115.     }
  116.     /**
  117.      * Class used to define a set of characters for matching purposes.
  118.      */
  119.     static final class StringMatcher extends StrMatcher {
  120.         /** The string to match, as a character array. */
  121.         private final char[] chars;

  122.         /**
  123.          * Constructor that creates a matcher from a String.
  124.          *
  125.          * @param str  the string to match, must not be null
  126.          */
  127.         StringMatcher(final String str) {
  128.             chars = str.toCharArray();
  129.         }

  130.         /**
  131.          * Returns whether or not the given text matches the stored string.
  132.          *
  133.          * @param buffer  the text content to match against, do not change
  134.          * @param pos  the starting position for the match, valid for buffer
  135.          * @param bufferStart  the first active index in the buffer, valid for buffer
  136.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  137.          * @return the number of matching characters, zero for no match
  138.          */
  139.         @Override
  140.         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
  141.             final int len = chars.length;
  142.             if (pos + len > bufferEnd) {
  143.                 return 0;
  144.             }
  145.             for (int i = 0; i < chars.length; i++, pos++) {
  146.                 if (chars[i] != buffer[pos]) {
  147.                     return 0;
  148.                 }
  149.             }
  150.             return len;
  151.         }

  152.         @Override
  153.         public String toString() {
  154.             return super.toString() + ' ' + Arrays.toString(chars);
  155.         }

  156.     }
  157.     /**
  158.      * Class used to match whitespace as per trim().
  159.      */
  160.     static final class TrimMatcher extends StrMatcher {

  161.         /**
  162.          * Constructs a new instance of {@link TrimMatcher}.
  163.          */
  164.         TrimMatcher() {
  165.         }

  166.         /**
  167.          * Returns whether or not the given character matches.
  168.          *
  169.          * @param buffer  the text content to match against, do not change
  170.          * @param pos  the starting position for the match, valid for buffer
  171.          * @param bufferStart  the first active index in the buffer, valid for buffer
  172.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  173.          * @return the number of matching characters, zero for no match
  174.          */
  175.         @Override
  176.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  177.             return buffer[pos] <= 32 ? 1 : 0;
  178.         }
  179.     }
  180.     /**
  181.      * Matches the comma character.
  182.      */
  183.     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
  184.     /**
  185.      * Matches the tab character.
  186.      */
  187.     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
  188.     /**
  189.      * Matches the space character.
  190.      */
  191.     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
  192.     /**
  193.      * Matches the same characters as StringTokenizer,
  194.      * namely space, tab, newline, formfeed.
  195.      */
  196.     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());

  197.     /**
  198.      * Matches the String trim() whitespace characters.
  199.      */
  200.     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();

  201.     /**
  202.      * Matches the double quote character.
  203.      */
  204.     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');

  205.     /**
  206.      * Matches the double quote character.
  207.      */
  208.     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');

  209.     /**
  210.      * Matches the single or double quote character.
  211.      */
  212.     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());

  213.     /**
  214.      * Matches no characters.
  215.      */
  216.     private static final StrMatcher NONE_MATCHER = new NoMatcher();

  217.     /**
  218.      * Constructor that creates a matcher from a character.
  219.      *
  220.      * @param ch  the character to match, must not be null
  221.      * @return a new Matcher for the given char
  222.      */
  223.     public static StrMatcher charMatcher(final char ch) {
  224.         return new CharMatcher(ch);
  225.     }

  226.     /**
  227.      * Constructor that creates a matcher from a set of characters.
  228.      *
  229.      * @param chars  the characters to match, null or empty matches nothing
  230.      * @return a new matcher for the given char[]
  231.      */
  232.     public static StrMatcher charSetMatcher(final char... chars) {
  233.         if (ArrayUtils.isEmpty(chars)) {
  234.             return NONE_MATCHER;
  235.         }
  236.         if (chars.length == 1) {
  237.             return new CharMatcher(chars[0]);
  238.         }
  239.         return new CharSetMatcher(chars);
  240.     }

  241.     /**
  242.      * Constructor that creates a matcher from a string representing a set of characters.
  243.      *
  244.      * @param chars  the characters to match, null or empty matches nothing
  245.      * @return a new Matcher for the given characters
  246.      */
  247.     public static StrMatcher charSetMatcher(final String chars) {
  248.         if (StringUtils.isEmpty(chars)) {
  249.             return NONE_MATCHER;
  250.         }
  251.         if (chars.length() == 1) {
  252.             return new CharMatcher(chars.charAt(0));
  253.         }
  254.         return new CharSetMatcher(chars.toCharArray());
  255.     }

  256.     /**
  257.      * Returns a matcher which matches the comma character.
  258.      *
  259.      * @return a matcher for a comma
  260.      */
  261.     public static StrMatcher commaMatcher() {
  262.         return COMMA_MATCHER;
  263.     }

  264.     /**
  265.      * Returns a matcher which matches the double quote character.
  266.      *
  267.      * @return a matcher for a double quote
  268.      */
  269.     public static StrMatcher doubleQuoteMatcher() {
  270.         return DOUBLE_QUOTE_MATCHER;
  271.     }

  272.     /**
  273.      * Matches no characters.
  274.      *
  275.      * @return a matcher that matches nothing
  276.      */
  277.     public static StrMatcher noneMatcher() {
  278.         return NONE_MATCHER;
  279.     }

  280.     /**
  281.      * Returns a matcher which matches the single or double quote character.
  282.      *
  283.      * @return a matcher for a single or double quote
  284.      */
  285.     public static StrMatcher quoteMatcher() {
  286.         return QUOTE_MATCHER;
  287.     }

  288.     /**
  289.      * Returns a matcher which matches the single quote character.
  290.      *
  291.      * @return a matcher for a single quote
  292.      */
  293.     public static StrMatcher singleQuoteMatcher() {
  294.         return SINGLE_QUOTE_MATCHER;
  295.     }

  296.     /**
  297.      * Returns a matcher which matches the space character.
  298.      *
  299.      * @return a matcher for a space
  300.      */
  301.     public static StrMatcher spaceMatcher() {
  302.         return SPACE_MATCHER;
  303.     }

  304.     /**
  305.      * Matches the same characters as StringTokenizer,
  306.      * namely space, tab, newline and formfeed.
  307.      *
  308.      * @return the split matcher
  309.      */
  310.     public static StrMatcher splitMatcher() {
  311.         return SPLIT_MATCHER;
  312.     }

  313.     /**
  314.      * Constructor that creates a matcher from a string.
  315.      *
  316.      * @param str  the string to match, null or empty matches nothing
  317.      * @return a new Matcher for the given String
  318.      */
  319.     public static StrMatcher stringMatcher(final String str) {
  320.         if (StringUtils.isEmpty(str)) {
  321.             return NONE_MATCHER;
  322.         }
  323.         return new StringMatcher(str);
  324.     }

  325.     /**
  326.      * Returns a matcher which matches the tab character.
  327.      *
  328.      * @return a matcher for a tab
  329.      */
  330.     public static StrMatcher tabMatcher() {
  331.         return TAB_MATCHER;
  332.     }

  333.     /**
  334.      * Matches the String trim() whitespace characters.
  335.      *
  336.      * @return the trim matcher
  337.      */
  338.     public static StrMatcher trimMatcher() {
  339.         return TRIM_MATCHER;
  340.     }

  341.     /**
  342.      * Constructs a new instance.
  343.      */
  344.     protected StrMatcher() {
  345.     }

  346.     /**
  347.      * Returns the number of matching characters, zero for no match.
  348.      * <p>
  349.      * This method is called to check for a match.
  350.      * The parameter {@code pos} represents the current position to be
  351.      * checked in the string {@code buffer} (a character array which must
  352.      * not be changed).
  353.      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
  354.      * </p>
  355.      * <p>
  356.      * The matching code may check one character or many.
  357.      * It may check characters preceding {@code pos} as well as those after.
  358.      * </p>
  359.      * <p>
  360.      * It must return zero for no match, or a positive number if a match was found.
  361.      * The number indicates the number of characters that matched.
  362.      * </p>
  363.      *
  364.      * @param buffer  the text content to match against, do not change
  365.      * @param pos  the starting position for the match, valid for buffer
  366.      * @return the number of matching characters, zero for no match
  367.      * @since 2.4
  368.      */
  369.     public int isMatch(final char[] buffer, final int pos) {
  370.         return isMatch(buffer, pos, 0, buffer.length);
  371.     }

  372.     /**
  373.      * Returns the number of matching characters, zero for no match.
  374.      * <p>
  375.      * This method is called to check for a match.
  376.      * The parameter {@code pos} represents the current position to be
  377.      * checked in the string {@code buffer} (a character array which must
  378.      * not be changed).
  379.      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
  380.      * </p>
  381.      * <p>
  382.      * The character array may be larger than the active area to be matched.
  383.      * Only values in the buffer between the specified indices may be accessed.
  384.      * </p>
  385.      * <p>
  386.      * The matching code may check one character or many.
  387.      * It may check characters preceding {@code pos} as well as those
  388.      * after, so long as no checks exceed the bounds specified.
  389.      * </p>
  390.      * <p>
  391.      * It must return zero for no match, or a positive number if a match was found.
  392.      * The number indicates the number of characters that matched.
  393.      * </p>
  394.      *
  395.      * @param buffer  the text content to match against, do not change
  396.      * @param pos  the starting position for the match, valid for buffer
  397.      * @param bufferStart  the first active index in the buffer, valid for buffer
  398.      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
  399.      * @return the number of matching characters, zero for no match
  400.      */
  401.     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);

  402. }