StrMatcher.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import java.util.Arrays;

  19. import org.apache.commons.lang3.ArrayUtils;
  20. import org.apache.commons.text.matcher.StringMatcherFactory;

  21. /**
  22.  * A matcher class that can be queried to determine if a character array
  23.  * portion matches.
  24.  * <p>
  25.  * This class comes complete with various factory methods.
  26.  * If these do not suffice, you can subclass and implement your own matcher.
  27.  * </p>
  28.  *
  29.  * @since 1.0
  30.  * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
  31.  */
  32. @Deprecated
  33. public abstract class StrMatcher {

  34.     /**
  35.      * Class used to define a character for matching purposes.
  36.      */
  37.     private static final class CharMatcher extends StrMatcher {

  38.         /** The character to match. */
  39.         private final char ch;

  40.         /**
  41.          * Constructor that creates a matcher that matches a single character.
  42.          *
  43.          * @param ch  the character to match
  44.          */
  45.         private CharMatcher(final char ch) {
  46.             this.ch = ch;
  47.         }

  48.         /**
  49.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  50.          *
  51.          * @param buffer  the text content to match against, do not change
  52.          * @param pos  the starting position for the match, valid for buffer
  53.          * @param bufferStart  the first active index in the buffer, valid for buffer
  54.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  55.          * @return The number of matching characters, or zero if there is no match
  56.          */
  57.         @Override
  58.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  59.             return ch == buffer[pos] ? 1 : 0;
  60.         }
  61.     }

  62.     /**
  63.      * Class used to define a set of characters for matching purposes.
  64.      */
  65.     private static final class CharSetMatcher extends StrMatcher {

  66.         /** The set of characters to match. */
  67.         private final char[] chars;

  68.         /**
  69.          * Constructor that creates a matcher from a character array.
  70.          *
  71.          * @param chars  the characters to match, must not be null
  72.          */
  73.         private CharSetMatcher(final char[] chars) {
  74.             this.chars = chars.clone();
  75.             Arrays.sort(this.chars);
  76.         }

  77.         /**
  78.          * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
  79.          *
  80.          * @param buffer  the text content to match against, do not change
  81.          * @param pos  the starting position for the match, valid for buffer
  82.          * @param bufferStart  the first active index in the buffer, valid for buffer
  83.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  84.          * @return The number of matching characters, or zero if there is no match
  85.          */
  86.         @Override
  87.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  88.             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
  89.         }
  90.     }

  91.     /**
  92.      * Class used to match no characters.
  93.      */
  94.     private static final class NoMatcher extends StrMatcher {

  95.         /**
  96.          * Constructs a new instance of {@code NoMatcher}.
  97.          */
  98.         private NoMatcher() {
  99.         }

  100.         /**
  101.          * Always returns {@code 0}.
  102.          *
  103.          * @param buffer  the text content to match against, do not change
  104.          * @param pos  the starting position for the match, valid for buffer
  105.          * @param bufferStart  the first active index in the buffer, valid for buffer
  106.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  107.          * @return The number of matching characters, or zero if there is no match
  108.          */
  109.         @Override
  110.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  111.             return 0;
  112.         }
  113.     }

  114.     /**
  115.      * Class used to define a set of characters for matching purposes.
  116.      */
  117.     private static final class StringMatcher extends StrMatcher {

  118.         /** The string to match, as a character array. */
  119.         private final char[] chars;

  120.         /**
  121.          * Constructor that creates a matcher from a String.
  122.          *
  123.          * @param str  the string to match, must not be null
  124.          */
  125.         private StringMatcher(final String str) {
  126.             chars = str.toCharArray();
  127.         }

  128.         /**
  129.          * Returns the number of matching characters, or zero if there is no match.
  130.          *
  131.          * @param buffer  the text content to match against, do not change
  132.          * @param pos  the starting position for the match, valid for buffer
  133.          * @param bufferStart  the first active index in the buffer, valid for buffer
  134.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  135.          * @return The number of matching characters, or zero if there is no match
  136.          */
  137.         @Override
  138.         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
  139.             final int len = chars.length;
  140.             if (pos + len > bufferEnd) {
  141.                 return 0;
  142.             }
  143.             for (int i = 0; i < chars.length; i++, pos++) {
  144.                 if (chars[i] != buffer[pos]) {
  145.                     return 0;
  146.                 }
  147.             }
  148.             return len;
  149.         }

  150.         @Override
  151.         public String toString() {
  152.             return super.toString() + ' ' + Arrays.toString(chars);
  153.         }

  154.     }

  155.     /**
  156.      * Class used to match whitespace as per trim().
  157.      */
  158.     private static final class TrimMatcher extends StrMatcher {

  159.         /**
  160.          * Constructs a new instance of {@code TrimMatcher}.
  161.          */
  162.         private TrimMatcher() {
  163.         }

  164.         /**
  165.          * Returns whether or not the given character matches.
  166.          *
  167.          * @param buffer  the text content to match against, do not change
  168.          * @param pos  the starting position for the match, valid for buffer
  169.          * @param bufferStart  the first active index in the buffer, valid for buffer
  170.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  171.          * @return The number of matching characters, or zero if there is no match
  172.          */
  173.         @Override
  174.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  175.             return buffer[pos] <= 32 ? 1 : 0;
  176.         }
  177.     }

  178.     /**
  179.      * Matches the comma character.
  180.      */
  181.     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');

  182.     /**
  183.      * Matches the tab character.
  184.      */
  185.     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');

  186.     /**
  187.      * Matches the space character.
  188.      */
  189.     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');

  190.     /**
  191.      * Matches the same characters as StringTokenizer,
  192.      * namely space, tab, newline, form feed.
  193.      */
  194.     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());

  195.     /**
  196.      * Matches the String trim() whitespace characters.
  197.      */
  198.     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();

  199.     /**
  200.      * Matches the double quote character.
  201.      */
  202.     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');

  203.     /**
  204.      * Matches the double quote character.
  205.      */
  206.     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');

  207.     /**
  208.      * Matches the single or double quote character.
  209.      */
  210.     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());

  211.     /**
  212.      * Matches no characters.
  213.      */
  214.     private static final StrMatcher NONE_MATCHER = new NoMatcher();

  215.     /**
  216.      * Creates a matcher from a character.
  217.      *
  218.      * @param ch  the character to match, must not be null
  219.      * @return a new Matcher for the given char
  220.      */
  221.     public static StrMatcher charMatcher(final char ch) {
  222.         return new CharMatcher(ch);
  223.     }

  224.     /**
  225.      * Creates a matcher from a set of characters.
  226.      *
  227.      * @param chars  the characters to match, null or empty matches nothing
  228.      * @return a new matcher for the given char[]
  229.      */
  230.     public static StrMatcher charSetMatcher(final char... chars) {
  231.         if (ArrayUtils.isEmpty(chars)) {
  232.             return NONE_MATCHER;
  233.         }
  234.         if (chars.length == 1) {
  235.             return new CharMatcher(chars[0]);
  236.         }
  237.         return new CharSetMatcher(chars);
  238.     }

  239.     /**
  240.      * Creates a matcher from a string representing a set of characters.
  241.      *
  242.      * @param chars  the characters to match, null or empty matches nothing
  243.      * @return a new Matcher for the given characters
  244.      */
  245.     public static StrMatcher charSetMatcher(final String chars) {
  246.         if (chars == null || chars.isEmpty()) {
  247.             return NONE_MATCHER;
  248.         }
  249.         if (chars.length() == 1) {
  250.             return new CharMatcher(chars.charAt(0));
  251.         }
  252.         return new CharSetMatcher(chars.toCharArray());
  253.     }

  254.     /**
  255.      * Returns a matcher which matches the comma character.
  256.      *
  257.      * @return a matcher for a comma
  258.      */
  259.     public static StrMatcher commaMatcher() {
  260.         return COMMA_MATCHER;
  261.     }

  262.     /**
  263.      * Returns a matcher which matches the double quote character.
  264.      *
  265.      * @return a matcher for a double quote
  266.      */
  267.     public static StrMatcher doubleQuoteMatcher() {
  268.         return DOUBLE_QUOTE_MATCHER;
  269.     }

  270.     /**
  271.      * Matches no characters.
  272.      *
  273.      * @return a matcher that matches nothing
  274.      */
  275.     public static StrMatcher noneMatcher() {
  276.         return NONE_MATCHER;
  277.     }

  278.     /**
  279.      * Returns a matcher which matches the single or double quote character.
  280.      *
  281.      * @return a matcher for a single or double quote
  282.      */
  283.     public static StrMatcher quoteMatcher() {
  284.         return QUOTE_MATCHER;
  285.     }

  286.     /**
  287.      * Returns a matcher which matches the single quote character.
  288.      *
  289.      * @return a matcher for a single quote
  290.      */
  291.     public static StrMatcher singleQuoteMatcher() {
  292.         return SINGLE_QUOTE_MATCHER;
  293.     }

  294.     /**
  295.      * Returns a matcher which matches the space character.
  296.      *
  297.      * @return a matcher for a space
  298.      */
  299.     public static StrMatcher spaceMatcher() {
  300.         return SPACE_MATCHER;
  301.     }

  302.     /**
  303.      * Matches the same characters as StringTokenizer,
  304.      * namely space, tab, newline and form feed.
  305.      *
  306.      * @return The split matcher
  307.      */
  308.     public static StrMatcher splitMatcher() {
  309.         return SPLIT_MATCHER;
  310.     }

  311.     /**
  312.      * Creates a matcher from a string.
  313.      *
  314.      * @param str  the string to match, null or empty matches nothing
  315.      * @return a new Matcher for the given String
  316.      */
  317.     public static StrMatcher stringMatcher(final String str) {
  318.         if (str == null || str.isEmpty()) {
  319.             return NONE_MATCHER;
  320.         }
  321.         return new StringMatcher(str);
  322.     }

  323.     /**
  324.      * Returns a matcher which matches the tab character.
  325.      *
  326.      * @return a matcher for a tab
  327.      */
  328.     public static StrMatcher tabMatcher() {
  329.         return TAB_MATCHER;
  330.     }

  331.     /**
  332.      * Matches the String trim() whitespace characters.
  333.      *
  334.      * @return The trim matcher
  335.      */
  336.     public static StrMatcher trimMatcher() {
  337.         return TRIM_MATCHER;
  338.     }

  339.     /**
  340.      * Constructs a new instance.
  341.      */
  342.     protected StrMatcher() {
  343.     }

  344.     /**
  345.      * Returns the number of matching characters, or zero if there is no match.
  346.      * <p>
  347.      * This method is called to check for a match.
  348.      * The parameter {@code pos} represents the current position to be
  349.      * checked in the string {@code buffer} (a character array which must
  350.      * not be changed).
  351.      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
  352.      * </p>
  353.      * <p>
  354.      * The matching code may check one character or many.
  355.      * It may check characters preceding {@code pos} as well as those after.
  356.      * </p>
  357.      * <p>
  358.      * It must return zero for no match, or a positive number if a match was found.
  359.      * The number indicates the number of characters that matched.
  360.      * </p>
  361.      *
  362.      * @param buffer  the text content to match against, do not change
  363.      * @param pos  the starting position for the match, valid for buffer
  364.      * @return The number of matching characters, or zero if there is no match
  365.      */
  366.     public int isMatch(final char[] buffer, final int pos) {
  367.         return isMatch(buffer, pos, 0, buffer.length);
  368.     }

  369.     /**
  370.      * Returns the number of matching characters, or zero if there is no match.
  371.      * <p>
  372.      * This method is called to check for a match.
  373.      * The parameter {@code pos} represents the current position to be
  374.      * checked in the string {@code buffer} (a character array which must
  375.      * not be changed).
  376.      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
  377.      * </p>
  378.      * <p>
  379.      * The character array may be larger than the active area to be matched.
  380.      * Only values in the buffer between the specified indices may be accessed.
  381.      * </p>
  382.      * <p>
  383.      * The matching code may check one character or many.
  384.      * It may check characters preceding {@code pos} as well as those
  385.      * after, so long as no checks exceed the bounds specified.
  386.      * </p>
  387.      * <p>
  388.      * It must return zero for no match, or a positive number if a match was found.
  389.      * The number indicates the number of characters that matched.
  390.      * </p>
  391.      *
  392.      * @param buffer  the text content to match against, do not change
  393.      * @param pos  the starting position for the match, valid for buffer
  394.      * @param bufferStart  the first active index in the buffer, valid for buffer
  395.      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
  396.      * @return The number of matching characters, or zero if there is no match
  397.      */
  398.     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);

  399. }