StrMatcher.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import java.util.Arrays;

  19. /**
  20.  * A matcher class that can be queried to determine if a character array
  21.  * portion matches.
  22.  * <p>
  23.  * This class comes complete with various factory methods.
  24.  * If these do not suffice, you can subclass and implement your own matcher.
  25.  *
  26.  * @since 1.0
  27.  */
  28. public abstract class StrMatcher {

  29.     /**
  30.      * Matches the comma character.
  31.      */
  32.     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
  33.     /**
  34.      * Matches the tab character.
  35.      */
  36.     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
  37.     /**
  38.      * Matches the space character.
  39.      */
  40.     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
  41.     /**
  42.      * Matches the same characters as StringTokenizer,
  43.      * namely space, tab, newline, formfeed.
  44.      */
  45.     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
  46.     /**
  47.      * Matches the String trim() whitespace characters.
  48.      */
  49.     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
  50.     /**
  51.      * Matches the double quote character.
  52.      */
  53.     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
  54.     /**
  55.      * Matches the double quote character.
  56.      */
  57.     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
  58.     /**
  59.      * Matches the single or double quote character.
  60.      */
  61.     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
  62.     /**
  63.      * Matches no characters.
  64.      */
  65.     private static final StrMatcher NONE_MATCHER = new NoMatcher();

  66.     // -----------------------------------------------------------------------

  67.     /**
  68.      * Returns a matcher which matches the comma character.
  69.      *
  70.      * @return a matcher for a comma
  71.      */
  72.     public static StrMatcher commaMatcher() {
  73.         return COMMA_MATCHER;
  74.     }

  75.     /**
  76.      * Returns a matcher which matches the tab character.
  77.      *
  78.      * @return a matcher for a tab
  79.      */
  80.     public static StrMatcher tabMatcher() {
  81.         return TAB_MATCHER;
  82.     }

  83.     /**
  84.      * Returns a matcher which matches the space character.
  85.      *
  86.      * @return a matcher for a space
  87.      */
  88.     public static StrMatcher spaceMatcher() {
  89.         return SPACE_MATCHER;
  90.     }

  91.     /**
  92.      * Matches the same characters as StringTokenizer,
  93.      * namely space, tab, newline and formfeed.
  94.      *
  95.      * @return the split matcher
  96.      */
  97.     public static StrMatcher splitMatcher() {
  98.         return SPLIT_MATCHER;
  99.     }

  100.     /**
  101.      * Matches the String trim() whitespace characters.
  102.      *
  103.      * @return the trim matcher
  104.      */
  105.     public static StrMatcher trimMatcher() {
  106.         return TRIM_MATCHER;
  107.     }

  108.     /**
  109.      * Returns a matcher which matches the single quote character.
  110.      *
  111.      * @return a matcher for a single quote
  112.      */
  113.     public static StrMatcher singleQuoteMatcher() {
  114.         return SINGLE_QUOTE_MATCHER;
  115.     }

  116.     /**
  117.      * Returns a matcher which matches the double quote character.
  118.      *
  119.      * @return a matcher for a double quote
  120.      */
  121.     public static StrMatcher doubleQuoteMatcher() {
  122.         return DOUBLE_QUOTE_MATCHER;
  123.     }

  124.     /**
  125.      * Returns a matcher which matches the single or double quote character.
  126.      *
  127.      * @return a matcher for a single or double quote
  128.      */
  129.     public static StrMatcher quoteMatcher() {
  130.         return QUOTE_MATCHER;
  131.     }

  132.     /**
  133.      * Matches no characters.
  134.      *
  135.      * @return a matcher that matches nothing
  136.      */
  137.     public static StrMatcher noneMatcher() {
  138.         return NONE_MATCHER;
  139.     }

  140.     /**
  141.      * Constructor that creates a matcher from a character.
  142.      *
  143.      * @param ch  the character to match, must not be null
  144.      * @return a new Matcher for the given char
  145.      */
  146.     public static StrMatcher charMatcher(final char ch) {
  147.         return new CharMatcher(ch);
  148.     }

  149.     /**
  150.      * Constructor that creates a matcher from a set of characters.
  151.      *
  152.      * @param chars  the characters to match, null or empty matches nothing
  153.      * @return a new matcher for the given char[]
  154.      */
  155.     public static StrMatcher charSetMatcher(final char... chars) {
  156.         if (chars == null || chars.length == 0) {
  157.             return NONE_MATCHER;
  158.         }
  159.         if (chars.length == 1) {
  160.             return new CharMatcher(chars[0]);
  161.         }
  162.         return new CharSetMatcher(chars);
  163.     }

  164.     /**
  165.      * Constructor that creates a matcher from a string representing a set of characters.
  166.      *
  167.      * @param chars  the characters to match, null or empty matches nothing
  168.      * @return a new Matcher for the given characters
  169.      */
  170.     public static StrMatcher charSetMatcher(final String chars) {
  171.         if (chars == null || chars.length() == 0) {
  172.             return NONE_MATCHER;
  173.         }
  174.         if (chars.length() == 1) {
  175.             return new CharMatcher(chars.charAt(0));
  176.         }
  177.         return new CharSetMatcher(chars.toCharArray());
  178.     }

  179.     /**
  180.      * Constructor that creates a matcher from a string.
  181.      *
  182.      * @param str  the string to match, null or empty matches nothing
  183.      * @return a new Matcher for the given String
  184.      */
  185.     public static StrMatcher stringMatcher(final String str) {
  186.         if (str == null || str.length() == 0) {
  187.             return NONE_MATCHER;
  188.         }
  189.         return new StringMatcher(str);
  190.     }

  191.     //-----------------------------------------------------------------------
  192.     /**
  193.      * Constructor.
  194.      */
  195.     protected StrMatcher() {
  196.         super();
  197.     }

  198.     /**
  199.      * Returns the number of matching characters, zero for no match.
  200.      * <p>
  201.      * This method is called to check for a match.
  202.      * The parameter <code>pos</code> represents the current position to be
  203.      * checked in the string <code>buffer</code> (a character array which must
  204.      * not be changed).
  205.      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
  206.      * <p>
  207.      * The character array may be larger than the active area to be matched.
  208.      * Only values in the buffer between the specified indices may be accessed.
  209.      * <p>
  210.      * The matching code may check one character or many.
  211.      * It may check characters preceding <code>pos</code> as well as those
  212.      * after, so long as no checks exceed the bounds specified.
  213.      * <p>
  214.      * It must return zero for no match, or a positive number if a match was found.
  215.      * The number indicates the number of characters that matched.
  216.      *
  217.      * @param buffer  the text content to match against, do not change
  218.      * @param pos  the starting position for the match, valid for buffer
  219.      * @param bufferStart  the first active index in the buffer, valid for buffer
  220.      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
  221.      * @return the number of matching characters, zero for no match
  222.      */
  223.     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);

  224.     /**
  225.      * Returns the number of matching characters, zero for no match.
  226.      * <p>
  227.      * This method is called to check for a match.
  228.      * The parameter <code>pos</code> represents the current position to be
  229.      * checked in the string <code>buffer</code> (a character array which must
  230.      * not be changed).
  231.      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
  232.      * <p>
  233.      * The matching code may check one character or many.
  234.      * It may check characters preceding <code>pos</code> as well as those after.
  235.      * <p>
  236.      * It must return zero for no match, or a positive number if a match was found.
  237.      * The number indicates the number of characters that matched.
  238.      *
  239.      * @param buffer  the text content to match against, do not change
  240.      * @param pos  the starting position for the match, valid for buffer
  241.      * @return the number of matching characters, zero for no match
  242.      */
  243.     public int isMatch(final char[] buffer, final int pos) {
  244.         return isMatch(buffer, pos, 0, buffer.length);
  245.     }

  246.     //-----------------------------------------------------------------------
  247.     /**
  248.      * Class used to define a set of characters for matching purposes.
  249.      */
  250.     static final class CharSetMatcher extends StrMatcher {
  251.         /** The set of characters to match. */
  252.         private final char[] chars;

  253.         /**
  254.          * Constructor that creates a matcher from a character array.
  255.          *
  256.          * @param chars  the characters to match, must not be null
  257.          */
  258.         CharSetMatcher(final char chars[]) {
  259.             super();
  260.             this.chars = chars.clone();
  261.             Arrays.sort(this.chars);
  262.         }

  263.         /**
  264.          * Returns whether or not the given character matches.
  265.          *
  266.          * @param buffer  the text content to match against, do not change
  267.          * @param pos  the starting position for the match, valid for buffer
  268.          * @param bufferStart  the first active index in the buffer, valid for buffer
  269.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  270.          * @return the number of matching characters, zero for no match
  271.          */
  272.         @Override
  273.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  274.             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
  275.         }
  276.     }

  277.     //-----------------------------------------------------------------------
  278.     /**
  279.      * Class used to define a character for matching purposes.
  280.      */
  281.     static final class CharMatcher extends StrMatcher {
  282.         /** The character to match. */
  283.         private final char ch;

  284.         /**
  285.          * Constructor that creates a matcher that matches a single character.
  286.          *
  287.          * @param ch  the character to match
  288.          */
  289.         CharMatcher(final char ch) {
  290.             super();
  291.             this.ch = ch;
  292.         }

  293.         /**
  294.          * Returns whether or not the given character matches.
  295.          *
  296.          * @param buffer  the text content to match against, do not change
  297.          * @param pos  the starting position for the match, valid for buffer
  298.          * @param bufferStart  the first active index in the buffer, valid for buffer
  299.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  300.          * @return the number of matching characters, zero for no match
  301.          */
  302.         @Override
  303.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  304.             return ch == buffer[pos] ? 1 : 0;
  305.         }
  306.     }

  307.     //-----------------------------------------------------------------------
  308.     /**
  309.      * Class used to define a set of characters for matching purposes.
  310.      */
  311.     static final class StringMatcher extends StrMatcher {
  312.         /** The string to match, as a character array. */
  313.         private final char[] chars;

  314.         /**
  315.          * Constructor that creates a matcher from a String.
  316.          *
  317.          * @param str  the string to match, must not be null
  318.          */
  319.         StringMatcher(final String str) {
  320.             super();
  321.             chars = str.toCharArray();
  322.         }

  323.         /**
  324.          * Returns whether or not the given text matches the stored string.
  325.          *
  326.          * @param buffer  the text content to match against, do not change
  327.          * @param pos  the starting position for the match, valid for buffer
  328.          * @param bufferStart  the first active index in the buffer, valid for buffer
  329.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  330.          * @return the number of matching characters, zero for no match
  331.          */
  332.         @Override
  333.         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
  334.             final int len = chars.length;
  335.             if (pos + len > bufferEnd) {
  336.                 return 0;
  337.             }
  338.             for (int i = 0; i < chars.length; i++, pos++) {
  339.                 if (chars[i] != buffer[pos]) {
  340.                     return 0;
  341.                 }
  342.             }
  343.             return len;
  344.         }
  345.        
  346.         @Override
  347.         public String toString() {
  348.             return super.toString() + ' ' + Arrays.toString(chars);
  349.         }

  350.     }

  351.     //-----------------------------------------------------------------------
  352.     /**
  353.      * Class used to match no characters.
  354.      */
  355.     static final class NoMatcher extends StrMatcher {

  356.         /**
  357.          * Constructs a new instance of <code>NoMatcher</code>.
  358.          */
  359.         NoMatcher() {
  360.             super();
  361.         }

  362.         /**
  363.          * Always returns <code>false</code>.
  364.          *
  365.          * @param buffer  the text content to match against, do not change
  366.          * @param pos  the starting position for the match, valid for buffer
  367.          * @param bufferStart  the first active index in the buffer, valid for buffer
  368.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  369.          * @return the number of matching characters, zero for no match
  370.          */
  371.         @Override
  372.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  373.             return 0;
  374.         }
  375.     }

  376.     //-----------------------------------------------------------------------
  377.     /**
  378.      * Class used to match whitespace as per trim().
  379.      */
  380.     static final class TrimMatcher extends StrMatcher {

  381.         /**
  382.          * Constructs a new instance of <code>TrimMatcher</code>.
  383.          */
  384.         TrimMatcher() {
  385.             super();
  386.         }

  387.         /**
  388.          * Returns whether or not the given character matches.
  389.          *
  390.          * @param buffer  the text content to match against, do not change
  391.          * @param pos  the starting position for the match, valid for buffer
  392.          * @param bufferStart  the first active index in the buffer, valid for buffer
  393.          * @param bufferEnd  the end index of the active buffer, valid for buffer
  394.          * @return the number of matching characters, zero for no match
  395.          */
  396.         @Override
  397.         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
  398.             return buffer[pos] <= 32 ? 1 : 0;
  399.         }
  400.     }

  401. }