View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.StringUtils;
22  
23  /**
24   * A matcher class that can be queried to determine if a character array
25   * portion matches.
26   * <p>
27   * This class comes complete with various factory methods.
28   * If these do not suffice, you can subclass and implement your own matcher.
29   *
30   * @since 2.2
31   * @deprecated as of 3.6, use commons-text
32   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StrMatcher.html">
33   * StrMatcher</a> instead
34   */
35  @Deprecated
36  public abstract class StrMatcher {
37  
38      /**
39       * Matches the comma character.
40       */
41      private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
42      /**
43       * Matches the tab character.
44       */
45      private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
46      /**
47       * Matches the space character.
48       */
49      private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
50      /**
51       * Matches the same characters as StringTokenizer,
52       * namely space, tab, newline, formfeed.
53       */
54      private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
55      /**
56       * Matches the String trim() whitespace characters.
57       */
58      private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
59      /**
60       * Matches the double quote character.
61       */
62      private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
63      /**
64       * Matches the double quote character.
65       */
66      private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
67      /**
68       * Matches the single or double quote character.
69       */
70      private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
71      /**
72       * Matches no characters.
73       */
74      private static final StrMatcher NONE_MATCHER = new NoMatcher();
75  
76      // -----------------------------------------------------------------------
77  
78      /**
79       * Returns a matcher which matches the comma character.
80       *
81       * @return a matcher for a comma
82       */
83      public static StrMatcher commaMatcher() {
84          return COMMA_MATCHER;
85      }
86  
87      /**
88       * Returns a matcher which matches the tab character.
89       *
90       * @return a matcher for a tab
91       */
92      public static StrMatcher tabMatcher() {
93          return TAB_MATCHER;
94      }
95  
96      /**
97       * Returns a matcher which matches the space character.
98       *
99       * @return a matcher for a space
100      */
101     public static StrMatcher spaceMatcher() {
102         return SPACE_MATCHER;
103     }
104 
105     /**
106      * Matches the same characters as StringTokenizer,
107      * namely space, tab, newline and formfeed.
108      *
109      * @return the split matcher
110      */
111     public static StrMatcher splitMatcher() {
112         return SPLIT_MATCHER;
113     }
114 
115     /**
116      * Matches the String trim() whitespace characters.
117      *
118      * @return the trim matcher
119      */
120     public static StrMatcher trimMatcher() {
121         return TRIM_MATCHER;
122     }
123 
124     /**
125      * Returns a matcher which matches the single quote character.
126      *
127      * @return a matcher for a single quote
128      */
129     public static StrMatcher singleQuoteMatcher() {
130         return SINGLE_QUOTE_MATCHER;
131     }
132 
133     /**
134      * Returns a matcher which matches the double quote character.
135      *
136      * @return a matcher for a double quote
137      */
138     public static StrMatcher doubleQuoteMatcher() {
139         return DOUBLE_QUOTE_MATCHER;
140     }
141 
142     /**
143      * Returns a matcher which matches the single or double quote character.
144      *
145      * @return a matcher for a single or double quote
146      */
147     public static StrMatcher quoteMatcher() {
148         return QUOTE_MATCHER;
149     }
150 
151     /**
152      * Matches no characters.
153      *
154      * @return a matcher that matches nothing
155      */
156     public static StrMatcher noneMatcher() {
157         return NONE_MATCHER;
158     }
159 
160     /**
161      * Constructor that creates a matcher from a character.
162      *
163      * @param ch  the character to match, must not be null
164      * @return a new Matcher for the given char
165      */
166     public static StrMatcher charMatcher(final char ch) {
167         return new CharMatcher(ch);
168     }
169 
170     /**
171      * Constructor that creates a matcher from a set of characters.
172      *
173      * @param chars  the characters to match, null or empty matches nothing
174      * @return a new matcher for the given char[]
175      */
176     public static StrMatcher charSetMatcher(final char... chars) {
177         if (chars == null || chars.length == 0) {
178             return NONE_MATCHER;
179         }
180         if (chars.length == 1) {
181             return new CharMatcher(chars[0]);
182         }
183         return new CharSetMatcher(chars);
184     }
185 
186     /**
187      * Constructor that creates a matcher from a string representing a set of characters.
188      *
189      * @param chars  the characters to match, null or empty matches nothing
190      * @return a new Matcher for the given characters
191      */
192     public static StrMatcher charSetMatcher(final String chars) {
193         if (StringUtils.isEmpty(chars)) {
194             return NONE_MATCHER;
195         }
196         if (chars.length() == 1) {
197             return new CharMatcher(chars.charAt(0));
198         }
199         return new CharSetMatcher(chars.toCharArray());
200     }
201 
202     /**
203      * Constructor that creates a matcher from a string.
204      *
205      * @param str  the string to match, null or empty matches nothing
206      * @return a new Matcher for the given String
207      */
208     public static StrMatcher stringMatcher(final String str) {
209         if (StringUtils.isEmpty(str)) {
210             return NONE_MATCHER;
211         }
212         return new StringMatcher(str);
213     }
214 
215     //-----------------------------------------------------------------------
216     /**
217      * Constructor.
218      */
219     protected StrMatcher() {
220         super();
221     }
222 
223     /**
224      * Returns the number of matching characters, zero for no match.
225      * <p>
226      * This method is called to check for a match.
227      * The parameter <code>pos</code> represents the current position to be
228      * checked in the string <code>buffer</code> (a character array which must
229      * not be changed).
230      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
231      * <p>
232      * The character array may be larger than the active area to be matched.
233      * Only values in the buffer between the specified indices may be accessed.
234      * <p>
235      * The matching code may check one character or many.
236      * It may check characters preceding <code>pos</code> as well as those
237      * after, so long as no checks exceed the bounds specified.
238      * <p>
239      * It must return zero for no match, or a positive number if a match was found.
240      * The number indicates the number of characters that matched.
241      *
242      * @param buffer  the text content to match against, do not change
243      * @param pos  the starting position for the match, valid for buffer
244      * @param bufferStart  the first active index in the buffer, valid for buffer
245      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
246      * @return the number of matching characters, zero for no match
247      */
248     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
249 
250     /**
251      * Returns the number of matching characters, zero for no match.
252      * <p>
253      * This method is called to check for a match.
254      * The parameter <code>pos</code> represents the current position to be
255      * checked in the string <code>buffer</code> (a character array which must
256      * not be changed).
257      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
258      * <p>
259      * The matching code may check one character or many.
260      * It may check characters preceding <code>pos</code> as well as those after.
261      * <p>
262      * It must return zero for no match, or a positive number if a match was found.
263      * The number indicates the number of characters that matched.
264      *
265      * @param buffer  the text content to match against, do not change
266      * @param pos  the starting position for the match, valid for buffer
267      * @return the number of matching characters, zero for no match
268      * @since 2.4
269      */
270     public int isMatch(final char[] buffer, final int pos) {
271         return isMatch(buffer, pos, 0, buffer.length);
272     }
273 
274     //-----------------------------------------------------------------------
275     /**
276      * Class used to define a set of characters for matching purposes.
277      */
278     static final class CharSetMatcher extends StrMatcher {
279         /** The set of characters to match. */
280         private final char[] chars;
281 
282         /**
283          * Constructor that creates a matcher from a character array.
284          *
285          * @param chars  the characters to match, must not be null
286          */
287         CharSetMatcher(final char chars[]) {
288             super();
289             this.chars = chars.clone();
290             Arrays.sort(this.chars);
291         }
292 
293         /**
294          * Returns whether or not the given character matches.
295          *
296          * @param buffer  the text content to match against, do not change
297          * @param pos  the starting position for the match, valid for buffer
298          * @param bufferStart  the first active index in the buffer, valid for buffer
299          * @param bufferEnd  the end index of the active buffer, valid for buffer
300          * @return the number of matching characters, zero for no match
301          */
302         @Override
303         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
304             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
305         }
306     }
307 
308     //-----------------------------------------------------------------------
309     /**
310      * Class used to define a character for matching purposes.
311      */
312     static final class CharMatcher extends StrMatcher {
313         /** The character to match. */
314         private final char ch;
315 
316         /**
317          * Constructor that creates a matcher that matches a single character.
318          *
319          * @param ch  the character to match
320          */
321         CharMatcher(final char ch) {
322             super();
323             this.ch = ch;
324         }
325 
326         /**
327          * Returns whether or not the given character matches.
328          *
329          * @param buffer  the text content to match against, do not change
330          * @param pos  the starting position for the match, valid for buffer
331          * @param bufferStart  the first active index in the buffer, valid for buffer
332          * @param bufferEnd  the end index of the active buffer, valid for buffer
333          * @return the number of matching characters, zero for no match
334          */
335         @Override
336         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
337             return ch == buffer[pos] ? 1 : 0;
338         }
339     }
340 
341     //-----------------------------------------------------------------------
342     /**
343      * Class used to define a set of characters for matching purposes.
344      */
345     static final class StringMatcher extends StrMatcher {
346         /** The string to match, as a character array. */
347         private final char[] chars;
348 
349         /**
350          * Constructor that creates a matcher from a String.
351          *
352          * @param str  the string to match, must not be null
353          */
354         StringMatcher(final String str) {
355             super();
356             chars = str.toCharArray();
357         }
358 
359         /**
360          * Returns whether or not the given text matches the stored string.
361          *
362          * @param buffer  the text content to match against, do not change
363          * @param pos  the starting position for the match, valid for buffer
364          * @param bufferStart  the first active index in the buffer, valid for buffer
365          * @param bufferEnd  the end index of the active buffer, valid for buffer
366          * @return the number of matching characters, zero for no match
367          */
368         @Override
369         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
370             final int len = chars.length;
371             if (pos + len > bufferEnd) {
372                 return 0;
373             }
374             for (int i = 0; i < chars.length; i++, pos++) {
375                 if (chars[i] != buffer[pos]) {
376                     return 0;
377                 }
378             }
379             return len;
380         }
381 
382         @Override
383         public String toString() {
384             return super.toString() + ' ' + Arrays.toString(chars);
385         }
386 
387     }
388 
389     //-----------------------------------------------------------------------
390     /**
391      * Class used to match no characters.
392      */
393     static final class NoMatcher extends StrMatcher {
394 
395         /**
396          * Constructs a new instance of <code>NoMatcher</code>.
397          */
398         NoMatcher() {
399             super();
400         }
401 
402         /**
403          * Always returns <code>false</code>.
404          *
405          * @param buffer  the text content to match against, do not change
406          * @param pos  the starting position for the match, valid for buffer
407          * @param bufferStart  the first active index in the buffer, valid for buffer
408          * @param bufferEnd  the end index of the active buffer, valid for buffer
409          * @return the number of matching characters, zero for no match
410          */
411         @Override
412         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
413             return 0;
414         }
415     }
416 
417     //-----------------------------------------------------------------------
418     /**
419      * Class used to match whitespace as per trim().
420      */
421     static final class TrimMatcher extends StrMatcher {
422 
423         /**
424          * Constructs a new instance of <code>TrimMatcher</code>.
425          */
426         TrimMatcher() {
427             super();
428         }
429 
430         /**
431          * Returns whether or not the given character matches.
432          *
433          * @param buffer  the text content to match against, do not change
434          * @param pos  the starting position for the match, valid for buffer
435          * @param bufferStart  the first active index in the buffer, valid for buffer
436          * @param bufferEnd  the end index of the active buffer, valid for buffer
437          * @return the number of matching characters, zero for no match
438          */
439         @Override
440         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
441             return buffer[pos] <= 32 ? 1 : 0;
442         }
443     }
444 
445 }