View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.ArrayUtils;
22  import org.apache.commons.text.matcher.StringMatcherFactory;
23  
24  /**
25   * A matcher class that can be queried to determine if a character array
26   * portion matches.
27   * <p>
28   * This class comes complete with various factory methods.
29   * If these do not suffice, you can subclass and implement your own matcher.
30   * </p>
31   *
32   * @since 1.0
33   * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
34   */
35  @Deprecated
36  public abstract class StrMatcher {
37  
38      /**
39       * Class used to define a character for matching purposes.
40       */
41      private static final class CharMatcher extends StrMatcher {
42  
43          /** The character to match. */
44          private final char ch;
45  
46          /**
47           * Constructor that creates a matcher that matches a single character.
48           *
49           * @param ch  the character to match
50           */
51          private CharMatcher(final char ch) {
52              this.ch = ch;
53          }
54  
55          /**
56           * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
57           *
58           * @param buffer  the text content to match against, do not change
59           * @param pos  the starting position for the match, valid for buffer
60           * @param bufferStart  the first active index in the buffer, valid for buffer
61           * @param bufferEnd  the end index of the active buffer, valid for buffer
62           * @return The number of matching characters, or zero if there is no match
63           */
64          @Override
65          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
66              return ch == buffer[pos] ? 1 : 0;
67          }
68      }
69  
70      /**
71       * Class used to define a set of characters for matching purposes.
72       */
73      private static final class CharSetMatcher extends StrMatcher {
74  
75          /** The set of characters to match. */
76          private final char[] chars;
77  
78          /**
79           * Constructor that creates a matcher from a character array.
80           *
81           * @param chars  the characters to match, must not be null
82           */
83          private CharSetMatcher(final char[] chars) {
84              this.chars = chars.clone();
85              Arrays.sort(this.chars);
86          }
87  
88          /**
89           * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
90           *
91           * @param buffer  the text content to match against, do not change
92           * @param pos  the starting position for the match, valid for buffer
93           * @param bufferStart  the first active index in the buffer, valid for buffer
94           * @param bufferEnd  the end index of the active buffer, valid for buffer
95           * @return The number of matching characters, or zero if there is no match
96           */
97          @Override
98          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
99              return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
100         }
101     }
102 
103     /**
104      * Class used to match no characters.
105      */
106     private static final class NoMatcher extends StrMatcher {
107 
108         /**
109          * Constructs a new instance of {@code NoMatcher}.
110          */
111         private NoMatcher() {
112         }
113 
114         /**
115          * Always returns {@code 0}.
116          *
117          * @param buffer  the text content to match against, do not change
118          * @param pos  the starting position for the match, valid for buffer
119          * @param bufferStart  the first active index in the buffer, valid for buffer
120          * @param bufferEnd  the end index of the active buffer, valid for buffer
121          * @return The number of matching characters, or zero if there is no match
122          */
123         @Override
124         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
125             return 0;
126         }
127     }
128 
129     /**
130      * Class used to define a set of characters for matching purposes.
131      */
132     private static final class StringMatcher extends StrMatcher {
133 
134         /** The string to match, as a character array. */
135         private final char[] chars;
136 
137         /**
138          * Constructor that creates a matcher from a String.
139          *
140          * @param str  the string to match, must not be null
141          */
142         private StringMatcher(final String str) {
143             chars = str.toCharArray();
144         }
145 
146         /**
147          * Returns the number of matching characters, or zero if there is no match.
148          *
149          * @param buffer  the text content to match against, do not change
150          * @param pos  the starting position for the match, valid for buffer
151          * @param bufferStart  the first active index in the buffer, valid for buffer
152          * @param bufferEnd  the end index of the active buffer, valid for buffer
153          * @return The number of matching characters, or zero if there is no match
154          */
155         @Override
156         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
157             final int len = chars.length;
158             if (pos + len > bufferEnd) {
159                 return 0;
160             }
161             for (int i = 0; i < chars.length; i++, pos++) {
162                 if (chars[i] != buffer[pos]) {
163                     return 0;
164                 }
165             }
166             return len;
167         }
168 
169         @Override
170         public String toString() {
171             return super.toString() + ' ' + Arrays.toString(chars);
172         }
173 
174     }
175 
176     /**
177      * Class used to match whitespace as per trim().
178      */
179     private static final class TrimMatcher extends StrMatcher {
180 
181         /**
182          * Constructs a new instance of {@code TrimMatcher}.
183          */
184         private TrimMatcher() {
185         }
186 
187         /**
188          * Returns whether or not the given character matches.
189          *
190          * @param buffer  the text content to match against, do not change
191          * @param pos  the starting position for the match, valid for buffer
192          * @param bufferStart  the first active index in the buffer, valid for buffer
193          * @param bufferEnd  the end index of the active buffer, valid for buffer
194          * @return The number of matching characters, or zero if there is no match
195          */
196         @Override
197         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
198             return buffer[pos] <= 32 ? 1 : 0;
199         }
200     }
201 
202     /**
203      * Matches the comma character.
204      */
205     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
206 
207     /**
208      * Matches the tab character.
209      */
210     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
211 
212     /**
213      * Matches the space character.
214      */
215     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
216 
217     /**
218      * Matches the same characters as StringTokenizer,
219      * namely space, tab, newline, form feed.
220      */
221     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
222 
223     /**
224      * Matches the String trim() whitespace characters.
225      */
226     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
227 
228     /**
229      * Matches the double quote character.
230      */
231     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
232 
233     /**
234      * Matches the double quote character.
235      */
236     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
237 
238     /**
239      * Matches the single or double quote character.
240      */
241     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
242 
243     /**
244      * Matches no characters.
245      */
246     private static final StrMatcher NONE_MATCHER = new NoMatcher();
247 
248     /**
249      * Creates a matcher from a character.
250      *
251      * @param ch  the character to match, must not be null
252      * @return a new Matcher for the given char
253      */
254     public static StrMatcher charMatcher(final char ch) {
255         return new CharMatcher(ch);
256     }
257 
258     /**
259      * Creates a matcher from a set of characters.
260      *
261      * @param chars  the characters to match, null or empty matches nothing
262      * @return a new matcher for the given char[]
263      */
264     public static StrMatcher charSetMatcher(final char... chars) {
265         if (ArrayUtils.isEmpty(chars)) {
266             return NONE_MATCHER;
267         }
268         if (chars.length == 1) {
269             return new CharMatcher(chars[0]);
270         }
271         return new CharSetMatcher(chars);
272     }
273 
274     /**
275      * Creates a matcher from a string representing a set of characters.
276      *
277      * @param chars  the characters to match, null or empty matches nothing
278      * @return a new Matcher for the given characters
279      */
280     public static StrMatcher charSetMatcher(final String chars) {
281         if (chars == null || chars.isEmpty()) {
282             return NONE_MATCHER;
283         }
284         if (chars.length() == 1) {
285             return new CharMatcher(chars.charAt(0));
286         }
287         return new CharSetMatcher(chars.toCharArray());
288     }
289 
290     /**
291      * Returns a matcher which matches the comma character.
292      *
293      * @return a matcher for a comma
294      */
295     public static StrMatcher commaMatcher() {
296         return COMMA_MATCHER;
297     }
298 
299     /**
300      * Returns a matcher which matches the double quote character.
301      *
302      * @return a matcher for a double quote
303      */
304     public static StrMatcher doubleQuoteMatcher() {
305         return DOUBLE_QUOTE_MATCHER;
306     }
307 
308     /**
309      * Matches no characters.
310      *
311      * @return a matcher that matches nothing
312      */
313     public static StrMatcher noneMatcher() {
314         return NONE_MATCHER;
315     }
316 
317     /**
318      * Returns a matcher which matches the single or double quote character.
319      *
320      * @return a matcher for a single or double quote
321      */
322     public static StrMatcher quoteMatcher() {
323         return QUOTE_MATCHER;
324     }
325 
326     /**
327      * Returns a matcher which matches the single quote character.
328      *
329      * @return a matcher for a single quote
330      */
331     public static StrMatcher singleQuoteMatcher() {
332         return SINGLE_QUOTE_MATCHER;
333     }
334 
335     /**
336      * Returns a matcher which matches the space character.
337      *
338      * @return a matcher for a space
339      */
340     public static StrMatcher spaceMatcher() {
341         return SPACE_MATCHER;
342     }
343 
344     /**
345      * Matches the same characters as StringTokenizer,
346      * namely space, tab, newline and form feed.
347      *
348      * @return The split matcher
349      */
350     public static StrMatcher splitMatcher() {
351         return SPLIT_MATCHER;
352     }
353 
354     /**
355      * Creates a matcher from a string.
356      *
357      * @param str  the string to match, null or empty matches nothing
358      * @return a new Matcher for the given String
359      */
360     public static StrMatcher stringMatcher(final String str) {
361         if (str == null || str.isEmpty()) {
362             return NONE_MATCHER;
363         }
364         return new StringMatcher(str);
365     }
366 
367     /**
368      * Returns a matcher which matches the tab character.
369      *
370      * @return a matcher for a tab
371      */
372     public static StrMatcher tabMatcher() {
373         return TAB_MATCHER;
374     }
375 
376     /**
377      * Matches the String trim() whitespace characters.
378      *
379      * @return The trim matcher
380      */
381     public static StrMatcher trimMatcher() {
382         return TRIM_MATCHER;
383     }
384 
385     /**
386      * Constructs a new instance.
387      */
388     protected StrMatcher() {
389     }
390 
391     /**
392      * Returns the number of matching characters, or zero if there is no match.
393      * <p>
394      * This method is called to check for a match.
395      * The parameter {@code pos} represents the current position to be
396      * checked in the string {@code buffer} (a character array which must
397      * not be changed).
398      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
399      * </p>
400      * <p>
401      * The matching code may check one character or many.
402      * It may check characters preceding {@code pos} as well as those after.
403      * </p>
404      * <p>
405      * It must return zero for no match, or a positive number if a match was found.
406      * The number indicates the number of characters that matched.
407      * </p>
408      *
409      * @param buffer  the text content to match against, do not change
410      * @param pos  the starting position for the match, valid for buffer
411      * @return The number of matching characters, or zero if there is no match
412      */
413     public int isMatch(final char[] buffer, final int pos) {
414         return isMatch(buffer, pos, 0, buffer.length);
415     }
416 
417     /**
418      * Returns the number of matching characters, or zero if there is no match.
419      * <p>
420      * This method is called to check for a match.
421      * The parameter {@code pos} represents the current position to be
422      * checked in the string {@code buffer} (a character array which must
423      * not be changed).
424      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
425      * </p>
426      * <p>
427      * The character array may be larger than the active area to be matched.
428      * Only values in the buffer between the specified indices may be accessed.
429      * </p>
430      * <p>
431      * The matching code may check one character or many.
432      * It may check characters preceding {@code pos} as well as those
433      * after, so long as no checks exceed the bounds specified.
434      * </p>
435      * <p>
436      * It must return zero for no match, or a positive number if a match was found.
437      * The number indicates the number of characters that matched.
438      * </p>
439      *
440      * @param buffer  the text content to match against, do not change
441      * @param pos  the starting position for the match, valid for buffer
442      * @param bufferStart  the first active index in the buffer, valid for buffer
443      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
444      * @return The number of matching characters, or zero if there is no match
445      */
446     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
447 
448 }