View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.StringUtils;
22  
23  /**
24   * A matcher class that can be queried to determine if a character array
25   * portion matches.
26   * <p>
27   * This class comes complete with various factory methods.
28   * If these do not suffice, you can subclass and implement your own matcher.
29   *
30   * @since 2.2
31   * @version $Id: StrMatcher.java 1436770 2013-01-22 07:09:45Z ggregory $
32   */
33  public abstract class StrMatcher {
34  
35      /**
36       * Matches the comma character.
37       */
38      private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
39      /**
40       * Matches the tab character.
41       */
42      private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
43      /**
44       * Matches the space character.
45       */
46      private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
47      /**
48       * Matches the same characters as StringTokenizer,
49       * namely space, tab, newline, formfeed.
50       */
51      private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
52      /**
53       * Matches the String trim() whitespace characters.
54       */
55      private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
56      /**
57       * Matches the double quote character.
58       */
59      private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
60      /**
61       * Matches the double quote character.
62       */
63      private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
64      /**
65       * Matches the single or double quote character.
66       */
67      private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
68      /**
69       * Matches no characters.
70       */
71      private static final StrMatcher NONE_MATCHER = new NoMatcher();
72  
73      // -----------------------------------------------------------------------
74  
75      /**
76       * Returns a matcher which matches the comma character.
77       *
78       * @return a matcher for a comma
79       */
80      public static StrMatcher commaMatcher() {
81          return COMMA_MATCHER;
82      }
83  
84      /**
85       * Returns a matcher which matches the tab character.
86       *
87       * @return a matcher for a tab
88       */
89      public static StrMatcher tabMatcher() {
90          return TAB_MATCHER;
91      }
92  
93      /**
94       * Returns a matcher which matches the space character.
95       *
96       * @return a matcher for a space
97       */
98      public static StrMatcher spaceMatcher() {
99          return SPACE_MATCHER;
100     }
101 
102     /**
103      * Matches the same characters as StringTokenizer,
104      * namely space, tab, newline and formfeed.
105      *
106      * @return the split matcher
107      */
108     public static StrMatcher splitMatcher() {
109         return SPLIT_MATCHER;
110     }
111 
112     /**
113      * Matches the String trim() whitespace characters.
114      *
115      * @return the trim matcher
116      */
117     public static StrMatcher trimMatcher() {
118         return TRIM_MATCHER;
119     }
120 
121     /**
122      * Returns a matcher which matches the single quote character.
123      *
124      * @return a matcher for a single quote
125      */
126     public static StrMatcher singleQuoteMatcher() {
127         return SINGLE_QUOTE_MATCHER;
128     }
129 
130     /**
131      * Returns a matcher which matches the double quote character.
132      *
133      * @return a matcher for a double quote
134      */
135     public static StrMatcher doubleQuoteMatcher() {
136         return DOUBLE_QUOTE_MATCHER;
137     }
138 
139     /**
140      * Returns a matcher which matches the single or double quote character.
141      *
142      * @return a matcher for a single or double quote
143      */
144     public static StrMatcher quoteMatcher() {
145         return QUOTE_MATCHER;
146     }
147 
148     /**
149      * Matches no characters.
150      *
151      * @return a matcher that matches nothing
152      */
153     public static StrMatcher noneMatcher() {
154         return NONE_MATCHER;
155     }
156 
157     /**
158      * Constructor that creates a matcher from a character.
159      *
160      * @param ch  the character to match, must not be null
161      * @return a new Matcher for the given char
162      */
163     public static StrMatcher charMatcher(final char ch) {
164         return new CharMatcher(ch);
165     }
166 
167     /**
168      * Constructor that creates a matcher from a set of characters.
169      *
170      * @param chars  the characters to match, null or empty matches nothing
171      * @return a new matcher for the given char[]
172      */
173     public static StrMatcher charSetMatcher(final char... chars) {
174         if (chars == null || chars.length == 0) {
175             return NONE_MATCHER;
176         }
177         if (chars.length == 1) {
178             return new CharMatcher(chars[0]);
179         }
180         return new CharSetMatcher(chars);
181     }
182 
183     /**
184      * Constructor that creates a matcher from a string representing a set of characters.
185      *
186      * @param chars  the characters to match, null or empty matches nothing
187      * @return a new Matcher for the given characters
188      */
189     public static StrMatcher charSetMatcher(final String chars) {
190         if (StringUtils.isEmpty(chars)) {
191             return NONE_MATCHER;
192         }
193         if (chars.length() == 1) {
194             return new CharMatcher(chars.charAt(0));
195         }
196         return new CharSetMatcher(chars.toCharArray());
197     }
198 
199     /**
200      * Constructor that creates a matcher from a string.
201      *
202      * @param str  the string to match, null or empty matches nothing
203      * @return a new Matcher for the given String
204      */
205     public static StrMatcher stringMatcher(final String str) {
206         if (StringUtils.isEmpty(str)) {
207             return NONE_MATCHER;
208         }
209         return new StringMatcher(str);
210     }
211 
212     //-----------------------------------------------------------------------
213     /**
214      * Constructor.
215      */
216     protected StrMatcher() {
217         super();
218     }
219 
220     /**
221      * Returns the number of matching characters, zero for no match.
222      * <p>
223      * This method is called to check for a match.
224      * The parameter <code>pos</code> represents the current position to be
225      * checked in the string <code>buffer</code> (a character array which must
226      * not be changed).
227      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
228      * <p>
229      * The character array may be larger than the active area to be matched.
230      * Only values in the buffer between the specifed indices may be accessed.
231      * <p>
232      * The matching code may check one character or many.
233      * It may check characters preceding <code>pos</code> as well as those
234      * after, so long as no checks exceed the bounds specified.
235      * <p>
236      * It must return zero for no match, or a positive number if a match was found.
237      * The number indicates the number of characters that matched.
238      *
239      * @param buffer  the text content to match against, do not change
240      * @param pos  the starting position for the match, valid for buffer
241      * @param bufferStart  the first active index in the buffer, valid for buffer
242      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
243      * @return the number of matching characters, zero for no match
244      */
245     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
246 
247     /**
248      * Returns the number of matching characters, zero for no match.
249      * <p>
250      * This method is called to check for a match.
251      * The parameter <code>pos</code> represents the current position to be
252      * checked in the string <code>buffer</code> (a character array which must
253      * not be changed).
254      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
255      * <p>
256      * The matching code may check one character or many.
257      * It may check characters preceding <code>pos</code> as well as those after.
258      * <p>
259      * It must return zero for no match, or a positive number if a match was found.
260      * The number indicates the number of characters that matched.
261      *
262      * @param buffer  the text content to match against, do not change
263      * @param pos  the starting position for the match, valid for buffer
264      * @return the number of matching characters, zero for no match
265      * @since 2.4
266      */
267     public int isMatch(final char[] buffer, final int pos) {
268         return isMatch(buffer, pos, 0, buffer.length);
269     }
270 
271     //-----------------------------------------------------------------------
272     /**
273      * Class used to define a set of characters for matching purposes.
274      */
275     static final class CharSetMatcher extends StrMatcher {
276         /** The set of characters to match. */
277         private final char[] chars;
278 
279         /**
280          * Constructor that creates a matcher from a character array.
281          *
282          * @param chars  the characters to match, must not be null
283          */
284         CharSetMatcher(final char chars[]) {
285             super();
286             this.chars = chars.clone();
287             Arrays.sort(this.chars);
288         }
289 
290         /**
291          * Returns whether or not the given character matches.
292          *
293          * @param buffer  the text content to match against, do not change
294          * @param pos  the starting position for the match, valid for buffer
295          * @param bufferStart  the first active index in the buffer, valid for buffer
296          * @param bufferEnd  the end index of the active buffer, valid for buffer
297          * @return the number of matching characters, zero for no match
298          */
299         @Override
300         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
301             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
302         }
303     }
304 
305     //-----------------------------------------------------------------------
306     /**
307      * Class used to define a character for matching purposes.
308      */
309     static final class CharMatcher extends StrMatcher {
310         /** The character to match. */
311         private final char ch;
312 
313         /**
314          * Constructor that creates a matcher that matches a single character.
315          *
316          * @param ch  the character to match
317          */
318         CharMatcher(final char ch) {
319             super();
320             this.ch = ch;
321         }
322 
323         /**
324          * Returns whether or not the given character matches.
325          *
326          * @param buffer  the text content to match against, do not change
327          * @param pos  the starting position for the match, valid for buffer
328          * @param bufferStart  the first active index in the buffer, valid for buffer
329          * @param bufferEnd  the end index of the active buffer, valid for buffer
330          * @return the number of matching characters, zero for no match
331          */
332         @Override
333         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
334             return ch == buffer[pos] ? 1 : 0;
335         }
336     }
337 
338     //-----------------------------------------------------------------------
339     /**
340      * Class used to define a set of characters for matching purposes.
341      */
342     static final class StringMatcher extends StrMatcher {
343         /** The string to match, as a character array. */
344         private final char[] chars;
345 
346         /**
347          * Constructor that creates a matcher from a String.
348          *
349          * @param str  the string to match, must not be null
350          */
351         StringMatcher(final String str) {
352             super();
353             chars = str.toCharArray();
354         }
355 
356         /**
357          * Returns whether or not the given text matches the stored string.
358          *
359          * @param buffer  the text content to match against, do not change
360          * @param pos  the starting position for the match, valid for buffer
361          * @param bufferStart  the first active index in the buffer, valid for buffer
362          * @param bufferEnd  the end index of the active buffer, valid for buffer
363          * @return the number of matching characters, zero for no match
364          */
365         @Override
366         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
367             final int len = chars.length;
368             if (pos + len > bufferEnd) {
369                 return 0;
370             }
371             for (int i = 0; i < chars.length; i++, pos++) {
372                 if (chars[i] != buffer[pos]) {
373                     return 0;
374                 }
375             }
376             return len;
377         }
378     }
379 
380     //-----------------------------------------------------------------------
381     /**
382      * Class used to match no characters.
383      */
384     static final class NoMatcher extends StrMatcher {
385 
386         /**
387          * Constructs a new instance of <code>NoMatcher</code>.
388          */
389         NoMatcher() {
390             super();
391         }
392 
393         /**
394          * Always returns <code>false</code>.
395          *
396          * @param buffer  the text content to match against, do not change
397          * @param pos  the starting position for the match, valid for buffer
398          * @param bufferStart  the first active index in the buffer, valid for buffer
399          * @param bufferEnd  the end index of the active buffer, valid for buffer
400          * @return the number of matching characters, zero for no match
401          */
402         @Override
403         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
404             return 0;
405         }
406     }
407 
408     //-----------------------------------------------------------------------
409     /**
410      * Class used to match whitespace as per trim().
411      */
412     static final class TrimMatcher extends StrMatcher {
413 
414         /**
415          * Constructs a new instance of <code>TrimMatcher</code>.
416          */
417         TrimMatcher() {
418             super();
419         }
420 
421         /**
422          * Returns whether or not the given character matches.
423          *
424          * @param buffer  the text content to match against, do not change
425          * @param pos  the starting position for the match, valid for buffer
426          * @param bufferStart  the first active index in the buffer, valid for buffer
427          * @param bufferEnd  the end index of the active buffer, valid for buffer
428          * @return the number of matching characters, zero for no match
429          */
430         @Override
431         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
432             return buffer[pos] <= 32 ? 1 : 0;
433         }
434     }
435 
436 }