View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.StringUtils;
22  
23  /**
24   * A matcher class that can be queried to determine if a character array
25   * portion matches.
26   * <p>
27   * This class comes complete with various factory methods.
28   * If these do not suffice, you can subclass and implement your own matcher.
29   *
30   * @since 2.2
31   */
32  public abstract class StrMatcher {
33  
34      /**
35       * Matches the comma character.
36       */
37      private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
38      /**
39       * Matches the tab character.
40       */
41      private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
42      /**
43       * Matches the space character.
44       */
45      private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
46      /**
47       * Matches the same characters as StringTokenizer,
48       * namely space, tab, newline, formfeed.
49       */
50      private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
51      /**
52       * Matches the String trim() whitespace characters.
53       */
54      private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
55      /**
56       * Matches the double quote character.
57       */
58      private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
59      /**
60       * Matches the double quote character.
61       */
62      private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
63      /**
64       * Matches the single or double quote character.
65       */
66      private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
67      /**
68       * Matches no characters.
69       */
70      private static final StrMatcher NONE_MATCHER = new NoMatcher();
71  
72      // -----------------------------------------------------------------------
73  
74      /**
75       * Returns a matcher which matches the comma character.
76       *
77       * @return a matcher for a comma
78       */
79      public static StrMatcher commaMatcher() {
80          return COMMA_MATCHER;
81      }
82  
83      /**
84       * Returns a matcher which matches the tab character.
85       *
86       * @return a matcher for a tab
87       */
88      public static StrMatcher tabMatcher() {
89          return TAB_MATCHER;
90      }
91  
92      /**
93       * Returns a matcher which matches the space character.
94       *
95       * @return a matcher for a space
96       */
97      public static StrMatcher spaceMatcher() {
98          return SPACE_MATCHER;
99      }
100 
101     /**
102      * Matches the same characters as StringTokenizer,
103      * namely space, tab, newline and formfeed.
104      *
105      * @return the split matcher
106      */
107     public static StrMatcher splitMatcher() {
108         return SPLIT_MATCHER;
109     }
110 
111     /**
112      * Matches the String trim() whitespace characters.
113      *
114      * @return the trim matcher
115      */
116     public static StrMatcher trimMatcher() {
117         return TRIM_MATCHER;
118     }
119 
120     /**
121      * Returns a matcher which matches the single quote character.
122      *
123      * @return a matcher for a single quote
124      */
125     public static StrMatcher singleQuoteMatcher() {
126         return SINGLE_QUOTE_MATCHER;
127     }
128 
129     /**
130      * Returns a matcher which matches the double quote character.
131      *
132      * @return a matcher for a double quote
133      */
134     public static StrMatcher doubleQuoteMatcher() {
135         return DOUBLE_QUOTE_MATCHER;
136     }
137 
138     /**
139      * Returns a matcher which matches the single or double quote character.
140      *
141      * @return a matcher for a single or double quote
142      */
143     public static StrMatcher quoteMatcher() {
144         return QUOTE_MATCHER;
145     }
146 
147     /**
148      * Matches no characters.
149      *
150      * @return a matcher that matches nothing
151      */
152     public static StrMatcher noneMatcher() {
153         return NONE_MATCHER;
154     }
155 
156     /**
157      * Constructor that creates a matcher from a character.
158      *
159      * @param ch  the character to match, must not be null
160      * @return a new Matcher for the given char
161      */
162     public static StrMatcher charMatcher(final char ch) {
163         return new CharMatcher(ch);
164     }
165 
166     /**
167      * Constructor that creates a matcher from a set of characters.
168      *
169      * @param chars  the characters to match, null or empty matches nothing
170      * @return a new matcher for the given char[]
171      */
172     public static StrMatcher charSetMatcher(final char... chars) {
173         if (chars == null || chars.length == 0) {
174             return NONE_MATCHER;
175         }
176         if (chars.length == 1) {
177             return new CharMatcher(chars[0]);
178         }
179         return new CharSetMatcher(chars);
180     }
181 
182     /**
183      * Constructor that creates a matcher from a string representing a set of characters.
184      *
185      * @param chars  the characters to match, null or empty matches nothing
186      * @return a new Matcher for the given characters
187      */
188     public static StrMatcher charSetMatcher(final String chars) {
189         if (StringUtils.isEmpty(chars)) {
190             return NONE_MATCHER;
191         }
192         if (chars.length() == 1) {
193             return new CharMatcher(chars.charAt(0));
194         }
195         return new CharSetMatcher(chars.toCharArray());
196     }
197 
198     /**
199      * Constructor that creates a matcher from a string.
200      *
201      * @param str  the string to match, null or empty matches nothing
202      * @return a new Matcher for the given String
203      */
204     public static StrMatcher stringMatcher(final String str) {
205         if (StringUtils.isEmpty(str)) {
206             return NONE_MATCHER;
207         }
208         return new StringMatcher(str);
209     }
210 
211     //-----------------------------------------------------------------------
212     /**
213      * Constructor.
214      */
215     protected StrMatcher() {
216         super();
217     }
218 
219     /**
220      * Returns the number of matching characters, zero for no match.
221      * <p>
222      * This method is called to check for a match.
223      * The parameter <code>pos</code> represents the current position to be
224      * checked in the string <code>buffer</code> (a character array which must
225      * not be changed).
226      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
227      * <p>
228      * The character array may be larger than the active area to be matched.
229      * Only values in the buffer between the specifed indices may be accessed.
230      * <p>
231      * The matching code may check one character or many.
232      * It may check characters preceding <code>pos</code> as well as those
233      * after, so long as no checks exceed the bounds specified.
234      * <p>
235      * It must return zero for no match, or a positive number if a match was found.
236      * The number indicates the number of characters that matched.
237      *
238      * @param buffer  the text content to match against, do not change
239      * @param pos  the starting position for the match, valid for buffer
240      * @param bufferStart  the first active index in the buffer, valid for buffer
241      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
242      * @return the number of matching characters, zero for no match
243      */
244     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
245 
246     /**
247      * Returns the number of matching characters, zero for no match.
248      * <p>
249      * This method is called to check for a match.
250      * The parameter <code>pos</code> represents the current position to be
251      * checked in the string <code>buffer</code> (a character array which must
252      * not be changed).
253      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
254      * <p>
255      * The matching code may check one character or many.
256      * It may check characters preceding <code>pos</code> as well as those after.
257      * <p>
258      * It must return zero for no match, or a positive number if a match was found.
259      * The number indicates the number of characters that matched.
260      *
261      * @param buffer  the text content to match against, do not change
262      * @param pos  the starting position for the match, valid for buffer
263      * @return the number of matching characters, zero for no match
264      * @since 2.4
265      */
266     public int isMatch(final char[] buffer, final int pos) {
267         return isMatch(buffer, pos, 0, buffer.length);
268     }
269 
270     //-----------------------------------------------------------------------
271     /**
272      * Class used to define a set of characters for matching purposes.
273      */
274     static final class CharSetMatcher extends StrMatcher {
275         /** The set of characters to match. */
276         private final char[] chars;
277 
278         /**
279          * Constructor that creates a matcher from a character array.
280          *
281          * @param chars  the characters to match, must not be null
282          */
283         CharSetMatcher(final char chars[]) {
284             super();
285             this.chars = chars.clone();
286             Arrays.sort(this.chars);
287         }
288 
289         /**
290          * Returns whether or not the given character matches.
291          *
292          * @param buffer  the text content to match against, do not change
293          * @param pos  the starting position for the match, valid for buffer
294          * @param bufferStart  the first active index in the buffer, valid for buffer
295          * @param bufferEnd  the end index of the active buffer, valid for buffer
296          * @return the number of matching characters, zero for no match
297          */
298         @Override
299         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
300             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
301         }
302     }
303 
304     //-----------------------------------------------------------------------
305     /**
306      * Class used to define a character for matching purposes.
307      */
308     static final class CharMatcher extends StrMatcher {
309         /** The character to match. */
310         private final char ch;
311 
312         /**
313          * Constructor that creates a matcher that matches a single character.
314          *
315          * @param ch  the character to match
316          */
317         CharMatcher(final char ch) {
318             super();
319             this.ch = ch;
320         }
321 
322         /**
323          * Returns whether or not the given character matches.
324          *
325          * @param buffer  the text content to match against, do not change
326          * @param pos  the starting position for the match, valid for buffer
327          * @param bufferStart  the first active index in the buffer, valid for buffer
328          * @param bufferEnd  the end index of the active buffer, valid for buffer
329          * @return the number of matching characters, zero for no match
330          */
331         @Override
332         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
333             return ch == buffer[pos] ? 1 : 0;
334         }
335     }
336 
337     //-----------------------------------------------------------------------
338     /**
339      * Class used to define a set of characters for matching purposes.
340      */
341     static final class StringMatcher extends StrMatcher {
342         /** The string to match, as a character array. */
343         private final char[] chars;
344 
345         /**
346          * Constructor that creates a matcher from a String.
347          *
348          * @param str  the string to match, must not be null
349          */
350         StringMatcher(final String str) {
351             super();
352             chars = str.toCharArray();
353         }
354 
355         /**
356          * Returns whether or not the given text matches the stored string.
357          *
358          * @param buffer  the text content to match against, do not change
359          * @param pos  the starting position for the match, valid for buffer
360          * @param bufferStart  the first active index in the buffer, valid for buffer
361          * @param bufferEnd  the end index of the active buffer, valid for buffer
362          * @return the number of matching characters, zero for no match
363          */
364         @Override
365         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
366             final int len = chars.length;
367             if (pos + len > bufferEnd) {
368                 return 0;
369             }
370             for (int i = 0; i < chars.length; i++, pos++) {
371                 if (chars[i] != buffer[pos]) {
372                     return 0;
373                 }
374             }
375             return len;
376         }
377     }
378 
379     //-----------------------------------------------------------------------
380     /**
381      * Class used to match no characters.
382      */
383     static final class NoMatcher extends StrMatcher {
384 
385         /**
386          * Constructs a new instance of <code>NoMatcher</code>.
387          */
388         NoMatcher() {
389             super();
390         }
391 
392         /**
393          * Always returns <code>false</code>.
394          *
395          * @param buffer  the text content to match against, do not change
396          * @param pos  the starting position for the match, valid for buffer
397          * @param bufferStart  the first active index in the buffer, valid for buffer
398          * @param bufferEnd  the end index of the active buffer, valid for buffer
399          * @return the number of matching characters, zero for no match
400          */
401         @Override
402         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
403             return 0;
404         }
405     }
406 
407     //-----------------------------------------------------------------------
408     /**
409      * Class used to match whitespace as per trim().
410      */
411     static final class TrimMatcher extends StrMatcher {
412 
413         /**
414          * Constructs a new instance of <code>TrimMatcher</code>.
415          */
416         TrimMatcher() {
417             super();
418         }
419 
420         /**
421          * Returns whether or not the given character matches.
422          *
423          * @param buffer  the text content to match against, do not change
424          * @param pos  the starting position for the match, valid for buffer
425          * @param bufferStart  the first active index in the buffer, valid for buffer
426          * @param bufferEnd  the end index of the active buffer, valid for buffer
427          * @return the number of matching characters, zero for no match
428          */
429         @Override
430         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
431             return buffer[pos] <= 32 ? 1 : 0;
432         }
433     }
434 
435 }