View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.Arrays;
20  
21  /**
22   * A matcher class that can be queried to determine if a character array
23   * portion matches.
24   * <p>
25   * This class comes complete with various factory methods.
26   * If these do not suffice, you can subclass and implement your own matcher.
27   *
28   * @since 1.0
29   */
30  public abstract class StrMatcher {
31  
32      /**
33       * Matches the comma character.
34       */
35      private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
36      /**
37       * Matches the tab character.
38       */
39      private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
40      /**
41       * Matches the space character.
42       */
43      private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
44      /**
45       * Matches the same characters as StringTokenizer,
46       * namely space, tab, newline, formfeed.
47       */
48      private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
49      /**
50       * Matches the String trim() whitespace characters.
51       */
52      private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
53      /**
54       * Matches the double quote character.
55       */
56      private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
57      /**
58       * Matches the double quote character.
59       */
60      private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
61      /**
62       * Matches the single or double quote character.
63       */
64      private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
65      /**
66       * Matches no characters.
67       */
68      private static final StrMatcher NONE_MATCHER = new NoMatcher();
69  
70      // -----------------------------------------------------------------------
71  
72      /**
73       * Returns a matcher which matches the comma character.
74       *
75       * @return a matcher for a comma
76       */
77      public static StrMatcher commaMatcher() {
78          return COMMA_MATCHER;
79      }
80  
81      /**
82       * Returns a matcher which matches the tab character.
83       *
84       * @return a matcher for a tab
85       */
86      public static StrMatcher tabMatcher() {
87          return TAB_MATCHER;
88      }
89  
90      /**
91       * Returns a matcher which matches the space character.
92       *
93       * @return a matcher for a space
94       */
95      public static StrMatcher spaceMatcher() {
96          return SPACE_MATCHER;
97      }
98  
99      /**
100      * Matches the same characters as StringTokenizer,
101      * namely space, tab, newline and formfeed.
102      *
103      * @return the split matcher
104      */
105     public static StrMatcher splitMatcher() {
106         return SPLIT_MATCHER;
107     }
108 
109     /**
110      * Matches the String trim() whitespace characters.
111      *
112      * @return the trim matcher
113      */
114     public static StrMatcher trimMatcher() {
115         return TRIM_MATCHER;
116     }
117 
118     /**
119      * Returns a matcher which matches the single quote character.
120      *
121      * @return a matcher for a single quote
122      */
123     public static StrMatcher singleQuoteMatcher() {
124         return SINGLE_QUOTE_MATCHER;
125     }
126 
127     /**
128      * Returns a matcher which matches the double quote character.
129      *
130      * @return a matcher for a double quote
131      */
132     public static StrMatcher doubleQuoteMatcher() {
133         return DOUBLE_QUOTE_MATCHER;
134     }
135 
136     /**
137      * Returns a matcher which matches the single or double quote character.
138      *
139      * @return a matcher for a single or double quote
140      */
141     public static StrMatcher quoteMatcher() {
142         return QUOTE_MATCHER;
143     }
144 
145     /**
146      * Matches no characters.
147      *
148      * @return a matcher that matches nothing
149      */
150     public static StrMatcher noneMatcher() {
151         return NONE_MATCHER;
152     }
153 
154     /**
155      * Constructor that creates a matcher from a character.
156      *
157      * @param ch  the character to match, must not be null
158      * @return a new Matcher for the given char
159      */
160     public static StrMatcher charMatcher(final char ch) {
161         return new CharMatcher(ch);
162     }
163 
164     /**
165      * Constructor that creates a matcher from a set of characters.
166      *
167      * @param chars  the characters to match, null or empty matches nothing
168      * @return a new matcher for the given char[]
169      */
170     public static StrMatcher charSetMatcher(final char... chars) {
171         if (chars == null || chars.length == 0) {
172             return NONE_MATCHER;
173         }
174         if (chars.length == 1) {
175             return new CharMatcher(chars[0]);
176         }
177         return new CharSetMatcher(chars);
178     }
179 
180     /**
181      * Constructor that creates a matcher from a string representing a set of characters.
182      *
183      * @param chars  the characters to match, null or empty matches nothing
184      * @return a new Matcher for the given characters
185      */
186     public static StrMatcher charSetMatcher(final String chars) {
187         if (chars == null || chars.length() == 0) {
188             return NONE_MATCHER;
189         }
190         if (chars.length() == 1) {
191             return new CharMatcher(chars.charAt(0));
192         }
193         return new CharSetMatcher(chars.toCharArray());
194     }
195 
196     /**
197      * Constructor that creates a matcher from a string.
198      *
199      * @param str  the string to match, null or empty matches nothing
200      * @return a new Matcher for the given String
201      */
202     public static StrMatcher stringMatcher(final String str) {
203         if (str == null || str.length() == 0) {
204             return NONE_MATCHER;
205         }
206         return new StringMatcher(str);
207     }
208 
209     //-----------------------------------------------------------------------
210     /**
211      * Constructor.
212      */
213     protected StrMatcher() {
214         super();
215     }
216 
217     /**
218      * Returns the number of matching characters, zero for no match.
219      * <p>
220      * This method is called to check for a match.
221      * The parameter <code>pos</code> represents the current position to be
222      * checked in the string <code>buffer</code> (a character array which must
223      * not be changed).
224      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
225      * <p>
226      * The character array may be larger than the active area to be matched.
227      * Only values in the buffer between the specified indices may be accessed.
228      * <p>
229      * The matching code may check one character or many.
230      * It may check characters preceding <code>pos</code> as well as those
231      * after, so long as no checks exceed the bounds specified.
232      * <p>
233      * It must return zero for no match, or a positive number if a match was found.
234      * The number indicates the number of characters that matched.
235      *
236      * @param buffer  the text content to match against, do not change
237      * @param pos  the starting position for the match, valid for buffer
238      * @param bufferStart  the first active index in the buffer, valid for buffer
239      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
240      * @return the number of matching characters, zero for no match
241      */
242     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
243 
244     /**
245      * Returns the number of matching characters, zero for no match.
246      * <p>
247      * This method is called to check for a match.
248      * The parameter <code>pos</code> represents the current position to be
249      * checked in the string <code>buffer</code> (a character array which must
250      * not be changed).
251      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
252      * <p>
253      * The matching code may check one character or many.
254      * It may check characters preceding <code>pos</code> as well as those after.
255      * <p>
256      * It must return zero for no match, or a positive number if a match was found.
257      * The number indicates the number of characters that matched.
258      *
259      * @param buffer  the text content to match against, do not change
260      * @param pos  the starting position for the match, valid for buffer
261      * @return the number of matching characters, zero for no match
262      */
263     public int isMatch(final char[] buffer, final int pos) {
264         return isMatch(buffer, pos, 0, buffer.length);
265     }
266 
267     //-----------------------------------------------------------------------
268     /**
269      * Class used to define a set of characters for matching purposes.
270      */
271     static final class CharSetMatcher extends StrMatcher {
272         /** The set of characters to match. */
273         private final char[] chars;
274 
275         /**
276          * Constructor that creates a matcher from a character array.
277          *
278          * @param chars  the characters to match, must not be null
279          */
280         CharSetMatcher(final char chars[]) {
281             super();
282             this.chars = chars.clone();
283             Arrays.sort(this.chars);
284         }
285 
286         /**
287          * Returns whether or not the given character matches.
288          *
289          * @param buffer  the text content to match against, do not change
290          * @param pos  the starting position for the match, valid for buffer
291          * @param bufferStart  the first active index in the buffer, valid for buffer
292          * @param bufferEnd  the end index of the active buffer, valid for buffer
293          * @return the number of matching characters, zero for no match
294          */
295         @Override
296         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
297             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
298         }
299     }
300 
301     //-----------------------------------------------------------------------
302     /**
303      * Class used to define a character for matching purposes.
304      */
305     static final class CharMatcher extends StrMatcher {
306         /** The character to match. */
307         private final char ch;
308 
309         /**
310          * Constructor that creates a matcher that matches a single character.
311          *
312          * @param ch  the character to match
313          */
314         CharMatcher(final char ch) {
315             super();
316             this.ch = ch;
317         }
318 
319         /**
320          * Returns whether or not the given character matches.
321          *
322          * @param buffer  the text content to match against, do not change
323          * @param pos  the starting position for the match, valid for buffer
324          * @param bufferStart  the first active index in the buffer, valid for buffer
325          * @param bufferEnd  the end index of the active buffer, valid for buffer
326          * @return the number of matching characters, zero for no match
327          */
328         @Override
329         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
330             return ch == buffer[pos] ? 1 : 0;
331         }
332     }
333 
334     //-----------------------------------------------------------------------
335     /**
336      * Class used to define a set of characters for matching purposes.
337      */
338     static final class StringMatcher extends StrMatcher {
339         /** The string to match, as a character array. */
340         private final char[] chars;
341 
342         /**
343          * Constructor that creates a matcher from a String.
344          *
345          * @param str  the string to match, must not be null
346          */
347         StringMatcher(final String str) {
348             super();
349             chars = str.toCharArray();
350         }
351 
352         /**
353          * Returns whether or not the given text matches the stored string.
354          *
355          * @param buffer  the text content to match against, do not change
356          * @param pos  the starting position for the match, valid for buffer
357          * @param bufferStart  the first active index in the buffer, valid for buffer
358          * @param bufferEnd  the end index of the active buffer, valid for buffer
359          * @return the number of matching characters, zero for no match
360          */
361         @Override
362         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
363             final int len = chars.length;
364             if (pos + len > bufferEnd) {
365                 return 0;
366             }
367             for (int i = 0; i < chars.length; i++, pos++) {
368                 if (chars[i] != buffer[pos]) {
369                     return 0;
370                 }
371             }
372             return len;
373         }
374         
375         @Override
376         public String toString() {
377             return super.toString() + ' ' + Arrays.toString(chars);
378         }
379 
380     }
381 
382     //-----------------------------------------------------------------------
383     /**
384      * Class used to match no characters.
385      */
386     static final class NoMatcher extends StrMatcher {
387 
388         /**
389          * Constructs a new instance of <code>NoMatcher</code>.
390          */
391         NoMatcher() {
392             super();
393         }
394 
395         /**
396          * Always returns <code>false</code>.
397          *
398          * @param buffer  the text content to match against, do not change
399          * @param pos  the starting position for the match, valid for buffer
400          * @param bufferStart  the first active index in the buffer, valid for buffer
401          * @param bufferEnd  the end index of the active buffer, valid for buffer
402          * @return the number of matching characters, zero for no match
403          */
404         @Override
405         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
406             return 0;
407         }
408     }
409 
410     //-----------------------------------------------------------------------
411     /**
412      * Class used to match whitespace as per trim().
413      */
414     static final class TrimMatcher extends StrMatcher {
415 
416         /**
417          * Constructs a new instance of <code>TrimMatcher</code>.
418          */
419         TrimMatcher() {
420             super();
421         }
422 
423         /**
424          * Returns whether or not the given character matches.
425          *
426          * @param buffer  the text content to match against, do not change
427          * @param pos  the starting position for the match, valid for buffer
428          * @param bufferStart  the first active index in the buffer, valid for buffer
429          * @param bufferEnd  the end index of the active buffer, valid for buffer
430          * @return the number of matching characters, zero for no match
431          */
432         @Override
433         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
434             return buffer[pos] <= 32 ? 1 : 0;
435         }
436     }
437 
438 }