View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.text.matcher.StringMatcherFactory;
22  
23  /**
24   * A matcher class that can be queried to determine if a character array
25   * portion matches.
26   * <p>
27   * This class comes complete with various factory methods.
28   * If these do not suffice, you can subclass and implement your own matcher.
29   *
30   * @since 1.0
31   * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
32   */
33  @Deprecated
34  public abstract class StrMatcher {
35  
36      /**
37       * Matches the comma character.
38       */
39      private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
40  
41      /**
42       * Matches the tab character.
43       */
44      private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
45  
46      /**
47       * Matches the space character.
48       */
49      private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
50  
51      /**
52       * Matches the same characters as StringTokenizer,
53       * namely space, tab, newline, form feed.
54       */
55      private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
56  
57      /**
58       * Matches the String trim() whitespace characters.
59       */
60      private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
61  
62      /**
63       * Matches the double quote character.
64       */
65      private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
66  
67      /**
68       * Matches the double quote character.
69       */
70      private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
71  
72      /**
73       * Matches the single or double quote character.
74       */
75      private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
76  
77      /**
78       * Matches no characters.
79       */
80      private static final StrMatcher NONE_MATCHER = new NoMatcher();
81  
82      // -----------------------------------------------------------------------
83  
84      /**
85       * Returns a matcher which matches the comma character.
86       *
87       * @return a matcher for a comma
88       */
89      public static StrMatcher commaMatcher() {
90          return COMMA_MATCHER;
91      }
92  
93      /**
94       * Returns a matcher which matches the tab character.
95       *
96       * @return a matcher for a tab
97       */
98      public static StrMatcher tabMatcher() {
99          return TAB_MATCHER;
100     }
101 
102     /**
103      * Returns a matcher which matches the space character.
104      *
105      * @return a matcher for a space
106      */
107     public static StrMatcher spaceMatcher() {
108         return SPACE_MATCHER;
109     }
110 
111     /**
112      * Matches the same characters as StringTokenizer,
113      * namely space, tab, newline and form feed.
114      *
115      * @return the split matcher
116      */
117     public static StrMatcher splitMatcher() {
118         return SPLIT_MATCHER;
119     }
120 
121     /**
122      * Matches the String trim() whitespace characters.
123      *
124      * @return the trim matcher
125      */
126     public static StrMatcher trimMatcher() {
127         return TRIM_MATCHER;
128     }
129 
130     /**
131      * Returns a matcher which matches the single quote character.
132      *
133      * @return a matcher for a single quote
134      */
135     public static StrMatcher singleQuoteMatcher() {
136         return SINGLE_QUOTE_MATCHER;
137     }
138 
139     /**
140      * Returns a matcher which matches the double quote character.
141      *
142      * @return a matcher for a double quote
143      */
144     public static StrMatcher doubleQuoteMatcher() {
145         return DOUBLE_QUOTE_MATCHER;
146     }
147 
148     /**
149      * Returns a matcher which matches the single or double quote character.
150      *
151      * @return a matcher for a single or double quote
152      */
153     public static StrMatcher quoteMatcher() {
154         return QUOTE_MATCHER;
155     }
156 
157     /**
158      * Matches no characters.
159      *
160      * @return a matcher that matches nothing
161      */
162     public static StrMatcher noneMatcher() {
163         return NONE_MATCHER;
164     }
165 
166     /**
167      * Creates a matcher from a character.
168      *
169      * @param ch  the character to match, must not be null
170      * @return a new Matcher for the given char
171      */
172     public static StrMatcher charMatcher(final char ch) {
173         return new CharMatcher(ch);
174     }
175 
176     /**
177      * Creates a matcher from a set of characters.
178      *
179      * @param chars  the characters to match, null or empty matches nothing
180      * @return a new matcher for the given char[]
181      */
182     public static StrMatcher charSetMatcher(final char... chars) {
183         if (chars == null || chars.length == 0) {
184             return NONE_MATCHER;
185         }
186         if (chars.length == 1) {
187             return new CharMatcher(chars[0]);
188         }
189         return new CharSetMatcher(chars);
190     }
191 
192     /**
193      * Creates a matcher from a string representing a set of characters.
194      *
195      * @param chars  the characters to match, null or empty matches nothing
196      * @return a new Matcher for the given characters
197      */
198     public static StrMatcher charSetMatcher(final String chars) {
199         if (chars == null || chars.length() == 0) {
200             return NONE_MATCHER;
201         }
202         if (chars.length() == 1) {
203             return new CharMatcher(chars.charAt(0));
204         }
205         return new CharSetMatcher(chars.toCharArray());
206     }
207 
208     /**
209      * Creates a matcher from a string.
210      *
211      * @param str  the string to match, null or empty matches nothing
212      * @return a new Matcher for the given String
213      */
214     public static StrMatcher stringMatcher(final String str) {
215         if (str == null || str.length() == 0) {
216             return NONE_MATCHER;
217         }
218         return new StringMatcher(str);
219     }
220 
221     //-----------------------------------------------------------------------
222     /**
223      * Constructor.
224      */
225     protected StrMatcher() {
226         super();
227     }
228 
229     /**
230      * Returns the number of matching characters, or zero if there is no match.
231      * <p>
232      * This method is called to check for a match.
233      * The parameter <code>pos</code> represents the current position to be
234      * checked in the string <code>buffer</code> (a character array which must
235      * not be changed).
236      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
237      * <p>
238      * The character array may be larger than the active area to be matched.
239      * Only values in the buffer between the specified indices may be accessed.
240      * <p>
241      * The matching code may check one character or many.
242      * It may check characters preceding <code>pos</code> as well as those
243      * after, so long as no checks exceed the bounds specified.
244      * <p>
245      * It must return zero for no match, or a positive number if a match was found.
246      * The number indicates the number of characters that matched.
247      *
248      * @param buffer  the text content to match against, do not change
249      * @param pos  the starting position for the match, valid for buffer
250      * @param bufferStart  the first active index in the buffer, valid for buffer
251      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
252      * @return the number of matching characters, or zero if there is no match
253      */
254     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
255 
256     /**
257      * Returns the number of matching characters, or zero if there is no match.
258      * <p>
259      * This method is called to check for a match.
260      * The parameter <code>pos</code> represents the current position to be
261      * checked in the string <code>buffer</code> (a character array which must
262      * not be changed).
263      * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
264      * <p>
265      * The matching code may check one character or many.
266      * It may check characters preceding <code>pos</code> as well as those after.
267      * <p>
268      * It must return zero for no match, or a positive number if a match was found.
269      * The number indicates the number of characters that matched.
270      *
271      * @param buffer  the text content to match against, do not change
272      * @param pos  the starting position for the match, valid for buffer
273      * @return the number of matching characters, or zero if there is no match
274      */
275     public int isMatch(final char[] buffer, final int pos) {
276         return isMatch(buffer, pos, 0, buffer.length);
277     }
278 
279     //-----------------------------------------------------------------------
280     /**
281      * Class used to define a set of characters for matching purposes.
282      */
283     static final class CharSetMatcher extends StrMatcher {
284         /** The set of characters to match. */
285         private final char[] chars;
286 
287         /**
288          * Constructor that creates a matcher from a character array.
289          *
290          * @param chars  the characters to match, must not be null
291          */
292         CharSetMatcher(final char[] chars) {
293             super();
294             this.chars = chars.clone();
295             Arrays.sort(this.chars);
296         }
297 
298         /**
299          * Returns whether or not the given character matches.
300          *
301          * @param buffer  the text content to match against, do not change
302          * @param pos  the starting position for the match, valid for buffer
303          * @param bufferStart  the first active index in the buffer, valid for buffer
304          * @param bufferEnd  the end index of the active buffer, valid for buffer
305          * @return the number of matching characters, or zero if there is no match
306          */
307         @Override
308         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
309             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
310         }
311     }
312 
313     //-----------------------------------------------------------------------
314     /**
315      * Class used to define a character for matching purposes.
316      */
317     static final class CharMatcher extends StrMatcher {
318         /** The character to match. */
319         private final char ch;
320 
321         /**
322          * Constructor that creates a matcher that matches a single character.
323          *
324          * @param ch  the character to match
325          */
326         CharMatcher(final char ch) {
327             super();
328             this.ch = ch;
329         }
330 
331         /**
332          * Returns whether or not the given character matches.
333          *
334          * @param buffer  the text content to match against, do not change
335          * @param pos  the starting position for the match, valid for buffer
336          * @param bufferStart  the first active index in the buffer, valid for buffer
337          * @param bufferEnd  the end index of the active buffer, valid for buffer
338          * @return the number of matching characters, or zero if there is no match
339          */
340         @Override
341         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
342             return ch == buffer[pos] ? 1 : 0;
343         }
344     }
345 
346     //-----------------------------------------------------------------------
347     /**
348      * Class used to define a set of characters for matching purposes.
349      */
350     static final class StringMatcher extends StrMatcher {
351         /** The string to match, as a character array. */
352         private final char[] chars;
353 
354         /**
355          * Constructor that creates a matcher from a String.
356          *
357          * @param str  the string to match, must not be null
358          */
359         StringMatcher(final String str) {
360             super();
361             chars = str.toCharArray();
362         }
363 
364         /**
365          * Returns whether or not the given text matches the stored string.
366          *
367          * @param buffer  the text content to match against, do not change
368          * @param pos  the starting position for the match, valid for buffer
369          * @param bufferStart  the first active index in the buffer, valid for buffer
370          * @param bufferEnd  the end index of the active buffer, valid for buffer
371          * @return the number of matching characters, or zero if there is no match
372          */
373         @Override
374         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
375             final int len = chars.length;
376             if (pos + len > bufferEnd) {
377                 return 0;
378             }
379             for (int i = 0; i < chars.length; i++, pos++) {
380                 if (chars[i] != buffer[pos]) {
381                     return 0;
382                 }
383             }
384             return len;
385         }
386 
387         @Override
388         public String toString() {
389             return super.toString() + ' ' + Arrays.toString(chars);
390         }
391 
392     }
393 
394     //-----------------------------------------------------------------------
395     /**
396      * Class used to match no characters.
397      */
398     static final class NoMatcher extends StrMatcher {
399 
400         /**
401          * Constructs a new instance of <code>NoMatcher</code>.
402          */
403         NoMatcher() {
404             super();
405         }
406 
407         /**
408          * Always returns <code>false</code>.
409          *
410          * @param buffer  the text content to match against, do not change
411          * @param pos  the starting position for the match, valid for buffer
412          * @param bufferStart  the first active index in the buffer, valid for buffer
413          * @param bufferEnd  the end index of the active buffer, valid for buffer
414          * @return the number of matching characters, or zero if there is no match
415          */
416         @Override
417         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
418             return 0;
419         }
420     }
421 
422     //-----------------------------------------------------------------------
423     /**
424      * Class used to match whitespace as per trim().
425      */
426     static final class TrimMatcher extends StrMatcher {
427 
428         /**
429          * Constructs a new instance of <code>TrimMatcher</code>.
430          */
431         TrimMatcher() {
432             super();
433         }
434 
435         /**
436          * Returns whether or not the given character matches.
437          *
438          * @param buffer  the text content to match against, do not change
439          * @param pos  the starting position for the match, valid for buffer
440          * @param bufferStart  the first active index in the buffer, valid for buffer
441          * @param bufferEnd  the end index of the active buffer, valid for buffer
442          * @return the number of matching characters, or zero if there is no match
443          */
444         @Override
445         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
446             return buffer[pos] <= 32 ? 1 : 0;
447         }
448     }
449 
450 }