1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import java.util.Arrays;
20
21 /**
22 * A matcher class that can be queried to determine if a character array
23 * portion matches.
24 * <p>
25 * This class comes complete with various factory methods.
26 * If these do not suffice, you can subclass and implement your own matcher.
27 *
28 * @since 1.0
29 */
30 public abstract class StrMatcher {
31
32 /**
33 * Matches the comma character.
34 */
35 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
36 /**
37 * Matches the tab character.
38 */
39 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
40 /**
41 * Matches the space character.
42 */
43 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
44 /**
45 * Matches the same characters as StringTokenizer,
46 * namely space, tab, newline, formfeed.
47 */
48 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
49 /**
50 * Matches the String trim() whitespace characters.
51 */
52 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
53 /**
54 * Matches the double quote character.
55 */
56 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
57 /**
58 * Matches the double quote character.
59 */
60 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
61 /**
62 * Matches the single or double quote character.
63 */
64 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
65 /**
66 * Matches no characters.
67 */
68 private static final StrMatcher NONE_MATCHER = new NoMatcher();
69
70 // -----------------------------------------------------------------------
71
72 /**
73 * Returns a matcher which matches the comma character.
74 *
75 * @return a matcher for a comma
76 */
77 public static StrMatcher commaMatcher() {
78 return COMMA_MATCHER;
79 }
80
81 /**
82 * Returns a matcher which matches the tab character.
83 *
84 * @return a matcher for a tab
85 */
86 public static StrMatcher tabMatcher() {
87 return TAB_MATCHER;
88 }
89
90 /**
91 * Returns a matcher which matches the space character.
92 *
93 * @return a matcher for a space
94 */
95 public static StrMatcher spaceMatcher() {
96 return SPACE_MATCHER;
97 }
98
99 /**
100 * Matches the same characters as StringTokenizer,
101 * namely space, tab, newline and formfeed.
102 *
103 * @return the split matcher
104 */
105 public static StrMatcher splitMatcher() {
106 return SPLIT_MATCHER;
107 }
108
109 /**
110 * Matches the String trim() whitespace characters.
111 *
112 * @return the trim matcher
113 */
114 public static StrMatcher trimMatcher() {
115 return TRIM_MATCHER;
116 }
117
118 /**
119 * Returns a matcher which matches the single quote character.
120 *
121 * @return a matcher for a single quote
122 */
123 public static StrMatcher singleQuoteMatcher() {
124 return SINGLE_QUOTE_MATCHER;
125 }
126
127 /**
128 * Returns a matcher which matches the double quote character.
129 *
130 * @return a matcher for a double quote
131 */
132 public static StrMatcher doubleQuoteMatcher() {
133 return DOUBLE_QUOTE_MATCHER;
134 }
135
136 /**
137 * Returns a matcher which matches the single or double quote character.
138 *
139 * @return a matcher for a single or double quote
140 */
141 public static StrMatcher quoteMatcher() {
142 return QUOTE_MATCHER;
143 }
144
145 /**
146 * Matches no characters.
147 *
148 * @return a matcher that matches nothing
149 */
150 public static StrMatcher noneMatcher() {
151 return NONE_MATCHER;
152 }
153
154 /**
155 * Constructor that creates a matcher from a character.
156 *
157 * @param ch the character to match, must not be null
158 * @return a new Matcher for the given char
159 */
160 public static StrMatcher charMatcher(final char ch) {
161 return new CharMatcher(ch);
162 }
163
164 /**
165 * Constructor that creates a matcher from a set of characters.
166 *
167 * @param chars the characters to match, null or empty matches nothing
168 * @return a new matcher for the given char[]
169 */
170 public static StrMatcher charSetMatcher(final char... chars) {
171 if (chars == null || chars.length == 0) {
172 return NONE_MATCHER;
173 }
174 if (chars.length == 1) {
175 return new CharMatcher(chars[0]);
176 }
177 return new CharSetMatcher(chars);
178 }
179
180 /**
181 * Constructor that creates a matcher from a string representing a set of characters.
182 *
183 * @param chars the characters to match, null or empty matches nothing
184 * @return a new Matcher for the given characters
185 */
186 public static StrMatcher charSetMatcher(final String chars) {
187 if (chars == null || chars.length() == 0) {
188 return NONE_MATCHER;
189 }
190 if (chars.length() == 1) {
191 return new CharMatcher(chars.charAt(0));
192 }
193 return new CharSetMatcher(chars.toCharArray());
194 }
195
196 /**
197 * Constructor that creates a matcher from a string.
198 *
199 * @param str the string to match, null or empty matches nothing
200 * @return a new Matcher for the given String
201 */
202 public static StrMatcher stringMatcher(final String str) {
203 if (str == null || str.length() == 0) {
204 return NONE_MATCHER;
205 }
206 return new StringMatcher(str);
207 }
208
209 //-----------------------------------------------------------------------
210 /**
211 * Constructor.
212 */
213 protected StrMatcher() {
214 super();
215 }
216
217 /**
218 * Returns the number of matching characters, zero for no match.
219 * <p>
220 * This method is called to check for a match.
221 * The parameter <code>pos</code> represents the current position to be
222 * checked in the string <code>buffer</code> (a character array which must
223 * not be changed).
224 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
225 * <p>
226 * The character array may be larger than the active area to be matched.
227 * Only values in the buffer between the specified indices may be accessed.
228 * <p>
229 * The matching code may check one character or many.
230 * It may check characters preceding <code>pos</code> as well as those
231 * after, so long as no checks exceed the bounds specified.
232 * <p>
233 * It must return zero for no match, or a positive number if a match was found.
234 * The number indicates the number of characters that matched.
235 *
236 * @param buffer the text content to match against, do not change
237 * @param pos the starting position for the match, valid for buffer
238 * @param bufferStart the first active index in the buffer, valid for buffer
239 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
240 * @return the number of matching characters, zero for no match
241 */
242 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
243
244 /**
245 * Returns the number of matching characters, zero for no match.
246 * <p>
247 * This method is called to check for a match.
248 * The parameter <code>pos</code> represents the current position to be
249 * checked in the string <code>buffer</code> (a character array which must
250 * not be changed).
251 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
252 * <p>
253 * The matching code may check one character or many.
254 * It may check characters preceding <code>pos</code> as well as those after.
255 * <p>
256 * It must return zero for no match, or a positive number if a match was found.
257 * The number indicates the number of characters that matched.
258 *
259 * @param buffer the text content to match against, do not change
260 * @param pos the starting position for the match, valid for buffer
261 * @return the number of matching characters, zero for no match
262 */
263 public int isMatch(final char[] buffer, final int pos) {
264 return isMatch(buffer, pos, 0, buffer.length);
265 }
266
267 //-----------------------------------------------------------------------
268 /**
269 * Class used to define a set of characters for matching purposes.
270 */
271 static final class CharSetMatcher extends StrMatcher {
272 /** The set of characters to match. */
273 private final char[] chars;
274
275 /**
276 * Constructor that creates a matcher from a character array.
277 *
278 * @param chars the characters to match, must not be null
279 */
280 CharSetMatcher(final char chars[]) {
281 super();
282 this.chars = chars.clone();
283 Arrays.sort(this.chars);
284 }
285
286 /**
287 * Returns whether or not the given character matches.
288 *
289 * @param buffer the text content to match against, do not change
290 * @param pos the starting position for the match, valid for buffer
291 * @param bufferStart the first active index in the buffer, valid for buffer
292 * @param bufferEnd the end index of the active buffer, valid for buffer
293 * @return the number of matching characters, zero for no match
294 */
295 @Override
296 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
297 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
298 }
299 }
300
301 //-----------------------------------------------------------------------
302 /**
303 * Class used to define a character for matching purposes.
304 */
305 static final class CharMatcher extends StrMatcher {
306 /** The character to match. */
307 private final char ch;
308
309 /**
310 * Constructor that creates a matcher that matches a single character.
311 *
312 * @param ch the character to match
313 */
314 CharMatcher(final char ch) {
315 super();
316 this.ch = ch;
317 }
318
319 /**
320 * Returns whether or not the given character matches.
321 *
322 * @param buffer the text content to match against, do not change
323 * @param pos the starting position for the match, valid for buffer
324 * @param bufferStart the first active index in the buffer, valid for buffer
325 * @param bufferEnd the end index of the active buffer, valid for buffer
326 * @return the number of matching characters, zero for no match
327 */
328 @Override
329 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
330 return ch == buffer[pos] ? 1 : 0;
331 }
332 }
333
334 //-----------------------------------------------------------------------
335 /**
336 * Class used to define a set of characters for matching purposes.
337 */
338 static final class StringMatcher extends StrMatcher {
339 /** The string to match, as a character array. */
340 private final char[] chars;
341
342 /**
343 * Constructor that creates a matcher from a String.
344 *
345 * @param str the string to match, must not be null
346 */
347 StringMatcher(final String str) {
348 super();
349 chars = str.toCharArray();
350 }
351
352 /**
353 * Returns whether or not the given text matches the stored string.
354 *
355 * @param buffer the text content to match against, do not change
356 * @param pos the starting position for the match, valid for buffer
357 * @param bufferStart the first active index in the buffer, valid for buffer
358 * @param bufferEnd the end index of the active buffer, valid for buffer
359 * @return the number of matching characters, zero for no match
360 */
361 @Override
362 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
363 final int len = chars.length;
364 if (pos + len > bufferEnd) {
365 return 0;
366 }
367 for (int i = 0; i < chars.length; i++, pos++) {
368 if (chars[i] != buffer[pos]) {
369 return 0;
370 }
371 }
372 return len;
373 }
374
375 @Override
376 public String toString() {
377 return super.toString() + ' ' + Arrays.toString(chars);
378 }
379
380 }
381
382 //-----------------------------------------------------------------------
383 /**
384 * Class used to match no characters.
385 */
386 static final class NoMatcher extends StrMatcher {
387
388 /**
389 * Constructs a new instance of <code>NoMatcher</code>.
390 */
391 NoMatcher() {
392 super();
393 }
394
395 /**
396 * Always returns <code>false</code>.
397 *
398 * @param buffer the text content to match against, do not change
399 * @param pos the starting position for the match, valid for buffer
400 * @param bufferStart the first active index in the buffer, valid for buffer
401 * @param bufferEnd the end index of the active buffer, valid for buffer
402 * @return the number of matching characters, zero for no match
403 */
404 @Override
405 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
406 return 0;
407 }
408 }
409
410 //-----------------------------------------------------------------------
411 /**
412 * Class used to match whitespace as per trim().
413 */
414 static final class TrimMatcher extends StrMatcher {
415
416 /**
417 * Constructs a new instance of <code>TrimMatcher</code>.
418 */
419 TrimMatcher() {
420 super();
421 }
422
423 /**
424 * Returns whether or not the given character matches.
425 *
426 * @param buffer the text content to match against, do not change
427 * @param pos the starting position for the match, valid for buffer
428 * @param bufferStart the first active index in the buffer, valid for buffer
429 * @param bufferEnd the end index of the active buffer, valid for buffer
430 * @return the number of matching characters, zero for no match
431 */
432 @Override
433 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
434 return buffer[pos] <= 32 ? 1 : 0;
435 }
436 }
437
438 }