1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.lang3.ArrayUtils;
22 import org.apache.commons.text.matcher.StringMatcherFactory;
23
24 /**
25 * A matcher class that can be queried to determine if a character array
26 * portion matches.
27 * <p>
28 * This class comes complete with various factory methods.
29 * If these do not suffice, you can subclass and implement your own matcher.
30 * </p>
31 *
32 * @since 1.0
33 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
34 */
35 @Deprecated
36 public abstract class StrMatcher {
37
38 /**
39 * Class used to define a character for matching purposes.
40 */
41 private static final class CharMatcher extends StrMatcher {
42
43 /** The character to match. */
44 private final char ch;
45
46 /**
47 * Constructor that creates a matcher that matches a single character.
48 *
49 * @param ch the character to match
50 */
51 private CharMatcher(final char ch) {
52 this.ch = ch;
53 }
54
55 /**
56 * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
57 *
58 * @param buffer the text content to match against, do not change
59 * @param pos the starting position for the match, valid for buffer
60 * @param bufferStart the first active index in the buffer, valid for buffer
61 * @param bufferEnd the end index of the active buffer, valid for buffer
62 * @return The number of matching characters, or zero if there is no match
63 */
64 @Override
65 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
66 return ch == buffer[pos] ? 1 : 0;
67 }
68 }
69
70 /**
71 * Class used to define a set of characters for matching purposes.
72 */
73 private static final class CharSetMatcher extends StrMatcher {
74
75 /** The set of characters to match. */
76 private final char[] chars;
77
78 /**
79 * Constructor that creates a matcher from a character array.
80 *
81 * @param chars the characters to match, must not be null
82 */
83 private CharSetMatcher(final char[] chars) {
84 this.chars = chars.clone();
85 Arrays.sort(this.chars);
86 }
87
88 /**
89 * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
90 *
91 * @param buffer the text content to match against, do not change
92 * @param pos the starting position for the match, valid for buffer
93 * @param bufferStart the first active index in the buffer, valid for buffer
94 * @param bufferEnd the end index of the active buffer, valid for buffer
95 * @return The number of matching characters, or zero if there is no match
96 */
97 @Override
98 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
99 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
100 }
101 }
102
103 /**
104 * Class used to match no characters.
105 */
106 private static final class NoMatcher extends StrMatcher {
107
108 /**
109 * Constructs a new instance of {@code NoMatcher}.
110 */
111 private NoMatcher() {
112 }
113
114 /**
115 * Always returns {@code 0}.
116 *
117 * @param buffer the text content to match against, do not change
118 * @param pos the starting position for the match, valid for buffer
119 * @param bufferStart the first active index in the buffer, valid for buffer
120 * @param bufferEnd the end index of the active buffer, valid for buffer
121 * @return The number of matching characters, or zero if there is no match
122 */
123 @Override
124 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
125 return 0;
126 }
127 }
128
129 /**
130 * Class used to define a set of characters for matching purposes.
131 */
132 private static final class StringMatcher extends StrMatcher {
133
134 /** The string to match, as a character array. */
135 private final char[] chars;
136
137 /**
138 * Constructor that creates a matcher from a String.
139 *
140 * @param str the string to match, must not be null
141 */
142 private StringMatcher(final String str) {
143 chars = str.toCharArray();
144 }
145
146 /**
147 * Returns the number of matching characters, or zero if there is no match.
148 *
149 * @param buffer the text content to match against, do not change
150 * @param pos the starting position for the match, valid for buffer
151 * @param bufferStart the first active index in the buffer, valid for buffer
152 * @param bufferEnd the end index of the active buffer, valid for buffer
153 * @return The number of matching characters, or zero if there is no match
154 */
155 @Override
156 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
157 final int len = chars.length;
158 if (pos + len > bufferEnd) {
159 return 0;
160 }
161 for (int i = 0; i < chars.length; i++, pos++) {
162 if (chars[i] != buffer[pos]) {
163 return 0;
164 }
165 }
166 return len;
167 }
168
169 @Override
170 public String toString() {
171 return super.toString() + ' ' + Arrays.toString(chars);
172 }
173
174 }
175
176 /**
177 * Class used to match whitespace as per trim().
178 */
179 private static final class TrimMatcher extends StrMatcher {
180
181 /**
182 * Constructs a new instance of {@code TrimMatcher}.
183 */
184 private TrimMatcher() {
185 }
186
187 /**
188 * Returns whether or not the given character matches.
189 *
190 * @param buffer the text content to match against, do not change
191 * @param pos the starting position for the match, valid for buffer
192 * @param bufferStart the first active index in the buffer, valid for buffer
193 * @param bufferEnd the end index of the active buffer, valid for buffer
194 * @return The number of matching characters, or zero if there is no match
195 */
196 @Override
197 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
198 return buffer[pos] <= 32 ? 1 : 0;
199 }
200 }
201
202 /**
203 * Matches the comma character.
204 */
205 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
206
207 /**
208 * Matches the tab character.
209 */
210 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
211
212 /**
213 * Matches the space character.
214 */
215 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
216
217 /**
218 * Matches the same characters as StringTokenizer,
219 * namely space, tab, newline, form feed.
220 */
221 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
222
223 /**
224 * Matches the String trim() whitespace characters.
225 */
226 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
227
228 /**
229 * Matches the double quote character.
230 */
231 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
232
233 /**
234 * Matches the double quote character.
235 */
236 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
237
238 /**
239 * Matches the single or double quote character.
240 */
241 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
242
243 /**
244 * Matches no characters.
245 */
246 private static final StrMatcher NONE_MATCHER = new NoMatcher();
247
248 /**
249 * Creates a matcher from a character.
250 *
251 * @param ch the character to match, must not be null
252 * @return a new Matcher for the given char
253 */
254 public static StrMatcher charMatcher(final char ch) {
255 return new CharMatcher(ch);
256 }
257
258 /**
259 * Creates a matcher from a set of characters.
260 *
261 * @param chars the characters to match, null or empty matches nothing
262 * @return a new matcher for the given char[]
263 */
264 public static StrMatcher charSetMatcher(final char... chars) {
265 if (ArrayUtils.isEmpty(chars)) {
266 return NONE_MATCHER;
267 }
268 if (chars.length == 1) {
269 return new CharMatcher(chars[0]);
270 }
271 return new CharSetMatcher(chars);
272 }
273
274 /**
275 * Creates a matcher from a string representing a set of characters.
276 *
277 * @param chars the characters to match, null or empty matches nothing
278 * @return a new Matcher for the given characters
279 */
280 public static StrMatcher charSetMatcher(final String chars) {
281 if (chars == null || chars.isEmpty()) {
282 return NONE_MATCHER;
283 }
284 if (chars.length() == 1) {
285 return new CharMatcher(chars.charAt(0));
286 }
287 return new CharSetMatcher(chars.toCharArray());
288 }
289
290 /**
291 * Returns a matcher which matches the comma character.
292 *
293 * @return a matcher for a comma
294 */
295 public static StrMatcher commaMatcher() {
296 return COMMA_MATCHER;
297 }
298
299 /**
300 * Returns a matcher which matches the double quote character.
301 *
302 * @return a matcher for a double quote
303 */
304 public static StrMatcher doubleQuoteMatcher() {
305 return DOUBLE_QUOTE_MATCHER;
306 }
307
308 /**
309 * Matches no characters.
310 *
311 * @return a matcher that matches nothing
312 */
313 public static StrMatcher noneMatcher() {
314 return NONE_MATCHER;
315 }
316
317 /**
318 * Returns a matcher which matches the single or double quote character.
319 *
320 * @return a matcher for a single or double quote
321 */
322 public static StrMatcher quoteMatcher() {
323 return QUOTE_MATCHER;
324 }
325
326 /**
327 * Returns a matcher which matches the single quote character.
328 *
329 * @return a matcher for a single quote
330 */
331 public static StrMatcher singleQuoteMatcher() {
332 return SINGLE_QUOTE_MATCHER;
333 }
334
335 /**
336 * Returns a matcher which matches the space character.
337 *
338 * @return a matcher for a space
339 */
340 public static StrMatcher spaceMatcher() {
341 return SPACE_MATCHER;
342 }
343
344 /**
345 * Matches the same characters as StringTokenizer,
346 * namely space, tab, newline and form feed.
347 *
348 * @return The split matcher
349 */
350 public static StrMatcher splitMatcher() {
351 return SPLIT_MATCHER;
352 }
353
354 /**
355 * Creates a matcher from a string.
356 *
357 * @param str the string to match, null or empty matches nothing
358 * @return a new Matcher for the given String
359 */
360 public static StrMatcher stringMatcher(final String str) {
361 if (str == null || str.isEmpty()) {
362 return NONE_MATCHER;
363 }
364 return new StringMatcher(str);
365 }
366
367 /**
368 * Returns a matcher which matches the tab character.
369 *
370 * @return a matcher for a tab
371 */
372 public static StrMatcher tabMatcher() {
373 return TAB_MATCHER;
374 }
375
376 /**
377 * Matches the String trim() whitespace characters.
378 *
379 * @return The trim matcher
380 */
381 public static StrMatcher trimMatcher() {
382 return TRIM_MATCHER;
383 }
384
385 /**
386 * Constructs a new instance.
387 */
388 protected StrMatcher() {
389 }
390
391 /**
392 * Returns the number of matching characters, or zero if there is no match.
393 * <p>
394 * This method is called to check for a match.
395 * The parameter {@code pos} represents the current position to be
396 * checked in the string {@code buffer} (a character array which must
397 * not be changed).
398 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
399 * </p>
400 * <p>
401 * The matching code may check one character or many.
402 * It may check characters preceding {@code pos} as well as those after.
403 * </p>
404 * <p>
405 * It must return zero for no match, or a positive number if a match was found.
406 * The number indicates the number of characters that matched.
407 * </p>
408 *
409 * @param buffer the text content to match against, do not change
410 * @param pos the starting position for the match, valid for buffer
411 * @return The number of matching characters, or zero if there is no match
412 */
413 public int isMatch(final char[] buffer, final int pos) {
414 return isMatch(buffer, pos, 0, buffer.length);
415 }
416
417 /**
418 * Returns the number of matching characters, or zero if there is no match.
419 * <p>
420 * This method is called to check for a match.
421 * The parameter {@code pos} represents the current position to be
422 * checked in the string {@code buffer} (a character array which must
423 * not be changed).
424 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
425 * </p>
426 * <p>
427 * The character array may be larger than the active area to be matched.
428 * Only values in the buffer between the specified indices may be accessed.
429 * </p>
430 * <p>
431 * The matching code may check one character or many.
432 * It may check characters preceding {@code pos} as well as those
433 * after, so long as no checks exceed the bounds specified.
434 * </p>
435 * <p>
436 * It must return zero for no match, or a positive number if a match was found.
437 * The number indicates the number of characters that matched.
438 * </p>
439 *
440 * @param buffer the text content to match against, do not change
441 * @param pos the starting position for the match, valid for buffer
442 * @param bufferStart the first active index in the buffer, valid for buffer
443 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
444 * @return The number of matching characters, or zero if there is no match
445 */
446 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
447
448 }