1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3.text;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.lang3.ArraySorter;
22 import org.apache.commons.lang3.ArrayUtils;
23 import org.apache.commons.lang3.StringUtils;
24
25 /**
26 * A matcher class that can be queried to determine if a character array
27 * portion matches.
28 * <p>
29 * This class comes complete with various factory methods.
30 * If these do not suffice, you can subclass and implement your own matcher.
31 * </p>
32 *
33 * @since 2.2
34 * @deprecated As of <a href="https://commons.apache.org/proper/commons-lang/changes-report.html#a3.6">3.6</a>, use Apache Commons Text
35 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
36 * StringMatcherFactory</a>.
37 */
38 @Deprecated
39 public abstract class StrMatcher {
40
41 /**
42 * Class used to define a character for matching purposes.
43 */
44 static final class CharMatcher extends StrMatcher {
45 /** The character to match. */
46 private final char ch;
47
48 /**
49 * Constructor that creates a matcher that matches a single character.
50 *
51 * @param ch the character to match
52 */
53 CharMatcher(final char ch) {
54 this.ch = ch;
55 }
56
57 /**
58 * Tests whether or not the given character matches.
59 *
60 * @param buffer the text content to match against, do not change
61 * @param pos the starting position for the match, valid for buffer
62 * @param bufferStart the first active index in the buffer, valid for buffer
63 * @param bufferEnd the end index of the active buffer, valid for buffer
64 * @return the number of matching characters, zero for no match
65 */
66 @Override
67 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
68 return ch == buffer[pos] ? 1 : 0;
69 }
70 }
71 /**
72 * Class used to define a set of characters for matching purposes.
73 */
74 static final class CharSetMatcher extends StrMatcher {
75 /** The set of characters to match. */
76 private final char[] chars;
77
78 /**
79 * Constructor that creates a matcher from a character array.
80 *
81 * @param chars the characters to match, must not be null
82 */
83 CharSetMatcher(final char[] chars) {
84 this.chars = ArraySorter.sort(chars.clone());
85 }
86
87 /**
88 * Returns whether or not the given character matches.
89 *
90 * @param buffer the text content to match against, do not change
91 * @param pos the starting position for the match, valid for buffer
92 * @param bufferStart the first active index in the buffer, valid for buffer
93 * @param bufferEnd the end index of the active buffer, valid for buffer
94 * @return the number of matching characters, zero for no match
95 */
96 @Override
97 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
98 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
99 }
100 }
101 /**
102 * Class used to match no characters.
103 */
104 static final class NoMatcher extends StrMatcher {
105
106 /**
107 * Constructs a new instance of {@link NoMatcher}.
108 */
109 NoMatcher() {
110 }
111
112 /**
113 * Always returns {@code false}.
114 *
115 * @param buffer the text content to match against, do not change
116 * @param pos the starting position for the match, valid for buffer
117 * @param bufferStart the first active index in the buffer, valid for buffer
118 * @param bufferEnd the end index of the active buffer, valid for buffer
119 * @return the number of matching characters, zero for no match
120 */
121 @Override
122 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
123 return 0;
124 }
125 }
126 /**
127 * Class used to define a set of characters for matching purposes.
128 */
129 static final class StringMatcher extends StrMatcher {
130 /** The string to match, as a character array. */
131 private final char[] chars;
132
133 /**
134 * Constructor that creates a matcher from a String.
135 *
136 * @param str the string to match, must not be null
137 */
138 StringMatcher(final String str) {
139 chars = str.toCharArray();
140 }
141
142 /**
143 * Tests whether or not the given text matches the stored string.
144 *
145 * @param buffer the text content to match against, do not change
146 * @param pos the starting position for the match, valid for buffer
147 * @param bufferStart the first active index in the buffer, valid for buffer
148 * @param bufferEnd the end index of the active buffer, valid for buffer
149 * @return the number of matching characters, zero for no match
150 */
151 @Override
152 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
153 final int len = chars.length;
154 if (pos + len > bufferEnd) {
155 return 0;
156 }
157 for (int i = 0; i < chars.length; i++, pos++) {
158 if (chars[i] != buffer[pos]) {
159 return 0;
160 }
161 }
162 return len;
163 }
164
165 @Override
166 public String toString() {
167 return super.toString() + ' ' + Arrays.toString(chars);
168 }
169
170 }
171 /**
172 * Class used to match whitespace as per trim().
173 */
174 static final class TrimMatcher extends StrMatcher {
175
176 /**
177 * Constructs a new instance of {@link TrimMatcher}.
178 */
179 TrimMatcher() {
180 }
181
182 /**
183 * Tests whether or not the given character matches.
184 *
185 * @param buffer the text content to match against, do not change
186 * @param pos the starting position for the match, valid for buffer
187 * @param bufferStart the first active index in the buffer, valid for buffer
188 * @param bufferEnd the end index of the active buffer, valid for buffer
189 * @return the number of matching characters, zero for no match
190 */
191 @Override
192 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
193 return buffer[pos] <= 32 ? 1 : 0;
194 }
195 }
196 /**
197 * Matches the comma character.
198 */
199 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
200 /**
201 * Matches the tab character.
202 */
203 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
204 /**
205 * Matches the space character.
206 */
207 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
208 /**
209 * Matches the same characters as StringTokenizer,
210 * namely space, tab, newline, formfeed.
211 */
212 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
213
214 /**
215 * Matches the String trim() whitespace characters.
216 */
217 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
218
219 /**
220 * Matches the double quote character.
221 */
222 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
223
224 /**
225 * Matches the double quote character.
226 */
227 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
228
229 /**
230 * Matches the single or double quote character.
231 */
232 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
233
234 /**
235 * Matches no characters.
236 */
237 private static final StrMatcher NONE_MATCHER = new NoMatcher();
238
239 /**
240 * Creates a matcher from a character.
241 *
242 * @param ch the character to match, must not be null
243 * @return a new Matcher for the given char
244 */
245 public static StrMatcher charMatcher(final char ch) {
246 return new CharMatcher(ch);
247 }
248
249 /**
250 * Creates a matcher from a set of characters.
251 *
252 * @param chars the characters to match, null or empty matches nothing
253 * @return a new matcher for the given char[]
254 */
255 public static StrMatcher charSetMatcher(final char... chars) {
256 if (ArrayUtils.isEmpty(chars)) {
257 return NONE_MATCHER;
258 }
259 if (chars.length == 1) {
260 return new CharMatcher(chars[0]);
261 }
262 return new CharSetMatcher(chars);
263 }
264
265 /**
266 * Creates a matcher from a string representing a set of characters.
267 *
268 * @param chars the characters to match, null or empty matches nothing
269 * @return a new Matcher for the given characters
270 */
271 public static StrMatcher charSetMatcher(final String chars) {
272 if (StringUtils.isEmpty(chars)) {
273 return NONE_MATCHER;
274 }
275 if (chars.length() == 1) {
276 return new CharMatcher(chars.charAt(0));
277 }
278 return new CharSetMatcher(chars.toCharArray());
279 }
280
281 /**
282 * Gets the matcher for the comma character.
283 *
284 * @return the matcher for a comma
285 */
286 public static StrMatcher commaMatcher() {
287 return COMMA_MATCHER;
288 }
289
290 /**
291 * Gets the matcher for the double quote character.
292 *
293 * @return the matcher for a double quote
294 */
295 public static StrMatcher doubleQuoteMatcher() {
296 return DOUBLE_QUOTE_MATCHER;
297 }
298
299 /**
300 * Gets the matcher for no characters.
301 *
302 * @return the matcher that matches nothing
303 */
304 public static StrMatcher noneMatcher() {
305 return NONE_MATCHER;
306 }
307
308 /**
309 * Gets the matcher for the single or double quote character.
310 *
311 * @return the matcher for a single or double quote
312 */
313 public static StrMatcher quoteMatcher() {
314 return QUOTE_MATCHER;
315 }
316
317 /**
318 * Gets the matcher for the single quote character.
319 *
320 * @return the matcher for a single quote
321 */
322 public static StrMatcher singleQuoteMatcher() {
323 return SINGLE_QUOTE_MATCHER;
324 }
325
326 /**
327 * Gets the matcher for the space character.
328 *
329 * @return the matcher for a space
330 */
331 public static StrMatcher spaceMatcher() {
332 return SPACE_MATCHER;
333 }
334
335 /**
336 * Gets the matcher for the same characters as StringTokenizer,
337 * namely space, tab, newline and form-feed.
338 *
339 * @return the split matcher
340 */
341 public static StrMatcher splitMatcher() {
342 return SPLIT_MATCHER;
343 }
344
345 /**
346 * Creates a matcher for a string.
347 *
348 * @param str the string to match, null or empty matches nothing
349 * @return a new Matcher for the given String
350 */
351 public static StrMatcher stringMatcher(final String str) {
352 if (StringUtils.isEmpty(str)) {
353 return NONE_MATCHER;
354 }
355 return new StringMatcher(str);
356 }
357
358 /**
359 * Gets the matcher for the tab character.
360 *
361 * @return the matcher for a tab
362 */
363 public static StrMatcher tabMatcher() {
364 return TAB_MATCHER;
365 }
366
367 /**
368 * Gets the matcher to String trim() whitespace characters.
369 *
370 * @return the trim matcher
371 */
372 public static StrMatcher trimMatcher() {
373 return TRIM_MATCHER;
374 }
375
376 /**
377 * Constructs a new instance.
378 */
379 protected StrMatcher() {
380 }
381
382 /**
383 * Tests whether the number of matching characters, zero for no match.
384 * <p>
385 * This method is called to check for a match.
386 * The parameter {@code pos} represents the current position to be
387 * checked in the string {@code buffer} (a character array which must
388 * not be changed).
389 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
390 * </p>
391 * <p>
392 * The matching code may check one character or many.
393 * It may check characters preceding {@code pos} as well as those after.
394 * </p>
395 * <p>
396 * It must return zero for no match, or a positive number if a match was found.
397 * The number indicates the number of characters that matched.
398 * </p>
399 *
400 * @param buffer the text content to match against, do not change
401 * @param pos the starting position for the match, valid for buffer
402 * @return the number of matching characters, zero for no match
403 * @since 2.4
404 */
405 public int isMatch(final char[] buffer, final int pos) {
406 return isMatch(buffer, pos, 0, buffer.length);
407 }
408
409 /**
410 * Tests whether the number of matching characters, zero for no match.
411 * <p>
412 * This method is called to check for a match.
413 * The parameter {@code pos} represents the current position to be
414 * checked in the string {@code buffer} (a character array which must
415 * not be changed).
416 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
417 * </p>
418 * <p>
419 * The character array may be larger than the active area to be matched.
420 * Only values in the buffer between the specified indices may be accessed.
421 * </p>
422 * <p>
423 * The matching code may check one character or many.
424 * It may check characters preceding {@code pos} as well as those
425 * after, so long as no checks exceed the bounds specified.
426 * </p>
427 * <p>
428 * It must return zero for no match, or a positive number if a match was found.
429 * The number indicates the number of characters that matched.
430 * </p>
431 *
432 * @param buffer the text content to match against, do not change
433 * @param pos the starting position for the match, valid for buffer
434 * @param bufferStart the first active index in the buffer, valid for buffer
435 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
436 * @return the number of matching characters, zero for no match
437 */
438 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
439
440 }