001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang3.text;
018
019 import java.util.Arrays;
020
021 import org.apache.commons.lang3.StringUtils;
022
023 /**
024 * A matcher class that can be queried to determine if a character array
025 * portion matches.
026 * <p>
027 * This class comes complete with various factory methods.
028 * If these do not suffice, you can subclass and implement your own matcher.
029 *
030 * @since 2.2
031 * @version $Id: StrMatcher.java 1144925 2011-07-10 18:07:05Z ggregory $
032 */
033 public abstract class StrMatcher {
034
035 /**
036 * Matches the comma character.
037 */
038 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
039 /**
040 * Matches the tab character.
041 */
042 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
043 /**
044 * Matches the space character.
045 */
046 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
047 /**
048 * Matches the same characters as StringTokenizer,
049 * namely space, tab, newline, formfeed.
050 */
051 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
052 /**
053 * Matches the String trim() whitespace characters.
054 */
055 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
056 /**
057 * Matches the double quote character.
058 */
059 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
060 /**
061 * Matches the double quote character.
062 */
063 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
064 /**
065 * Matches the single or double quote character.
066 */
067 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
068 /**
069 * Matches no characters.
070 */
071 private static final StrMatcher NONE_MATCHER = new NoMatcher();
072
073 // -----------------------------------------------------------------------
074
075 /**
076 * Returns a matcher which matches the comma character.
077 *
078 * @return a matcher for a comma
079 */
080 public static StrMatcher commaMatcher() {
081 return COMMA_MATCHER;
082 }
083
084 /**
085 * Returns a matcher which matches the tab character.
086 *
087 * @return a matcher for a tab
088 */
089 public static StrMatcher tabMatcher() {
090 return TAB_MATCHER;
091 }
092
093 /**
094 * Returns a matcher which matches the space character.
095 *
096 * @return a matcher for a space
097 */
098 public static StrMatcher spaceMatcher() {
099 return SPACE_MATCHER;
100 }
101
102 /**
103 * Matches the same characters as StringTokenizer,
104 * namely space, tab, newline and formfeed.
105 *
106 * @return the split matcher
107 */
108 public static StrMatcher splitMatcher() {
109 return SPLIT_MATCHER;
110 }
111
112 /**
113 * Matches the String trim() whitespace characters.
114 *
115 * @return the trim matcher
116 */
117 public static StrMatcher trimMatcher() {
118 return TRIM_MATCHER;
119 }
120
121 /**
122 * Returns a matcher which matches the single quote character.
123 *
124 * @return a matcher for a single quote
125 */
126 public static StrMatcher singleQuoteMatcher() {
127 return SINGLE_QUOTE_MATCHER;
128 }
129
130 /**
131 * Returns a matcher which matches the double quote character.
132 *
133 * @return a matcher for a double quote
134 */
135 public static StrMatcher doubleQuoteMatcher() {
136 return DOUBLE_QUOTE_MATCHER;
137 }
138
139 /**
140 * Returns a matcher which matches the single or double quote character.
141 *
142 * @return a matcher for a single or double quote
143 */
144 public static StrMatcher quoteMatcher() {
145 return QUOTE_MATCHER;
146 }
147
148 /**
149 * Matches no characters.
150 *
151 * @return a matcher that matches nothing
152 */
153 public static StrMatcher noneMatcher() {
154 return NONE_MATCHER;
155 }
156
157 /**
158 * Constructor that creates a matcher from a character.
159 *
160 * @param ch the character to match, must not be null
161 * @return a new Matcher for the given char
162 */
163 public static StrMatcher charMatcher(char ch) {
164 return new CharMatcher(ch);
165 }
166
167 /**
168 * Constructor that creates a matcher from a set of characters.
169 *
170 * @param chars the characters to match, null or empty matches nothing
171 * @return a new matcher for the given char[]
172 */
173 public static StrMatcher charSetMatcher(char... chars) {
174 if (chars == null || chars.length == 0) {
175 return NONE_MATCHER;
176 }
177 if (chars.length == 1) {
178 return new CharMatcher(chars[0]);
179 }
180 return new CharSetMatcher(chars);
181 }
182
183 /**
184 * Constructor that creates a matcher from a string representing a set of characters.
185 *
186 * @param chars the characters to match, null or empty matches nothing
187 * @return a new Matcher for the given characters
188 */
189 public static StrMatcher charSetMatcher(String chars) {
190 if (chars == null || chars.length() == 0) {
191 return NONE_MATCHER;
192 }
193 if (chars.length() == 1) {
194 return new CharMatcher(chars.charAt(0));
195 }
196 return new CharSetMatcher(chars.toCharArray());
197 }
198
199 /**
200 * Constructor that creates a matcher from a string.
201 *
202 * @param str the string to match, null or empty matches nothing
203 * @return a new Matcher for the given String
204 */
205 public static StrMatcher stringMatcher(String str) {
206 if (StringUtils.isEmpty(str)) {
207 return NONE_MATCHER;
208 }
209 return new StringMatcher(str);
210 }
211
212 //-----------------------------------------------------------------------
213 /**
214 * Constructor.
215 */
216 protected StrMatcher() {
217 super();
218 }
219
220 /**
221 * Returns the number of matching characters, zero for no match.
222 * <p>
223 * This method is called to check for a match.
224 * The parameter <code>pos</code> represents the current position to be
225 * checked in the string <code>buffer</code> (a character array which must
226 * not be changed).
227 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
228 * <p>
229 * The character array may be larger than the active area to be matched.
230 * Only values in the buffer between the specifed indices may be accessed.
231 * <p>
232 * The matching code may check one character or many.
233 * It may check characters preceeding <code>pos</code> as well as those
234 * after, so long as no checks exceed the bounds specified.
235 * <p>
236 * It must return zero for no match, or a positive number if a match was found.
237 * The number indicates the number of characters that matched.
238 *
239 * @param buffer the text content to match against, do not change
240 * @param pos the starting position for the match, valid for buffer
241 * @param bufferStart the first active index in the buffer, valid for buffer
242 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
243 * @return the number of matching characters, zero for no match
244 */
245 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
246
247 /**
248 * Returns the number of matching characters, zero for no match.
249 * <p>
250 * This method is called to check for a match.
251 * The parameter <code>pos</code> represents the current position to be
252 * checked in the string <code>buffer</code> (a character array which must
253 * not be changed).
254 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
255 * <p>
256 * The matching code may check one character or many.
257 * It may check characters preceeding <code>pos</code> as well as those after.
258 * <p>
259 * It must return zero for no match, or a positive number if a match was found.
260 * The number indicates the number of characters that matched.
261 *
262 * @param buffer the text content to match against, do not change
263 * @param pos the starting position for the match, valid for buffer
264 * @return the number of matching characters, zero for no match
265 * @since 2.4
266 */
267 public int isMatch(char[] buffer, int pos) {
268 return isMatch(buffer, pos, 0, buffer.length);
269 }
270
271 //-----------------------------------------------------------------------
272 /**
273 * Class used to define a set of characters for matching purposes.
274 */
275 static final class CharSetMatcher extends StrMatcher {
276 /** The set of characters to match. */
277 private final char[] chars;
278
279 /**
280 * Constructor that creates a matcher from a character array.
281 *
282 * @param chars the characters to match, must not be null
283 */
284 CharSetMatcher(char chars[]) {
285 super();
286 this.chars = chars.clone();
287 Arrays.sort(this.chars);
288 }
289
290 /**
291 * Returns whether or not the given character matches.
292 *
293 * @param buffer the text content to match against, do not change
294 * @param pos the starting position for the match, valid for buffer
295 * @param bufferStart the first active index in the buffer, valid for buffer
296 * @param bufferEnd the end index of the active buffer, valid for buffer
297 * @return the number of matching characters, zero for no match
298 */
299 @Override
300 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
301 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
302 }
303 }
304
305 //-----------------------------------------------------------------------
306 /**
307 * Class used to define a character for matching purposes.
308 */
309 static final class CharMatcher extends StrMatcher {
310 /** The character to match. */
311 private final char ch;
312
313 /**
314 * Constructor that creates a matcher that matches a single character.
315 *
316 * @param ch the character to match
317 */
318 CharMatcher(char ch) {
319 super();
320 this.ch = ch;
321 }
322
323 /**
324 * Returns whether or not the given character matches.
325 *
326 * @param buffer the text content to match against, do not change
327 * @param pos the starting position for the match, valid for buffer
328 * @param bufferStart the first active index in the buffer, valid for buffer
329 * @param bufferEnd the end index of the active buffer, valid for buffer
330 * @return the number of matching characters, zero for no match
331 */
332 @Override
333 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
334 return ch == buffer[pos] ? 1 : 0;
335 }
336 }
337
338 //-----------------------------------------------------------------------
339 /**
340 * Class used to define a set of characters for matching purposes.
341 */
342 static final class StringMatcher extends StrMatcher {
343 /** The string to match, as a character array. */
344 private final char[] chars;
345
346 /**
347 * Constructor that creates a matcher from a String.
348 *
349 * @param str the string to match, must not be null
350 */
351 StringMatcher(String str) {
352 super();
353 chars = str.toCharArray();
354 }
355
356 /**
357 * Returns whether or not the given text matches the stored string.
358 *
359 * @param buffer the text content to match against, do not change
360 * @param pos the starting position for the match, valid for buffer
361 * @param bufferStart the first active index in the buffer, valid for buffer
362 * @param bufferEnd the end index of the active buffer, valid for buffer
363 * @return the number of matching characters, zero for no match
364 */
365 @Override
366 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
367 int len = chars.length;
368 if (pos + len > bufferEnd) {
369 return 0;
370 }
371 for (int i = 0; i < chars.length; i++, pos++) {
372 if (chars[i] != buffer[pos]) {
373 return 0;
374 }
375 }
376 return len;
377 }
378 }
379
380 //-----------------------------------------------------------------------
381 /**
382 * Class used to match no characters.
383 */
384 static final class NoMatcher extends StrMatcher {
385
386 /**
387 * Constructs a new instance of <code>NoMatcher</code>.
388 */
389 NoMatcher() {
390 super();
391 }
392
393 /**
394 * Always returns <code>false</code>.
395 *
396 * @param buffer the text content to match against, do not change
397 * @param pos the starting position for the match, valid for buffer
398 * @param bufferStart the first active index in the buffer, valid for buffer
399 * @param bufferEnd the end index of the active buffer, valid for buffer
400 * @return the number of matching characters, zero for no match
401 */
402 @Override
403 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
404 return 0;
405 }
406 }
407
408 //-----------------------------------------------------------------------
409 /**
410 * Class used to match whitespace as per trim().
411 */
412 static final class TrimMatcher extends StrMatcher {
413
414 /**
415 * Constructs a new instance of <code>TrimMatcher</code>.
416 */
417 TrimMatcher() {
418 super();
419 }
420
421 /**
422 * Returns whether or not the given character matches.
423 *
424 * @param buffer the text content to match against, do not change
425 * @param pos the starting position for the match, valid for buffer
426 * @param bufferStart the first active index in the buffer, valid for buffer
427 * @param bufferEnd the end index of the active buffer, valid for buffer
428 * @return the number of matching characters, zero for no match
429 */
430 @Override
431 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
432 return buffer[pos] <= 32 ? 1 : 0;
433 }
434 }
435
436 }