1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.text; 18 19 import java.util.Arrays; 20 21 /** 22 * A matcher class that can be queried to determine if a character array 23 * portion matches. 24 * <p> 25 * This class comes complete with various factory methods. 26 * If these do not suffice, you can subclass and implement your own matcher. 27 * 28 * @since 1.0 29 */ 30 public abstract class StrMatcher { 31 32 /** 33 * Matches the comma character. 34 */ 35 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 36 /** 37 * Matches the tab character. 38 */ 39 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 40 /** 41 * Matches the space character. 42 */ 43 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 44 /** 45 * Matches the same characters as StringTokenizer, 46 * namely space, tab, newline, formfeed. 47 */ 48 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 49 /** 50 * Matches the String trim() whitespace characters. 51 */ 52 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 53 /** 54 * Matches the double quote character. 55 */ 56 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 57 /** 58 * Matches the double quote character. 59 */ 60 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 61 /** 62 * Matches the single or double quote character. 63 */ 64 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 65 /** 66 * Matches no characters. 67 */ 68 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 69 70 // ----------------------------------------------------------------------- 71 72 /** 73 * Returns a matcher which matches the comma character. 74 * 75 * @return a matcher for a comma 76 */ 77 public static StrMatcher commaMatcher() { 78 return COMMA_MATCHER; 79 } 80 81 /** 82 * Returns a matcher which matches the tab character. 83 * 84 * @return a matcher for a tab 85 */ 86 public static StrMatcher tabMatcher() { 87 return TAB_MATCHER; 88 } 89 90 /** 91 * Returns a matcher which matches the space character. 92 * 93 * @return a matcher for a space 94 */ 95 public static StrMatcher spaceMatcher() { 96 return SPACE_MATCHER; 97 } 98 99 /** 100 * Matches the same characters as StringTokenizer, 101 * namely space, tab, newline and formfeed. 102 * 103 * @return the split matcher 104 */ 105 public static StrMatcher splitMatcher() { 106 return SPLIT_MATCHER; 107 } 108 109 /** 110 * Matches the String trim() whitespace characters. 111 * 112 * @return the trim matcher 113 */ 114 public static StrMatcher trimMatcher() { 115 return TRIM_MATCHER; 116 } 117 118 /** 119 * Returns a matcher which matches the single quote character. 120 * 121 * @return a matcher for a single quote 122 */ 123 public static StrMatcher singleQuoteMatcher() { 124 return SINGLE_QUOTE_MATCHER; 125 } 126 127 /** 128 * Returns a matcher which matches the double quote character. 129 * 130 * @return a matcher for a double quote 131 */ 132 public static StrMatcher doubleQuoteMatcher() { 133 return DOUBLE_QUOTE_MATCHER; 134 } 135 136 /** 137 * Returns a matcher which matches the single or double quote character. 138 * 139 * @return a matcher for a single or double quote 140 */ 141 public static StrMatcher quoteMatcher() { 142 return QUOTE_MATCHER; 143 } 144 145 /** 146 * Matches no characters. 147 * 148 * @return a matcher that matches nothing 149 */ 150 public static StrMatcher noneMatcher() { 151 return NONE_MATCHER; 152 } 153 154 /** 155 * Constructor that creates a matcher from a character. 156 * 157 * @param ch the character to match, must not be null 158 * @return a new Matcher for the given char 159 */ 160 public static StrMatcher charMatcher(final char ch) { 161 return new CharMatcher(ch); 162 } 163 164 /** 165 * Constructor that creates a matcher from a set of characters. 166 * 167 * @param chars the characters to match, null or empty matches nothing 168 * @return a new matcher for the given char[] 169 */ 170 public static StrMatcher charSetMatcher(final char... chars) { 171 if (chars == null || chars.length == 0) { 172 return NONE_MATCHER; 173 } 174 if (chars.length == 1) { 175 return new CharMatcher(chars[0]); 176 } 177 return new CharSetMatcher(chars); 178 } 179 180 /** 181 * Constructor that creates a matcher from a string representing a set of characters. 182 * 183 * @param chars the characters to match, null or empty matches nothing 184 * @return a new Matcher for the given characters 185 */ 186 public static StrMatcher charSetMatcher(final String chars) { 187 if (chars == null || chars.length() == 0) { 188 return NONE_MATCHER; 189 } 190 if (chars.length() == 1) { 191 return new CharMatcher(chars.charAt(0)); 192 } 193 return new CharSetMatcher(chars.toCharArray()); 194 } 195 196 /** 197 * Constructor that creates a matcher from a string. 198 * 199 * @param str the string to match, null or empty matches nothing 200 * @return a new Matcher for the given String 201 */ 202 public static StrMatcher stringMatcher(final String str) { 203 if (str == null || str.length() == 0) { 204 return NONE_MATCHER; 205 } 206 return new StringMatcher(str); 207 } 208 209 //----------------------------------------------------------------------- 210 /** 211 * Constructor. 212 */ 213 protected StrMatcher() { 214 super(); 215 } 216 217 /** 218 * Returns the number of matching characters, zero for no match. 219 * <p> 220 * This method is called to check for a match. 221 * The parameter <code>pos</code> represents the current position to be 222 * checked in the string <code>buffer</code> (a character array which must 223 * not be changed). 224 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 225 * <p> 226 * The character array may be larger than the active area to be matched. 227 * Only values in the buffer between the specified indices may be accessed. 228 * <p> 229 * The matching code may check one character or many. 230 * It may check characters preceding <code>pos</code> as well as those 231 * after, so long as no checks exceed the bounds specified. 232 * <p> 233 * It must return zero for no match, or a positive number if a match was found. 234 * The number indicates the number of characters that matched. 235 * 236 * @param buffer the text content to match against, do not change 237 * @param pos the starting position for the match, valid for buffer 238 * @param bufferStart the first active index in the buffer, valid for buffer 239 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 240 * @return the number of matching characters, zero for no match 241 */ 242 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 243 244 /** 245 * Returns the number of matching characters, zero for no match. 246 * <p> 247 * This method is called to check for a match. 248 * The parameter <code>pos</code> represents the current position to be 249 * checked in the string <code>buffer</code> (a character array which must 250 * not be changed). 251 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 252 * <p> 253 * The matching code may check one character or many. 254 * It may check characters preceding <code>pos</code> as well as those after. 255 * <p> 256 * It must return zero for no match, or a positive number if a match was found. 257 * The number indicates the number of characters that matched. 258 * 259 * @param buffer the text content to match against, do not change 260 * @param pos the starting position for the match, valid for buffer 261 * @return the number of matching characters, zero for no match 262 */ 263 public int isMatch(final char[] buffer, final int pos) { 264 return isMatch(buffer, pos, 0, buffer.length); 265 } 266 267 //----------------------------------------------------------------------- 268 /** 269 * Class used to define a set of characters for matching purposes. 270 */ 271 static final class CharSetMatcher extends StrMatcher { 272 /** The set of characters to match. */ 273 private final char[] chars; 274 275 /** 276 * Constructor that creates a matcher from a character array. 277 * 278 * @param chars the characters to match, must not be null 279 */ 280 CharSetMatcher(final char chars[]) { 281 super(); 282 this.chars = chars.clone(); 283 Arrays.sort(this.chars); 284 } 285 286 /** 287 * Returns whether or not the given character matches. 288 * 289 * @param buffer the text content to match against, do not change 290 * @param pos the starting position for the match, valid for buffer 291 * @param bufferStart the first active index in the buffer, valid for buffer 292 * @param bufferEnd the end index of the active buffer, valid for buffer 293 * @return the number of matching characters, zero for no match 294 */ 295 @Override 296 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 297 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 298 } 299 } 300 301 //----------------------------------------------------------------------- 302 /** 303 * Class used to define a character for matching purposes. 304 */ 305 static final class CharMatcher extends StrMatcher { 306 /** The character to match. */ 307 private final char ch; 308 309 /** 310 * Constructor that creates a matcher that matches a single character. 311 * 312 * @param ch the character to match 313 */ 314 CharMatcher(final char ch) { 315 super(); 316 this.ch = ch; 317 } 318 319 /** 320 * Returns whether or not the given character matches. 321 * 322 * @param buffer the text content to match against, do not change 323 * @param pos the starting position for the match, valid for buffer 324 * @param bufferStart the first active index in the buffer, valid for buffer 325 * @param bufferEnd the end index of the active buffer, valid for buffer 326 * @return the number of matching characters, zero for no match 327 */ 328 @Override 329 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 330 return ch == buffer[pos] ? 1 : 0; 331 } 332 } 333 334 //----------------------------------------------------------------------- 335 /** 336 * Class used to define a set of characters for matching purposes. 337 */ 338 static final class StringMatcher extends StrMatcher { 339 /** The string to match, as a character array. */ 340 private final char[] chars; 341 342 /** 343 * Constructor that creates a matcher from a String. 344 * 345 * @param str the string to match, must not be null 346 */ 347 StringMatcher(final String str) { 348 super(); 349 chars = str.toCharArray(); 350 } 351 352 /** 353 * Returns whether or not the given text matches the stored string. 354 * 355 * @param buffer the text content to match against, do not change 356 * @param pos the starting position for the match, valid for buffer 357 * @param bufferStart the first active index in the buffer, valid for buffer 358 * @param bufferEnd the end index of the active buffer, valid for buffer 359 * @return the number of matching characters, zero for no match 360 */ 361 @Override 362 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 363 final int len = chars.length; 364 if (pos + len > bufferEnd) { 365 return 0; 366 } 367 for (int i = 0; i < chars.length; i++, pos++) { 368 if (chars[i] != buffer[pos]) { 369 return 0; 370 } 371 } 372 return len; 373 } 374 375 @Override 376 public String toString() { 377 return super.toString() + ' ' + Arrays.toString(chars); 378 } 379 380 } 381 382 //----------------------------------------------------------------------- 383 /** 384 * Class used to match no characters. 385 */ 386 static final class NoMatcher extends StrMatcher { 387 388 /** 389 * Constructs a new instance of <code>NoMatcher</code>. 390 */ 391 NoMatcher() { 392 super(); 393 } 394 395 /** 396 * Always returns <code>false</code>. 397 * 398 * @param buffer the text content to match against, do not change 399 * @param pos the starting position for the match, valid for buffer 400 * @param bufferStart the first active index in the buffer, valid for buffer 401 * @param bufferEnd the end index of the active buffer, valid for buffer 402 * @return the number of matching characters, zero for no match 403 */ 404 @Override 405 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 406 return 0; 407 } 408 } 409 410 //----------------------------------------------------------------------- 411 /** 412 * Class used to match whitespace as per trim(). 413 */ 414 static final class TrimMatcher extends StrMatcher { 415 416 /** 417 * Constructs a new instance of <code>TrimMatcher</code>. 418 */ 419 TrimMatcher() { 420 super(); 421 } 422 423 /** 424 * Returns whether or not the given character matches. 425 * 426 * @param buffer the text content to match against, do not change 427 * @param pos the starting position for the match, valid for buffer 428 * @param bufferStart the first active index in the buffer, valid for buffer 429 * @param bufferEnd the end index of the active buffer, valid for buffer 430 * @return the number of matching characters, zero for no match 431 */ 432 @Override 433 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 434 return buffer[pos] <= 32 ? 1 : 0; 435 } 436 } 437 438 }